summaryrefslogtreecommitdiffstats
path: root/memory/replace/logalloc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /memory/replace/logalloc
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'memory/replace/logalloc')
-rw-r--r--memory/replace/logalloc/LogAlloc.cpp238
-rw-r--r--memory/replace/logalloc/README95
-rw-r--r--memory/replace/logalloc/moz.build36
-rw-r--r--memory/replace/logalloc/replay/Makefile.in48
-rw-r--r--memory/replace/logalloc/replay/Replay.cpp1159
-rw-r--r--memory/replace/logalloc/replay/expected_output_minimal.log17
-rw-r--r--memory/replace/logalloc/replay/logalloc_munge.py147
-rw-r--r--memory/replace/logalloc/replay/moz.build87
-rw-r--r--memory/replace/logalloc/replay/replay.log18
9 files changed, 1845 insertions, 0 deletions
diff --git a/memory/replace/logalloc/LogAlloc.cpp b/memory/replace/logalloc/LogAlloc.cpp
new file mode 100644
index 0000000000..a976b0c674
--- /dev/null
+++ b/memory/replace/logalloc/LogAlloc.cpp
@@ -0,0 +1,238 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <cstdlib>
+#include <fcntl.h>
+
+#ifdef _WIN32
+# include <windows.h>
+# include <io.h>
+# include <process.h>
+#else
+# include <unistd.h>
+# include <pthread.h>
+#endif
+
+#include "replace_malloc.h"
+#include "FdPrintf.h"
+#include "Mutex.h"
+
+static malloc_table_t sFuncs;
+static intptr_t sFd = 0;
+static bool sStdoutOrStderr = false;
+
+static Mutex sMutex MOZ_UNANNOTATED;
+
+#ifndef _WIN32
+static void prefork() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Lock(); }
+static void postfork_parent() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Unlock(); }
+static void postfork_child() { sMutex.Init(); }
+#endif
+
+static size_t GetPid() { return size_t(getpid()); }
+
+static size_t GetTid() {
+#if defined(_WIN32)
+ return size_t(GetCurrentThreadId());
+#else
+ return size_t(pthread_self());
+#endif
+}
+
+#ifdef ANDROID
+/* Android doesn't have pthread_atfork defined in pthread.h */
+extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void),
+ void (*)(void));
+#endif
+
+class LogAllocBridge : public ReplaceMallocBridge {
+ virtual void InitDebugFd(mozilla::DebugFdRegistry& aRegistry) override {
+ if (!sStdoutOrStderr) {
+ aRegistry.RegisterHandle(sFd);
+ }
+ }
+};
+
+/* Do a simple, text-form, log of all calls to replace-malloc functions.
+ * Use locking to guarantee that an allocation that did happen is logged
+ * before any other allocation/free happens.
+ */
+
+static void* replace_malloc(size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* ptr = sFuncs.malloc(aSize);
+ FdPrintf(sFd, "%zu %zu malloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr);
+ return ptr;
+}
+
+static int replace_posix_memalign(void** aPtr, size_t aAlignment,
+ size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ int ret = sFuncs.posix_memalign(aPtr, aAlignment, aSize);
+ FdPrintf(sFd, "%zu %zu posix_memalign(%zu,%zu)=%p\n", GetPid(), GetTid(),
+ aAlignment, aSize, (ret == 0) ? *aPtr : nullptr);
+ return ret;
+}
+
+static void* replace_aligned_alloc(size_t aAlignment, size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* ptr = sFuncs.aligned_alloc(aAlignment, aSize);
+ FdPrintf(sFd, "%zu %zu aligned_alloc(%zu,%zu)=%p\n", GetPid(), GetTid(),
+ aAlignment, aSize, ptr);
+ return ptr;
+}
+
+static void* replace_calloc(size_t aNum, size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* ptr = sFuncs.calloc(aNum, aSize);
+ FdPrintf(sFd, "%zu %zu calloc(%zu,%zu)=%p\n", GetPid(), GetTid(), aNum, aSize,
+ ptr);
+ return ptr;
+}
+
+static void* replace_realloc(void* aPtr, size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* new_ptr = sFuncs.realloc(aPtr, aSize);
+ FdPrintf(sFd, "%zu %zu realloc(%p,%zu)=%p\n", GetPid(), GetTid(), aPtr, aSize,
+ new_ptr);
+ return new_ptr;
+}
+
+static void replace_free(void* aPtr) {
+ MutexAutoLock lock(sMutex);
+ FdPrintf(sFd, "%zu %zu free(%p)\n", GetPid(), GetTid(), aPtr);
+ sFuncs.free(aPtr);
+}
+
+static void* replace_memalign(size_t aAlignment, size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* ptr = sFuncs.memalign(aAlignment, aSize);
+ FdPrintf(sFd, "%zu %zu memalign(%zu,%zu)=%p\n", GetPid(), GetTid(),
+ aAlignment, aSize, ptr);
+ return ptr;
+}
+
+static void* replace_valloc(size_t aSize) {
+ MutexAutoLock lock(sMutex);
+ void* ptr = sFuncs.valloc(aSize);
+ FdPrintf(sFd, "%zu %zu valloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr);
+ return ptr;
+}
+
+static void replace_jemalloc_stats(jemalloc_stats_t* aStats,
+ jemalloc_bin_stats_t* aBinStats) {
+ MutexAutoLock lock(sMutex);
+ sFuncs.jemalloc_stats_internal(aStats, aBinStats);
+ FdPrintf(sFd, "%zu %zu jemalloc_stats()\n", GetPid(), GetTid());
+}
+
+void replace_init(malloc_table_t* aTable, ReplaceMallocBridge** aBridge) {
+ /* Initialize output file descriptor from the MALLOC_LOG environment
+ * variable. Numbers up to 9999 are considered as a preopened file
+ * descriptor number. Other values are considered as a file name. */
+#ifdef _WIN32
+ wchar_t* log = _wgetenv(L"MALLOC_LOG");
+#else
+ char* log = getenv("MALLOC_LOG");
+#endif
+ if (log && *log) {
+ int fd = 0;
+ const auto* fd_num = log;
+ while (*fd_num) {
+ /* Reject non digits. */
+ if (*fd_num < '0' || *fd_num > '9') {
+ fd = -1;
+ break;
+ }
+ fd = fd * 10 + (*fd_num - '0');
+ /* Reject values >= 10000. */
+ if (fd >= 10000) {
+ fd = -1;
+ break;
+ }
+ fd_num++;
+ }
+ if (fd == 1 || fd == 2) {
+ sStdoutOrStderr = true;
+ }
+#ifdef _WIN32
+ // See comment in FdPrintf.h as to why CreateFile is used.
+ HANDLE handle;
+ if (fd > 0) {
+ handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+ } else {
+ handle =
+ CreateFileW(log, FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE,
+ nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);
+ }
+ if (handle != INVALID_HANDLE_VALUE) {
+ sFd = reinterpret_cast<intptr_t>(handle);
+ }
+#else
+ if (fd == -1) {
+ fd = open(log, O_WRONLY | O_CREAT | O_APPEND, 0644);
+ }
+ if (fd > 0) {
+ sFd = fd;
+ }
+#endif
+ }
+
+ // Don't initialize if we weren't passed a valid MALLOC_LOG.
+ if (sFd == 0) {
+ return;
+ }
+
+ sMutex.Init();
+ static LogAllocBridge bridge;
+ sFuncs = *aTable;
+#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
+#define MALLOC_DECL(name, ...) aTable->name = replace_##name;
+#include "malloc_decls.h"
+ aTable->jemalloc_stats_internal = replace_jemalloc_stats;
+ if (!getenv("MALLOC_LOG_MINIMAL")) {
+ aTable->posix_memalign = replace_posix_memalign;
+ aTable->aligned_alloc = replace_aligned_alloc;
+ aTable->valloc = replace_valloc;
+ }
+ *aBridge = &bridge;
+
+#ifndef _WIN32
+ /* When another thread has acquired a lock before forking, the child
+ * process will inherit the lock state but the thread, being nonexistent
+ * in the child process, will never release it, leading to a dead-lock
+ * whenever the child process gets the lock. We thus need to ensure no
+ * other thread is holding the lock before forking, by acquiring it
+ * ourselves, and releasing it after forking in the parent process and
+ * resetting it to its initial state in the child process. The latter is
+ * important because some implementations (notably macOS) prevent a lock from
+ * being unlocked by a different thread than the one which locked it in the
+ * first place.
+ * Windows doesn't have this problem since there is no fork().
+ * The real allocator, however, might be doing the same thing (jemalloc
+ * does). But pthread_atfork `prepare` handlers (first argument) are
+ * processed in reverse order they were established. But replace_init
+ * runs before the real allocator has had any chance to initialize and
+ * call pthread_atfork itself. This leads to its prefork running before
+ * ours. This leads to a race condition that can lead to a deadlock like
+ * the following:
+ * - thread A forks.
+ * - libc calls real allocator's prefork, so thread A holds the real
+ * allocator lock.
+ * - thread B calls malloc, which calls our replace_malloc.
+ * - consequently, thread B holds our lock.
+ * - thread B then proceeds to call the real allocator's malloc, and
+ * waits for the real allocator's lock, which thread A holds.
+ * - libc calls our prefork, so thread A waits for our lock, which
+ * thread B holds.
+ * To avoid this race condition, the real allocator's prefork must be
+ * called after ours, which means it needs to be registered before ours.
+ * So trick the real allocator into initializing itself without more side
+ * effects by calling malloc with a size it can't possibly allocate. */
+ sFuncs.malloc(-1);
+ pthread_atfork(prefork, postfork_parent, postfork_child);
+#endif
+}
diff --git a/memory/replace/logalloc/README b/memory/replace/logalloc/README
new file mode 100644
index 0000000000..c2e8cf66ce
--- /dev/null
+++ b/memory/replace/logalloc/README
@@ -0,0 +1,95 @@
+Logalloc is a replace-malloc library for Firefox (see
+memory/build/replace_malloc.h) that dumps a log of memory allocations to a
+given file descriptor or file name. That log can then be replayed against
+Firefox's default memory allocator independently or through another
+replace-malloc library, allowing the testing of other allocators under the
+exact same workload.
+
+To get an allocation log the following environment variable when starting
+Firefox:
+ MALLOC_LOG=/path/to/log-file
+ or
+ MALLOC_LOG=number
+
+When MALLOC_LOG is a number below 10000, it is considered as a file
+descriptor number that is fed to Firefox when it is started. Otherwise,
+it is considered as a file name.
+
+As those allocation logs can grow large quite quickly, it can be useful
+to pipe the output to a compression tool.
+
+MALLOC_LOG=1 would send to Firefox's stdout, MALLOC_LOG=2 would send to
+its stderr. Since in both cases that could be mixed with other output
+from Firefox, it is usually better to use another file descriptor
+by shell redirections, such as:
+
+ MALLOC_LOG=3 firefox 3>&1 1>&2 | gzip -c > log.gz
+
+(3>&1 copies the `| gzip` pipe file descriptor to file descriptor #3, 1>&2
+then copies stderr to stdout. This leads to: fd1 and fd2 sending to stderr
+of the parent process (the shell), and fd3 sending to gzip.)
+
+Each line of the allocations log is formatted as follows:
+ <pid> <tid> <function>([<args>])[=<result>]
+where <args> is a comma separated list of values. The number of <args> and
+the presence of <result> depend on the <function>.
+
+Example log:
+ 18545 18545 malloc(32)=0x7f90495120e0
+ 18545 18545 calloc(1,148)=0x7f9049537480
+ 18545 18545 realloc(0x7f90495120e0,64)=0x7f9049536680
+ 18545 18545 posix_memalign(256,240)=0x7f9049583300
+ 18545 18545 jemalloc_stats()
+ 18545 18545 free(0x7f9049536680)
+
+This log can be replayed with the logalloc-replay tool in
+memory/replace/logalloc/replay. However, as the goal of that tool is to
+reproduce the recorded memory allocations, it needs to avoid as much as
+possible doing its own allocations for bookkeeping. Reading the logs as
+they are would require data structures and memory allocations. As a
+consequence, the logs need to be preprocessed beforehand.
+
+The logalloc_munge.py script is responsible for that preprocessing. It simply
+takes a raw log on its stdin, and outputs the preprocessed log on its stdout.
+It replaces pointer addresses with indexes the logalloc-replay tool can use
+in a large (almost) linear array of allocation tracking slots (prefixed with
+'#'). It also replaces the pids with numbers starting from 1 (such as the
+first seen pid number is 1, the second is 2, etc.).
+
+The above example log would become the following, once preprocessed:
+ 1 1 malloc(32)=#1
+ 1 1 calloc(1,148)=#2
+ 1 1 realloc(#1,64)=#1
+ 1 1 posix_memalign(256,240)=#3
+ 1 1 jemalloc_stats()
+ 1 1 free(#1)
+
+The logalloc-replay tool then takes the preprocessed log on its stdin and
+replays the allocations printed there, but will only replay those with the
+same process id as the first line (which normally is 1).
+
+As the log files are simple text files, though, it is easy to separate out
+the different processes log with e.g. grep, and feed the separate processes
+logs to logalloc-replay.
+
+The logalloc-replay program won't output anything unless jemalloc_stats
+records appears in the log. You can expect those to be recorded when going
+to about:memory in Firefox, but they can also be added after preprocessing.
+
+Here is an example of what one can do:
+
+ gunzip -c log.gz | python logalloc_munge.py | \
+ awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 1 jemalloc_stats()" }' | \
+ ./logalloc-replay
+
+The above command replays the allocations of process #2, with some stats
+output every 10000 records.
+
+The logalloc-replay tool itself being hooked with replace-malloc, it is possible
+to set LD_PRELOAD/DYLD_INSERT_LIBRARIES/MOZ_REPLACE_MALLOC_LIB and replay a log
+through a different allocator. For example:
+
+ LD_PRELOAD=libreplace_jemalloc.so logalloc-replay < log
+
+Will replay the log against jemalloc4 (which is, as of writing, what
+libreplace_jemalloc.so contains).
diff --git a/memory/replace/logalloc/moz.build b/memory/replace/logalloc/moz.build
new file mode 100644
index 0000000000..e0588cfb16
--- /dev/null
+++ b/memory/replace/logalloc/moz.build
@@ -0,0 +1,36 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ReplaceMalloc("logalloc")
+
+SOURCES += [
+ "LogAlloc.cpp",
+]
+
+# FdPrintf is statically linked if we're using static linking to mozjemalloc
+# and PHC is also compiled-in.
+if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"] or not CONFIG["MOZ_PHC"]:
+ SOURCES += [
+ "/memory/build/FdPrintf.cpp",
+ ]
+
+DisableStlWrapping()
+NO_PGO = True
+DEFINES["MOZ_NO_MOZALLOC"] = True
+
+LOCAL_INCLUDES += [
+ "/memory/build",
+]
+
+# Android doesn't have pthread_atfork, but we have our own in mozglue.
+if CONFIG["OS_TARGET"] == "Android" and FORCE_SHARED_LIB:
+ USE_LIBS += [
+ "mozglue",
+ ]
+
+DIRS += [
+ "replay",
+]
diff --git a/memory/replace/logalloc/replay/Makefile.in b/memory/replace/logalloc/replay/Makefile.in
new file mode 100644
index 0000000000..73659add98
--- /dev/null
+++ b/memory/replace/logalloc/replay/Makefile.in
@@ -0,0 +1,48 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ifdef MOZ_CODE_COVERAGE
+SKIP = 1
+endif
+
+ifdef CROSS_COMPILE
+SKIP = 1
+endif
+
+ifneq ($(SKIP),1)
+
+ifeq ($(OS_TARGET),WINNT)
+LOGALLOC_VAR = MOZ_REPLACE_MALLOC_LIB
+else
+ifeq ($(OS_TARGET),Darwin)
+LOGALLOC_VAR = DYLD_INSERT_LIBRARIES
+else
+LOGALLOC_VAR = LD_PRELOAD
+endif
+endif
+
+ifndef MOZ_REPLACE_MALLOC_STATIC
+LOGALLOC = $(LOGALLOC_VAR)=$(CURDIR)/../$(DLL_PREFIX)logalloc$(DLL_SUFFIX)
+endif
+
+expected_output.log: $(srcdir)/replay.log
+# The logalloc-replay program will only replay entries from the first pid,
+# so the expected output only contains entries beginning with "1 "
+ grep "^1 " $< > $@
+
+check:: $(srcdir)/replay.log expected_output.log $(srcdir)/expected_output_minimal.log
+# Test with MALLOC_LOG as a file descriptor number
+# We filter out anything happening before the first jemalloc_stats (first
+# command in replay.log) because starting with libstdc++ 5, a static
+# initializer in the STL allocates memory, which we obviously don't have
+# in expected_output.log.
+ MALLOC_LOG=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log
+# Test with MALLOC_LOG as a file name
+ $(RM) test_output.log
+ MALLOC_LOG=test_output.log $(LOGALLOC) ./$(PROGRAM) < $<
+ sed -n '/jemalloc_stats/,$$p' test_output.log | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log
+
+ MALLOC_LOG=1 MALLOC_LOG_MINIMAL=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - $(srcdir)/expected_output_minimal.log
+
+endif
diff --git a/memory/replace/logalloc/replay/Replay.cpp b/memory/replace/logalloc/replay/Replay.cpp
new file mode 100644
index 0000000000..b5ad0c540e
--- /dev/null
+++ b/memory/replace/logalloc/replay/Replay.cpp
@@ -0,0 +1,1159 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define MOZ_MEMORY_IMPL
+#include "mozmemory_wrap.h"
+
+#ifdef _WIN32
+# include <windows.h>
+# include <io.h>
+typedef intptr_t ssize_t;
+#else
+# include <sys/mman.h>
+# include <unistd.h>
+#endif
+#ifdef XP_LINUX
+# include <fcntl.h>
+# include <stdlib.h>
+#endif
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "FdPrintf.h"
+
+using namespace mozilla;
+
+static void die(const char* message) {
+ /* Here, it doesn't matter that fprintf may allocate memory. */
+ fprintf(stderr, "%s\n", message);
+ exit(1);
+}
+
+#ifdef XP_LINUX
+static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }();
+#endif
+
+/* We don't want to be using malloc() to allocate our internal tracking
+ * data, because that would change the parameters of what is being measured,
+ * so we want to use data types that directly use mmap/VirtualAlloc. */
+template <typename T, size_t Len>
+class MappedArray {
+ public:
+ MappedArray() : mPtr(nullptr) {
+#ifdef XP_LINUX
+ MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)),
+ "MappedArray size must be a multiple of the page size");
+#endif
+ }
+
+ ~MappedArray() {
+ if (mPtr) {
+#ifdef _WIN32
+ VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE);
+#elif defined(XP_LINUX)
+ munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) -
+ sPageSize),
+ sizeof(T) * Len + sPageSize * 2);
+#else
+ munmap(mPtr, sizeof(T) * Len);
+#endif
+ }
+ }
+
+ T& operator[](size_t aIndex) const {
+ if (mPtr) {
+ return mPtr[aIndex];
+ }
+
+#ifdef _WIN32
+ mPtr = reinterpret_cast<T*>(VirtualAlloc(
+ nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
+ if (mPtr == nullptr) {
+ die("VirtualAlloc error");
+ }
+#else
+ size_t data_size = sizeof(T) * Len;
+ size_t size = data_size;
+# ifdef XP_LINUX
+ // See below
+ size += sPageSize * 2;
+# endif
+ mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0));
+ if (mPtr == MAP_FAILED) {
+ die("Mmap error");
+ }
+# ifdef XP_LINUX
+ // On Linux we request a page on either side of the allocation and
+ // mprotect them. This prevents mappings in /proc/self/smaps from being
+ // merged and allows us to parse this file to calculate the allocator's RSS.
+ MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0));
+ MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(mPtr) + data_size +
+ sPageSize),
+ sPageSize, 0));
+ mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize);
+# endif
+#endif
+ return mPtr[aIndex];
+ }
+
+ bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; }
+
+ bool allocated() const { return !!mPtr; }
+
+ private:
+ mutable T* mPtr;
+};
+
+/* Type for records of allocations. */
+struct MemSlot {
+ void* mPtr;
+
+ // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared
+ // when memory is freed or realloc()ed.
+ size_t mRequest;
+};
+
+/* An almost infinite list of slots.
+ * In essence, this is a linked list of arrays of groups of slots.
+ * Each group is 1MB. On 64-bits, one group allows to store 64k allocations.
+ * Each MemSlotList instance can store 1023 such groups, which means more
+ * than 67M allocations. In case more would be needed, we chain to another
+ * MemSlotList, and so on.
+ * Using 1023 groups makes the MemSlotList itself page sized on 32-bits
+ * and 2 pages-sized on 64-bits.
+ */
+class MemSlotList {
+ static constexpr size_t kGroups = 1024 - 1;
+ static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot);
+
+ MappedArray<MemSlot, kGroupSize> mSlots[kGroups];
+ MappedArray<MemSlotList, 1> mNext;
+
+ public:
+ MemSlot& operator[](size_t aIndex) const {
+ if (aIndex < kGroupSize * kGroups) {
+ return mSlots[aIndex / kGroupSize][aIndex % kGroupSize];
+ }
+ aIndex -= kGroupSize * kGroups;
+ return mNext[0][aIndex];
+ }
+
+ // Ask if any of the memory-mapped buffers use this range.
+ bool ownsMapping(uintptr_t aStart) const {
+ for (const auto& slot : mSlots) {
+ if (slot.allocated() && slot.ownsMapping(aStart)) {
+ return true;
+ }
+ }
+ return mNext.ownsMapping(aStart) ||
+ (mNext.allocated() && mNext[0].ownsMapping(aStart));
+ }
+};
+
+/* Helper class for memory buffers */
+class Buffer {
+ public:
+ Buffer() : mBuf(nullptr), mLength(0) {}
+
+ Buffer(const void* aBuf, size_t aLength)
+ : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {}
+
+ /* Constructor for string literals. */
+ template <size_t Size>
+ explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {}
+
+ /* Returns a sub-buffer up-to but not including the given aNeedle character.
+ * The "parent" buffer itself is altered to begin after the aNeedle
+ * character.
+ * If the aNeedle character is not found, return the entire buffer, and empty
+ * the "parent" buffer. */
+ Buffer SplitChar(char aNeedle) {
+ char* buf = const_cast<char*>(mBuf);
+ char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength));
+ if (!c) {
+ return Split(mLength);
+ }
+
+ Buffer result = Split(c - buf);
+ // Remove the aNeedle character itself.
+ Split(1);
+ return result;
+ }
+
+ // Advance to the position after aNeedle. This is like SplitChar but does not
+ // return the skipped portion.
+ void Skip(char aNeedle, unsigned nTimes = 1) {
+ for (unsigned i = 0; i < nTimes; i++) {
+ SplitChar(aNeedle);
+ }
+ }
+
+ void SkipWhitespace() {
+ while (mLength > 0) {
+ if (!IsSpace(mBuf[0])) {
+ break;
+ }
+ mBuf++;
+ mLength--;
+ }
+ }
+
+ static bool IsSpace(char c) {
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\v':
+ case '\f':
+ case '\r':
+ return true;
+ }
+ return false;
+ }
+
+ /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is
+ * amputated of those aLength characters. If the "parent" buffer is smaller
+ * than aLength, then its length is used instead. */
+ Buffer Split(size_t aLength) {
+ Buffer result(mBuf, std::min(aLength, mLength));
+ mLength -= result.mLength;
+ mBuf += result.mLength;
+ return result;
+ }
+
+ /* Move the buffer (including its content) to the memory address of the aOther
+ * buffer. */
+ void Slide(Buffer aOther) {
+ memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength);
+ mBuf = aOther.mBuf;
+ }
+
+ /* Returns whether the two involved buffers have the same content. */
+ bool operator==(Buffer aOther) {
+ return mLength == aOther.mLength &&
+ (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength));
+ }
+
+ bool operator!=(Buffer aOther) { return !(*this == aOther); }
+
+ /* Returns true if the buffer is not empty. */
+ explicit operator bool() { return mLength; }
+
+ char operator[](size_t n) const { return mBuf[n]; }
+
+ /* Returns the memory location of the buffer. */
+ const char* get() { return mBuf; }
+
+ /* Returns the memory location of the end of the buffer (technically, the
+ * first byte after the buffer). */
+ const char* GetEnd() { return mBuf + mLength; }
+
+ /* Extend the buffer over the content of the other buffer, assuming it is
+ * adjacent. */
+ void Extend(Buffer aOther) {
+ MOZ_ASSERT(aOther.mBuf == GetEnd());
+ mLength += aOther.mLength;
+ }
+
+ size_t Length() const { return mLength; }
+
+ private:
+ const char* mBuf;
+ size_t mLength;
+};
+
+/* Helper class to read from a file descriptor line by line. */
+class FdReader {
+ public:
+ explicit FdReader(int aFd, bool aNeedClose = false)
+ : mFd(aFd),
+ mNeedClose(aNeedClose),
+ mData(&mRawBuf, 0),
+ mBuf(&mRawBuf, sizeof(mRawBuf)) {}
+
+ FdReader(FdReader&& aOther) noexcept
+ : mFd(aOther.mFd),
+ mNeedClose(aOther.mNeedClose),
+ mData(&mRawBuf, 0),
+ mBuf(&mRawBuf, sizeof(mRawBuf)) {
+ memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf));
+ aOther.mFd = -1;
+ aOther.mNeedClose = false;
+ aOther.mData = Buffer();
+ aOther.mBuf = Buffer();
+ }
+
+ FdReader& operator=(const FdReader&) = delete;
+ FdReader(const FdReader&) = delete;
+
+ ~FdReader() {
+ if (mNeedClose) {
+ close(mFd);
+ }
+ }
+
+ /* Read a line from the file descriptor and returns it as a Buffer instance */
+ Buffer ReadLine() {
+ while (true) {
+ Buffer result = mData.SplitChar('\n');
+
+ /* There are essentially three different cases here:
+ * - '\n' was found "early". In this case, the end of the result buffer
+ * is before the beginning of the mData buffer (since SplitChar
+ * amputated it).
+ * - '\n' was found as the last character of mData. In this case, mData
+ * is empty, but still points at the end of mBuf. result points to what
+ * used to be in mData, without the last character.
+ * - '\n' was not found. In this case too, mData is empty and points at
+ * the end of mBuf. But result points to the entire buffer that used to
+ * be pointed by mData.
+ * Only in the latter case do both result and mData's end match, and it's
+ * the only case where we need to refill the buffer.
+ */
+ if (result.GetEnd() != mData.GetEnd()) {
+ return result;
+ }
+
+ /* Since SplitChar emptied mData, make it point to what it had before. */
+ mData = result;
+
+ /* And move it to the beginning of the read buffer. */
+ mData.Slide(mBuf);
+
+ FillBuffer();
+
+ if (!mData) {
+ return Buffer();
+ }
+ }
+ }
+
+ private:
+ /* Fill the read buffer. */
+ void FillBuffer() {
+ size_t size = mBuf.GetEnd() - mData.GetEnd();
+ Buffer remainder(mData.GetEnd(), size);
+
+ ssize_t len = 1;
+ while (remainder && len > 0) {
+ len = ::read(mFd, const_cast<char*>(remainder.get()), size);
+ if (len < 0) {
+ die("Read error");
+ }
+ size -= len;
+ mData.Extend(remainder.Split(len));
+ }
+ }
+
+ /* File descriptor to read from. */
+ int mFd;
+ bool mNeedClose;
+
+ /* Part of data that was read from the file descriptor but not returned with
+ * ReadLine yet. */
+ Buffer mData;
+ /* Buffer representation of mRawBuf */
+ Buffer mBuf;
+ /* read() buffer */
+ char mRawBuf[4096];
+};
+
+MOZ_BEGIN_EXTERN_C
+
+/* Function declarations for all the replace_malloc _impl functions.
+ * See memory/build/replace_malloc.c */
+#define MALLOC_DECL(name, return_type, ...) \
+ return_type name##_impl(__VA_ARGS__);
+#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
+#include "malloc_decls.h"
+
+#define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__);
+#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
+#include "malloc_decls.h"
+
+#ifdef ANDROID
+
+/* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have.
+ * While gecko has one in libmozglue, the replay program can't use that.
+ * Since we're not going to fork anyways, make it a dummy function. */
+int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void),
+ void (*aChild)(void)) {
+ return 0;
+}
+#endif
+
+MOZ_END_EXTERN_C
+
+template <unsigned Base = 10>
+size_t parseNumber(Buffer aBuf) {
+ if (!aBuf) {
+ die("Malformed input");
+ }
+
+ size_t result = 0;
+ for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) {
+ result *= Base;
+ if ((*c >= '0' && *c <= '9')) {
+ result += *c - '0';
+ } else if (Base == 16 && *c >= 'a' && *c <= 'f') {
+ result += *c - 'a' + 10;
+ } else if (Base == 16 && *c >= 'A' && *c <= 'F') {
+ result += *c - 'A' + 10;
+ } else {
+ die("Malformed input");
+ }
+ }
+ return result;
+}
+
+static size_t percent(size_t a, size_t b) {
+ if (!b) {
+ return 0;
+ }
+ return size_t(round(double(a) / double(b) * 100.0));
+}
+
+class Distribution {
+ public:
+ // Default constructor used for array initialisation.
+ Distribution()
+ : mMaxSize(0),
+ mNextSmallest(0),
+ mShift(0),
+ mArrayOffset(0),
+ mArraySlots(0),
+ mTotalRequests(0),
+ mRequests{0} {}
+
+ Distribution(size_t max_size, size_t next_smallest, size_t bucket_size)
+ : mMaxSize(max_size),
+ mNextSmallest(next_smallest),
+ mShift(CeilingLog2(bucket_size)),
+ mArrayOffset(1 + next_smallest),
+ mArraySlots((max_size - next_smallest) >> mShift),
+ mTotalRequests(0),
+ mRequests{
+ 0,
+ } {
+ MOZ_ASSERT(mMaxSize);
+ MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS);
+ }
+
+ Distribution& operator=(const Distribution& aOther) = default;
+
+ void addRequest(size_t request) {
+ MOZ_ASSERT(mMaxSize);
+
+ mRequests[(request - mArrayOffset) >> mShift]++;
+ mTotalRequests++;
+ }
+
+ void printDist(intptr_t std_err) {
+ MOZ_ASSERT(mMaxSize);
+
+ // The translation to turn a slot index into a memory request size.
+ const size_t array_offset_add = (1 << mShift) + mNextSmallest;
+
+ FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize);
+ FdPrintf(std_err, " request : count percent\n");
+ size_t range_start = mNextSmallest + 1;
+ for (size_t j = 0; j < mArraySlots; j++) {
+ size_t range_end = (j << mShift) + array_offset_add;
+ FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end,
+ mRequests[j], percent(mRequests[j], mTotalRequests));
+ range_start = range_end + 1;
+ }
+ }
+
+ size_t maxSize() const { return mMaxSize; }
+
+ private:
+ static constexpr size_t MAX_NUM_BUCKETS = 16;
+
+ // If size is zero this distribution is uninitialised.
+ size_t mMaxSize;
+ size_t mNextSmallest;
+
+ // Parameters to convert a size into a slot number.
+ unsigned mShift;
+ unsigned mArrayOffset;
+
+ // The number of slots.
+ unsigned mArraySlots;
+
+ size_t mTotalRequests;
+ size_t mRequests[MAX_NUM_BUCKETS];
+};
+
+#ifdef XP_LINUX
+struct MemoryMap {
+ uintptr_t mStart;
+ uintptr_t mEnd;
+ bool mReadable;
+ bool mPrivate;
+ bool mAnon;
+ bool mIsStack;
+ bool mIsSpecial;
+ size_t mRSS;
+
+ bool IsCandidate() const {
+ // Candidates mappings are:
+ // * anonymous
+ // * they are private (not shared),
+ // * anonymous or "[heap]" (not another area such as stack),
+ //
+ // The only mappings we're falsely including are the .bss segments for
+ // shared libraries.
+ return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial;
+ }
+};
+
+class SMapsReader : private FdReader {
+ private:
+ explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {}
+
+ public:
+ static Maybe<SMapsReader> open() {
+ int fd = ::open(FILENAME, O_RDONLY);
+ if (fd < 0) {
+ perror(FILENAME);
+ return mozilla::Nothing();
+ }
+
+ return Some(SMapsReader(FdReader(fd, true)));
+ }
+
+ Maybe<MemoryMap> readMap(intptr_t aStdErr) {
+ // This is not very tolerant of format changes because things like
+ // parseNumber will crash if they get a bad value. TODO: make this
+ // soft-fail.
+
+ Buffer line = ReadLine();
+ if (!line) {
+ return Nothing();
+ }
+
+ // We're going to be at the start of an entry, start tokenising the first
+ // line.
+
+ // Range
+ Buffer range = line.SplitChar(' ');
+ uintptr_t range_start = parseNumber<16>(range.SplitChar('-'));
+ uintptr_t range_end = parseNumber<16>(range);
+
+ // Mode.
+ Buffer mode = line.SplitChar(' ');
+ if (mode.Length() != 4) {
+ FdPrintf(aStdErr, "Couldn't parse SMAPS file\n");
+ return Nothing();
+ }
+ bool readable = mode[0] == 'r';
+ bool private_ = mode[3] == 'p';
+
+ // Offset, device and inode.
+ line.SkipWhitespace();
+ bool zero_offset = !parseNumber<16>(line.SplitChar(' '));
+ line.SkipWhitespace();
+ bool no_device = line.SplitChar(' ') == Buffer("00:00");
+ line.SkipWhitespace();
+ bool zero_inode = !parseNumber(line.SplitChar(' '));
+ bool is_anon = zero_offset && no_device && zero_inode;
+
+ // Filename, or empty for anon mappings.
+ line.SkipWhitespace();
+ Buffer filename = line.SplitChar(' ');
+
+ bool is_stack;
+ bool is_special;
+ if (filename && filename[0] == '[') {
+ is_stack = filename == Buffer("[stack]");
+ is_special = filename == Buffer("[vdso]") ||
+ filename == Buffer("[vvar]") ||
+ filename == Buffer("[vsyscall]");
+ } else {
+ is_stack = false;
+ is_special = false;
+ }
+
+ size_t rss = 0;
+ while ((line = ReadLine())) {
+ Buffer field = line.SplitChar(':');
+ if (field == Buffer("VmFlags")) {
+ // This is the last field, at least in the current format. Break this
+ // loop to read the next mapping.
+ break;
+ }
+
+ if (field == Buffer("Rss")) {
+ line.SkipWhitespace();
+ Buffer value = line.SplitChar(' ');
+ rss = parseNumber(value) * 1024;
+ }
+ }
+
+ return Some(MemoryMap({range_start, range_end, readable, private_, is_anon,
+ is_stack, is_special, rss}));
+ }
+
+ static constexpr char FILENAME[] = "/proc/self/smaps";
+};
+#endif // XP_LINUX
+
+/* Class to handle dispatching the replay function calls to replace-malloc. */
+class Replay {
+ public:
+ Replay() {
+#ifdef _WIN32
+ // See comment in FdPrintf.h as to why native win32 handles are used.
+ mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE));
+#else
+ mStdErr = fileno(stderr);
+#endif
+#ifdef XP_LINUX
+ BuildInitialMapInfo();
+#endif
+ }
+
+ void enableSlopCalculation() { mCalculateSlop = true; }
+ void enableMemset() { mDoMemset = true; }
+
+ MemSlot& operator[](size_t index) const { return mSlots[index]; }
+
+ void malloc(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t size = parseNumber(aArgs);
+ aSlot.mPtr = ::malloc_impl(size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void posix_memalign(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t alignment = parseNumber(aArgs.SplitChar(','));
+ size_t size = parseNumber(aArgs);
+ void* ptr;
+ if (::posix_memalign_impl(&ptr, alignment, size) == 0) {
+ aSlot.mPtr = ptr;
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ } else {
+ aSlot.mPtr = nullptr;
+ }
+ }
+
+ void aligned_alloc(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t alignment = parseNumber(aArgs.SplitChar(','));
+ size_t size = parseNumber(aArgs);
+ aSlot.mPtr = ::aligned_alloc_impl(alignment, size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void calloc(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t num = parseNumber(aArgs.SplitChar(','));
+ size_t size = parseNumber(aArgs);
+ aSlot.mPtr = ::calloc_impl(num, size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = num * size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += num * size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void realloc(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ Buffer dummy = aArgs.SplitChar('#');
+ if (dummy) {
+ die("Malformed input");
+ }
+ size_t slot_id = parseNumber(aArgs.SplitChar(','));
+ size_t size = parseNumber(aArgs);
+ MemSlot& old_slot = (*this)[slot_id];
+ void* old_ptr = old_slot.mPtr;
+ old_slot.mPtr = nullptr;
+ aSlot.mPtr = ::realloc_impl(old_ptr, size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void free(Buffer& aArgs, Buffer& aResult) {
+ if (aResult) {
+ die("Malformed input");
+ }
+ mOps++;
+ Buffer dummy = aArgs.SplitChar('#');
+ if (dummy) {
+ die("Malformed input");
+ }
+ size_t slot_id = parseNumber(aArgs);
+ MemSlot& slot = (*this)[slot_id];
+ ::free_impl(slot.mPtr);
+ slot.mPtr = nullptr;
+ }
+
+ void memalign(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t alignment = parseNumber(aArgs.SplitChar(','));
+ size_t size = parseNumber(aArgs);
+ aSlot.mPtr = ::memalign_impl(alignment, size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void valloc(Buffer& aArgs, Buffer& aResult) {
+ MemSlot& aSlot = SlotForResult(aResult);
+ mOps++;
+ size_t size = parseNumber(aArgs);
+ aSlot.mPtr = ::valloc_impl(size);
+ if (aSlot.mPtr) {
+ aSlot.mRequest = size;
+ MaybeCommit(aSlot);
+ if (mCalculateSlop) {
+ mTotalRequestedSize += size;
+ mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
+ }
+ }
+ }
+
+ void jemalloc_stats(Buffer& aArgs, Buffer& aResult) {
+ if (aArgs || aResult) {
+ die("Malformed input");
+ }
+ mOps++;
+ jemalloc_stats_t stats;
+ // Using a variable length array here is a GCC & Clang extension. But it
+ // allows us to place this on the stack and not alter jemalloc's profiling.
+ const size_t num_bins = ::jemalloc_stats_num_bins();
+ const size_t MAX_NUM_BINS = 100;
+ if (num_bins > MAX_NUM_BINS) {
+ die("Exceeded maximum number of jemalloc stats bins");
+ }
+ jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}};
+ ::jemalloc_stats_internal(&stats, bin_stats);
+
+#ifdef XP_LINUX
+ size_t rss = get_rss();
+#endif
+
+ size_t num_objects = 0;
+ size_t num_sloppy_objects = 0;
+ size_t total_allocated = 0;
+ size_t total_slop = 0;
+ size_t large_slop = 0;
+ size_t large_used = 0;
+ size_t huge_slop = 0;
+ size_t huge_used = 0;
+ size_t bin_slop[MAX_NUM_BINS] = {0};
+
+ for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {
+ MemSlot& slot = mSlots[slot_id];
+ if (slot.mPtr) {
+ size_t used = ::malloc_usable_size_impl(slot.mPtr);
+ size_t slop = used - slot.mRequest;
+ total_allocated += used;
+ total_slop += slop;
+ num_objects++;
+ if (slop) {
+ num_sloppy_objects++;
+ }
+
+ if (used <=
+ (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) {
+ // We know that this is an inefficient linear search, but there's a
+ // small number of bins and this is simple.
+ for (unsigned i = 0; i < num_bins; i++) {
+ auto& bin = bin_stats[i];
+ if (used == bin.size) {
+ bin_slop[i] += slop;
+ break;
+ }
+ }
+ } else if (used <= stats.large_max) {
+ large_slop += slop;
+ large_used += used;
+ } else {
+ huge_slop += slop;
+ huge_used += used;
+ }
+ }
+ }
+
+ // This formula corresponds to the calculation of wasted (from committed and
+ // the other parameters) within jemalloc_stats()
+ size_t committed = stats.allocated + stats.waste + stats.page_cache +
+ stats.bookkeeping + stats.bin_unused;
+
+ FdPrintf(mStdErr, "\n");
+ FdPrintf(mStdErr, "Objects: %9zu\n", num_objects);
+ FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots);
+ FdPrintf(mStdErr, "Ops: %9zu\n", mOps);
+ FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped);
+ FdPrintf(mStdErr, "committed: %9zu\n", committed);
+#ifdef XP_LINUX
+ if (rss) {
+ FdPrintf(mStdErr, "rss: %9zu\n", rss);
+ }
+#endif
+ FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated);
+ FdPrintf(mStdErr, "waste: %9zu\n", stats.waste);
+ FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache);
+ FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping);
+ FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused);
+ FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max);
+ FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max);
+ FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max);
+ FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max);
+ if (mCalculateSlop) {
+ size_t slop = mTotalAllocatedSize - mTotalRequestedSize;
+ FdPrintf(mStdErr,
+ "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n",
+ slop / 1024, mTotalAllocatedSize / 1024,
+ percent(slop, mTotalAllocatedSize));
+ }
+ FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n",
+ num_sloppy_objects, num_objects,
+ percent(num_sloppy_objects, num_objects));
+ FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n",
+ total_slop / 1024, total_allocated / 1024,
+ percent(total_slop, total_allocated));
+
+ FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size",
+ "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)",
+ "used (r)");
+ for (unsigned i = 0; i < num_bins; i++) {
+ auto& bin = bin_stats[i];
+ MOZ_ASSERT(bin.size);
+ FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n",
+ bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024,
+ percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total),
+ bin.num_non_full_runs, bin.num_runs,
+ percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs));
+ }
+
+ FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent");
+ for (unsigned i = 0; i < num_bins; i++) {
+ auto& bin = bin_stats[i];
+ size_t used = bin.bytes_total - bin.bytes_unused;
+ FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used,
+ percent(bin_slop[i], used));
+ }
+ FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used,
+ percent(large_slop, large_used));
+ FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used,
+ percent(huge_slop, huge_used));
+
+ print_distributions(stats, bin_stats);
+ }
+
+ private:
+ /*
+ * Create and print frequency distributions of memory requests.
+ */
+ void print_distributions(jemalloc_stats_t& stats,
+ jemalloc_bin_stats_t* bin_stats) {
+ const size_t num_bins = ::jemalloc_stats_num_bins();
+
+ // We compute distributions for all of the bins for small allocations
+ // (num_bins) plus two more distributions for larger allocations.
+ Distribution dists[num_bins + 2];
+
+ unsigned last_size = 0;
+ unsigned num_dists = 0;
+ for (unsigned i = 0; i < num_bins; i++) {
+ auto& bin = bin_stats[i];
+ auto& dist = dists[num_dists++];
+
+ MOZ_ASSERT(bin.size);
+ if (bin.size <= 16) {
+ // 1 byte buckets.
+ dist = Distribution(bin.size, last_size, 1);
+ } else if (bin.size <= stats.quantum_max) {
+ // 4 buckets, (4 bytes per bucket with a 16 byte quantum).
+ dist = Distribution(bin.size, last_size, stats.quantum / 4);
+ } else if (bin.size <= stats.quantum_wide_max) {
+ // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide).
+ dist = Distribution(bin.size, last_size, stats.quantum_wide / 8);
+ } else {
+ // 16 buckets.
+ dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16);
+ }
+ last_size = bin.size;
+ }
+
+ // 16 buckets.
+ dists[num_dists] = Distribution(stats.page_size, last_size,
+ (stats.page_size - last_size) / 16);
+ num_dists++;
+
+ // Buckets are 1/4 of the page size (12 buckets).
+ dists[num_dists] =
+ Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4);
+ num_dists++;
+
+ MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2);
+
+ for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {
+ MemSlot& slot = mSlots[slot_id];
+ if (slot.mPtr) {
+ for (size_t i = 0; i < num_dists; i++) {
+ if (slot.mRequest <= dists[i].maxSize()) {
+ dists[i].addRequest(slot.mRequest);
+ break;
+ }
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < num_dists; i++) {
+ dists[i].printDist(mStdErr);
+ }
+ }
+
+#ifdef XP_LINUX
+ size_t get_rss() {
+ if (mGetRSSFailed) {
+ return 0;
+ }
+
+ // On Linux we can determine the RSS of the heap area by examining the
+ // smaps file.
+ mozilla::Maybe<SMapsReader> reader = SMapsReader::open();
+ if (!reader) {
+ mGetRSSFailed = true;
+ return 0;
+ }
+
+ size_t rss = 0;
+ while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {
+ if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) &&
+ !InitialMapsContains(map->mStart)) {
+ rss += map->mRSS;
+ }
+ }
+
+ return rss;
+ }
+
+ bool InitialMapsContains(uintptr_t aRangeStart) {
+ for (unsigned i = 0; i < mNumInitialMaps; i++) {
+ MOZ_ASSERT(i < MAX_INITIAL_MAPS);
+
+ if (mInitialMaps[i] == aRangeStart) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public:
+ void BuildInitialMapInfo() {
+ if (mGetRSSFailed) {
+ return;
+ }
+
+ Maybe<SMapsReader> reader = SMapsReader::open();
+ if (!reader) {
+ mGetRSSFailed = true;
+ return;
+ }
+
+ while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {
+ if (map->IsCandidate()) {
+ if (mNumInitialMaps >= MAX_INITIAL_MAPS) {
+ FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n");
+ mGetRSSFailed = false;
+ return;
+ }
+
+ mInitialMaps[mNumInitialMaps++] = map->mStart;
+ }
+ }
+ }
+#endif
+
+ private:
+ MemSlot& SlotForResult(Buffer& aResult) {
+ /* Parse result value and get the corresponding slot. */
+ Buffer dummy = aResult.SplitChar('=');
+ Buffer dummy2 = aResult.SplitChar('#');
+ if (dummy || dummy2) {
+ die("Malformed input");
+ }
+
+ size_t slot_id = parseNumber(aResult);
+ mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1);
+
+ return mSlots[slot_id];
+ }
+
+ void MaybeCommit(MemSlot& aSlot) {
+ if (mDoMemset) {
+ // Write any byte, 0x55 isn't significant.
+ memset(aSlot.mPtr, 0x55, aSlot.mRequest);
+ }
+ }
+
+ intptr_t mStdErr;
+ size_t mOps = 0;
+
+ // The number of slots that have been used. It is used to iterate over slots
+ // without accessing those we haven't initialised.
+ size_t mNumUsedSlots = 0;
+
+ MemSlotList mSlots;
+ size_t mTotalRequestedSize = 0;
+ size_t mTotalAllocatedSize = 0;
+ // Whether to calculate slop for all allocations over the runtime of a
+ // process.
+ bool mCalculateSlop = false;
+ bool mDoMemset = false;
+
+#ifdef XP_LINUX
+ // If we have a failure reading smaps info then this is used to disable that
+ // feature.
+ bool mGetRSSFailed = false;
+
+ // The initial memory mappings are recorded here at start up. We exclude
+ // memory in these mappings when computing RSS. We assume they do not grow
+ // and that no regions are allocated near them, this is true because they'll
+ // only record the .bss and .data segments from our binary and shared objects
+ // or regions that logalloc-replay has created for MappedArrays.
+ //
+ // 64 should be enough for anybody.
+ static constexpr unsigned MAX_INITIAL_MAPS = 64;
+ uintptr_t mInitialMaps[MAX_INITIAL_MAPS];
+ unsigned mNumInitialMaps = 0;
+#endif // XP_LINUX
+};
+
+static Replay replay;
+
+int main(int argc, const char* argv[]) {
+ size_t first_pid = 0;
+ FdReader reader(0);
+
+ for (int i = 1; i < argc; i++) {
+ const char* option = argv[i];
+ if (strcmp(option, "-s") == 0) {
+ // Do accounting to calculate allocation slop.
+ replay.enableSlopCalculation();
+ } else if (strcmp(option, "-c") == 0) {
+ // Touch memory as we allocate it.
+ replay.enableMemset();
+ } else {
+ fprintf(stderr, "Unknown command line option: %s\n", option);
+ return EXIT_FAILURE;
+ }
+ }
+
+ /* Read log from stdin and dispatch function calls to the Replay instance.
+ * The log format is essentially:
+ * <pid> <tid> <function>([<args>])[=<result>]
+ * <args> is a comma separated list of arguments.
+ *
+ * The logs are expected to be preprocessed so that allocations are
+ * attributed a tracking slot. The input is trusted not to have crazy
+ * values for these slot numbers.
+ *
+ * <result>, as well as some of the args to some of the function calls are
+ * such slot numbers.
+ */
+ while (true) {
+ Buffer line = reader.ReadLine();
+
+ if (!line) {
+ break;
+ }
+
+ size_t pid = parseNumber(line.SplitChar(' '));
+ if (!first_pid) {
+ first_pid = pid;
+ }
+
+ /* The log may contain data for several processes, only entries for the
+ * very first that appears are treated. */
+ if (first_pid != pid) {
+ continue;
+ }
+
+ /* The log contains thread ids for manual analysis, but we just ignore them
+ * for now. */
+ parseNumber(line.SplitChar(' '));
+
+ Buffer func = line.SplitChar('(');
+ Buffer args = line.SplitChar(')');
+
+ if (func == Buffer("jemalloc_stats")) {
+ replay.jemalloc_stats(args, line);
+ } else if (func == Buffer("free")) {
+ replay.free(args, line);
+ } else if (func == Buffer("malloc")) {
+ replay.malloc(args, line);
+ } else if (func == Buffer("posix_memalign")) {
+ replay.posix_memalign(args, line);
+ } else if (func == Buffer("aligned_alloc")) {
+ replay.aligned_alloc(args, line);
+ } else if (func == Buffer("calloc")) {
+ replay.calloc(args, line);
+ } else if (func == Buffer("realloc")) {
+ replay.realloc(args, line);
+ } else if (func == Buffer("memalign")) {
+ replay.memalign(args, line);
+ } else if (func == Buffer("valloc")) {
+ replay.valloc(args, line);
+ } else {
+ die("Malformed input");
+ }
+ }
+
+ return 0;
+}
diff --git a/memory/replace/logalloc/replay/expected_output_minimal.log b/memory/replace/logalloc/replay/expected_output_minimal.log
new file mode 100644
index 0000000000..332fe20957
--- /dev/null
+++ b/memory/replace/logalloc/replay/expected_output_minimal.log
@@ -0,0 +1,17 @@
+1 1 jemalloc_stats()
+1 1 malloc(42)=#1
+1 1 malloc(24)=#2
+1 1 free(#1)
+1 1 memalign(4096,1024)=#1
+1 1 calloc(4,42)=#3
+1 1 free(#2)
+1 1 realloc(#3,84)=#2
+1 1 memalign(256,1024)=#3
+1 1 memalign(512,1024)=#4
+1 1 memalign(4096,1024)=#5
+1 1 jemalloc_stats()
+1 1 free(#5)
+1 1 free(#4)
+1 1 free(#3)
+1 1 free(#2)
+1 1 free(#1)
diff --git a/memory/replace/logalloc/replay/logalloc_munge.py b/memory/replace/logalloc/replay/logalloc_munge.py
new file mode 100644
index 0000000000..52d0032463
--- /dev/null
+++ b/memory/replace/logalloc/replay/logalloc_munge.py
@@ -0,0 +1,147 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+This script takes a log from the replace-malloc logalloc library on stdin
+and munges it so that it can be used with the logalloc-replay tool.
+
+Given the following output:
+ 13663 malloc(42)=0x7f0c33502040
+ 13663 malloc(24)=0x7f0c33503040
+ 13663 free(0x7f0c33502040)
+The resulting output is:
+ 1 malloc(42)=#1
+ 1 malloc(24)=#2
+ 1 free(#1)
+
+See README for more details.
+"""
+
+import sys
+from collections import defaultdict, deque
+
+
+class IdMapping(object):
+ """Class to map values to ids.
+
+ Each value is associated to an increasing id, starting from 1.
+ When a value is removed, its id is recycled and will be reused for
+ subsequent values.
+ """
+
+ def __init__(self):
+ self.id = 1
+ self._values = {}
+ self._recycle = deque()
+
+ def __getitem__(self, value):
+ if value not in self._values:
+ if self._recycle:
+ self._values[value] = self._recycle.popleft()
+ else:
+ self._values[value] = self.id
+ self.id += 1
+ return self._values[value]
+
+ def __delitem__(self, value):
+ if value == 0:
+ return
+ self._recycle.append(self._values[value])
+ del self._values[value]
+
+ def __contains__(self, value):
+ return value == 0 or value in self._values
+
+
+class Ignored(Exception):
+ pass
+
+
+def split_log_line(line):
+ try:
+ # The format for each line is:
+ # <pid> [<tid>] <function>([<args>])[=<result>]
+ #
+ # The original format didn't include the tid, so we try to parse
+ # lines whether they have one or not.
+ pid, func_call = line.split(" ", 1)
+ call, result = func_call.split(")")
+ func, args = call.split("(")
+ args = args.split(",") if args else []
+ if result:
+ if result[0] != "=":
+ raise Ignored("Malformed input")
+ result = result[1:]
+ if " " in func:
+ tid, func = func.split(" ", 1)
+ else:
+ tid = pid
+ return pid, tid, func, args, result
+ except Exception:
+ raise Ignored("Malformed input")
+
+
+NUM_ARGUMENTS = {
+ "jemalloc_stats": 0,
+ "free": 1,
+ "malloc": 1,
+ "posix_memalign": 2,
+ "aligned_alloc": 2,
+ "calloc": 2,
+ "realloc": 2,
+ "memalign": 2,
+ "valloc": 1,
+}
+
+
+def main():
+ pids = IdMapping()
+ processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()})
+ for line in sys.stdin:
+ line = line.strip()
+
+ try:
+ pid, tid, func, args, result = split_log_line(line)
+
+ # Replace pid with an id.
+ pid = pids[int(pid)]
+
+ process = processes[pid]
+ tid = process["tids"][int(tid)]
+
+ pointers = process["pointers"]
+
+ if func not in NUM_ARGUMENTS:
+ raise Ignored("Unknown function")
+
+ if len(args) != NUM_ARGUMENTS[func]:
+ raise Ignored("Malformed input")
+
+ if func in ("jemalloc_stats", "free") and result:
+ raise Ignored("Malformed input")
+
+ if func in ("free", "realloc"):
+ ptr = int(args[0], 16)
+ if ptr and ptr not in pointers:
+ raise Ignored("Did not see an alloc for pointer")
+ args[0] = "#%d" % pointers[ptr]
+ del pointers[ptr]
+
+ if result:
+ result = int(result, 16)
+ if not result:
+ raise Ignored("Result is NULL")
+ result = "#%d" % pointers[result]
+
+ print(
+ "%d %d %s(%s)%s"
+ % (pid, tid, func, ",".join(args), "=%s" % result if result else "")
+ )
+
+ except Exception as e:
+ print('Ignored "%s": %s' % (line, e), file=sys.stderr)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/memory/replace/logalloc/replay/moz.build b/memory/replace/logalloc/replay/moz.build
new file mode 100644
index 0000000000..4ebf48f842
--- /dev/null
+++ b/memory/replace/logalloc/replay/moz.build
@@ -0,0 +1,87 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Program("logalloc-replay")
+
+SOURCES += [
+ "/mfbt/Assertions.cpp",
+ "/mfbt/Poison.cpp",
+ "/mfbt/RandomNum.cpp",
+ "/mfbt/TaggedAnonymousMemory.cpp",
+ "/mfbt/Unused.cpp",
+ "/mozglue/misc/StackWalk.cpp",
+ "Replay.cpp",
+]
+
+if CONFIG["OS_TARGET"] == "WINNT":
+ SOURCES += [
+ "/mozglue/misc/ProcessType.cpp",
+ ]
+
+if CONFIG["OS_TARGET"] == "Linux":
+ LDFLAGS += ["-static-libstdc++"]
+
+if CONFIG["OS_TARGET"] == "Darwin":
+ # Work around "warning: 'aligned_alloc' is only available on macOS 10.15 or newer"
+ # when building with MACOSX_DEPLOYMENT_TARGET < 10.15 with >= 10.15 SDK.
+ # We have our own definition of the function, so it doesn't matter what the SDK says.
+ SOURCES["Replay.cpp"].flags += ["-Wno-unguarded-availability-new"]
+
+if CONFIG["MOZ_REPLACE_MALLOC_STATIC"] and (CONFIG["MOZ_DMD"] or CONFIG["MOZ_PHC"]):
+ UNIFIED_SOURCES += [
+ "/mfbt/HashFunctions.cpp",
+ "/mfbt/JSONWriter.cpp",
+ ]
+
+if CONFIG["OS_ARCH"] == "WINNT":
+ OS_LIBS += [
+ "advapi32",
+ "dbghelp",
+ ]
+
+if CONFIG["MOZ_LINKER"] and CONFIG["MOZ_WIDGET_TOOLKIT"] == "android":
+ LOCAL_INCLUDES += [
+ "/mozglue/linker",
+ ]
+ DEFINES["__wrap_dladdr"] = "dladdr"
+
+
+if CONFIG["MOZ_BUILD_APP"] == "memory":
+ EXPORTS.mozilla += [
+ "/mozglue/misc/StackWalk.h",
+ ]
+
+if CONFIG["MOZ_BUILD_APP"] == "memory" or CONFIG["MOZ_REPLACE_MALLOC_STATIC"]:
+ UNIFIED_SOURCES += [
+ "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc",
+ "/mfbt/double-conversion/double-conversion/bignum.cc",
+ "/mfbt/double-conversion/double-conversion/cached-powers.cc",
+ "/mfbt/double-conversion/double-conversion/double-to-string.cc",
+ "/mfbt/double-conversion/double-conversion/fast-dtoa.cc",
+ "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc",
+ "/mfbt/double-conversion/double-conversion/string-to-double.cc",
+ "/mfbt/double-conversion/double-conversion/strtod.cc",
+ "/mozglue/misc/Printf.cpp",
+ ]
+
+LOCAL_INCLUDES += [
+ "/memory/build",
+]
+
+# Link replace-malloc and the default allocator.
+USE_LIBS += [
+ "memory",
+]
+
+# The memory library defines this, so it's needed here too.
+DEFINES["IMPL_MFBT"] = True
+
+if CONFIG["MOZ_NEEDS_LIBATOMIC"]:
+ OS_LIBS += ["atomic"]
+
+DisableStlWrapping()
+
+include("/mozglue/build/replace_malloc.mozbuild")
diff --git a/memory/replace/logalloc/replay/replay.log b/memory/replace/logalloc/replay/replay.log
new file mode 100644
index 0000000000..f1e6de788b
--- /dev/null
+++ b/memory/replace/logalloc/replay/replay.log
@@ -0,0 +1,18 @@
+1 1 jemalloc_stats()
+1 1 malloc(42)=#1
+1 1 malloc(24)=#2
+2 2 malloc(42)=#1
+1 1 free(#1)
+1 1 posix_memalign(4096,1024)=#1
+1 1 calloc(4,42)=#3
+1 1 free(#2)
+1 1 realloc(#3,84)=#2
+1 1 aligned_alloc(256,1024)=#3
+1 1 memalign(512,1024)=#4
+1 1 valloc(1024)=#5
+1 1 jemalloc_stats()
+1 1 free(#5)
+1 1 free(#4)
+1 1 free(#3)
+1 1 free(#2)
+1 1 free(#1)