/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #define MOZ_MEMORY_IMPL #include "mozmemory_wrap.h" #ifdef _WIN32 # include # include typedef intptr_t ssize_t; #else # include # include #endif #ifdef XP_LINUX # include # include #endif #include #include #include #include #include "mozilla/Assertions.h" #include "mozilla/MathAlgorithms.h" #include "mozilla/Maybe.h" #include "FdPrintf.h" using namespace mozilla; static void die(const char* message) { /* Here, it doesn't matter that fprintf may allocate memory. */ fprintf(stderr, "%s\n", message); exit(1); } #ifdef XP_LINUX static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }(); #endif /* We don't want to be using malloc() to allocate our internal tracking * data, because that would change the parameters of what is being measured, * so we want to use data types that directly use mmap/VirtualAlloc. */ template class MappedArray { public: MappedArray() : mPtr(nullptr) { #ifdef XP_LINUX MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)), "MappedArray size must be a multiple of the page size"); #endif } ~MappedArray() { if (mPtr) { #ifdef _WIN32 VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE); #elif defined(XP_LINUX) munmap(reinterpret_cast(reinterpret_cast(mPtr) - sPageSize), sizeof(T) * Len + sPageSize * 2); #else munmap(mPtr, sizeof(T) * Len); #endif } } T& operator[](size_t aIndex) const { if (mPtr) { return mPtr[aIndex]; } #ifdef _WIN32 mPtr = reinterpret_cast(VirtualAlloc( nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); if (mPtr == nullptr) { die("VirtualAlloc error"); } #else size_t data_size = sizeof(T) * Len; size_t size = data_size; # ifdef XP_LINUX // See below size += sPageSize * 2; # endif mPtr = reinterpret_cast(mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)); if (mPtr == MAP_FAILED) { die("Mmap error"); } # ifdef XP_LINUX // On Linux we request a page on either side of the allocation and // mprotect them. This prevents mappings in /proc/self/smaps from being // merged and allows us to parse this file to calculate the allocator's RSS. MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0)); MOZ_ASSERT(0 == mprotect(reinterpret_cast( reinterpret_cast(mPtr) + data_size + sPageSize), sPageSize, 0)); mPtr = reinterpret_cast(reinterpret_cast(mPtr) + sPageSize); # endif #endif return mPtr[aIndex]; } bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; } bool allocated() const { return !!mPtr; } private: mutable T* mPtr; }; /* Type for records of allocations. */ struct MemSlot { void* mPtr; // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared // when memory is freed or realloc()ed. size_t mRequest; }; /* An almost infinite list of slots. * In essence, this is a linked list of arrays of groups of slots. * Each group is 1MB. On 64-bits, one group allows to store 64k allocations. * Each MemSlotList instance can store 1023 such groups, which means more * than 67M allocations. In case more would be needed, we chain to another * MemSlotList, and so on. * Using 1023 groups makes the MemSlotList itself page sized on 32-bits * and 2 pages-sized on 64-bits. */ class MemSlotList { static constexpr size_t kGroups = 1024 - 1; static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot); MappedArray mSlots[kGroups]; MappedArray mNext; public: MemSlot& operator[](size_t aIndex) const { if (aIndex < kGroupSize * kGroups) { return mSlots[aIndex / kGroupSize][aIndex % kGroupSize]; } aIndex -= kGroupSize * kGroups; return mNext[0][aIndex]; } // Ask if any of the memory-mapped buffers use this range. bool ownsMapping(uintptr_t aStart) const { for (const auto& slot : mSlots) { if (slot.allocated() && slot.ownsMapping(aStart)) { return true; } } return mNext.ownsMapping(aStart) || (mNext.allocated() && mNext[0].ownsMapping(aStart)); } }; /* Helper class for memory buffers */ class Buffer { public: Buffer() : mBuf(nullptr), mLength(0) {} Buffer(const void* aBuf, size_t aLength) : mBuf(reinterpret_cast(aBuf)), mLength(aLength) {} /* Constructor for string literals. */ template explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {} /* Returns a sub-buffer up-to but not including the given aNeedle character. * The "parent" buffer itself is altered to begin after the aNeedle * character. * If the aNeedle character is not found, return the entire buffer, and empty * the "parent" buffer. */ Buffer SplitChar(char aNeedle) { char* buf = const_cast(mBuf); char* c = reinterpret_cast(memchr(buf, aNeedle, mLength)); if (!c) { return Split(mLength); } Buffer result = Split(c - buf); // Remove the aNeedle character itself. Split(1); return result; } // Advance to the position after aNeedle. This is like SplitChar but does not // return the skipped portion. void Skip(char aNeedle, unsigned nTimes = 1) { for (unsigned i = 0; i < nTimes; i++) { SplitChar(aNeedle); } } void SkipWhitespace() { while (mLength > 0) { if (!IsSpace(mBuf[0])) { break; } mBuf++; mLength--; } } static bool IsSpace(char c) { switch (c) { case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': return true; } return false; } /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is * amputated of those aLength characters. If the "parent" buffer is smaller * than aLength, then its length is used instead. */ Buffer Split(size_t aLength) { Buffer result(mBuf, std::min(aLength, mLength)); mLength -= result.mLength; mBuf += result.mLength; return result; } /* Move the buffer (including its content) to the memory address of the aOther * buffer. */ void Slide(Buffer aOther) { memmove(const_cast(aOther.mBuf), mBuf, mLength); mBuf = aOther.mBuf; } /* Returns whether the two involved buffers have the same content. */ bool operator==(Buffer aOther) { return mLength == aOther.mLength && (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength)); } bool operator!=(Buffer aOther) { return !(*this == aOther); } /* Returns true if the buffer is not empty. */ explicit operator bool() { return mLength; } char operator[](size_t n) const { return mBuf[n]; } /* Returns the memory location of the buffer. */ const char* get() { return mBuf; } /* Returns the memory location of the end of the buffer (technically, the * first byte after the buffer). */ const char* GetEnd() { return mBuf + mLength; } /* Extend the buffer over the content of the other buffer, assuming it is * adjacent. */ void Extend(Buffer aOther) { MOZ_ASSERT(aOther.mBuf == GetEnd()); mLength += aOther.mLength; } size_t Length() const { return mLength; } private: const char* mBuf; size_t mLength; }; /* Helper class to read from a file descriptor line by line. */ class FdReader { public: explicit FdReader(int aFd, bool aNeedClose = false) : mFd(aFd), mNeedClose(aNeedClose), mData(&mRawBuf, 0), mBuf(&mRawBuf, sizeof(mRawBuf)) {} FdReader(FdReader&& aOther) noexcept : mFd(aOther.mFd), mNeedClose(aOther.mNeedClose), mData(&mRawBuf, 0), mBuf(&mRawBuf, sizeof(mRawBuf)) { memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf)); aOther.mFd = -1; aOther.mNeedClose = false; aOther.mData = Buffer(); aOther.mBuf = Buffer(); } FdReader& operator=(const FdReader&) = delete; FdReader(const FdReader&) = delete; ~FdReader() { if (mNeedClose) { close(mFd); } } /* Read a line from the file descriptor and returns it as a Buffer instance */ Buffer ReadLine() { while (true) { Buffer result = mData.SplitChar('\n'); /* There are essentially three different cases here: * - '\n' was found "early". In this case, the end of the result buffer * is before the beginning of the mData buffer (since SplitChar * amputated it). * - '\n' was found as the last character of mData. In this case, mData * is empty, but still points at the end of mBuf. result points to what * used to be in mData, without the last character. * - '\n' was not found. In this case too, mData is empty and points at * the end of mBuf. But result points to the entire buffer that used to * be pointed by mData. * Only in the latter case do both result and mData's end match, and it's * the only case where we need to refill the buffer. */ if (result.GetEnd() != mData.GetEnd()) { return result; } /* Since SplitChar emptied mData, make it point to what it had before. */ mData = result; /* And move it to the beginning of the read buffer. */ mData.Slide(mBuf); FillBuffer(); if (!mData) { return Buffer(); } } } private: /* Fill the read buffer. */ void FillBuffer() { size_t size = mBuf.GetEnd() - mData.GetEnd(); Buffer remainder(mData.GetEnd(), size); ssize_t len = 1; while (remainder && len > 0) { len = ::read(mFd, const_cast(remainder.get()), size); if (len < 0) { die("Read error"); } size -= len; mData.Extend(remainder.Split(len)); } } /* File descriptor to read from. */ int mFd; bool mNeedClose; /* Part of data that was read from the file descriptor but not returned with * ReadLine yet. */ Buffer mData; /* Buffer representation of mRawBuf */ Buffer mBuf; /* read() buffer */ char mRawBuf[4096]; }; MOZ_BEGIN_EXTERN_C /* Function declarations for all the replace_malloc _impl functions. * See memory/build/replace_malloc.c */ #define MALLOC_DECL(name, return_type, ...) \ return_type name##_impl(__VA_ARGS__); #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC #include "malloc_decls.h" #define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__); #define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC #include "malloc_decls.h" #ifdef ANDROID /* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have. * While gecko has one in libmozglue, the replay program can't use that. * Since we're not going to fork anyways, make it a dummy function. */ int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void), void (*aChild)(void)) { return 0; } #endif MOZ_END_EXTERN_C template size_t parseNumber(Buffer aBuf) { if (!aBuf) { die("Malformed input"); } size_t result = 0; for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) { result *= Base; if ((*c >= '0' && *c <= '9')) { result += *c - '0'; } else if (Base == 16 && *c >= 'a' && *c <= 'f') { result += *c - 'a' + 10; } else if (Base == 16 && *c >= 'A' && *c <= 'F') { result += *c - 'A' + 10; } else { die("Malformed input"); } } return result; } static size_t percent(size_t a, size_t b) { if (!b) { return 0; } return size_t(round(double(a) / double(b) * 100.0)); } class Distribution { public: // Default constructor used for array initialisation. Distribution() : mMaxSize(0), mNextSmallest(0), mShift(0), mArrayOffset(0), mArraySlots(0), mTotalRequests(0), mRequests{0} {} Distribution(size_t max_size, size_t next_smallest, size_t bucket_size) : mMaxSize(max_size), mNextSmallest(next_smallest), mShift(CeilingLog2(bucket_size)), mArrayOffset(1 + next_smallest), mArraySlots((max_size - next_smallest) >> mShift), mTotalRequests(0), mRequests{ 0, } { MOZ_ASSERT(mMaxSize); MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS); } Distribution& operator=(const Distribution& aOther) = default; void addRequest(size_t request) { MOZ_ASSERT(mMaxSize); mRequests[(request - mArrayOffset) >> mShift]++; mTotalRequests++; } void printDist(intptr_t std_err) { MOZ_ASSERT(mMaxSize); // The translation to turn a slot index into a memory request size. const size_t array_offset_add = (1 << mShift) + mNextSmallest; FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize); FdPrintf(std_err, " request : count percent\n"); size_t range_start = mNextSmallest + 1; for (size_t j = 0; j < mArraySlots; j++) { size_t range_end = (j << mShift) + array_offset_add; FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end, mRequests[j], percent(mRequests[j], mTotalRequests)); range_start = range_end + 1; } } size_t maxSize() const { return mMaxSize; } private: static constexpr size_t MAX_NUM_BUCKETS = 16; // If size is zero this distribution is uninitialised. size_t mMaxSize; size_t mNextSmallest; // Parameters to convert a size into a slot number. unsigned mShift; unsigned mArrayOffset; // The number of slots. unsigned mArraySlots; size_t mTotalRequests; size_t mRequests[MAX_NUM_BUCKETS]; }; #ifdef XP_LINUX struct MemoryMap { uintptr_t mStart; uintptr_t mEnd; bool mReadable; bool mPrivate; bool mAnon; bool mIsStack; bool mIsSpecial; size_t mRSS; bool IsCandidate() const { // Candidates mappings are: // * anonymous // * they are private (not shared), // * anonymous or "[heap]" (not another area such as stack), // // The only mappings we're falsely including are the .bss segments for // shared libraries. return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial; } }; class SMapsReader : private FdReader { private: explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {} public: static Maybe open() { int fd = ::open(FILENAME, O_RDONLY); if (fd < 0) { perror(FILENAME); return mozilla::Nothing(); } return Some(SMapsReader(FdReader(fd, true))); } Maybe readMap(intptr_t aStdErr) { // This is not very tolerant of format changes because things like // parseNumber will crash if they get a bad value. TODO: make this // soft-fail. Buffer line = ReadLine(); if (!line) { return Nothing(); } // We're going to be at the start of an entry, start tokenising the first // line. // Range Buffer range = line.SplitChar(' '); uintptr_t range_start = parseNumber<16>(range.SplitChar('-')); uintptr_t range_end = parseNumber<16>(range); // Mode. Buffer mode = line.SplitChar(' '); if (mode.Length() != 4) { FdPrintf(aStdErr, "Couldn't parse SMAPS file\n"); return Nothing(); } bool readable = mode[0] == 'r'; bool private_ = mode[3] == 'p'; // Offset, device and inode. line.SkipWhitespace(); bool zero_offset = !parseNumber<16>(line.SplitChar(' ')); line.SkipWhitespace(); bool no_device = line.SplitChar(' ') == Buffer("00:00"); line.SkipWhitespace(); bool zero_inode = !parseNumber(line.SplitChar(' ')); bool is_anon = zero_offset && no_device && zero_inode; // Filename, or empty for anon mappings. line.SkipWhitespace(); Buffer filename = line.SplitChar(' '); bool is_stack; bool is_special; if (filename && filename[0] == '[') { is_stack = filename == Buffer("[stack]"); is_special = filename == Buffer("[vdso]") || filename == Buffer("[vvar]") || filename == Buffer("[vsyscall]"); } else { is_stack = false; is_special = false; } size_t rss = 0; while ((line = ReadLine())) { Buffer field = line.SplitChar(':'); if (field == Buffer("VmFlags")) { // This is the last field, at least in the current format. Break this // loop to read the next mapping. break; } if (field == Buffer("Rss")) { line.SkipWhitespace(); Buffer value = line.SplitChar(' '); rss = parseNumber(value) * 1024; } } return Some(MemoryMap({range_start, range_end, readable, private_, is_anon, is_stack, is_special, rss})); } static constexpr char FILENAME[] = "/proc/self/smaps"; }; #endif // XP_LINUX /* Class to handle dispatching the replay function calls to replace-malloc. */ class Replay { public: Replay() { #ifdef _WIN32 // See comment in FdPrintf.h as to why native win32 handles are used. mStdErr = reinterpret_cast(GetStdHandle(STD_ERROR_HANDLE)); #else mStdErr = fileno(stderr); #endif #ifdef XP_LINUX BuildInitialMapInfo(); #endif } void enableSlopCalculation() { mCalculateSlop = true; } void enableMemset() { mDoMemset = true; } MemSlot& operator[](size_t index) const { return mSlots[index]; } void malloc(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t size = parseNumber(aArgs); aSlot.mPtr = ::malloc_impl(size); if (aSlot.mPtr) { aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void posix_memalign(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t alignment = parseNumber(aArgs.SplitChar(',')); size_t size = parseNumber(aArgs); void* ptr; if (::posix_memalign_impl(&ptr, alignment, size) == 0) { aSlot.mPtr = ptr; aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } else { aSlot.mPtr = nullptr; } } void aligned_alloc(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t alignment = parseNumber(aArgs.SplitChar(',')); size_t size = parseNumber(aArgs); aSlot.mPtr = ::aligned_alloc_impl(alignment, size); if (aSlot.mPtr) { aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void calloc(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t num = parseNumber(aArgs.SplitChar(',')); size_t size = parseNumber(aArgs); aSlot.mPtr = ::calloc_impl(num, size); if (aSlot.mPtr) { aSlot.mRequest = num * size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += num * size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void realloc(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; Buffer dummy = aArgs.SplitChar('#'); if (dummy) { die("Malformed input"); } size_t slot_id = parseNumber(aArgs.SplitChar(',')); size_t size = parseNumber(aArgs); MemSlot& old_slot = (*this)[slot_id]; void* old_ptr = old_slot.mPtr; old_slot.mPtr = nullptr; aSlot.mPtr = ::realloc_impl(old_ptr, size); if (aSlot.mPtr) { aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void free(Buffer& aArgs, Buffer& aResult) { if (aResult) { die("Malformed input"); } mOps++; Buffer dummy = aArgs.SplitChar('#'); if (dummy) { die("Malformed input"); } size_t slot_id = parseNumber(aArgs); MemSlot& slot = (*this)[slot_id]; ::free_impl(slot.mPtr); slot.mPtr = nullptr; } void memalign(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t alignment = parseNumber(aArgs.SplitChar(',')); size_t size = parseNumber(aArgs); aSlot.mPtr = ::memalign_impl(alignment, size); if (aSlot.mPtr) { aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void valloc(Buffer& aArgs, Buffer& aResult) { MemSlot& aSlot = SlotForResult(aResult); mOps++; size_t size = parseNumber(aArgs); aSlot.mPtr = ::valloc_impl(size); if (aSlot.mPtr) { aSlot.mRequest = size; MaybeCommit(aSlot); if (mCalculateSlop) { mTotalRequestedSize += size; mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); } } } void jemalloc_stats(Buffer& aArgs, Buffer& aResult) { if (aArgs || aResult) { die("Malformed input"); } mOps++; jemalloc_stats_t stats; // Using a variable length array here is a GCC & Clang extension. But it // allows us to place this on the stack and not alter jemalloc's profiling. const size_t num_bins = ::jemalloc_stats_num_bins(); const size_t MAX_NUM_BINS = 100; if (num_bins > MAX_NUM_BINS) { die("Exceeded maximum number of jemalloc stats bins"); } jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}}; ::jemalloc_stats_internal(&stats, bin_stats); #ifdef XP_LINUX size_t rss = get_rss(); #endif size_t num_objects = 0; size_t num_sloppy_objects = 0; size_t total_allocated = 0; size_t total_slop = 0; size_t large_slop = 0; size_t large_used = 0; size_t huge_slop = 0; size_t huge_used = 0; size_t bin_slop[MAX_NUM_BINS] = {0}; for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { MemSlot& slot = mSlots[slot_id]; if (slot.mPtr) { size_t used = ::malloc_usable_size_impl(slot.mPtr); size_t slop = used - slot.mRequest; total_allocated += used; total_slop += slop; num_objects++; if (slop) { num_sloppy_objects++; } if (used <= (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) { // We know that this is an inefficient linear search, but there's a // small number of bins and this is simple. for (unsigned i = 0; i < num_bins; i++) { auto& bin = bin_stats[i]; if (used == bin.size) { bin_slop[i] += slop; break; } } } else if (used <= stats.large_max) { large_slop += slop; large_used += used; } else { huge_slop += slop; huge_used += used; } } } // This formula corresponds to the calculation of wasted (from committed and // the other parameters) within jemalloc_stats() size_t committed = stats.allocated + stats.waste + stats.page_cache + stats.bookkeeping + stats.bin_unused; FdPrintf(mStdErr, "\n"); FdPrintf(mStdErr, "Objects: %9zu\n", num_objects); FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots); FdPrintf(mStdErr, "Ops: %9zu\n", mOps); FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped); FdPrintf(mStdErr, "committed: %9zu\n", committed); #ifdef XP_LINUX if (rss) { FdPrintf(mStdErr, "rss: %9zu\n", rss); } #endif FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated); FdPrintf(mStdErr, "waste: %9zu\n", stats.waste); FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache); FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping); FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused); FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max); FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max); FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max); FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max); if (mCalculateSlop) { size_t slop = mTotalAllocatedSize - mTotalRequestedSize; FdPrintf(mStdErr, "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n", slop / 1024, mTotalAllocatedSize / 1024, percent(slop, mTotalAllocatedSize)); } FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n", num_sloppy_objects, num_objects, percent(num_sloppy_objects, num_objects)); FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n", total_slop / 1024, total_allocated / 1024, percent(total_slop, total_allocated)); FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size", "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)", "used (r)"); for (unsigned i = 0; i < num_bins; i++) { auto& bin = bin_stats[i]; MOZ_ASSERT(bin.size); FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n", bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024, percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total), bin.num_non_full_runs, bin.num_runs, percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs)); } FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent"); for (unsigned i = 0; i < num_bins; i++) { auto& bin = bin_stats[i]; size_t used = bin.bytes_total - bin.bytes_unused; FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used, percent(bin_slop[i], used)); } FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used, percent(large_slop, large_used)); FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used, percent(huge_slop, huge_used)); print_distributions(stats, bin_stats); } private: /* * Create and print frequency distributions of memory requests. */ void print_distributions(jemalloc_stats_t& stats, jemalloc_bin_stats_t* bin_stats) { const size_t num_bins = ::jemalloc_stats_num_bins(); // We compute distributions for all of the bins for small allocations // (num_bins) plus two more distributions for larger allocations. Distribution dists[num_bins + 2]; unsigned last_size = 0; unsigned num_dists = 0; for (unsigned i = 0; i < num_bins; i++) { auto& bin = bin_stats[i]; auto& dist = dists[num_dists++]; MOZ_ASSERT(bin.size); if (bin.size <= 16) { // 1 byte buckets. dist = Distribution(bin.size, last_size, 1); } else if (bin.size <= stats.quantum_max) { // 4 buckets, (4 bytes per bucket with a 16 byte quantum). dist = Distribution(bin.size, last_size, stats.quantum / 4); } else if (bin.size <= stats.quantum_wide_max) { // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide). dist = Distribution(bin.size, last_size, stats.quantum_wide / 8); } else { // 16 buckets. dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16); } last_size = bin.size; } // 16 buckets. dists[num_dists] = Distribution(stats.page_size, last_size, (stats.page_size - last_size) / 16); num_dists++; // Buckets are 1/4 of the page size (12 buckets). dists[num_dists] = Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4); num_dists++; MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2); for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { MemSlot& slot = mSlots[slot_id]; if (slot.mPtr) { for (size_t i = 0; i < num_dists; i++) { if (slot.mRequest <= dists[i].maxSize()) { dists[i].addRequest(slot.mRequest); break; } } } } for (unsigned i = 0; i < num_dists; i++) { dists[i].printDist(mStdErr); } } #ifdef XP_LINUX size_t get_rss() { if (mGetRSSFailed) { return 0; } // On Linux we can determine the RSS of the heap area by examining the // smaps file. mozilla::Maybe reader = SMapsReader::open(); if (!reader) { mGetRSSFailed = true; return 0; } size_t rss = 0; while (Maybe map = reader->readMap(mStdErr)) { if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) && !InitialMapsContains(map->mStart)) { rss += map->mRSS; } } return rss; } bool InitialMapsContains(uintptr_t aRangeStart) { for (unsigned i = 0; i < mNumInitialMaps; i++) { MOZ_ASSERT(i < MAX_INITIAL_MAPS); if (mInitialMaps[i] == aRangeStart) { return true; } } return false; } public: void BuildInitialMapInfo() { if (mGetRSSFailed) { return; } Maybe reader = SMapsReader::open(); if (!reader) { mGetRSSFailed = true; return; } while (Maybe map = reader->readMap(mStdErr)) { if (map->IsCandidate()) { if (mNumInitialMaps >= MAX_INITIAL_MAPS) { FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n"); mGetRSSFailed = false; return; } mInitialMaps[mNumInitialMaps++] = map->mStart; } } } #endif private: MemSlot& SlotForResult(Buffer& aResult) { /* Parse result value and get the corresponding slot. */ Buffer dummy = aResult.SplitChar('='); Buffer dummy2 = aResult.SplitChar('#'); if (dummy || dummy2) { die("Malformed input"); } size_t slot_id = parseNumber(aResult); mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1); return mSlots[slot_id]; } void MaybeCommit(MemSlot& aSlot) { if (mDoMemset) { // Write any byte, 0x55 isn't significant. memset(aSlot.mPtr, 0x55, aSlot.mRequest); } } intptr_t mStdErr; size_t mOps = 0; // The number of slots that have been used. It is used to iterate over slots // without accessing those we haven't initialised. size_t mNumUsedSlots = 0; MemSlotList mSlots; size_t mTotalRequestedSize = 0; size_t mTotalAllocatedSize = 0; // Whether to calculate slop for all allocations over the runtime of a // process. bool mCalculateSlop = false; bool mDoMemset = false; #ifdef XP_LINUX // If we have a failure reading smaps info then this is used to disable that // feature. bool mGetRSSFailed = false; // The initial memory mappings are recorded here at start up. We exclude // memory in these mappings when computing RSS. We assume they do not grow // and that no regions are allocated near them, this is true because they'll // only record the .bss and .data segments from our binary and shared objects // or regions that logalloc-replay has created for MappedArrays. // // 64 should be enough for anybody. static constexpr unsigned MAX_INITIAL_MAPS = 64; uintptr_t mInitialMaps[MAX_INITIAL_MAPS]; unsigned mNumInitialMaps = 0; #endif // XP_LINUX }; static Replay replay; int main(int argc, const char* argv[]) { size_t first_pid = 0; FdReader reader(0); for (int i = 1; i < argc; i++) { const char* option = argv[i]; if (strcmp(option, "-s") == 0) { // Do accounting to calculate allocation slop. replay.enableSlopCalculation(); } else if (strcmp(option, "-c") == 0) { // Touch memory as we allocate it. replay.enableMemset(); } else { fprintf(stderr, "Unknown command line option: %s\n", option); return EXIT_FAILURE; } } /* Read log from stdin and dispatch function calls to the Replay instance. * The log format is essentially: * ([])[=] * is a comma separated list of arguments. * * The logs are expected to be preprocessed so that allocations are * attributed a tracking slot. The input is trusted not to have crazy * values for these slot numbers. * * , as well as some of the args to some of the function calls are * such slot numbers. */ while (true) { Buffer line = reader.ReadLine(); if (!line) { break; } size_t pid = parseNumber(line.SplitChar(' ')); if (!first_pid) { first_pid = pid; } /* The log may contain data for several processes, only entries for the * very first that appears are treated. */ if (first_pid != pid) { continue; } /* The log contains thread ids for manual analysis, but we just ignore them * for now. */ parseNumber(line.SplitChar(' ')); Buffer func = line.SplitChar('('); Buffer args = line.SplitChar(')'); if (func == Buffer("jemalloc_stats")) { replay.jemalloc_stats(args, line); } else if (func == Buffer("free")) { replay.free(args, line); } else if (func == Buffer("malloc")) { replay.malloc(args, line); } else if (func == Buffer("posix_memalign")) { replay.posix_memalign(args, line); } else if (func == Buffer("aligned_alloc")) { replay.aligned_alloc(args, line); } else if (func == Buffer("calloc")) { replay.calloc(args, line); } else if (func == Buffer("realloc")) { replay.realloc(args, line); } else if (func == Buffer("memalign")) { replay.memalign(args, line); } else if (func == Buffer("valloc")) { replay.valloc(args, line); } else { die("Malformed input"); } } return 0; }