diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:44:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:44:51 +0000 |
commit | 9e3c08db40b8916968b9f30096c7be3f00ce9647 (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /tools/jprof | |
parent | Initial commit. (diff) | |
download | thunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.tar.xz thunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | tools/jprof/README.html | 330 | ||||
-rw-r--r-- | tools/jprof/bfd.cpp | 221 | ||||
-rw-r--r-- | tools/jprof/coff.cpp | 93 | ||||
-rw-r--r-- | tools/jprof/elf.cpp | 128 | ||||
-rw-r--r-- | tools/jprof/intcnt.cpp | 69 | ||||
-rw-r--r-- | tools/jprof/intcnt.h | 39 | ||||
-rwxr-xr-x | tools/jprof/jprofsig | 46 | ||||
-rw-r--r-- | tools/jprof/leaky.cpp | 827 | ||||
-rw-r--r-- | tools/jprof/leaky.h | 124 | ||||
-rw-r--r-- | tools/jprof/moz.build | 28 | ||||
-rwxr-xr-x | tools/jprof/split-profile.py | 156 | ||||
-rw-r--r-- | tools/jprof/strset.cpp | 37 | ||||
-rw-r--r-- | tools/jprof/strset.h | 19 | ||||
-rw-r--r-- | tools/jprof/stub/Makefile.in | 8 | ||||
-rw-r--r-- | tools/jprof/stub/config.h | 18 | ||||
-rw-r--r-- | tools/jprof/stub/jprof.h | 17 | ||||
-rw-r--r-- | tools/jprof/stub/libmalloc.cpp | 740 | ||||
-rw-r--r-- | tools/jprof/stub/libmalloc.h | 45 | ||||
-rw-r--r-- | tools/jprof/stub/moz.build | 17 |
19 files changed, 2962 insertions, 0 deletions
diff --git a/tools/jprof/README.html b/tools/jprof/README.html new file mode 100644 index 0000000000..ac25acc5ef --- /dev/null +++ b/tools/jprof/README.html @@ -0,0 +1,330 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<html> +<head><title>The Jprof Profiler</title></head> + +<body bgcolor="#FFFFFF" text="#000000" + link="#0000EE" vlink="#551A8B" alink="#FF0000"> +<center> +<h1>The Jprof Profiler</h1> +<font size="-1"> +<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a><p> +Recent (4/2011) updates Randell Jesup (see bugzilla for contact info) +</font> +<hr> + +<a href="#introduction">Introduction</a> | <a href="#operation">Operation</a> | +<a href="#setup">Setup</a> | <a href="#usage">Usage</a> | +<a href="#interpretation">Interpretation</a> + +</center> +<hr> + +<h3><a name="introduction">Introduction</a></h3> + +Jprof is a profiling tool. I am writing it because I need to find out +where mozilla is spending its time, and there do not seem to be any +profilers for Linux that can handle threads and/or shared libraries. +This code is based heavily on Kipp Hickman's leaky. + +<h3><a name="operation">Operation</a></h3> + +Jprof operates by installing a timer which periodically interrupts mozilla. +When this timer goes off, the jprof code inside mozilla walks the function call +stack to determine which code was executing and saves the results into the +<code>jprof-log</code> and <code>jprof-map</code> files. By collecting a large +number of these call stacks, it is possible to deduce where mozilla is spending +its time. + +<h3><a name="setup">Setup</a></h3> + +<p>Configure your mozilla with jprof support by adding +<code>--enable-jprof</code> to your configure options (eg adding +<code>ac_add_options --enable-jprof</code> to your <code>.mozconfig</code>) and +making sure that you do <strong>not</strong> have the +<code>--enable-strip</code> configure option set -- jprof needs symbols to +operate. On many architectures with GCC, you'll need to add +<code>--enable-optimize="-O3 -fno-omit-frame-pointer"</code> or the +equivalent to ensure frame pointer generation in the compiler you're using.</p> + +<p>Finally, build mozilla with your new configuration. Now you can run jprof.</p> + +<h3><a name="usage">Usage</a></h3> +<pre> jprof [-v] [-t] [-e exclude] [-i include] [-s stackdepth] [--last] [--all] [--start n [--end m]] [--output-dir dir] prog log [log2 ...]</pre> +Options: +<ul> + <li><b>-s depth</b> : Limit depth looked at from captured stack + frames</li> + <li><b>-v</b> : Output some information about the symbols, memory map, etc.</li> + <li><b>-t or --threads</b> : Group output according to thread. May require external + LD_PRELOAD library to help force sampling of spawned threads; jprof + may capture the main thread only. See <a + href="http://sam.zoy.org/writings/programming/gprof.html">gprof-helper</a>; + it may need adaption for jprof.</li> + <li><b>--only-thread id</b> : Only output data for thread 'id'</li> + <li><b>-e exclusion</b> : Allows excluding specific stack frames</li> + <li><b>-i inclusion</b> : Allows including specific stack frames</li> + <li><b>--last</b> : Only process data from the last 'section' of sampling + (starting at the last PROF)</li> + <li><b>--start N</b> : Start processing data at 'section' N </li> + <li><b>--end N</b> : Stop processing data at 'section' N </li> + <li><b>--output-dir dir</b> : Store generated .html files in the given directory </li> +</ul> +The behavior of jprof is determined by the value of the JPROF_FLAGS environment +variable. This environment variable can be composed of several substrings +which have the following meanings: +<ul> + <li> <b>JP_START</b> : Install the signal handler, and start sending the + timer signals. + + <li> <b>JP_DEFER</b> : Install the signal handler, but don't start sending + the timer signals. The user must start the signals by sending the first + one (with <code>kill -PROF</code>, or with <code>kill -ALRM</code> if + JP_REALTIME is used, or with <code>kill -POLL</code> (also known as <code>kill -IO</code>) if JP_RTC_HZ is used). + + <li> <b>JP_FIRST=x</b> : Wait x seconds before starting the timer + + <li> <b>JP_PERIOD=y</b> : Set timer to interrupt every y seconds. Only + values of y greater than or equal to 0.001 are supported. Default is + 0.050 (50ms). + + <li> <b>JP_REALTIME</b> : Do the profiling in intervals of real time rather + than intervals of time used by the mozilla process (and the kernel + when doing work for mozilla). This could probably lead to weird + results (you'll see whatever runs when mozilla is waiting for events), + but is needed to see time spent in the X server. + + <li> <b>JP_RTC_HZ=freq</b> : This option, only available on Linux if the + kernel is built with RTC support, makes jprof use the RTC timer instead of + using its own timer. This option, like JP_REALTIME, uses intervals of real + time. This option overrides JP_PERIOD. <code>freq</code> is the frequency + at which the timer should fire, measured in Hz. It must be a power of 2. + The maximal frequency allowed by the kernel can be changed by writing to + <code>/proc/sys/dev/rtc/max-user-freq</code>; the maximum value it can be + set to is 8192. Note that <code>/dev/rtc</code> will need to be readable + by the Firefox process; making that file world-readable is a simple way to + accomplish that. + + <li> <b>JP_CIRCULAR=size</b> : This tells jprof to store samples in a + circular buffer of the given size, which then will be saved (appended) + to disk when SIGUSR1 is received or JProfStopProfiling is done. If the + buffer overflows, the oldest entries will be evicted until there's + space for the new entry.<p> + + SIGUSR2 will cause the circular buffer to be cleared. + + <li> <b>JP_FILENAME=basefilename</b> : This is the filename used for + saving the log files to; the default is "jprof-log". If Electrolysis + is used, each process after the first will have the process ID + added ("jprof-log-3212"); + +</ul> + +<h4>Starting and stopping jprof from JavaScript</h4> +<p> +A build with jprof enabled adds four functions to the Window object:<p> +<code>JProfStartProfiling()</code> and <code>JProfStopProfiling()</code>: When used with JP_DEFER, these +allow one to start and stop the timer just around whatever critical section is +being profiled.</p><p> +<code>JProfClearCircular()</code> and <code>JProfSaveCircular()</code>: +These clear the circular buffer and save the buffer (without stopping), respectively.</p> + +<h4>Examples of JPROF_FLAGS usage</h4> +<ul> + + <li>To make the timer start firing 3 seconds after the program is started and + fire every 25 milliseconds of program time use: + <pre> + setenv JPROF_FLAGS "JP_START JP_FIRST=3 JP_PERIOD=0.025" </pre> + + <li>To make the timer start on your signal and fire every 1 millisecond of + program time use: + <pre> + setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.001" </pre> + + <li>To make the timer start on your signal and fire every 10 milliseconds of + wall-clock time use: + <pre> + setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.010 JP_REALTIME" </pre> + + <li>To make the timer start on your signal and fire at 8192 Hz in wall-clock + time use: + <pre> + setenv JPROF_FLAGS "JP_DEFER JP_RTC_HZ=8192" </pre> + + <li>To make the timer start on JProfStartProfiling() and run continously + with a 1ms sample rate until told to stop, then save the last 1MB of + data: + <pre> + setenv JPROF_FLAGS "JP_DEFER JP_CIRCULAR=1048576 JP_PERIOD=0.001" </pre> + +</ul> + +<h4>Pausing profiles</h4> + +<P>jprof can be paused at any time by sending a SIGUSR1 to mozilla (<code>kill +-USR1</code>). This will cause the timer signals to stop and jprof-map to be +written, but it will not close jprof-log. Combining SIGUSR1 with the JP_DEFER +option allows profiling of one sequence of actions by starting the timer right +before starting the actions and stopping the timer right afterward. + +<P>After a SIGUSR1, sending another timer signal (SIGPROF, SIGALRM, or SIGPOLL (aka SIGIO), +depending on the mode) can be used to continue writing data to the same +output. + +<P>SIGUSR2 will cause the circular buffer to be cleared, if it's in use. +This is useful right before running a test when you're using a large, +continuous circular buffer, or programmatically at the start of an action +which might take too long (JProfClearCircular()). + +<h4>Looking at the results</h4> + +Now that we have <code>jprof-log</code> and <code>jprof-map</code> files, we +can use the jprof executable is used to turn them into readable output. To do +this jprof needs the name of the mozilla binary and the log file. It deduces +the name of the map file: + +<pre> + ./jprof /home/user/mozilla/objdir/dist/bin/firefox ./jprof-log > tmp.html +</pre> + +This will generate the file <code>tmp.html</code> which you should view in a +web browser. + +<pre> + ./jprof --output-dir=/tmp /home/user/mozilla/objdir/dist/bin/firefox ./jprof-log* +</pre> + +This will generate a set of files in /tmp for each process. + + +<h3><a name="interpretation">Interpretation</a></h3> + + +The Jprof output is split into a flat portion and a hierarchical portion. +There are links to each section at the top of the page. It is typically +easier to analyze the profile by starting with the flat output and following +the links contained in the flat output up to the hierarchical output. + +<h4><a name="flat">Flat output</a></h3> + +The flat portion of the profile indicates which functions were executing +when the timer was going off. It is displayed as a list of functions names +on the right and the number of times that function was interrupted on the +left. The list is sorted by decreasing interrupt count. For example: + +<blockquote> <pre> +Total hit count: 151603 +Count %Total Function Name + +<a href="#23081">8806 5.8 __libc_poll</a> +<a href="#40008">2254 1.5 __i686.get_pc_thunk.bx</a> +<a href="#21390">2053 1.4 _int_malloc</a> +<a href="#49013">1777 1.2 ComputedStyle::GetStyleData(nsStyleStructID)</a> +<a href="#21380">1600 1.1 __libc_malloc</a> +<a href="#603">1552 1.0 nsCOMPtr_base::~nsCOMPtr_base()</a> +</pre> </blockquote> + +This shows that of the 151603 times the timer fired, 1777 (1.2% of the total) were inside ComputedStyle::GetStyleData() and 1552 (1.0% of the total) were in the nsCOMPtr_base destructor. + +<p> +In general, the functions with the highest count are the functions which +are taking the most time. + +<P> +The function names are linked to the entry for that function in the +hierarchical profile, which is described in the next section. + +<h4><a name="hier">Hierarchical output</a></h4> + +The hierarchical output is divided up into sections, with each section +corresponding to one function. A typical section looks something like +this: + +<blockquote><pre> + index Count Hits Function Name + <A href="#72871"> 545 (46.4%) nsBlockFrame::ReflowInlineFrames(nsBlockReflowState&, nsLineList_iterator, int*)</A> + <A href="#72873"> 100 (8.5%) nsBlockFrame::ReflowDirtyLines(nsBlockReflowState&)</A> + 72870 4 (0.3%) <a name=72870> 645 (54.9%)</a> <b>nsBlockFrame::DoReflowInlineFrames(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFlowAreaRect&, int&, nsFloatManager::SavedState*, int*, LineReflowStatus*, int)</b> + <A href="#72821"> 545 (46.4%) nsBlockFrame::ReflowInlineFrame(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsIFrame*, LineReflowStatus*)</A> + <A href="#72853"> 83 (7.1%) nsBlockFrame::PlaceLine(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFloatManager::SavedState*, nsRect&, int&, int*)</A> + <A href="#74150"> 9 (0.8%) nsLineLayout::BeginLineReflow(int, int, int, int, int, int)</A> + <A href="#74897"> 1 (0.1%) nsTextFrame::GetType() const</A> + <A href="#74131"> 1 (0.1%) nsLineLayout::RelativePositionFrames(nsOverflowAreas&)</A> + <A href="#58320"> 1 (0.1%) __i686.get_pc_thunk.bx</A> + <A href="#53077"> 1 (0.1%) PL_ArenaAllocate</A> +</pre></blockquote> + +The information this block tells us is: + +<ul> +<li>There were 4 profiler hits <em>in</em> <code>nsBlockFrame::DoReflowInlineFrames</code> +<li>There were 645 profiler hits <em>in or under</em> <code>nsBlockFrame::DoReflowInlineFrames</code>. Of these: +<ul> + <li>545 were in or under <code>nsBlockFrame::ReflowInlineFrame</code> + <li>83 were in or under <code>nsBlockFrame::PlaceLine</code> + <li>9 were in or under <code>nsLineLayout::BeginLineReflow</code> + <li>1 was in or under <code>nsTextFrame::GetType</code> + <li>1 was in or under <code>nsLineLayout::RelativePositionFrames</code> + <li>1 was in or under <code>__i686.get_pc_thunk.bx</code> + <li>1 was in or under <code>PL_ArenaAllocate</code> +</ul> +<li>Of these 645 calls into <code>nsBlockFrame::DoReflowInlineFrames</code>: +<ul> + <li>545 came from <code>nsBlockFrame::ReflowInlineFrames</code> + <li>100 came from <code>nsBlockFrame::ReflowDirtyLines</code> +</ul> +</ul> + + +The rest of this section explains how to read this information off from the jprof output. + +<p>This block corresponds to the function <code>nsBlockFrame::DoReflowInlineFrames</code>, which is +therefore bolded and not a link. The name of this function is preceded by +five numbers which have the following meaning. The number on the left (72870) +is the index number, and is not important. The next number (4) and the +percentage following (0.3%) are the number +of times this function was interrupted by the timer and the percentage of +the total hits that is. The last number pair ("645 (54.9%)") +are the number of times this function was in the call stack when the timer went +off. That is, the timer went off while we were in code that was ultimately +called from <code>nsBlockFrame::DoReflowInlineFrames</code>. +<p>For our example we can see that our function was in the call stack for +645 interrupt ticks, but we were only the function that was running when +the interrupt arrived 4 times. +<P> +The functions listed above the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its +callers. The numbers to the left of these function names are the numbers of +times these functions were in the call stack as callers of +<code>nsBlockFrame::DoReflowInlineFrames</code>. In our example, we were called 545 times by +<code>nsBlockFrame::ReflowInlineFrames</code> and 100 times by +<code>nsBlockFrame::ReflowDirtyLines</code>. +<P> +The functions listed below the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its +callees. The numbers to the left of the function names are the numbers of +times these functions were in the callstack as callees of +<code>nsBlockFrame::DoReflowInlineFrames</code> and the corresponding percentages. In our example, of the 645 profiler hits under <code>nsBlockFrame::DoReflowInlineFrames</code> 545 were under <code>nsBlockFrame::ReflowInlineFrame</code>, 83 were under <code>nsBlockFrame::PlaceLine</code>, and so forth.<p> + +<b>NOTE:</b> If there are loops of execution or recursion, the numbers will +not add up and percentages can exceed 100%. If a function directly calls +itself "(self)" will be appended to the line, but indirect recursion will +not be marked. + +<h3>Bugs</h3> +The current build of Jprof has only been tested under Ubuntu 8.04 LTS, but +should work under any fairly modern linux distribution using GCC/GLIBC. +Please update this document with any known compatibilities/incompatibilities. +<p> +If you get an error:<p><code>Inconsistency detected by ld.so: dl-open.c: 260: dl_open_worker: Assertion `_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT' failed! +</code><p>that means you've hit a timing hole in the version of glibc you're +running. See <a +href="http://sources.redhat.com/bugzilla/show_bug.cgi?id=4578">Redhat bug 4578</a>. +<!-- <h3>Update</h3> +<ul> +</ul> +--> + +</body> +</html> diff --git a/tools/jprof/bfd.cpp b/tools/jprof/bfd.cpp new file mode 100644 index 0000000000..6be8fde760 --- /dev/null +++ b/tools/jprof/bfd.cpp @@ -0,0 +1,221 @@ +// vim:ts=8:sw=2:et: +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "leaky.h" + +#ifdef USE_BFD +# include <stdio.h> +# include <string.h> +# include <fcntl.h> +# include <unistd.h> +# include <libgen.h> +# include <bfd.h> +# include <cxxabi.h> + +static bfd* try_debug_file(const char* filename, unsigned long crc32) { + int fd = open(filename, O_RDONLY); + if (fd < 0) return nullptr; + + unsigned char buf[4 * 1024]; + unsigned long crc = 0; + + while (1) { + ssize_t count = read(fd, buf, sizeof(buf)); + if (count <= 0) break; + + crc = bfd_calc_gnu_debuglink_crc32(crc, buf, count); + } + + close(fd); + + if (crc != crc32) return nullptr; + + bfd* object = bfd_openr(filename, nullptr); + if (!bfd_check_format(object, bfd_object)) { + bfd_close(object); + return nullptr; + } + + return object; +} + +static bfd* find_debug_file(bfd* lib, const char* aFileName) { + // check for a separate debug file with symbols + asection* sect = bfd_get_section_by_name(lib, ".gnu_debuglink"); + + if (!sect) return nullptr; + + bfd_size_type debuglinkSize = bfd_section_size(objfile->obfd, sect); + + char* debuglink = new char[debuglinkSize]; + bfd_get_section_contents(lib, sect, debuglink, 0, debuglinkSize); + + // crc checksum is aligned to 4 bytes, and after the NUL. + int crc_offset = (int(strlen(debuglink)) & ~3) + 4; + unsigned long crc32 = bfd_get_32(lib, debuglink + crc_offset); + + // directory component + char* dirbuf = strdup(aFileName); + const char* dir = dirname(dirbuf); + + static const char debug_subdir[] = ".debug"; + // This is gdb's default global debugging info directory, but gdb can + // be instructed to use a different directory. + static const char global_debug_dir[] = "/usr/lib/debug"; + + char* filename = + new char[strlen(global_debug_dir) + strlen(dir) + crc_offset + 3]; + + // /path/debuglink + sprintf(filename, "%s/%s", dir, debuglink); + bfd* debugFile = try_debug_file(filename, crc32); + if (!debugFile) { + // /path/.debug/debuglink + sprintf(filename, "%s/%s/%s", dir, debug_subdir, debuglink); + debugFile = try_debug_file(filename, crc32); + if (!debugFile) { + // /usr/lib/debug/path/debuglink + sprintf(filename, "%s/%s/%s", global_debug_dir, dir, debuglink); + debugFile = try_debug_file(filename, crc32); + } + } + + delete[] filename; + free(dirbuf); + delete[] debuglink; + + return debugFile; +} + +// Use an indirect array to avoid copying tons of objects +Symbol** leaky::ExtendSymbols(int num) { + long n = numExternalSymbols + num; + + externalSymbols = (Symbol**)realloc(externalSymbols, + (size_t)(sizeof(externalSymbols[0]) * n)); + Symbol* new_array = new Symbol[n]; + for (int i = 0; i < num; i++) { + externalSymbols[i + numExternalSymbols] = &new_array[i]; + } + lastSymbol = externalSymbols + n; + Symbol** sp = externalSymbols + numExternalSymbols; + numExternalSymbols = n; + return sp; +} + +# define NEXT_SYMBOL \ + do { \ + sp++; \ + if (sp >= lastSymbol) { \ + sp = ExtendSymbols(16384); \ + } \ + } while (0) + +void leaky::ReadSymbols(const char* aFileName, u_long aBaseAddress) { + int initialSymbols = usefulSymbols; + if (nullptr == externalSymbols) { + externalSymbols = (Symbol**)calloc(sizeof(Symbol*), 10000); + Symbol* new_array = new Symbol[10000]; + for (int i = 0; i < 10000; i++) { + externalSymbols[i] = &new_array[i]; + } + numExternalSymbols = 10000; + } + Symbol** sp = externalSymbols + usefulSymbols; + lastSymbol = externalSymbols + numExternalSymbols; + + // Create a dummy symbol for the library so, if it doesn't have any + // symbols, we show it by library. + (*sp)->Init(aFileName, aBaseAddress); + NEXT_SYMBOL; + + bfd_boolean kDynamic = (bfd_boolean) false; + + static int firstTime = 1; + if (firstTime) { + firstTime = 0; + bfd_init(); + } + + bfd* lib = bfd_openr(aFileName, nullptr); + if (nullptr == lib) { + return; + } + if (!bfd_check_format(lib, bfd_object)) { + bfd_close(lib); + return; + } + + bfd* symbolFile = find_debug_file(lib, aFileName); + + // read mini symbols + PTR minisyms; + unsigned int size; + long symcount = 0; + + if (symbolFile) { + symcount = bfd_read_minisymbols(symbolFile, kDynamic, &minisyms, &size); + if (symcount == 0) { + bfd_close(symbolFile); + } else { + bfd_close(lib); + } + } + if (symcount == 0) { + symcount = bfd_read_minisymbols(lib, kDynamic, &minisyms, &size); + if (symcount == 0) { + // symtab is empty; try dynamic symbols + kDynamic = (bfd_boolean) true; + symcount = bfd_read_minisymbols(lib, kDynamic, &minisyms, &size); + } + symbolFile = lib; + } + + asymbol* store; + store = bfd_make_empty_symbol(symbolFile); + + // Scan symbols + size_t demangle_buffer_size = 128; + char* demangle_buffer = (char*)malloc(demangle_buffer_size); + bfd_byte* from = (bfd_byte*)minisyms; + bfd_byte* fromend = from + symcount * size; + for (; from < fromend; from += size) { + asymbol* sym; + sym = + bfd_minisymbol_to_symbol(symbolFile, kDynamic, (const PTR)from, store); + + symbol_info syminfo; + bfd_get_symbol_info(symbolFile, sym, &syminfo); + + // if ((syminfo.type == 'T') || (syminfo.type == 't')) { + const char* nm = bfd_asymbol_name(sym); + if (nm && nm[0]) { + char* dnm = nullptr; + if (strncmp("__thunk", nm, 7)) { + dnm = + abi::__cxa_demangle(nm, demangle_buffer, &demangle_buffer_size, 0); + if (dnm) { + demangle_buffer = dnm; + } + } + (*sp)->Init(dnm ? dnm : nm, syminfo.value + aBaseAddress); + NEXT_SYMBOL; + } + // } + } + + free(demangle_buffer); + demangle_buffer = nullptr; + + bfd_close(symbolFile); + + int interesting = sp - externalSymbols; + if (!quiet) { + printf("%s provided %d symbols\n", aFileName, interesting - initialSymbols); + } + usefulSymbols = interesting; +} + +#endif /* USE_BFD */ diff --git a/tools/jprof/coff.cpp b/tools/jprof/coff.cpp new file mode 100644 index 0000000000..0efa83960c --- /dev/null +++ b/tools/jprof/coff.cpp @@ -0,0 +1,93 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "leaky.h" + +#ifdef USE_COFF + +# define LANGUAGE_C +# include <sym.h> +# include <cmplrs/stsupport.h> +# include <symconst.h> +# include <filehdr.h> +# include <ldfcn.h> +# include <string.h> +# include <stdlib.h> + +# ifdef IRIX4 +extern "C" { +extern char* demangle(char const* in); +}; +# else +# include <dem.h> +# endif + +static char* Demangle(char* rawName) { +# ifdef IRIX4 + return strdup(demangle(rawName)); +# else + char namebuf[4000]; + demangle(rawName, namebuf); + return strdup(namebuf); +# endif +} + +void leaky::readSymbols(const char* fileName) { + LDFILE* ldptr; + + ldptr = ldopen(fileName, nullptr); + if (!ldptr) { + fprintf(stderr, "%s: unable to open \"%s\"\n", applicationName, fileName); + exit(-1); + } + if (PSYMTAB(ldptr) == 0) { + fprintf(stderr, "%s: \"%s\": has no symbol table\n", applicationName, + fileName); + exit(-1); + } + + long isymMax = SYMHEADER(ldptr).isymMax; + long iextMax = SYMHEADER(ldptr).iextMax; + long iMax = isymMax + iextMax; + + long alloced = 10000; + Symbol* syms = (Symbol*)malloc(sizeof(Symbol) * 10000); + Symbol* sp = syms; + Symbol* last = syms + alloced; + SYMR symr; + + for (long isym = 0; isym < iMax; isym++) { + if (ldtbread(ldptr, isym, &symr) != SUCCESS) { + fprintf(stderr, "%s: can't read symbol #%d\n", applicationName, isym); + exit(-1); + } + if (isym < isymMax) { + if ((symr.st == stStaticProc) || + ((symr.st == stProc) && + ((symr.sc == scText) || (symr.sc == scAbs))) || + ((symr.st == stBlock) && (symr.sc == scText))) { + // Text symbol. Set name field to point to the symbol name + sp->name = Demangle(ldgetname(ldptr, &symr)); + sp->address = symr.value; + sp++; + if (sp >= last) { + long n = alloced + 10000; + syms = (Symbol*)realloc(syms, (size_t)(sizeof(Symbol) * n)); + last = syms + n; + sp = syms + alloced; + alloced = n; + } + } + } + } + + int interesting = sp - syms; + if (!quiet) { + printf("Total of %d symbols\n", interesting); + } + usefulSymbols = interesting; + externalSymbols = syms; +} + +#endif /* USE_COFF */ diff --git a/tools/jprof/elf.cpp b/tools/jprof/elf.cpp new file mode 100644 index 0000000000..c2e00f60da --- /dev/null +++ b/tools/jprof/elf.cpp @@ -0,0 +1,128 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "leaky.h" + +#ifdef USE_ELF + +# include "leaky.h" +# include <stdio.h> +# include <malloc.h> +# include <libelf/libelf.h> +# include <unistd.h> +# include <fcntl.h> +# include <string.h> + +void leaky::readSymbols(const char* fileName) { + int fd = ::open(fileName, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s: unable to open \"%s\"\n", applicationName, fileName); + exit(-1); + } + + elf_version(EV_CURRENT); + Elf* elf = elf_begin(fd, ELF_C_READ, 0); + if (!elf) { + fprintf(stderr, "%s: \"%s\": has no symbol table\n", applicationName, + fileName); + exit(-1); + } + + long alloced = 10000; + Symbol* syms = (Symbol*)malloc(sizeof(Symbol) * 10000); + Symbol* sp = syms; + Symbol* last = syms + alloced; + + // Get each of the relevant sections and add them to the list of + // symbols. + Elf32_Ehdr* ehdr = elf32_getehdr(elf); + if (!ehdr) { + fprintf(stderr, "%s: elf library lossage\n", applicationName); + exit(-1); + } +# if 0 + Elf32_Half ndx = ehdr->e_shstrndx; +# endif + + Elf_Scn* scn = 0; + int strtabndx = -1; + for (int i = 1; (scn = elf_nextscn(elf, scn)) != 0; i++) { + Elf32_Shdr* shdr = elf32_getshdr(scn); +# if 0 + char *name = elf_strptr(elf, ndx, (size_t) shdr->sh_name); + printf("Section %s (%d 0x%x)\n", name ? name : "(null)", + shdr->sh_type, shdr->sh_type); +# endif + if (shdr->sh_type == SHT_STRTAB) { + /* We assume here that string tables preceed symbol tables... */ + strtabndx = i; + continue; + } +# if 0 + if (shdr->sh_type == SHT_DYNAMIC) { + /* Dynamic */ + Elf_Data *data = elf_getdata(scn, 0); + if (!data || !data->d_size) { + printf("No data..."); + continue; + } + + Elf32_Dyn *dyn = (Elf32_Dyn*) data->d_buf; + Elf32_Dyn *lastdyn = + (Elf32_Dyn*) ((char*) data->d_buf + data->d_size); + for (; dyn < lastdyn; dyn++) { + printf("tag=%d value=0x%x\n", dyn->d_tag, dyn->d_un.d_val); + } + } else +# endif + if ((shdr->sh_type == SHT_SYMTAB) || (shdr->sh_type == SHT_DYNSYM)) { + /* Symbol table */ + Elf_Data* data = elf_getdata(scn, 0); + if (!data || !data->d_size) { + printf("No data..."); + continue; + } + + /* In theory we now have the symbols... */ + Elf32_Sym* esym = (Elf32_Sym*)data->d_buf; + Elf32_Sym* lastsym = (Elf32_Sym*)((char*)data->d_buf + data->d_size); + for (; esym < lastsym; esym++) { +# if 0 + char *nm = elf_strptr(elf, strtabndx, (size_t)esym->st_name); + printf("%20s 0x%08x %02x %02x\n", + nm, esym->st_value, ELF32_ST_BIND(esym->st_info), + ELF32_ST_TYPE(esym->st_info)); +# endif + if ((esym->st_value == 0) || + (ELF32_ST_BIND(esym->st_info) == STB_WEAK) || + (ELF32_ST_BIND(esym->st_info) == STB_NUM) || + (ELF32_ST_TYPE(esym->st_info) != STT_FUNC)) { + continue; + } +# if 1 + char* nm = elf_strptr(elf, strtabndx, (size_t)esym->st_name); +# endif + sp->name = nm ? strdup(nm) : "(no name)"; + sp->address = esym->st_value; + sp++; + if (sp >= last) { + long n = alloced + 10000; + syms = (Symbol*)realloc(syms, (size_t)(sizeof(Symbol) * n)); + last = syms + n; + sp = syms + alloced; + alloced = n; + } + } + } + } + + int interesting = sp - syms; + if (!quiet) { + printf("Total of %d symbols\n", interesting); + } + usefulSymbols = interesting; + externalSymbols = syms; +} + +#endif /* USE_ELF */ diff --git a/tools/jprof/intcnt.cpp b/tools/jprof/intcnt.cpp new file mode 100644 index 0000000000..a87b6ccf74 --- /dev/null +++ b/tools/jprof/intcnt.cpp @@ -0,0 +1,69 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "intcnt.h" + +IntCount::IntCount() : numInts(0), iPair(nullptr) {} +IntCount::~IntCount() { delete[] iPair; } +int IntCount::getSize() { return numInts; } +int IntCount::getCount(int pos) { return iPair[pos].cnt; } +int IntCount::getIndex(int pos) { return iPair[pos].idx; } + +void IntCount::clear() { + delete[] iPair; + iPair = new IntPair[0]; + numInts = 0; +} + +int IntCount::countAdd(int index, int increment) { + if (numInts) { + // Do a binary search to find the element + int divPoint = 0; + + if (index > iPair[numInts - 1].idx) { + divPoint = numInts; + } else if (index < iPair[0].idx) { + divPoint = 0; + } else { + int low = 0, high = numInts - 1; + int mid = (low + high) / 2; + while (1) { + mid = (low + high) / 2; + + if (index < iPair[mid].idx) { + high = mid; + } else if (index > iPair[mid].idx) { + if (mid < numInts - 1 && index < iPair[mid + 1].idx) { + divPoint = mid + 1; + break; + } else { + low = mid + 1; + } + } else if (index == iPair[mid].idx) { + return iPair[mid].cnt += increment; + } + } + } + + int i; + IntPair* tpair = new IntPair[numInts + 1]; + for (i = 0; i < divPoint; i++) { + tpair[i] = iPair[i]; + } + for (i = divPoint; i < numInts; i++) { + tpair[i + 1] = iPair[i]; + } + ++numInts; + delete[] iPair; + iPair = tpair; + iPair[divPoint].idx = index; + iPair[divPoint].cnt = increment; + return increment; + } else { + iPair = new IntPair[1]; + numInts = 1; + iPair[0].idx = index; + return iPair[0].cnt = increment; + } +} diff --git a/tools/jprof/intcnt.h b/tools/jprof/intcnt.h new file mode 100644 index 0000000000..2cf9ec1ff1 --- /dev/null +++ b/tools/jprof/intcnt.h @@ -0,0 +1,39 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef INTCNT_H +#define INTCNT_H + +class IntCount { + public: + IntCount(); + ~IntCount(); + void clear(); + int countAdd(int index, int increment = 1); + int countGet(int index); + int getSize(); + int getCount(int pos); + int getIndex(int pos); + + IntCount(const IntCount& old) { + numInts = old.numInts; + if (numInts > 0) { + iPair = new IntPair[numInts]; + for (int i = 0; i < numInts; i++) { + iPair[i] = old.iPair[i]; + } + } else { + iPair = nullptr; + } + } + + private: + int numInts; + struct IntPair { + int idx; + int cnt; + }* iPair; +}; + +#endif diff --git a/tools/jprof/jprofsig b/tools/jprof/jprofsig new file mode 100755 index 0000000000..02226fc4b6 --- /dev/null +++ b/tools/jprof/jprofsig @@ -0,0 +1,46 @@ +#!/bin/sh +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# +# Find Mozilla PID and send it a signal, to be used +# with the jprof tool. +# + +jpsignal_usage() { + echo "Usage: jprofsig [start|stop]" + exit 1 +} + +if [ $# != 1 ]; then + echo "Wrong number of arguments." + jpsignal_usage +fi + +jpsignal_arg="$1" + +# Find & print mozilla PID +tmpmoz=`ps aux | grep mozilla-bin | head -1 | awk '{ print $2 }'` +echo "Mozilla PID = $tmpmoz" + +# See how we were called. +case "$jpsignal_arg" in + start) + if [ "$JP_REALTIME" = 1 ]; then + kill -ALRM $tmpmoz + else + # Normal, non-realtime mode. + kill -PROF $tmpmoz + fi + ;; + stop) + kill -USR1 $tmpmoz + ;; + *) + jpsignal_usage + exit 1 +esac + +exit 0 diff --git a/tools/jprof/leaky.cpp b/tools/jprof/leaky.cpp new file mode 100644 index 0000000000..91e9d8aa82 --- /dev/null +++ b/tools/jprof/leaky.cpp @@ -0,0 +1,827 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "leaky.h" +#include "intcnt.h" + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#ifndef NTO +# include <getopt.h> +#endif +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef NTO +# include <mem.h> +#endif + +#ifndef FALSE +# define FALSE 0 +#endif +#ifndef TRUE +# define TRUE 1 +#endif + +static const u_int DefaultBuckets = 10007; // arbitrary, but prime +static const u_int MaxBuckets = 1000003; // arbitrary, but prime + +//---------------------------------------------------------------------- + +int main(int argc, char** argv) { + leaky* l = new leaky; + + l->initialize(argc, argv); + l->outputfd = stdout; + + for (int i = 0; i < l->numLogFiles; i++) { + if (l->output_dir || l->numLogFiles > 1) { + char name[2048]; // XXX fix + if (l->output_dir) + snprintf(name, sizeof(name), "%s/%s.html", l->output_dir, + argv[l->logFileIndex + i]); + else + snprintf(name, sizeof(name), "%s.html", argv[l->logFileIndex + i]); + + fprintf(stderr, "opening %s\n", name); + l->outputfd = fopen(name, "w"); + // if an error we won't process the file + } + if (l->outputfd) { // paranoia + l->open(argv[l->logFileIndex + i]); + + if (l->outputfd != stderr) { + fclose(l->outputfd); + l->outputfd = nullptr; + } + } + } + + return 0; +} + +char* htmlify(const char* in) { + const char* p = in; + char *out, *q; + int n = 0; + size_t newlen; + + // Count the number of '<' and '>' in the input. + while ((p = strpbrk(p, "<>"))) { + ++n; + ++p; + } + + // Knowing the number of '<' and '>', we can calculate the space + // needed for the output string. + newlen = strlen(in) + n * 3 + 1; + out = new char[newlen]; + + // Copy the input to the output, with substitutions. + p = in; + q = out; + do { + if (*p == '<') { + strcpy(q, "<"); + q += 4; + } else if (*p == '>') { + strcpy(q, ">"); + q += 4; + } else { + *q++ = *p; + } + p++; + } while (*p); + *q = '\0'; + + return out; +} + +leaky::leaky() { + applicationName = nullptr; + progFile = nullptr; + + quiet = true; + showAddress = false; + showThreads = false; + stackDepth = 100000; + onlyThread = 0; + cleo = false; + + mappedLogFile = -1; + firstLogEntry = lastLogEntry = 0; + + sfd = -1; + externalSymbols = 0; + usefulSymbols = 0; + numExternalSymbols = 0; + lowestSymbolAddr = 0; + highestSymbolAddr = 0; + + loadMap = nullptr; + + collect_last = false; + collect_start = -1; + collect_end = -1; +} + +leaky::~leaky() {} + +void leaky::usageError() { + fprintf(stderr, + "Usage: %s [-v] [-t] [-e exclude] [-i include] [-s stackdepth] " + "[--last] [--all] [--start n [--end m]] [--cleo] [--output-dir dir] " + "prog log [log2 ...]\n", + (char*)applicationName); + fprintf( + stderr, + "\t-v: verbose\n" + "\t-t | --threads: split threads\n" + "\t--only-thread n: only profile thread N\n" + "\t-i include-id: stack must include specified id\n" + "\t-e exclude-id: stack must NOT include specified id\n" + "\t-s stackdepth: Limit depth looked at from captured stack frames\n" + "\t--last: only profile the last capture section\n" + "\t--start n [--end m]: profile n to m (or end) capture sections\n" + "\t--cleo: format output for 'cleopatra' display\n" + "\t--output-dir dir: write output files to dir\n" + "\tIf there's one log, output goes to stdout unless --output-dir is set\n" + "\tIf there are more than one log, output files will be named with .html " + "added\n"); + exit(-1); +} + +static struct option longopts[] = { + {"threads", 0, nullptr, 't'}, {"only-thread", 1, nullptr, 'T'}, + {"last", 0, nullptr, 'l'}, {"start", 1, nullptr, 'x'}, + {"end", 1, nullptr, 'n'}, {"cleo", 0, nullptr, 'c'}, + {"output-dir", 1, nullptr, 'd'}, {nullptr, 0, nullptr, 0}, +}; + +void leaky::initialize(int argc, char** argv) { + applicationName = argv[0]; + applicationName = strrchr(applicationName, '/'); + if (!applicationName) { + applicationName = argv[0]; + } else { + applicationName++; + } + + int arg; + int errflg = 0; + int longindex = 0; + + onlyThread = 0; + output_dir = nullptr; + cleo = false; + + // XXX tons of cruft here left over from tracemalloc + // XXX The -- options shouldn't need short versions, or they should be + // documented + while (((arg = getopt_long(argc, argv, "adEe:gh:i:r:Rs:tT:qvx:ln:", longopts, + &longindex)) != -1)) { + switch (arg) { + case '?': + default: + fprintf(stderr, "error: unknown option %c\n", optopt); + errflg++; + break; + case 'a': + break; + case 'A': // not implemented + showAddress = true; + break; + case 'c': + cleo = true; + break; + case 'd': + output_dir = optarg; // reference to an argv pointer + break; + case 'R': + break; + case 'e': + exclusions.add(optarg); + break; + case 'g': + break; + case 'r': // not implemented + roots.add(optarg); + if (!includes.IsEmpty()) { + errflg++; + } + break; + case 'i': + includes.add(optarg); + if (!roots.IsEmpty()) { + errflg++; + } + break; + case 'h': + break; + case 's': + stackDepth = atoi(optarg); + if (stackDepth < 2) { + stackDepth = 2; + } + break; + case 'x': + // --start + collect_start = atoi(optarg); + break; + case 'n': + // --end + collect_end = atoi(optarg); + break; + case 'l': + // --last + collect_last = true; + break; + case 'q': + break; + case 'v': + quiet = !quiet; + break; + case 't': + showThreads = true; + break; + case 'T': + showThreads = true; + onlyThread = atoi(optarg); + break; + } + } + if (errflg || ((argc - optind) < 2)) { + usageError(); + } + progFile = argv[optind++]; + logFileIndex = optind; + numLogFiles = argc - optind; + if (!quiet) fprintf(stderr, "numlogfiles = %d\n", numLogFiles); +} + +static void* mapFile(int fd, u_int flags, off_t* sz) { + struct stat sb; + if (fstat(fd, &sb) < 0) { + perror("fstat"); + exit(-1); + } + void* base = mmap(0, (int)sb.st_size, flags, MAP_PRIVATE, fd, 0); + if (!base) { + perror("mmap"); + exit(-1); + } + *sz = sb.st_size; + return base; +} + +void leaky::LoadMap() { + malloc_map_entry mme; + char name[1000]; + + if (!loadMap) { + // all files use the same map + int fd = ::open(M_MAPFILE, O_RDONLY); + if (fd < 0) { + perror("open: " M_MAPFILE); + exit(-1); + } + for (;;) { + int nb = read(fd, &mme, sizeof(mme)); + if (nb != sizeof(mme)) break; + nb = read(fd, name, mme.nameLen); + if (nb != (int)mme.nameLen) break; + name[mme.nameLen] = 0; + if (!quiet) { + fprintf(stderr, "%s @ %lx\n", name, mme.address); + } + + LoadMapEntry* lme = new LoadMapEntry; + lme->address = mme.address; + lme->name = strdup(name); + lme->next = loadMap; + loadMap = lme; + } + close(fd); + } +} + +void leaky::open(char* logFile) { + int threadArray[100]; // should auto-expand + int last_thread = -1; + int numThreads = 0; + int section = -1; + bool collecting = false; + + LoadMap(); + + setupSymbols(progFile); + + // open up the log file + if (mappedLogFile) ::close(mappedLogFile); + + mappedLogFile = ::open(logFile, O_RDONLY); + if (mappedLogFile < 0) { + perror("open"); + exit(-1); + } + off_t size; + firstLogEntry = (malloc_log_entry*)mapFile(mappedLogFile, PROT_READ, &size); + lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size); + + if (!collect_last || collect_start < 0) { + collecting = true; + } + + // First, restrict it to the capture sections specified (all, last, start/end) + // This loop walks through all the call stacks we recorded + for (malloc_log_entry* lep = firstLogEntry; lep < lastLogEntry; + lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) { + if (lep->flags & JP_FIRST_AFTER_PAUSE) { + section++; + if (collect_last) { + firstLogEntry = lep; + numThreads = 0; + collecting = true; + } + if (collect_start == section) { + collecting = true; + firstLogEntry = lep; + } + if (collect_end == section) { + collecting = false; + lastLogEntry = lep; + } + if (!quiet) + fprintf(stderr, "New section %d: first=%p, last=%p, collecting=%d\n", + section, (void*)firstLogEntry, (void*)lastLogEntry, collecting); + } + + // Capture thread info at the same time + + // Find all the threads captured + + // pthread/linux docs say the signal can be delivered to any thread in + // the process. In practice, it appears in Linux that it's always + // delivered to the thread that called setitimer(), and each thread can + // have a separate itimer. There's a support library for gprof that + // overlays pthread_create() to set timers in any threads you spawn. + if (showThreads && collecting) { + if (lep->thread != last_thread) { + int i; + for (i = 0; i < numThreads; i++) { + if (lep->thread == threadArray[i]) break; + } + if (i == numThreads && + i < (int)(sizeof(threadArray) / sizeof(threadArray[0]))) { + threadArray[i] = lep->thread; + numThreads++; + if (!quiet) fprintf(stderr, "new thread %d\n", lep->thread); + } + } + } + } + if (!quiet) + fprintf(stderr, + "Done collecting: sections %d: first=%p, last=%p, numThreads=%d\n", + section, (void*)firstLogEntry, (void*)lastLogEntry, numThreads); + + if (!cleo) { + fprintf(outputfd, + "<html><head><title>Jprof Profile Report</title></head><body>\n"); + fprintf(outputfd, "<h1><center>Jprof Profile Report</center></h1>\n"); + } + + if (showThreads) { + fprintf(stderr, "Num threads %d\n", numThreads); + + if (!cleo) { + fprintf(outputfd, "<hr>Threads:<p><pre>\n"); + for (int i = 0; i < numThreads; i++) { + fprintf(outputfd, " <a href=\"#thread_%d\">%d</a> ", threadArray[i], + threadArray[i]); + if ((i + 1) % 10 == 0) fprintf(outputfd, "<br>\n"); + } + fprintf(outputfd, "</pre>"); + } + + for (int i = 0; i < numThreads; i++) { + if (!onlyThread || onlyThread == threadArray[i]) analyze(threadArray[i]); + } + } else { + analyze(0); + } + + if (!cleo) fprintf(outputfd, "</pre></body></html>\n"); +} + +//---------------------------------------------------------------------- + +static int symbolOrder(void const* a, void const* b) { + Symbol const** ap = (Symbol const**)a; + Symbol const** bp = (Symbol const**)b; + return (*ap)->address == (*bp)->address + ? 0 + : ((*ap)->address > (*bp)->address ? 1 : -1); +} + +void leaky::ReadSharedLibrarySymbols() { + LoadMapEntry* lme = loadMap; + while (nullptr != lme) { + ReadSymbols(lme->name, lme->address); + lme = lme->next; + } +} + +void leaky::setupSymbols(const char* fileName) { + if (usefulSymbols == 0) { + // only read once! + + // Read in symbols from the program + ReadSymbols(fileName, 0); + + // Read in symbols from the .so's + ReadSharedLibrarySymbols(); + + if (!quiet) { + fprintf(stderr, "A total of %d symbols were loaded\n", usefulSymbols); + } + + // Now sort them + qsort(externalSymbols, usefulSymbols, sizeof(Symbol*), symbolOrder); + lowestSymbolAddr = externalSymbols[0]->address; + highestSymbolAddr = externalSymbols[usefulSymbols - 1]->address; + } +} + +// Binary search the table, looking for a symbol that covers this +// address. +int leaky::findSymbolIndex(u_long addr) { + u_int base = 0; + u_int limit = usefulSymbols - 1; + Symbol** end = &externalSymbols[limit]; + while (base <= limit) { + u_int midPoint = (base + limit) >> 1; + Symbol** sp = &externalSymbols[midPoint]; + if (addr < (*sp)->address) { + if (midPoint == 0) { + return -1; + } + limit = midPoint - 1; + } else { + if (sp + 1 < end) { + if (addr < (*(sp + 1))->address) { + return midPoint; + } + } else { + return midPoint; + } + base = midPoint + 1; + } + } + return -1; +} + +Symbol* leaky::findSymbol(u_long addr) { + int idx = findSymbolIndex(addr); + + if (idx < 0) { + return nullptr; + } else { + return externalSymbols[idx]; + } +} + +//---------------------------------------------------------------------- + +bool leaky::excluded(malloc_log_entry* lep) { + if (exclusions.IsEmpty()) { + return false; + } + + char** pcp = &lep->pcs[0]; + u_int n = lep->numpcs; + for (u_int i = 0; i < n; i++, pcp++) { + Symbol* sp = findSymbol((u_long)*pcp); + if (sp && exclusions.contains(sp->name)) { + return true; + } + } + return false; +} + +bool leaky::included(malloc_log_entry* lep) { + if (includes.IsEmpty()) { + return true; + } + + char** pcp = &lep->pcs[0]; + u_int n = lep->numpcs; + for (u_int i = 0; i < n; i++, pcp++) { + Symbol* sp = findSymbol((u_long)*pcp); + if (sp && includes.contains(sp->name)) { + return true; + } + } + return false; +} + +//---------------------------------------------------------------------- + +void leaky::displayStackTrace(FILE* out, malloc_log_entry* lep) { + char** pcp = &lep->pcs[0]; + u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth; + for (u_int i = 0; i < n; i++, pcp++) { + u_long addr = (u_long)*pcp; + Symbol* sp = findSymbol(addr); + if (sp) { + fputs(sp->name, out); + if (showAddress) { + fprintf(out, "[%p]", (char*)addr); + } + } else { + fprintf(out, "<%p>", (char*)addr); + } + fputc(' ', out); + } + fputc('\n', out); +} + +void leaky::dumpEntryToLog(malloc_log_entry* lep) { + printf("%ld\t", lep->delTime); + printf(" --> "); + displayStackTrace(outputfd, lep); +} + +void leaky::generateReportHTML(FILE* fp, int* countArray, int count, + int thread) { + fprintf(fp, "<center>"); + if (showThreads) { + fprintf(fp, "<hr><A NAME=thread_%d><b>Thread: %d</b></A><p>", thread, + thread); + } + fprintf( + fp, + "<A href=#flat_%d>flat</A><b> | </b><A href=#hier_%d>hierarchical</A>", + thread, thread); + fprintf(fp, "</center><P><P><P>\n"); + + int totalTimerHits = count; + int* rankingTable = new int[usefulSymbols]; + + for (int cnt = usefulSymbols; --cnt >= 0; rankingTable[cnt] = cnt) + ; + + // Drat. I would use ::qsort() but I would need a global variable and my + // intro-pascal professor threatened to flunk anyone who used globals. + // She damaged me for life :-) (That was 1986. See how much influence + // she had. I don't remember her name but I always feel guilty about globals) + + // Shell Sort. 581130733 is the max 31 bit value of h = 3h+1 + int mx, i, h; + for (mx = usefulSymbols / 9, h = 581130733; h > 0; h /= 3) { + if (h < mx) { + for (i = h - 1; i < usefulSymbols; i++) { + int j, tmp = rankingTable[i], val = countArray[tmp]; + for (j = i; (j >= h) && (countArray[rankingTable[j - h]] < val); + j -= h) { + rankingTable[j] = rankingTable[j - h]; + } + rankingTable[j] = tmp; + } + } + } + + // Ok, We are sorted now. Let's go through the table until we get to + // functions that were never called. Right now we don't do much inside + // this loop. Later we can get callers and callees into it like gprof + // does + fprintf(fp, + "<h2><A NAME=hier_%d></A><center><a " + "href=\"http://searchfox.org/mozilla-central/source/tools/jprof/" + "README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n", + thread); + fprintf(fp, "<pre>\n"); + fprintf(fp, "%6s %6s %4s %s\n", "index", "Count", "Hits", + "Function Name"); + + for (i = 0; i < usefulSymbols && countArray[rankingTable[i]] > 0; i++) { + Symbol** sp = &externalSymbols[rankingTable[i]]; + + (*sp)->cntP.printReport(fp, this, rankingTable[i], totalTimerHits); + + char* symname = htmlify((*sp)->name); + fprintf(fp, + "%6d %6d (%3.1f%%)%s <a name=%d>%8d (%3.1f%%)</a>%s <b>%s</b>\n", + rankingTable[i], (*sp)->timerHit, + ((*sp)->timerHit * 1000 / totalTimerHits) / 10.0, + ((*sp)->timerHit * 1000 / totalTimerHits) / 10.0 >= 10.0 ? "" : " ", + rankingTable[i], countArray[rankingTable[i]], + (countArray[rankingTable[i]] * 1000 / totalTimerHits) / 10.0, + (countArray[rankingTable[i]] * 1000 / totalTimerHits) / 10.0 >= 10.0 + ? "" + : " ", + symname); + delete[] symname; + + (*sp)->cntC.printReport(fp, this, rankingTable[i], totalTimerHits); + + fprintf(fp, "<hr>\n"); + } + fprintf(fp, "</pre>\n"); + + // OK, Now we want to print the flat profile. To do this we resort on + // the hit count. + + // Cut-N-Paste Shell sort from above. The Ranking Table has already been + // populated, so we do not have to reinitialize it. + for (mx = usefulSymbols / 9, h = 581130733; h > 0; h /= 3) { + if (h < mx) { + for (i = h - 1; i < usefulSymbols; i++) { + int j, tmp = rankingTable[i], val = externalSymbols[tmp]->timerHit; + for (j = i; + (j >= h) && (externalSymbols[rankingTable[j - h]]->timerHit < val); + j -= h) { + rankingTable[j] = rankingTable[j - h]; + } + rankingTable[j] = tmp; + } + } + } + + // Pre-count up total counter hits, to get a percentage. + // I wanted the total before walking the list, if this + // double-pass over externalSymbols gets slow we can + // do single-pass and print this out after the loop finishes. + totalTimerHits = 0; + for (i = 0; + i < usefulSymbols && externalSymbols[rankingTable[i]]->timerHit > 0; + i++) { + Symbol** sp = &externalSymbols[rankingTable[i]]; + totalTimerHits += (*sp)->timerHit; + } + if (totalTimerHits == 0) totalTimerHits = 1; + + if (totalTimerHits != count) + fprintf(stderr, "Hit count mismatch: count=%d; totalTimerHits=%d", count, + totalTimerHits); + + fprintf(fp, + "<h2><A NAME=flat_%d></A><center><a " + "href=\"http://searchfox.org/mozilla-central/source/tools/jprof/" + "README.html#flat\">Flat Profile</a></center></h2><br>\n", + thread); + fprintf(fp, "<pre>\n"); + + fprintf(fp, "Total hit count: %d\n", totalTimerHits); + fprintf(fp, "Count %%Total Function Name\n"); + // Now loop for as long as we have timer hits + for (i = 0; + i < usefulSymbols && externalSymbols[rankingTable[i]]->timerHit > 0; + i++) { + Symbol** sp = &externalSymbols[rankingTable[i]]; + + char* symname = htmlify((*sp)->name); + fprintf(fp, "<a href=\"#%d\">%3d %-2.1f %s</a>\n", rankingTable[i], + (*sp)->timerHit, + ((float)(*sp)->timerHit / (float)totalTimerHits) * 100.0, symname); + delete[] symname; + } +} + +void leaky::analyze(int thread) { + int* countArray = new int[usefulSymbols]; + int* flagArray = new int[usefulSymbols]; + + // Zero our function call counter + memset(countArray, 0, sizeof(countArray[0]) * usefulSymbols); + + // reset hit counts + for (int i = 0; i < usefulSymbols; i++) { + externalSymbols[i]->timerHit = 0; + externalSymbols[i]->regClear(); + } + + // The flag array is used to prevent counting symbols multiple times + // if functions are called recursively. In order to keep from having + // to zero it on each pass through the loop, we mark it with the value + // of stacks on each trip through the loop. This means we can determine + // if we have seen this symbol for this stack trace w/o having to reset + // from the prior stacktrace. + memset(flagArray, -1, sizeof(flagArray[0]) * usefulSymbols); + + if (cleo) fprintf(outputfd, "m-Start\n"); + + // This loop walks through all the call stacks we recorded + // --last, --start and --end can restrict it, as can excludes/includes + stacks = 0; + for (malloc_log_entry* lep = firstLogEntry; lep < lastLogEntry; + lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) { + if ((thread != 0 && lep->thread != thread) || excluded(lep) || + !included(lep)) { + continue; + } + + ++stacks; // How many stack frames did we collect + + u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth; + char** pcp = &lep->pcs[n - 1]; + int idx = -1, parrentIdx = -1; // Init idx incase n==0 + if (cleo) { + // This loop walks through every symbol in the call stack. By walking it + // backwards we know who called the function when we get there. + char type = 's'; + for (int i = n - 1; i >= 0; --i, --pcp) { + idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp)); + + if (idx >= 0) { + // Skip over bogus __restore_rt frames that realtime profiling + // can introduce. + if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) { + --pcp; + --i; + idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp)); + if (idx < 0) { + continue; + } + } + Symbol** sp = &externalSymbols[idx]; + char* symname = htmlify((*sp)->name); + fprintf(outputfd, "%c-%s\n", type, symname); + delete[] symname; + } + // else can't find symbol - ignore + type = 'c'; + } + } else { + // This loop walks through every symbol in the call stack. By walking it + // backwards we know who called the function when we get there. + for (int i = n - 1; i >= 0; --i, --pcp) { + idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp)); + + if (idx >= 0) { + // Skip over bogus __restore_rt frames that realtime profiling + // can introduce. + if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) { + --pcp; + --i; + idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp)); + if (idx < 0) { + continue; + } + } + + // If we have not seen this symbol before count it and mark it as seen + if (flagArray[idx] != stacks && ((flagArray[idx] = stacks) || true)) { + ++countArray[idx]; + } + + // We know who we are and we know who our parrent is. Count this + if (parrentIdx >= 0) { + externalSymbols[parrentIdx]->regChild(idx); + externalSymbols[idx]->regParrent(parrentIdx); + } + // inside if() so an unknown in the middle of a stack won't break + // the link! + parrentIdx = idx; + } + } + + // idx should be the function that we were in when we received the signal. + if (idx >= 0) { + ++externalSymbols[idx]->timerHit; + } + } + } + if (!cleo) generateReportHTML(outputfd, countArray, stacks, thread); +} + +void FunctionCount::printReport(FILE* fp, leaky* lk, int parent, int total) { + const char* fmt = + " <A href=\"#%d\">%8d (%3.1f%%)%s %s</A>%s\n"; + + int nmax, tmax = ((~0U) >> 1); + + do { + nmax = 0; + for (int j = getSize(); --j >= 0;) { + int cnt = getCount(j); + if (cnt == tmax) { + int idx = getIndex(j); + char* symname = htmlify(lk->indexToName(idx)); + fprintf(fp, fmt, idx, getCount(j), getCount(j) * 100.0 / total, + getCount(j) * 100.0 / total >= 10.0 ? "" : " ", symname, + parent == idx ? " (self)" : ""); + delete[] symname; + } else if (cnt < tmax && cnt > nmax) { + nmax = cnt; + } + } + } while ((tmax = nmax) > 0); +} diff --git a/tools/jprof/leaky.h b/tools/jprof/leaky.h new file mode 100644 index 0000000000..6c9beb7b42 --- /dev/null +++ b/tools/jprof/leaky.h @@ -0,0 +1,124 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __leaky_h_ +#define __leaky_h_ + +#include "config.h" +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include "libmalloc.h" +#include "strset.h" +#include "intcnt.h" + +typedef unsigned int u_int; + +struct Symbol; +struct leaky; + +class FunctionCount : public IntCount { + public: + void printReport(FILE* fp, leaky* lk, int parent, int total); +}; + +struct Symbol { + char* name; + u_long address; + int timerHit; + FunctionCount cntP, cntC; + + int regChild(int id) { return cntC.countAdd(id, 1); } + int regParrent(int id) { return cntP.countAdd(id, 1); } + void regClear() { + cntC.clear(); + cntP.clear(); + } + + Symbol() : timerHit(0) {} + void Init(const char* aName, u_long aAddress) { + name = aName ? strdup(aName) : (char*)""; + address = aAddress; + } +}; + +struct LoadMapEntry { + char* name; // name of .so + u_long address; // base address where it was mapped in + LoadMapEntry* next; +}; + +struct leaky { + leaky(); + ~leaky(); + + void initialize(int argc, char** argv); + void open(char* arg); + + char* applicationName; + int logFileIndex; + int numLogFiles; + char* progFile; + FILE* outputfd; + + bool quiet; + bool showAddress; + bool showThreads; + bool cleo; + u_int stackDepth; + int onlyThread; + char* output_dir; + + int mappedLogFile; + malloc_log_entry* firstLogEntry; + malloc_log_entry* lastLogEntry; + + int stacks; + + int sfd; + Symbol** externalSymbols; + Symbol** lastSymbol; + int usefulSymbols; + int numExternalSymbols; + StrSet exclusions; + u_long lowestSymbolAddr; + u_long highestSymbolAddr; + + LoadMapEntry* loadMap; + + bool collect_last; + int collect_start; + int collect_end; + + StrSet roots; + StrSet includes; + + void usageError(); + + void LoadMap(); + + void analyze(int thread); + + void dumpEntryToLog(malloc_log_entry* lep); + + void insertAddress(u_long address, malloc_log_entry* lep); + void removeAddress(u_long address, malloc_log_entry* lep); + + void displayStackTrace(FILE* out, malloc_log_entry* lep); + + Symbol** ExtendSymbols(int num); + void ReadSymbols(const char* fileName, u_long aBaseAddress); + void ReadSharedLibrarySymbols(); + void setupSymbols(const char* fileName); + Symbol* findSymbol(u_long address); + bool excluded(malloc_log_entry* lep); + bool included(malloc_log_entry* lep); + const char* indexToName(int idx) { return externalSymbols[idx]->name; } + + private: + void generateReportHTML(FILE* fp, int* countArray, int count, int thread); + int findSymbolIndex(u_long address); +}; + +#endif /* __leaky_h_ */ diff --git a/tools/jprof/moz.build b/tools/jprof/moz.build new file mode 100644 index 0000000000..2aa4a8c294 --- /dev/null +++ b/tools/jprof/moz.build @@ -0,0 +1,28 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +DIRS += ["stub"] + +Program("jprof") + +SOURCES += [ + "bfd.cpp", + "coff.cpp", + "elf.cpp", + "intcnt.cpp", + "leaky.cpp", + "strset.cpp", +] + +LOCAL_INCLUDES += [ + "stub", +] + +OS_LIBS += [ + "dl", + "bfd", + "iberty", +] diff --git a/tools/jprof/split-profile.py b/tools/jprof/split-profile.py new file mode 100755 index 0000000000..c280c130c2 --- /dev/null +++ b/tools/jprof/split-profile.py @@ -0,0 +1,156 @@ +#!/usr/bin/python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This program splits up a jprof profile into multiple files based on a +# list of functions in a text file. First, a complete profile is +# generated. Then, for each line in the text file, a profile is +# generated containing only stacks that go through that line, and also +# excluding all stacks in earlier lines in the text file. This means +# that the text file, from start to end, is splitting out pieces of the +# profile in their own file. Finally, a final profile containing the +# remainder is produced. + +# The program takes four arguments: +# (1) The path to jprof. +# (2) The path to the text file describing the splits. The output +# will be placed in the same directory as this file. +# (3) The program that was profiled. +# (4) The jprof-log file generated by the profile, to be split up. +# (Really, all arguments from (3) and later are passed through to +# jprof, so additional arguments could be provided if you want to pass +# additional arguments to jprof.) + +# In slightly more detail: +# +# This script uses jprof's includes (-i) and excludes (-e) options to +# split profiles into segments. It takes as input a single text file, +# and from that text file creates a series of jprof profiles in the +# directory the text file is in. +# +# The input file format looks like the following: +# +# poll g_main_poll +# GetRuleCascade CSSRuleProcessor::GetRuleCascade(nsPresContext *, nsAtom *) +# RuleProcessorData RuleProcessorData::RuleProcessorData +# (nsPresContext *, nsIContent *, nsRuleWalker *, nsCompatibility *) +# +# +# From this input file, the script will construct a profile called +# jprof-0.html that contains the whole profile, a profile called +# jprof-1-poll.html that includes only stacks with g_main_poll, a +# profile called jprof-2-GetRuleCascade.html that includes only stacks +# that have GetRuleCascade and do not have g_main_poll, a profile called +# jprof-3-RuleProcessorData.html that includes only stacks that have the +# RuleProcessorData constructor and do not have GetRuleCascade or +# g_main_poll, and a profile called jprof-4.html that includes only +# stacks that do not have any of the three functions in them. +# +# This means that all of the segments of the profile, except +# jprof-0.html, are mutually exclusive. Thus clever ordering of the +# functions in the input file can lead to a logical splitting of the +# profile into segments. + +import os.path +import subprocess +import sys + +if len(sys.argv) < 5: + sys.stderr.write("Expected arguments: <jprof> <split-file> <program> <jprof-log>\n") + sys.exit(1) + +jprof = sys.argv[1] +splitfile = sys.argv[2] +passthrough = sys.argv[3:] + +for f in [jprof, splitfile]: + if not os.path.isfile(f): + sys.stderr.write("could not find file: {0}\n".format(f)) + sys.exit(1) + + +def read_splits(splitfile): + """ + Read splitfile (each line of which contains a name, a space, and + then a function name to split on), and return a list of pairs + representing exactly that. (Note that the name cannot contain + spaces, but the function name can, and often does.) + """ + + def line_to_split(line): + line = line.strip("\r\n") + idx = line.index(" ") + return (line[0:idx], line[idx + 1 :]) + + io = open(splitfile, "r") + result = [line_to_split(line) for line in io] + io.close() + return result + + +splits = read_splits(splitfile) + + +def generate_profile(options, destfile): + """ + Run jprof to generate one split of the profile. + """ + args = [jprof] + options + passthrough + print("Generating {}".format(destfile)) + destio = open(destfile, "w") + # jprof expects the "jprof-map" file to be in its current working directory + cwd = None + for option in passthrough: + if option.find("jprof-log"): + cwd = os.path.dirname(option) + if cwd is None: + raise Exception("no jprof-log option given") + process = subprocess.Popen(args, stdout=destio, cwd=cwd) + process.wait() + destio.close() + if process.returncode != 0: + os.remove(destfile) + sys.stderr.write( + "Error {0} from command:\n {1}\n".format( + process.returncode, " ".join(args) + ) + ) + sys.exit(process.returncode) + + +def output_filename(number, splitname): + """ + Return the filename (absolute path) we should use to output the + profile segment with the given number and splitname. Splitname + should be None for the complete profile and the remainder. + """ + + def pad_count(i): + result = str(i) + # 0-pad to the same length + result = "0" * (len(str(len(splits) + 1)) - len(result)) + result + return result + + name = pad_count(number) + if splitname is not None: + name += "-" + splitname + + return os.path.join(os.path.dirname(splitfile), "jprof-{0}.html".format(name)) + + +# generate the complete profile +generate_profile([], output_filename(0, None)) + +# generate the listed splits +count = 1 +excludes = [] +for (splitname, splitfunction) in splits: + generate_profile( + excludes + ["-i" + splitfunction], output_filename(count, splitname) + ) + excludes += ["-e" + splitfunction] + count = count + 1 + +# generate the remainder after the splits +generate_profile(excludes, output_filename(count, None)) diff --git a/tools/jprof/strset.cpp b/tools/jprof/strset.cpp new file mode 100644 index 0000000000..514b8c03e0 --- /dev/null +++ b/tools/jprof/strset.cpp @@ -0,0 +1,37 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "strset.h" +#include <malloc.h> +#include <string.h> + +StrSet::StrSet() { + strings = 0; + numstrings = 0; +} + +void StrSet::add(const char* s) { + if (strings) { + strings = (char**)realloc(strings, (numstrings + 1) * sizeof(char*)); + } else { + strings = (char**)malloc(sizeof(char*)); + } + strings[numstrings] = strdup(s); + numstrings++; +} + +int StrSet::contains(const char* s) { + char** sp = strings; + int i = numstrings; + + while (--i >= 0) { + char* ss = *sp++; + if (ss[0] == s[0]) { + if (strcmp(ss, s) == 0) { + return 1; + } + } + } + return 0; +} diff --git a/tools/jprof/strset.h b/tools/jprof/strset.h new file mode 100644 index 0000000000..681ed22a25 --- /dev/null +++ b/tools/jprof/strset.h @@ -0,0 +1,19 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __strset_h_ +#define __strset_h_ + +struct StrSet { + StrSet(); + + void add(const char* string); + int contains(const char* string); + bool IsEmpty() const { return 0 == numstrings; } + + char** strings; + int numstrings; +}; + +#endif /* __strset_h_ */ diff --git a/tools/jprof/stub/Makefile.in b/tools/jprof/stub/Makefile.in new file mode 100644 index 0000000000..8e6b6b8f8d --- /dev/null +++ b/tools/jprof/stub/Makefile.in @@ -0,0 +1,8 @@ +#! gmake +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# override optimization +MOZ_OPTIMIZE_FLAGS = -fno-omit-frame-pointer diff --git a/tools/jprof/stub/config.h b/tools/jprof/stub/config.h new file mode 100644 index 0000000000..6e5789452a --- /dev/null +++ b/tools/jprof/stub/config.h @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef config_h___ +#define config_h___ + +#define MAX_STACK_CRAWL 500 +#define M_LOGFILE "jprof-log" +#define M_MAPFILE "jprof-map" + +#if defined(linux) || defined(NTO) +# define USE_BFD +# undef NEED_WRAPPERS + +#endif /* linux */ + +#endif /* config_h___ */ diff --git a/tools/jprof/stub/jprof.h b/tools/jprof/stub/jprof.h new file mode 100644 index 0000000000..c118760750 --- /dev/null +++ b/tools/jprof/stub/jprof.h @@ -0,0 +1,17 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef jprof_h___ +#define jprof_h___ +#include "nscore.h" + +#ifdef _IMPL_JPPROF_API +# define JPROF_API(type) NS_EXPORT_(type) +#else +# define JPROF_API(type) NS_IMPORT_(type) +#endif + +JPROF_API(void) setupProfilingStuff(void); + +#endif /* jprof_h___ */ diff --git a/tools/jprof/stub/libmalloc.cpp b/tools/jprof/stub/libmalloc.cpp new file mode 100644 index 0000000000..3003543a93 --- /dev/null +++ b/tools/jprof/stub/libmalloc.cpp @@ -0,0 +1,740 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +// vim:cindent:sw=4:et:ts=8: +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// The linux glibc hides part of sigaction if _POSIX_SOURCE is defined +#if defined(linux) +# undef _POSIX_SOURCE +# undef _SVID_SOURCE +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +#endif + +#include <errno.h> +#if defined(linux) +# include <linux/rtc.h> +# include <pthread.h> +#endif +#include <unistd.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <ucontext.h> +#include <execinfo.h> + +#include "libmalloc.h" +#include "jprof.h" +#include <string.h> +#include <errno.h> +#include <dlfcn.h> + +#ifdef NTO +# include <sys/link.h> +extern r_debug _r_debug; +#else +# include <link.h> +#endif + +#define USE_GLIBC_BACKTRACE 1 +// To debug, use #define JPROF_STATIC +#define JPROF_STATIC static + +static int gLogFD = -1; +static pthread_t main_thread; + +static bool gIsChild = false; +static int gFilenamePID; + +static void startSignalCounter(unsigned long millisec); +static int enableRTCSignals(bool enable); + +//---------------------------------------------------------------------- +// replace use of atexit() + +static void DumpAddressMap(); + +struct JprofShutdown { + JprofShutdown() {} + ~JprofShutdown() { DumpAddressMap(); } +}; + +static void RegisterJprofShutdown() { + // This instanciates the dummy class above, and will trigger the class + // destructor when libxul is unloaded. This is equivalent to atexit(), + // but gracefully handles dlclose(). + static JprofShutdown t; +} + +#if defined(i386) || defined(_i386) || defined(__x86_64__) +JPROF_STATIC void CrawlStack(malloc_log_entry* me, void* stack_top, + void* top_instr_ptr) { +# if USE_GLIBC_BACKTRACE + // This probably works on more than x86! But we need a way to get the + // top instruction pointer, which is kindof arch-specific + void* array[500]; + int cnt, i; + u_long numpcs = 0; + + // This is from glibc. A more generic version might use + // libunwind and/or CaptureStackBackTrace() on Windows + cnt = backtrace(&array[0], sizeof(array) / sizeof(array[0])); + + // StackHook->JprofLog->CrawlStack + // Then we have sigaction, which replaced top_instr_ptr + array[3] = top_instr_ptr; + for (i = 3; i < cnt; i++) { + me->pcs[numpcs++] = (char*)array[i]; + } + me->numpcs = numpcs; + +# else + // original code - this breaks on many platforms + void** bp; +# if defined(__i386) + __asm__("movl %%ebp, %0" : "=g"(bp)); +# elif defined(__x86_64__) + __asm__("movq %%rbp, %0" : "=g"(bp)); +# else + // It would be nice if this worked uniformly, but at least on i386 and + // x86_64, it stopped working with gcc 4.1, because it points to the + // end of the saved registers instead of the start. + bp = __builtin_frame_address(0); +# endif + u_long numpcs = 0; + bool tracing = false; + + me->pcs[numpcs++] = (char*)top_instr_ptr; + + while (numpcs < MAX_STACK_CRAWL) { + void** nextbp = (void**)*bp++; + void* pc = *bp; + if (nextbp < bp) { + break; + } + if (tracing) { + // Skip the signal handling. + me->pcs[numpcs++] = (char*)pc; + } else if (pc == top_instr_ptr) { + tracing = true; + } + bp = nextbp; + } + me->numpcs = numpcs; +# endif +} +#endif + +//---------------------------------------------------------------------- + +static int rtcHz; +static int rtcFD = -1; +static bool circular = false; + +#if defined(linux) || defined(NTO) +static void DumpAddressMap() { + // Turn off the timer so we don't get interrupts during shutdown +# if defined(linux) + if (rtcHz) { + enableRTCSignals(false); + } else +# endif + { + startSignalCounter(0); + } + + char filename[2048]; + if (gIsChild) + snprintf(filename, sizeof(filename), "%s-%d", M_MAPFILE, gFilenamePID); + else + snprintf(filename, sizeof(filename), "%s", M_MAPFILE); + + int mfd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (mfd >= 0) { + malloc_map_entry mme; + link_map* map = _r_debug.r_map; + while (nullptr != map) { + if (map->l_name && *map->l_name) { + mme.nameLen = strlen(map->l_name); + mme.address = map->l_addr; + write(mfd, &mme, sizeof(mme)); + write(mfd, map->l_name, mme.nameLen); +# if 0 + write(1, map->l_name, mme.nameLen); + write(1, "\n", 1); +# endif + } + map = map->l_next; + } + close(mfd); + } +} +#endif + +static bool was_paused = true; + +JPROF_STATIC void JprofBufferDump(); +JPROF_STATIC void JprofBufferClear(); + +static void ClearProfilingHook(int signum) { + if (circular) { + JprofBufferClear(); + puts("Jprof: cleared circular buffer."); + } +} + +static void EndProfilingHook(int signum) { + if (circular) JprofBufferDump(); + + DumpAddressMap(); + was_paused = true; + puts("Jprof: profiling paused."); +} + +//---------------------------------------------------------------------- +// proper usage would be a template, including the function to find the +// size of an entry, or include a size header explicitly to each entry. +#if defined(linux) +# define DUMB_LOCK() pthread_mutex_lock(&mutex); +# define DUMB_UNLOCK() pthread_mutex_unlock(&mutex); +#else +# define DUMB_LOCK() FIXME() +# define DUMB_UNLOCK() FIXME() +#endif + +class DumbCircularBuffer { + public: + DumbCircularBuffer(size_t init_buffer_size) { + used = 0; + buffer_size = init_buffer_size; + buffer = (unsigned char*)malloc(buffer_size); + head = tail = buffer; + +#if defined(linux) + pthread_mutexattr_t mAttr; + pthread_mutexattr_settype(&mAttr, PTHREAD_MUTEX_RECURSIVE_NP); + pthread_mutex_init(&mutex, &mAttr); + pthread_mutexattr_destroy(&mAttr); +#endif + } + ~DumbCircularBuffer() { + free(buffer); +#if defined(linux) + pthread_mutex_destroy(&mutex); +#endif + } + + void clear() { + DUMB_LOCK(); + head = tail; + used = 0; + DUMB_UNLOCK(); + } + + bool empty() { return head == tail; } + + size_t space_available() { + size_t result; + DUMB_LOCK(); + if (tail > head) + result = buffer_size - (tail - head) - 1; + else + result = head - tail - 1; + DUMB_UNLOCK(); + return result; + } + + void drop(size_t size) { + // assumes correctness! + DUMB_LOCK(); + head += size; + if (head >= &buffer[buffer_size]) head -= buffer_size; + used--; + DUMB_UNLOCK(); + } + + bool insert(void* data, size_t size) { + // can fail if not enough space in the entire buffer + DUMB_LOCK(); + if (space_available() < size) return false; + + size_t max_without_wrap = &buffer[buffer_size] - tail; + size_t initial = size > max_without_wrap ? max_without_wrap : size; +#if DEBUG_CIRCULAR + fprintf(stderr, "insert(%d): max_without_wrap %d, size %d, initial %d\n", + used, max_without_wrap, size, initial); +#endif + memcpy(tail, data, initial); + tail += initial; + data = ((char*)data) + initial; + size -= initial; + if (size != 0) { +#if DEBUG_CIRCULAR + fprintf(stderr, "wrapping by %d bytes\n", size); +#endif + memcpy(buffer, data, size); + tail = &(((unsigned char*)buffer)[size]); + } + + used++; + DUMB_UNLOCK(); + + return true; + } + + // for external access to the buffer (saving) + void lock() { DUMB_LOCK(); } + + void unlock() { DUMB_UNLOCK(); } + + // XXX These really shouldn't be public... + unsigned char* head; + unsigned char* tail; + unsigned int used; + unsigned char* buffer; + size_t buffer_size; + + private: + pthread_mutex_t mutex; +}; + +class DumbCircularBuffer* JprofBuffer; + +JPROF_STATIC void JprofBufferInit(size_t size) { + JprofBuffer = new DumbCircularBuffer(size); +} + +JPROF_STATIC void JprofBufferClear() { + fprintf(stderr, "Told to clear JPROF circular buffer\n"); + JprofBuffer->clear(); +} + +JPROF_STATIC size_t JprofEntrySizeof(malloc_log_entry* me) { + return offsetof(malloc_log_entry, pcs) + me->numpcs * sizeof(char*); +} + +JPROF_STATIC void JprofBufferAppend(malloc_log_entry* me) { + size_t size = JprofEntrySizeof(me); + + do { + while (JprofBuffer->space_available() < size && JprofBuffer->used > 0) { +#if DEBUG_CIRCULAR + fprintf( + stderr, + "dropping entry: %d in use, %d free, need %d, size_to_free = %d\n", + JprofBuffer->used, JprofBuffer->space_available(), size, + JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head)); +#endif + JprofBuffer->drop(JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head)); + } + if (JprofBuffer->space_available() < size) return; + + } while (!JprofBuffer->insert(me, size)); +} + +JPROF_STATIC void JprofBufferDump() { + JprofBuffer->lock(); +#if DEBUG_CIRCULAR + fprintf( + stderr, "dumping JP_CIRCULAR buffer, %d of %d bytes\n", + JprofBuffer->tail > JprofBuffer->head + ? JprofBuffer->tail - JprofBuffer->head + : JprofBuffer->buffer_size + JprofBuffer->tail - JprofBuffer->head, + JprofBuffer->buffer_size); +#endif + if (JprofBuffer->tail >= JprofBuffer->head) { + write(gLogFD, JprofBuffer->head, JprofBuffer->tail - JprofBuffer->head); + } else { + write(gLogFD, JprofBuffer->head, + &(JprofBuffer->buffer[JprofBuffer->buffer_size]) - JprofBuffer->head); + write(gLogFD, JprofBuffer->buffer, JprofBuffer->tail - JprofBuffer->buffer); + } + JprofBuffer->clear(); + JprofBuffer->unlock(); +} + +//---------------------------------------------------------------------- + +JPROF_STATIC void JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr) { + // Static is simply to make debugging tolerable + static malloc_log_entry me; + + me.delTime = aTime; + me.thread = syscall(SYS_gettid); // gettid(); + if (was_paused) { + me.flags = JP_FIRST_AFTER_PAUSE; + was_paused = 0; + } else { + me.flags = 0; + } + + CrawlStack(&me, stack_top, top_instr_ptr); + +#ifndef NTO + if (circular) { + JprofBufferAppend(&me); + } else { + write(gLogFD, &me, JprofEntrySizeof(&me)); + } +#else + printf("Neutrino is missing the pcs member of malloc_log_entry!! \n"); +#endif +} + +static int realTime; + +/* Lets interrupt at 10 Hz. This is so my log files don't get too large. + * This can be changed to a faster value latter. This timer is not + * programmed to reset, even though it is capable of doing so. This is + * to keep from getting interrupts from inside of the handler. + */ +static void startSignalCounter(unsigned long millisec) { + struct itimerval tvalue; + + tvalue.it_interval.tv_sec = 0; + tvalue.it_interval.tv_usec = 0; + tvalue.it_value.tv_sec = millisec / 1000; + tvalue.it_value.tv_usec = (millisec % 1000) * 1000; + + if (realTime) { + setitimer(ITIMER_REAL, &tvalue, nullptr); + } else { + setitimer(ITIMER_PROF, &tvalue, nullptr); + } +} + +static long timerMilliSec = 50; + +#if defined(linux) +static int setupRTCSignals(int hz, struct sigaction* sap) { + /* global */ rtcFD = open("/dev/rtc", O_RDONLY); + if (rtcFD < 0) { + perror("JPROF_RTC setup: open(\"/dev/rtc\", O_RDONLY)"); + return 0; + } + + if (sigaction(SIGIO, sap, nullptr) == -1) { + perror("JPROF_RTC setup: sigaction(SIGIO)"); + return 0; + } + + if (ioctl(rtcFD, RTC_IRQP_SET, hz) == -1) { + perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_IRQP_SET, $JPROF_RTC_HZ)"); + return 0; + } + + if (ioctl(rtcFD, RTC_PIE_ON, 0) == -1) { + perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_PIE_ON)"); + return 0; + } + + if (fcntl(rtcFD, F_SETSIG, 0) == -1) { + perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETSIG, 0)"); + return 0; + } + + if (fcntl(rtcFD, F_SETOWN, getpid()) == -1) { + perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETOWN, getpid())"); + return 0; + } + + return 1; +} + +static int enableRTCSignals(bool enable) { + static bool enabled = false; + if (enabled == enable) { + return 0; + } + enabled = enable; + + int flags = fcntl(rtcFD, F_GETFL); + if (flags < 0) { + perror("JPROF_RTC setup: fcntl(/dev/rtc, F_GETFL)"); + return 0; + } + + if (enable) { + flags |= FASYNC; + } else { + flags &= ~FASYNC; + } + + if (fcntl(rtcFD, F_SETFL, flags) == -1) { + if (enable) { + perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags | FASYNC)"); + } else { + perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags & ~FASYNC)"); + } + return 0; + } + + return 1; +} +#endif + +JPROF_STATIC void StackHook(int signum, siginfo_t* info, void* ucontext) { + static struct timeval tFirst; + static int first = 1; + size_t millisec = 0; + +#if defined(linux) + if (rtcHz && pthread_self() != main_thread) { + // Only collect stack data on the main thread, for now. + return; + } +#endif + + if (first && !(first = 0)) { + puts("Jprof: received first signal"); +#if defined(linux) + if (rtcHz) { + enableRTCSignals(true); + } else +#endif + { + gettimeofday(&tFirst, 0); + millisec = 0; + } + } else { +#if defined(linux) + if (rtcHz) { + enableRTCSignals(true); + } else +#endif + { + struct timeval tNow; + gettimeofday(&tNow, 0); + double usec = 1e6 * (tNow.tv_sec - tFirst.tv_sec); + usec += (tNow.tv_usec - tFirst.tv_usec); + millisec = static_cast<size_t>(usec * 1e-3); + } + } + + gregset_t& gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs; +#ifdef __x86_64__ + JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]); +#else + JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]); +#endif + + if (!rtcHz) startSignalCounter(timerMilliSec); +} + +NS_EXPORT_(void) setupProfilingStuff(void) { + static int gFirstTime = 1; + char filename[2048]; // XXX fix + + if (gFirstTime && !(gFirstTime = 0)) { + int startTimer = 1; + int doNotStart = 1; + int firstDelay = 0; + int append = O_TRUNC; + char* tst = getenv("JPROF_FLAGS"); + + /* Options from JPROF_FLAGS environment variable: + * JP_DEFER -> Wait for a SIGPROF (or SIGALRM, if JP_REALTIME + * is set) from userland before starting + * to generate them internally + * JP_START -> Install the signal handler + * JP_PERIOD -> Time between profiler ticks + * JP_FIRST -> Extra delay before starting + * JP_REALTIME -> Take stack traces in intervals of real time + * rather than time used by the process (and the + * system for the process). This is useful for + * finding time spent by the X server. + * JP_APPEND -> Append to jprof-log rather than overwriting it. + * This is somewhat risky since it depends on the + * address map staying constant across multiple runs. + * JP_FILENAME -> base filename to use when saving logs. Note that + * this does not affect the mapfile. + * JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1 + * + * JPROF_ISCHILD is set if this is not the first process. + */ + + circular = false; + + if (tst) { + if (strstr(tst, "JP_DEFER")) { + doNotStart = 0; + startTimer = 0; + } + if (strstr(tst, "JP_START")) doNotStart = 0; + if (strstr(tst, "JP_REALTIME")) realTime = 1; + if (strstr(tst, "JP_APPEND")) append = O_APPEND; + + char* delay = strstr(tst, "JP_PERIOD="); + if (delay) { + double tmp = strtod(delay + strlen("JP_PERIOD="), nullptr); + if (tmp >= 1e-3) { + timerMilliSec = static_cast<unsigned long>(1000 * tmp); + } else { + fprintf(stderr, "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n", + tmp); + timerMilliSec = 1; + } + } + + char* circular_op = strstr(tst, "JP_CIRCULAR="); + if (circular_op) { + size_t size = atol(circular_op + strlen("JP_CIRCULAR=")); + if (size < 1000) { + fprintf(stderr, "JP_CIRCULAR of %lu less than 1000, using 10000\n", + (unsigned long)size); + size = 10000; + } + JprofBufferInit(size); + fprintf(stderr, "JP_CIRCULAR buffer of %lu bytes\n", + (unsigned long)size); + circular = true; + } + + char* first = strstr(tst, "JP_FIRST="); + if (first) { + firstDelay = atol(first + strlen("JP_FIRST=")); + } + + char* rtc = strstr(tst, "JP_RTC_HZ="); + if (rtc) { +#if defined(linux) + rtcHz = atol(rtc + strlen("JP_RTC_HZ=")); + timerMilliSec = 0; /* This makes JP_FIRST work right. */ + realTime = 1; /* It's the _R_TC and all. ;) */ + +# define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0) + + if (!IS_POWER_OF_TWO(rtcHz) || rtcHz < 2) { + fprintf(stderr, + "JP_RTC_HZ must be power of two and >= 2, " + "but %d was provided; using default of 2048\n", + rtcHz); + rtcHz = 2048; + } +#else + fputs( + "JP_RTC_HZ found, but RTC profiling only supported on " + "Linux!\n", + stderr); + +#endif + } + const char* f = strstr(tst, "JP_FILENAME="); + if (f) + f = f + strlen("JP_FILENAME="); + else + f = M_LOGFILE; + + char* is_child = getenv("JPROF_ISCHILD"); + if (!is_child) setenv("JPROF_ISCHILD", "", 0); + gIsChild = !!is_child; + + gFilenamePID = syscall(SYS_gettid); // gettid(); + if (is_child) + snprintf(filename, sizeof(filename), "%s-%d", f, gFilenamePID); + else + snprintf(filename, sizeof(filename), "%s", f); + + // XXX FIX! inherit current capture state! + } + + if (!doNotStart) { + if (gLogFD < 0) { + gLogFD = open(filename, O_CREAT | O_WRONLY | append, 0666); + if (gLogFD < 0) { + fprintf(stderr, "Unable to create " M_LOGFILE); + perror(":"); + } else { + struct sigaction action; + sigset_t mset; + + // Dump out the address map when we terminate + RegisterJprofShutdown(); + + main_thread = pthread_self(); + // fprintf(stderr,"jprof: main_thread = %u\n", + // (unsigned int)main_thread); + + // FIX! probably should block these against each other + // Very unlikely. + sigemptyset(&mset); + action.sa_handler = nullptr; + action.sa_sigaction = StackHook; + action.sa_mask = mset; + action.sa_flags = SA_RESTART | SA_SIGINFO; +#if defined(linux) + if (rtcHz) { + if (!setupRTCSignals(rtcHz, &action)) { + fputs( + "jprof: Error initializing RTC, NOT " + "profiling\n", + stderr); + return; + } + } + + if (!rtcHz || firstDelay != 0) +#endif + { + if (realTime) { + sigaction(SIGALRM, &action, nullptr); + } + } + // enable PROF in all cases to simplify JP_DEFER/pause/restart + sigaction(SIGPROF, &action, nullptr); + + // make it so a SIGUSR1 will stop the profiling + // Note: It currently does not close the logfile. + // This could be configurable (so that it could + // later be reopened). + + struct sigaction stop_action; + stop_action.sa_handler = EndProfilingHook; + stop_action.sa_mask = mset; + stop_action.sa_flags = SA_RESTART; + sigaction(SIGUSR1, &stop_action, nullptr); + + // make it so a SIGUSR2 will clear the circular buffer + + stop_action.sa_handler = ClearProfilingHook; + stop_action.sa_mask = mset; + stop_action.sa_flags = SA_RESTART; + sigaction(SIGUSR2, &stop_action, nullptr); + + printf( + "Jprof: Initialized signal handler and set " + "timer for %lu %s, %d s " + "initial delay\n", + rtcHz ? rtcHz : timerMilliSec, rtcHz ? "Hz" : "ms", firstDelay); + + if (startTimer) { +#if defined(linux) + /* If we have an initial delay we can just use + startSignalCounter to set up a timer to fire the + first stackHook after that delay. When that happens + we'll go and switch to RTC profiling. */ + if (rtcHz && firstDelay == 0) { + puts("Jprof: enabled RTC signals"); + enableRTCSignals(true); + } else +#endif + { + puts("Jprof: started timer"); + startSignalCounter(firstDelay * 1000 + timerMilliSec); + } + } + } + } + } + } else { + printf("setupProfilingStuff() called multiple times\n"); + } +} diff --git a/tools/jprof/stub/libmalloc.h b/tools/jprof/stub/libmalloc.h new file mode 100644 index 0000000000..a78b35ade8 --- /dev/null +++ b/tools/jprof/stub/libmalloc.h @@ -0,0 +1,45 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef libmalloc_h___ +#define libmalloc_h___ + +#include <sys/types.h> +#include <malloc.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#include "config.h" + +typedef unsigned long u_long; + +// For me->flags +#define JP_FIRST_AFTER_PAUSE 1 + +// Format of a jprof log entry. This is what's written out to the +// "jprof-log" file. +// It's called malloc_log_entry because the history of jprof is that +// it's a modified version of tracemalloc. +struct malloc_log_entry { + u_long delTime; + u_long numpcs; + unsigned int flags; + int thread; + char* pcs[MAX_STACK_CRAWL]; +}; + +// Format of a malloc map entry; after this struct is nameLen+1 bytes of +// name data. +struct malloc_map_entry { + u_long nameLen; + u_long address; // base address +}; + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* libmalloc_h___ */ diff --git a/tools/jprof/stub/moz.build b/tools/jprof/stub/moz.build new file mode 100644 index 0000000000..692c6ea37f --- /dev/null +++ b/tools/jprof/stub/moz.build @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "jprof.h", +] + +SOURCES += [ + "libmalloc.cpp", +] + +SharedLibrary("jprof") + +DEFINES["_IMPL_JPROF_API"] = True |