/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "leaky.h" #include "intcnt.h" #include #include #include #include #include #include #ifndef NTO # include #endif #include #include #include #ifdef NTO # include #endif #ifndef FALSE # define FALSE 0 #endif #ifndef TRUE # define TRUE 1 #endif static const u_int DefaultBuckets = 10007; // arbitrary, but prime static const u_int MaxBuckets = 1000003; // arbitrary, but prime //---------------------------------------------------------------------- int main(int argc, char** argv) { leaky* l = new leaky; l->initialize(argc, argv); l->outputfd = stdout; for (int i = 0; i < l->numLogFiles; i++) { if (l->output_dir || l->numLogFiles > 1) { char name[2048]; // XXX fix if (l->output_dir) snprintf(name, sizeof(name), "%s/%s.html", l->output_dir, argv[l->logFileIndex + i]); else snprintf(name, sizeof(name), "%s.html", argv[l->logFileIndex + i]); fprintf(stderr, "opening %s\n", name); l->outputfd = fopen(name, "w"); // if an error we won't process the file } if (l->outputfd) { // paranoia l->open(argv[l->logFileIndex + i]); if (l->outputfd != stderr) { fclose(l->outputfd); l->outputfd = nullptr; } } } return 0; } char* htmlify(const char* in) { const char* p = in; char *out, *q; int n = 0; size_t newlen; // Count the number of '<' and '>' in the input. while ((p = strpbrk(p, "<>"))) { ++n; ++p; } // Knowing the number of '<' and '>', we can calculate the space // needed for the output string. newlen = strlen(in) + n * 3 + 1; out = new char[newlen]; // Copy the input to the output, with substitutions. p = in; q = out; do { if (*p == '<') { strcpy(q, "<"); q += 4; } else if (*p == '>') { strcpy(q, ">"); q += 4; } else { *q++ = *p; } p++; } while (*p); *q = '\0'; return out; } leaky::leaky() { applicationName = nullptr; progFile = nullptr; quiet = true; showAddress = false; showThreads = false; stackDepth = 100000; onlyThread = 0; cleo = false; mappedLogFile = -1; firstLogEntry = lastLogEntry = 0; sfd = -1; externalSymbols = 0; usefulSymbols = 0; numExternalSymbols = 0; lowestSymbolAddr = 0; highestSymbolAddr = 0; loadMap = nullptr; collect_last = false; collect_start = -1; collect_end = -1; } leaky::~leaky() {} void leaky::usageError() { fprintf(stderr, "Usage: %s [-v] [-t] [-e exclude] [-i include] [-s stackdepth] " "[--last] [--all] [--start n [--end m]] [--cleo] [--output-dir dir] " "prog log [log2 ...]\n", (char*)applicationName); fprintf( stderr, "\t-v: verbose\n" "\t-t | --threads: split threads\n" "\t--only-thread n: only profile thread N\n" "\t-i include-id: stack must include specified id\n" "\t-e exclude-id: stack must NOT include specified id\n" "\t-s stackdepth: Limit depth looked at from captured stack frames\n" "\t--last: only profile the last capture section\n" "\t--start n [--end m]: profile n to m (or end) capture sections\n" "\t--cleo: format output for 'cleopatra' display\n" "\t--output-dir dir: write output files to dir\n" "\tIf there's one log, output goes to stdout unless --output-dir is set\n" "\tIf there are more than one log, output files will be named with .html " "added\n"); exit(-1); } static struct option longopts[] = { {"threads", 0, nullptr, 't'}, {"only-thread", 1, nullptr, 'T'}, {"last", 0, nullptr, 'l'}, {"start", 1, nullptr, 'x'}, {"end", 1, nullptr, 'n'}, {"cleo", 0, nullptr, 'c'}, {"output-dir", 1, nullptr, 'd'}, {nullptr, 0, nullptr, 0}, }; void leaky::initialize(int argc, char** argv) { applicationName = argv[0]; applicationName = strrchr(applicationName, '/'); if (!applicationName) { applicationName = argv[0]; } else { applicationName++; } int arg; int errflg = 0; int longindex = 0; onlyThread = 0; output_dir = nullptr; cleo = false; // XXX tons of cruft here left over from tracemalloc // XXX The -- options shouldn't need short versions, or they should be // documented while (((arg = getopt_long(argc, argv, "adEe:gh:i:r:Rs:tT:qvx:ln:", longopts, &longindex)) != -1)) { switch (arg) { case '?': default: fprintf(stderr, "error: unknown option %c\n", optopt); errflg++; break; case 'a': break; case 'A': // not implemented showAddress = true; break; case 'c': cleo = true; break; case 'd': output_dir = optarg; // reference to an argv pointer break; case 'R': break; case 'e': exclusions.add(optarg); break; case 'g': break; case 'r': // not implemented roots.add(optarg); if (!includes.IsEmpty()) { errflg++; } break; case 'i': includes.add(optarg); if (!roots.IsEmpty()) { errflg++; } break; case 'h': break; case 's': stackDepth = atoi(optarg); if (stackDepth < 2) { stackDepth = 2; } break; case 'x': // --start collect_start = atoi(optarg); break; case 'n': // --end collect_end = atoi(optarg); break; case 'l': // --last collect_last = true; break; case 'q': break; case 'v': quiet = !quiet; break; case 't': showThreads = true; break; case 'T': showThreads = true; onlyThread = atoi(optarg); break; } } if (errflg || ((argc - optind) < 2)) { usageError(); } progFile = argv[optind++]; logFileIndex = optind; numLogFiles = argc - optind; if (!quiet) fprintf(stderr, "numlogfiles = %d\n", numLogFiles); } static void* mapFile(int fd, u_int flags, off_t* sz) { struct stat sb; if (fstat(fd, &sb) < 0) { perror("fstat"); exit(-1); } void* base = mmap(0, (int)sb.st_size, flags, MAP_PRIVATE, fd, 0); if (!base) { perror("mmap"); exit(-1); } *sz = sb.st_size; return base; } void leaky::LoadMap() { malloc_map_entry mme; char name[1000]; if (!loadMap) { // all files use the same map int fd = ::open(M_MAPFILE, O_RDONLY); if (fd < 0) { perror("open: " M_MAPFILE); exit(-1); } for (;;) { int nb = read(fd, &mme, sizeof(mme)); if (nb != sizeof(mme)) break; nb = read(fd, name, mme.nameLen); if (nb != (int)mme.nameLen) break; name[mme.nameLen] = 0; if (!quiet) { fprintf(stderr, "%s @ %lx\n", name, mme.address); } LoadMapEntry* lme = new LoadMapEntry; lme->address = mme.address; lme->name = strdup(name); lme->next = loadMap; loadMap = lme; } close(fd); } } void leaky::open(char* logFile) { int threadArray[100]; // should auto-expand int last_thread = -1; int numThreads = 0; int section = -1; bool collecting = false; LoadMap(); setupSymbols(progFile); // open up the log file if (mappedLogFile) ::close(mappedLogFile); mappedLogFile = ::open(logFile, O_RDONLY); if (mappedLogFile < 0) { perror("open"); exit(-1); } off_t size; firstLogEntry = (malloc_log_entry*)mapFile(mappedLogFile, PROT_READ, &size); lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size); if (!collect_last || collect_start < 0) { collecting = true; } // First, restrict it to the capture sections specified (all, last, start/end) // This loop walks through all the call stacks we recorded for (malloc_log_entry* lep = firstLogEntry; lep < lastLogEntry; lep = reinterpret_cast(&lep->pcs[lep->numpcs])) { if (lep->flags & JP_FIRST_AFTER_PAUSE) { section++; if (collect_last) { firstLogEntry = lep; numThreads = 0; collecting = true; } if (collect_start == section) { collecting = true; firstLogEntry = lep; } if (collect_end == section) { collecting = false; lastLogEntry = lep; } if (!quiet) fprintf(stderr, "New section %d: first=%p, last=%p, collecting=%d\n", section, (void*)firstLogEntry, (void*)lastLogEntry, collecting); } // Capture thread info at the same time // Find all the threads captured // pthread/linux docs say the signal can be delivered to any thread in // the process. In practice, it appears in Linux that it's always // delivered to the thread that called setitimer(), and each thread can // have a separate itimer. There's a support library for gprof that // overlays pthread_create() to set timers in any threads you spawn. if (showThreads && collecting) { if (lep->thread != last_thread) { int i; for (i = 0; i < numThreads; i++) { if (lep->thread == threadArray[i]) break; } if (i == numThreads && i < (int)(sizeof(threadArray) / sizeof(threadArray[0]))) { threadArray[i] = lep->thread; numThreads++; if (!quiet) fprintf(stderr, "new thread %d\n", lep->thread); } } } } if (!quiet) fprintf(stderr, "Done collecting: sections %d: first=%p, last=%p, numThreads=%d\n", section, (void*)firstLogEntry, (void*)lastLogEntry, numThreads); if (!cleo) { fprintf(outputfd, "Jprof Profile Report\n"); fprintf(outputfd, "

Jprof Profile Report

\n"); } if (showThreads) { fprintf(stderr, "Num threads %d\n", numThreads); if (!cleo) { fprintf(outputfd, "
Threads:

\n");
      for (int i = 0; i < numThreads; i++) {
        fprintf(outputfd, "   %d  ", threadArray[i],
                threadArray[i]);
        if ((i + 1) % 10 == 0) fprintf(outputfd, "
\n"); } fprintf(outputfd, "
"); } for (int i = 0; i < numThreads; i++) { if (!onlyThread || onlyThread == threadArray[i]) analyze(threadArray[i]); } } else { analyze(0); } if (!cleo) fprintf(outputfd, "\n"); } //---------------------------------------------------------------------- static int symbolOrder(void const* a, void const* b) { Symbol const** ap = (Symbol const**)a; Symbol const** bp = (Symbol const**)b; return (*ap)->address == (*bp)->address ? 0 : ((*ap)->address > (*bp)->address ? 1 : -1); } void leaky::ReadSharedLibrarySymbols() { LoadMapEntry* lme = loadMap; while (nullptr != lme) { ReadSymbols(lme->name, lme->address); lme = lme->next; } } void leaky::setupSymbols(const char* fileName) { if (usefulSymbols == 0) { // only read once! // Read in symbols from the program ReadSymbols(fileName, 0); // Read in symbols from the .so's ReadSharedLibrarySymbols(); if (!quiet) { fprintf(stderr, "A total of %d symbols were loaded\n", usefulSymbols); } // Now sort them qsort(externalSymbols, usefulSymbols, sizeof(Symbol*), symbolOrder); lowestSymbolAddr = externalSymbols[0]->address; highestSymbolAddr = externalSymbols[usefulSymbols - 1]->address; } } // Binary search the table, looking for a symbol that covers this // address. int leaky::findSymbolIndex(u_long addr) { u_int base = 0; u_int limit = usefulSymbols - 1; Symbol** end = &externalSymbols[limit]; while (base <= limit) { u_int midPoint = (base + limit) >> 1; Symbol** sp = &externalSymbols[midPoint]; if (addr < (*sp)->address) { if (midPoint == 0) { return -1; } limit = midPoint - 1; } else { if (sp + 1 < end) { if (addr < (*(sp + 1))->address) { return midPoint; } } else { return midPoint; } base = midPoint + 1; } } return -1; } Symbol* leaky::findSymbol(u_long addr) { int idx = findSymbolIndex(addr); if (idx < 0) { return nullptr; } else { return externalSymbols[idx]; } } //---------------------------------------------------------------------- bool leaky::excluded(malloc_log_entry* lep) { if (exclusions.IsEmpty()) { return false; } char** pcp = &lep->pcs[0]; u_int n = lep->numpcs; for (u_int i = 0; i < n; i++, pcp++) { Symbol* sp = findSymbol((u_long)*pcp); if (sp && exclusions.contains(sp->name)) { return true; } } return false; } bool leaky::included(malloc_log_entry* lep) { if (includes.IsEmpty()) { return true; } char** pcp = &lep->pcs[0]; u_int n = lep->numpcs; for (u_int i = 0; i < n; i++, pcp++) { Symbol* sp = findSymbol((u_long)*pcp); if (sp && includes.contains(sp->name)) { return true; } } return false; } //---------------------------------------------------------------------- void leaky::displayStackTrace(FILE* out, malloc_log_entry* lep) { char** pcp = &lep->pcs[0]; u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth; for (u_int i = 0; i < n; i++, pcp++) { u_long addr = (u_long)*pcp; Symbol* sp = findSymbol(addr); if (sp) { fputs(sp->name, out); if (showAddress) { fprintf(out, "[%p]", (char*)addr); } } else { fprintf(out, "<%p>", (char*)addr); } fputc(' ', out); } fputc('\n', out); } void leaky::dumpEntryToLog(malloc_log_entry* lep) { printf("%ld\t", lep->delTime); printf(" --> "); displayStackTrace(outputfd, lep); } void leaky::generateReportHTML(FILE* fp, int* countArray, int count, int thread) { fprintf(fp, "
"); if (showThreads) { fprintf(fp, "
Thread: %d

", thread, thread); } fprintf( fp, "flat | hierarchical", thread, thread); fprintf(fp, "

\n"); int totalTimerHits = count; int* rankingTable = new int[usefulSymbols]; for (int cnt = usefulSymbols; --cnt >= 0; rankingTable[cnt] = cnt) ; // Drat. I would use ::qsort() but I would need a global variable and my // intro-pascal professor threatened to flunk anyone who used globals. // She damaged me for life :-) (That was 1986. See how much influence // she had. I don't remember her name but I always feel guilty about globals) // Shell Sort. 581130733 is the max 31 bit value of h = 3h+1 int mx, i, h; for (mx = usefulSymbols / 9, h = 581130733; h > 0; h /= 3) { if (h < mx) { for (i = h - 1; i < usefulSymbols; i++) { int j, tmp = rankingTable[i], val = countArray[tmp]; for (j = i; (j >= h) && (countArray[rankingTable[j - h]] < val); j -= h) { rankingTable[j] = rankingTable[j - h]; } rankingTable[j] = tmp; } } } // Ok, We are sorted now. Let's go through the table until we get to // functions that were never called. Right now we don't do much inside // this loop. Later we can get callers and callees into it like gprof // does fprintf(fp, "

Hierarchical Profile


\n", thread); fprintf(fp, "
\n");
  fprintf(fp, "%6s %6s         %4s      %s\n", "index", "Count", "Hits",
          "Function Name");

  for (i = 0; i < usefulSymbols && countArray[rankingTable[i]] > 0; i++) {
    Symbol** sp = &externalSymbols[rankingTable[i]];

    (*sp)->cntP.printReport(fp, this, rankingTable[i], totalTimerHits);

    char* symname = htmlify((*sp)->name);
    fprintf(fp,
            "%6d %6d (%3.1f%%)%s %8d (%3.1f%%)%s %s\n",
            rankingTable[i], (*sp)->timerHit,
            ((*sp)->timerHit * 1000 / totalTimerHits) / 10.0,
            ((*sp)->timerHit * 1000 / totalTimerHits) / 10.0 >= 10.0 ? "" : " ",
            rankingTable[i], countArray[rankingTable[i]],
            (countArray[rankingTable[i]] * 1000 / totalTimerHits) / 10.0,
            (countArray[rankingTable[i]] * 1000 / totalTimerHits) / 10.0 >= 10.0
                ? ""
                : " ",
            symname);
    delete[] symname;

    (*sp)->cntC.printReport(fp, this, rankingTable[i], totalTimerHits);

    fprintf(fp, "
\n"); } fprintf(fp, "
\n"); // OK, Now we want to print the flat profile. To do this we resort on // the hit count. // Cut-N-Paste Shell sort from above. The Ranking Table has already been // populated, so we do not have to reinitialize it. for (mx = usefulSymbols / 9, h = 581130733; h > 0; h /= 3) { if (h < mx) { for (i = h - 1; i < usefulSymbols; i++) { int j, tmp = rankingTable[i], val = externalSymbols[tmp]->timerHit; for (j = i; (j >= h) && (externalSymbols[rankingTable[j - h]]->timerHit < val); j -= h) { rankingTable[j] = rankingTable[j - h]; } rankingTable[j] = tmp; } } } // Pre-count up total counter hits, to get a percentage. // I wanted the total before walking the list, if this // double-pass over externalSymbols gets slow we can // do single-pass and print this out after the loop finishes. totalTimerHits = 0; for (i = 0; i < usefulSymbols && externalSymbols[rankingTable[i]]->timerHit > 0; i++) { Symbol** sp = &externalSymbols[rankingTable[i]]; totalTimerHits += (*sp)->timerHit; } if (totalTimerHits == 0) totalTimerHits = 1; if (totalTimerHits != count) fprintf(stderr, "Hit count mismatch: count=%d; totalTimerHits=%d", count, totalTimerHits); fprintf(fp, "

Flat Profile


\n", thread); fprintf(fp, "
\n");

  fprintf(fp, "Total hit count: %d\n", totalTimerHits);
  fprintf(fp, "Count %%Total  Function Name\n");
  // Now loop for as long as we have timer hits
  for (i = 0;
       i < usefulSymbols && externalSymbols[rankingTable[i]]->timerHit > 0;
       i++) {
    Symbol** sp = &externalSymbols[rankingTable[i]];

    char* symname = htmlify((*sp)->name);
    fprintf(fp, "%3d   %-2.1f     %s\n", rankingTable[i],
            (*sp)->timerHit,
            ((float)(*sp)->timerHit / (float)totalTimerHits) * 100.0, symname);
    delete[] symname;
  }
}

void leaky::analyze(int thread) {
  int* countArray = new int[usefulSymbols];
  int* flagArray = new int[usefulSymbols];

  // Zero our function call counter
  memset(countArray, 0, sizeof(countArray[0]) * usefulSymbols);

  // reset hit counts
  for (int i = 0; i < usefulSymbols; i++) {
    externalSymbols[i]->timerHit = 0;
    externalSymbols[i]->regClear();
  }

  // The flag array is used to prevent counting symbols multiple times
  // if functions are called recursively.  In order to keep from having
  // to zero it on each pass through the loop, we mark it with the value
  // of stacks on each trip through the loop.  This means we can determine
  // if we have seen this symbol for this stack trace w/o having to reset
  // from the prior stacktrace.
  memset(flagArray, -1, sizeof(flagArray[0]) * usefulSymbols);

  if (cleo) fprintf(outputfd, "m-Start\n");

  // This loop walks through all the call stacks we recorded
  // --last, --start and --end can restrict it, as can excludes/includes
  stacks = 0;
  for (malloc_log_entry* lep = firstLogEntry; lep < lastLogEntry;
       lep = reinterpret_cast(&lep->pcs[lep->numpcs])) {
    if ((thread != 0 && lep->thread != thread) || excluded(lep) ||
        !included(lep)) {
      continue;
    }

    ++stacks;  // How many stack frames did we collect

    u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
    char** pcp = &lep->pcs[n - 1];
    int idx = -1, parrentIdx = -1;  // Init idx incase n==0
    if (cleo) {
      // This loop walks through every symbol in the call stack.  By walking it
      // backwards we know who called the function when we get there.
      char type = 's';
      for (int i = n - 1; i >= 0; --i, --pcp) {
        idx = findSymbolIndex(reinterpret_cast(*pcp));

        if (idx >= 0) {
          // Skip over bogus __restore_rt frames that realtime profiling
          // can introduce.
          if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
            --pcp;
            --i;
            idx = findSymbolIndex(reinterpret_cast(*pcp));
            if (idx < 0) {
              continue;
            }
          }
          Symbol** sp = &externalSymbols[idx];
          char* symname = htmlify((*sp)->name);
          fprintf(outputfd, "%c-%s\n", type, symname);
          delete[] symname;
        }
        // else can't find symbol - ignore
        type = 'c';
      }
    } else {
      // This loop walks through every symbol in the call stack.  By walking it
      // backwards we know who called the function when we get there.
      for (int i = n - 1; i >= 0; --i, --pcp) {
        idx = findSymbolIndex(reinterpret_cast(*pcp));

        if (idx >= 0) {
          // Skip over bogus __restore_rt frames that realtime profiling
          // can introduce.
          if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
            --pcp;
            --i;
            idx = findSymbolIndex(reinterpret_cast(*pcp));
            if (idx < 0) {
              continue;
            }
          }

          // If we have not seen this symbol before count it and mark it as seen
          if (flagArray[idx] != stacks && ((flagArray[idx] = stacks) || true)) {
            ++countArray[idx];
          }

          // We know who we are and we know who our parrent is.  Count this
          if (parrentIdx >= 0) {
            externalSymbols[parrentIdx]->regChild(idx);
            externalSymbols[idx]->regParrent(parrentIdx);
          }
          // inside if() so an unknown in the middle of a stack won't break
          // the link!
          parrentIdx = idx;
        }
      }

      // idx should be the function that we were in when we received the signal.
      if (idx >= 0) {
        ++externalSymbols[idx]->timerHit;
      }
    }
  }
  if (!cleo) generateReportHTML(outputfd, countArray, stacks, thread);
}

void FunctionCount::printReport(FILE* fp, leaky* lk, int parent, int total) {
  const char* fmt =
      "                      %8d (%3.1f%%)%s %s%s\n";

  int nmax, tmax = ((~0U) >> 1);

  do {
    nmax = 0;
    for (int j = getSize(); --j >= 0;) {
      int cnt = getCount(j);
      if (cnt == tmax) {
        int idx = getIndex(j);
        char* symname = htmlify(lk->indexToName(idx));
        fprintf(fp, fmt, idx, getCount(j), getCount(j) * 100.0 / total,
                getCount(j) * 100.0 / total >= 10.0 ? "" : " ", symname,
                parent == idx ? " (self)" : "");
        delete[] symname;
      } else if (cnt < tmax && cnt > nmax) {
        nmax = cnt;
      }
    }
  } while ((tmax = nmax) > 0);
}