#! /usr/bin/env python # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """This script analyzes a JSON file emitted by DMD.""" from __future__ import absolute_import, print_function, division import argparse import collections import gzip import json import os import platform import re import shutil import sys import tempfile from bisect import bisect_right # The DMD output version this script handles. outputVersion = 5 # If --ignore-alloc-fns is specified, stack frames containing functions that # match these strings will be removed from the *start* of stack traces. (Once # we hit a non-matching frame, any subsequent frames won't be removed even if # they do match.) allocatorFns = [ # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, # pod_malloc, malloc_zone_*, g_malloc. "malloc", # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, # pod_calloc, malloc_zone_calloc, pod_callocCanGC. "calloc", # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, # pod_realloc, pod_reallocCanGC. "realloc", # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, # moz_xmemalign, vpx_memalign, malloc_zone_memalign. "memalign", "operator new(", "operator new[](", "g_slice_alloc", # This one necessary to fully filter some sequences of allocation functions # that happen in practice. Note that ??? entries that follow non-allocation # functions won't be stripped, as explained above. "???", ] class Record(object): """A record is an aggregation of heap blocks that have identical stack traces. It can also be used to represent the difference between two records.""" def __init__(self): self.numBlocks = 0 self.reqSize = 0 self.slopSize = 0 self.usableSize = 0 self.allocatedAtDesc = None self.reportedAtDescs = [] self.usableSizes = collections.defaultdict(int) def isZero(self, args): return ( self.numBlocks == 0 and self.reqSize == 0 and self.slopSize == 0 and self.usableSize == 0 and len(self.usableSizes) == 0 ) def negate(self): self.numBlocks = -self.numBlocks self.reqSize = -self.reqSize self.slopSize = -self.slopSize self.usableSize = -self.usableSize negatedUsableSizes = collections.defaultdict(int) for usableSize, count in self.usableSizes.items(): negatedUsableSizes[-usableSize] = count self.usableSizes = negatedUsableSizes def subtract(self, r): # We should only be calling this on records with matching stack traces. # Check this. assert self.allocatedAtDesc == r.allocatedAtDesc assert self.reportedAtDescs == r.reportedAtDescs self.numBlocks -= r.numBlocks self.reqSize -= r.reqSize self.slopSize -= r.slopSize self.usableSize -= r.usableSize usableSizes1 = self.usableSizes usableSizes2 = r.usableSizes usableSizes3 = collections.defaultdict(int) for usableSize in usableSizes1: counts1 = usableSizes1[usableSize] if usableSize in usableSizes2: counts2 = usableSizes2[usableSize] del usableSizes2[usableSize] counts3 = counts1 - counts2 if counts3 != 0: if counts3 < 0: usableSize = -usableSize counts3 = -counts3 usableSizes3[usableSize] = counts3 else: usableSizes3[usableSize] = counts1 for usableSize in usableSizes2: usableSizes3[-usableSize] = usableSizes2[usableSize] self.usableSizes = usableSizes3 @staticmethod def cmpByUsableSize(r1, r2): # Sort by usable size, then by req size. return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize( r1, r2 ) @staticmethod def cmpByReqSize(r1, r2): # Sort by req size. return cmp(abs(r1.reqSize), abs(r2.reqSize)) @staticmethod def cmpBySlopSize(r1, r2): # Sort by slop size. return cmp(abs(r1.slopSize), abs(r2.slopSize)) @staticmethod def cmpByNumBlocks(r1, r2): # Sort by block counts, then by usable size. return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize( r1, r2 ) sortByChoices = { "usable": Record.cmpByUsableSize, # the default "req": Record.cmpByReqSize, "slop": Record.cmpBySlopSize, "num-blocks": Record.cmpByNumBlocks, } def parseCommandLine(): # 24 is the maximum number of frames that DMD will produce. def range_1_24(string): value = int(string) if value < 1 or value > 24: msg = "{:s} is not in the range 1..24".format(string) raise argparse.ArgumentTypeError(msg) return value description = """ Analyze heap data produced by DMD. If one file is specified, analyze it; if two files are specified, analyze the difference. Input files can be gzipped. Write to stdout unless -o/--output is specified. Stack traces are fixed to show function names, filenames and line numbers unless --no-fix-stacks is specified; stack fixing modifies the original file and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment variable is used to find breakpad symbols for stack fixing. """ p = argparse.ArgumentParser(description=description) p.add_argument( "-o", "--output", type=argparse.FileType("w"), help="output file; stdout if unspecified", ) p.add_argument( "-f", "--max-frames", type=range_1_24, default=8, help="maximum number of frames to consider in each trace", ) p.add_argument( "-s", "--sort-by", choices=sortByChoices.keys(), default="usable", help="sort the records by a particular metric", ) p.add_argument( "-a", "--ignore-alloc-fns", action="store_true", help="ignore allocation functions at the start of traces", ) p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks") p.add_argument( "--clamp-contents", action="store_true", help="for a scan mode log, clamp addresses to the start of live blocks, " "or zero if not in one", ) p.add_argument( "--print-clamp-stats", action="store_true", help="print information about the results of pointer clamping; mostly " "useful for debugging clamping", ) p.add_argument( "--filter-stacks-for-testing", action="store_true", help="filter stack traces; only useful for testing purposes", ) p.add_argument( "--allocation-filter", help="Only print entries that have a stack that matches the filter", ) p.add_argument("input_file", help="a file produced by DMD") p.add_argument( "input_file2", nargs="?", help="a file produced by DMD; if present, it is diff'd with input_file", ) return p.parse_args(sys.argv[1:]) # Fix stacks if necessary: first write the output to a tempfile, then replace # the original file with it. def fixStackTraces(inputFilename, isZipped, opener): # This append() call is needed to make the import statements work when this # script is installed as a symlink. sys.path.append(os.path.dirname(__file__)) bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None) sysname = platform.system() if bpsyms and os.path.exists(bpsyms): import fix_stacks as fixModule def fix(line): return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms) elif sysname in ("Linux", "Darwin", "Windows"): import fix_stacks as fixModule def fix(line): return fixModule.fixSymbols(line, jsonMode=True) else: return # Fix stacks, writing output to a temporary file, and then overwrite the # original file. tmpFile = tempfile.NamedTemporaryFile(delete=False) # If the input is gzipped, then the output (written initially to |tmpFile|) # should be gzipped as well. # # And we want to set its pre-gzipped filename to '' rather than the name of # the temporary file, so that programs like the Unix 'file' utility don't # say that it was called 'tmp6ozTxE' (or something like that) before it was # zipped. So that explains the |filename=''| parameter. # # But setting the filename like that clobbers |tmpFile.name|, so we must # get that now in order to move |tmpFile| at the end. tmpFilename = tmpFile.name if isZipped: tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile) with opener(inputFilename, "rb") as inputFile: for line in inputFile: tmpFile.write(fix(line)) tmpFile.close() shutil.move(tmpFilename, inputFilename) def getDigestFromFile(args, inputFile): # Handle gzipped input if necessary. isZipped = inputFile.endswith(".gz") opener = gzip.open if isZipped else open # Fix stack traces unless otherwise instructed. if not args.no_fix_stacks: fixStackTraces(inputFile, isZipped, opener) if args.clamp_contents: clampBlockList(args, inputFile, isZipped, opener) with opener(inputFile, "rb") as f: j = json.load(f) if j["version"] != outputVersion: raise Exception("'version' property isn't '{:d}'".format(outputVersion)) # Extract the main parts of the JSON object. invocation = j["invocation"] dmdEnvVar = invocation["dmdEnvVar"] mode = invocation["mode"] blockList = j["blockList"] traceTable = j["traceTable"] frameTable = j["frameTable"] # Insert the necessary entries for unrecorded stack traces. Note that 'ut' # and 'uf' will not overlap with any keys produced by DMD's # ToIdStringConverter::Base32() function. unrecordedTraceID = "ut" unrecordedFrameID = "uf" traceTable[unrecordedTraceID] = [unrecordedFrameID] frameTable[ unrecordedFrameID ] = "#00: (no stack trace recorded due to --stacks=partial)" # For the purposes of this script, 'scan' behaves like 'live'. if mode == "scan": mode = "live" if mode not in ["live", "dark-matter", "cumulative"]: raise Exception("bad 'mode' property: '{:s}'".format(mode)) # Remove allocation functions at the start of traces. if args.ignore_alloc_fns: # Build a regexp that matches every function in allocatorFns. escapedAllocatorFns = map(re.escape, allocatorFns) fn_re = re.compile("|".join(escapedAllocatorFns)) # Remove allocator fns from each stack trace. for traceKey, frameKeys in traceTable.items(): numSkippedFrames = 0 for frameKey in frameKeys: frameDesc = frameTable[frameKey] if re.search(fn_re, frameDesc): numSkippedFrames += 1 else: break if numSkippedFrames > 0: traceTable[traceKey] = frameKeys[numSkippedFrames:] # Trim the number of frames. for traceKey, frameKeys in traceTable.items(): if len(frameKeys) > args.max_frames: traceTable[traceKey] = frameKeys[: args.max_frames] def buildTraceDescription(traceTable, frameTable, traceKey): frameKeys = traceTable[traceKey] fmt = " #{:02d}{:}" if args.filter_stacks_for_testing: # This option is used by `test_dmd.js`, which runs the code in # `SmokeDMD.cpp`. When running that test, there is too much # variation in the stack traces across different machines and # platforms to do exact output matching. However, every stack trace # should have at least three frames that contain `DMD` (in one of # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some # example frames from automation (where `..` indicates excised path # segments): # # Linux debug, with stack fixing using breakpad syms: # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]` # # Linux opt, with native stack fixing: # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)` # # Mac opt, with native stack fixing: # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)` # # Windows opt, with native stack fixing failing due to a missing PDB: # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)` # # If we see three such frames, we replace the entire stack trace # with a single, predictable frame. This imprecise matching will at # least detect if stack fixing fails completely. dmd_frame_matches = 0 for frameKey in frameKeys: frameDesc = frameTable[frameKey] if "DMD" in frameDesc: dmd_frame_matches += 1 if dmd_frame_matches >= 3: return [fmt.format(1, ": ... DMD.cpp ...")] # The frame number is always '#00' (see DMD.h for why), so we have to # replace that with the correct frame number. desc = [] for n, frameKey in enumerate(traceTable[traceKey], start=1): desc.append(fmt.format(n, frameTable[frameKey][3:])) return desc # Aggregate blocks into records. All sufficiently similar blocks go into a # single record. if mode in ["live", "cumulative"]: liveOrCumulativeRecords = collections.defaultdict(Record) elif mode == "dark-matter": unreportedRecords = collections.defaultdict(Record) onceReportedRecords = collections.defaultdict(Record) twiceReportedRecords = collections.defaultdict(Record) heapUsableSize = 0 heapBlocks = 0 recordKeyPartCache = {} for block in blockList: # For each block we compute a |recordKey|, and all blocks with the same # |recordKey| are aggregated into a single record. The |recordKey| is # derived from the block's 'alloc' and 'reps' (if present) stack # traces. # # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing # traces for equality. We can't use trace keys or frame keys because # they're not comparable across different DMD runs (which is relevant # when doing diffs). # # Using frame descriptions also fits in with the stack trimming done # for --max-frames, which requires that stack traces with common # beginnings but different endings to be considered equivalent. E.g. if # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] # and we trim the final frame of each they should be considered # equivalent because the untrimmed frame descriptions (D1 and D2) # match. # # Having said all that, during a single invocation of dmd.py on a # single DMD file, for a single frameKey value the record key will # always be the same, and we might encounter it 1000s of times. So we # cache prior results for speed. def makeRecordKeyPart(traceKey): if traceKey in recordKeyPartCache: return recordKeyPartCache[traceKey] recordKeyPart = str( map(lambda frameKey: frameTable[frameKey], traceTable[traceKey]) ) recordKeyPartCache[traceKey] = recordKeyPart return recordKeyPart allocatedAtTraceKey = block.get("alloc", unrecordedTraceID) if mode in ["live", "cumulative"]: recordKey = makeRecordKeyPart(allocatedAtTraceKey) records = liveOrCumulativeRecords elif mode == "dark-matter": recordKey = makeRecordKeyPart(allocatedAtTraceKey) if "reps" in block: reportedAtTraceKeys = block["reps"] for reportedAtTraceKey in reportedAtTraceKeys: recordKey += makeRecordKeyPart(reportedAtTraceKey) if len(reportedAtTraceKeys) == 1: records = onceReportedRecords else: records = twiceReportedRecords else: records = unreportedRecords record = records[recordKey] if "req" not in block: raise Exception("'req' property missing in block'") reqSize = block["req"] slopSize = block.get("slop", 0) if "num" in block: num = block["num"] else: num = 1 usableSize = reqSize + slopSize heapUsableSize += num * usableSize heapBlocks += num record.numBlocks += num record.reqSize += num * reqSize record.slopSize += num * slopSize record.usableSize += num * usableSize if record.allocatedAtDesc is None: record.allocatedAtDesc = buildTraceDescription( traceTable, frameTable, allocatedAtTraceKey ) if mode in ["live", "cumulative"]: pass elif mode == "dark-matter": if "reps" in block and record.reportedAtDescs == []: def f(k): return buildTraceDescription(traceTable, frameTable, k) record.reportedAtDescs = map(f, reportedAtTraceKeys) record.usableSizes[usableSize] += num # All the processed data for a single DMD file is called a "digest". digest = {} digest["dmdEnvVar"] = dmdEnvVar digest["mode"] = mode digest["heapUsableSize"] = heapUsableSize digest["heapBlocks"] = heapBlocks if mode in ["live", "cumulative"]: digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords elif mode == "dark-matter": digest["unreportedRecords"] = unreportedRecords digest["onceReportedRecords"] = onceReportedRecords digest["twiceReportedRecords"] = twiceReportedRecords return digest def diffRecords(args, records1, records2): records3 = {} # Process records1. for k in records1: r1 = records1[k] if k in records2: # This record is present in both records1 and records2. r2 = records2[k] del records2[k] r2.subtract(r1) if not r2.isZero(args): records3[k] = r2 else: # This record is present only in records1. r1.negate() records3[k] = r1 for k in records2: # This record is present only in records2. records3[k] = records2[k] return records3 def diffDigests(args, d1, d2): if d1["mode"] != d2["mode"]: raise Exception("the input files have different 'mode' properties") d3 = {} d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"]) d3["mode"] = d1["mode"] d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"] d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"] if d1["mode"] in ["live", "cumulative"]: d3["liveOrCumulativeRecords"] = diffRecords( args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"] ) elif d1["mode"] == "dark-matter": d3["unreportedRecords"] = diffRecords( args, d1["unreportedRecords"], d2["unreportedRecords"] ) d3["onceReportedRecords"] = diffRecords( args, d1["onceReportedRecords"], d2["onceReportedRecords"] ) d3["twiceReportedRecords"] = diffRecords( args, d1["twiceReportedRecords"], d2["twiceReportedRecords"] ) return d3 def printDigest(args, digest): dmdEnvVar = digest["dmdEnvVar"] mode = digest["mode"] heapUsableSize = digest["heapUsableSize"] heapBlocks = digest["heapBlocks"] if mode in ["live", "cumulative"]: liveOrCumulativeRecords = digest["liveOrCumulativeRecords"] elif mode == "dark-matter": unreportedRecords = digest["unreportedRecords"] onceReportedRecords = digest["onceReportedRecords"] twiceReportedRecords = digest["twiceReportedRecords"] separator = "#" + "-" * 65 + "\n" def number(n): """Format a number with comma as a separator.""" return "{:,d}".format(n) def perc(m, n): return 0 if n == 0 else (100 * m / n) def plural(n): return "" if n == 1 else "s" # Prints to stdout, or to file if -o/--output was specified. def out(*arguments, **kwargs): print(*arguments, file=args.output, **kwargs) def printStack(traceDesc): for frameDesc in traceDesc: out(frameDesc) def printRecords(recordKind, records, heapUsableSize): RecordKind = recordKind.capitalize() out(separator) numRecords = len(records) cmpRecords = sortByChoices[args.sort_by] sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True) kindBlocks = 0 kindUsableSize = 0 maxRecord = 1000 if args.allocation_filter: sortedRecords = list( filter( lambda x: any( map(lambda y: args.allocation_filter in y, x.allocatedAtDesc) ), sortedRecords, ) ) # First iteration: get totals, etc. for record in sortedRecords: kindBlocks += record.numBlocks kindUsableSize += record.usableSize # Second iteration: print. if numRecords == 0: out("# no {:} heap blocks\n".format(recordKind)) kindCumulativeUsableSize = 0 for i, record in enumerate(sortedRecords, start=1): # Stop printing at the |maxRecord|th record. if i == maxRecord: out( "# {:}: stopping after {:,d} heap block records\n".format( RecordKind, i ) ) break kindCumulativeUsableSize += record.usableSize out(RecordKind + " {") out( " {:} block{:} in heap block record {:,d} of {:,d}".format( number(record.numBlocks), plural(record.numBlocks), i, numRecords ) ) out( " {:} bytes ({:} requested / {:} slop)".format( number(record.usableSize), number(record.reqSize), number(record.slopSize), ) ) usableSizes = sorted( record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True ) hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 if not hasSingleBlock: out(" Individual block sizes: ", end="") if len(usableSizes) == 0: out("(no change)", end="") else: isFirst = True for usableSize, count in usableSizes: if not isFirst: out("; ", end="") out("{:}".format(number(usableSize)), end="") if count > 1: out(" x {:,d}".format(count), end="") isFirst = False out() out( " {:4.2f}% of the heap ({:4.2f}% cumulative)".format( perc(record.usableSize, heapUsableSize), perc(kindCumulativeUsableSize, heapUsableSize), ) ) if mode in ["live", "cumulative"]: pass elif mode == "dark-matter": out( " {:4.2f}% of {:} ({:4.2f}% cumulative)".format( perc(record.usableSize, kindUsableSize), recordKind, perc(kindCumulativeUsableSize, kindUsableSize), ) ) out(" Allocated at {") printStack(record.allocatedAtDesc) out(" }") if mode in ["live", "cumulative"]: pass elif mode == "dark-matter": for n, reportedAtDesc in enumerate(record.reportedAtDescs): again = "again " if n > 0 else "" out(" Reported {:}at {{".format(again)) printStack(reportedAtDesc) out(" }") out("}\n") return (kindUsableSize, kindBlocks) def printInvocation(n, dmdEnvVar, mode): out("Invocation{:} {{".format(n)) if dmdEnvVar is None: out(" $DMD is undefined") else: out(" $DMD = '" + dmdEnvVar + "'") out(" Mode = '" + mode + "'") out("}\n") # Print command line. Strip dirs so the output is deterministic, which is # needed for testing. out(separator, end="") out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n") # Print invocation(s). if type(dmdEnvVar) is not tuple: printInvocation("", dmdEnvVar, mode) else: printInvocation(" 1", dmdEnvVar[0], mode) printInvocation(" 2", dmdEnvVar[1], mode) # Print records. if mode in ["live", "cumulative"]: liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords( mode, liveOrCumulativeRecords, heapUsableSize ) elif mode == "dark-matter": twiceReportedUsableSize, twiceReportedBlocks = printRecords( "twice-reported", twiceReportedRecords, heapUsableSize ) unreportedUsableSize, unreportedBlocks = printRecords( "unreported", unreportedRecords, heapUsableSize ) onceReportedUsableSize, onceReportedBlocks = printRecords( "once-reported", onceReportedRecords, heapUsableSize ) # Print summary. out(separator) out("Summary {") if mode in ["live", "cumulative"]: out( " Total: {:} bytes in {:} blocks".format( number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks) ) ) elif mode == "dark-matter": fmt = " {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)" out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100)) out( fmt.format( "Unreported:", number(unreportedUsableSize), perc(unreportedUsableSize, heapUsableSize), number(unreportedBlocks), perc(unreportedBlocks, heapBlocks), ) ) out( fmt.format( "Once-reported:", number(onceReportedUsableSize), perc(onceReportedUsableSize, heapUsableSize), number(onceReportedBlocks), perc(onceReportedBlocks, heapBlocks), ) ) out( fmt.format( "Twice-reported:", number(twiceReportedUsableSize), perc(twiceReportedUsableSize, heapUsableSize), number(twiceReportedBlocks), perc(twiceReportedBlocks, heapBlocks), ) ) out("}\n") ############################# # Pretty printer for DMD JSON ############################# def prettyPrintDmdJson(out, j): out.write("{\n") out.write(' "version": {0},\n'.format(j["version"])) out.write(' "invocation": ') json.dump(j["invocation"], out, sort_keys=True) out.write(",\n") out.write(' "blockList": [') first = True for b in j["blockList"]: out.write("" if first else ",") out.write("\n ") json.dump(b, out, sort_keys=True) first = False out.write("\n ],\n") out.write(' "traceTable": {') first = True for k, l in j["traceTable"].iteritems(): out.write("" if first else ",") out.write('\n "{0}": {1}'.format(k, json.dumps(l))) first = False out.write("\n },\n") out.write(' "frameTable": {') first = True for k, v in j["frameTable"].iteritems(): out.write("" if first else ",") out.write('\n "{0}": {1}'.format(k, json.dumps(v))) first = False out.write("\n }\n") out.write("}\n") ################################################################## # Code for clamping addresses using conservative pointer analysis. ################################################################## # Start is the address of the first byte of the block, while end is # the address of the first byte after the final byte in the block. class AddrRange: def __init__(self, block, length): self.block = block self.start = int(block, 16) self.length = length self.end = self.start + self.length assert self.start > 0 assert length >= 0 class ClampStats: def __init__(self): # Number of pointers already pointing to the start of a block. self.startBlockPtr = 0 # Number of pointers pointing to the middle of a block. These # are clamped to the start of the block they point into. self.midBlockPtr = 0 # Number of null pointers. self.nullPtr = 0 # Number of non-null pointers that didn't point into the middle # of any blocks. These are clamped to null. self.nonNullNonBlockPtr = 0 def clampedBlockAddr(self, sameAddress): if sameAddress: self.startBlockPtr += 1 else: self.midBlockPtr += 1 def nullAddr(self): self.nullPtr += 1 def clampedNonBlockAddr(self): self.nonNullNonBlockPtr += 1 def log(self): sys.stderr.write("Results:\n") sys.stderr.write( " Number of pointers already pointing to start of blocks: " + str(self.startBlockPtr) + "\n" ) sys.stderr.write( " Number of pointers clamped to start of blocks: " + str(self.midBlockPtr) + "\n" ) sys.stderr.write( " Number of non-null pointers not pointing into blocks " "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n" ) sys.stderr.write(" Number of null pointers: " + str(self.nullPtr) + "\n") # Search the block ranges array for a block that address points into. # The search is carried out in an array of starting addresses for each blocks # because it is faster. def clampAddress(blockRanges, blockStarts, clampStats, address): i = bisect_right(blockStarts, address) # Any addresses completely out of the range should have been eliminated already. assert i > 0 r = blockRanges[i - 1] assert r.start <= address if address >= r.end: assert address < blockRanges[i].start clampStats.clampedNonBlockAddr() return "0" clampStats.clampedBlockAddr(r.start == address) return r.block def clampBlockList(args, inputFileName, isZipped, opener): # XXX This isn't very efficient because we end up reading and writing # the file multiple times. with opener(inputFileName, "rb") as f: j = json.load(f) if j["version"] != outputVersion: raise Exception("'version' property isn't '{:d}'".format(outputVersion)) # Check that the invocation is reasonable for contents clamping. invocation = j["invocation"] if invocation["mode"] != "scan": raise Exception("Log was taken in mode " + invocation["mode"] + " not scan") sys.stderr.write("Creating block range list.\n") blockList = j["blockList"] blockRanges = [] for block in blockList: blockRanges.append(AddrRange(block["addr"], block["req"])) blockRanges.sort(key=lambda r: r.start) # Make sure there are no overlapping blocks. prevRange = blockRanges[0] for currRange in blockRanges[1:]: assert prevRange.end <= currRange.start prevRange = currRange sys.stderr.write("Clamping block contents.\n") clampStats = ClampStats() firstAddr = blockRanges[0].start lastAddr = blockRanges[-1].end blockStarts = [] for r in blockRanges: blockStarts.append(r.start) for block in blockList: # Small blocks don't have any contents. if "contents" not in block: continue cont = block["contents"] for i in range(len(cont)): address = int(cont[i], 16) if address == 0: clampStats.nullAddr() continue # If the address is before the first block or after the last # block then it can't be within a block. if address < firstAddr or address >= lastAddr: clampStats.clampedNonBlockAddr() cont[i] = "0" continue cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) # Remove any trailing nulls. while len(cont) and cont[-1] == "0": cont.pop() if args.print_clamp_stats: clampStats.log() sys.stderr.write("Saving file.\n") tmpFile = tempfile.NamedTemporaryFile(delete=False) tmpFilename = tmpFile.name if isZipped: tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile) prettyPrintDmdJson(tmpFile, j) tmpFile.close() shutil.move(tmpFilename, inputFileName) def main(): args = parseCommandLine() digest = getDigestFromFile(args, args.input_file) if args.input_file2: digest2 = getDigestFromFile(args, args.input_file2) digest = diffDigests(args, digest, digest2) printDigest(args, digest) if __name__ == "__main__": main()