Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /memory/replace/dmd/dmd.py
parent: Initial commit. (diff)
download: thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
1 files changed, 1027 insertions, 0 deletions
diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py
new file mode 100755
index 0000000000..ae772d15ff
--- /dev/null
+++ b/memory/replace/dmd/dmd.py
@@ -0,0 +1,1027 @@
+#! /usr/bin/env python3
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""This script analyzes a JSON file emitted by DMD."""
+
+import argparse
+import collections
+import gzip
+import io
+import json
+import os
+import platform
+import re
+import shutil
+import sys
+import tempfile
+from bisect import bisect_right
+from functools import cmp_to_key
+from typing import Callable
+
+# The DMD output version this script handles.
+outputVersion = 5
+
+# If --ignore-alloc-fns is specified, stack frames containing functions that
+# match these strings will be removed from the *start* of stack traces. (Once
+# we hit a non-matching frame, any subsequent frames won't be removed even if
+# they do match.)
+allocatorFns = [
+    # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc,
+    # pod_malloc, malloc_zone_*, g_malloc.
+    "malloc",
+    # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc,
+    # pod_calloc, malloc_zone_calloc, pod_callocCanGC.
+    "calloc",
+    # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc,
+    # pod_realloc, pod_reallocCanGC.
+    "realloc",
+    # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign,
+    # moz_xmemalign, vpx_memalign, malloc_zone_memalign.
+    "memalign",
+    "operator new(",
+    "operator new[](",
+    "g_slice_alloc",
+    # This one is necessary to fully filter some sequences of allocation
+    # functions that happen in practice. Note that ??? entries that follow
+    # non-allocation functions won't be stripped, as explained above.
+    "???",
+    # Match DMD internals.
+    "mozilla::dmd::AllocCallback",
+    "mozilla::dmd::StackTrace::Get",
+]
+
+
+def cmp(a, b):
+    return (a > b) - (a < b)
+
+
+class Record(object):
+    """A record is an aggregation of heap blocks that have identical stack
+    traces. It can also be used to represent the difference between two
+    records."""
+
+    def __init__(self):
+        self.numBlocks = 0
+        self.reqSize = 0
+        self.slopSize = 0
+        self.usableSize = 0
+        self.allocatedAtDesc = None
+        self.reportedAtDescs = []
+        self.usableSizes = collections.defaultdict(int)
+
+    def isZero(self, args):
+        return (
+            self.numBlocks == 0
+            and self.reqSize == 0
+            and self.slopSize == 0
+            and self.usableSize == 0
+            and len(self.usableSizes) == 0
+        )
+
+    def negate(self):
+        self.numBlocks = -self.numBlocks
+        self.reqSize = -self.reqSize
+        self.slopSize = -self.slopSize
+        self.usableSize = -self.usableSize
+
+        negatedUsableSizes = collections.defaultdict(int)
+        for usableSize, count in self.usableSizes.items():
+            negatedUsableSizes[-usableSize] = count
+        self.usableSizes = negatedUsableSizes
+
+    def subtract(self, r):
+        # We should only be calling this on records with matching stack traces.
+        # Check this.
+        assert self.allocatedAtDesc == r.allocatedAtDesc
+        assert self.reportedAtDescs == r.reportedAtDescs
+
+        self.numBlocks -= r.numBlocks
+        self.reqSize -= r.reqSize
+        self.slopSize -= r.slopSize
+        self.usableSize -= r.usableSize
+
+        usableSizes1 = self.usableSizes
+        usableSizes2 = r.usableSizes
+        usableSizes3 = collections.defaultdict(int)
+        for usableSize in usableSizes1:
+            counts1 = usableSizes1[usableSize]
+            if usableSize in usableSizes2:
+                counts2 = usableSizes2[usableSize]
+                del usableSizes2[usableSize]
+                counts3 = counts1 - counts2
+                if counts3 != 0:
+                    if counts3 < 0:
+                        usableSize = -usableSize
+                        counts3 = -counts3
+                    usableSizes3[usableSize] = counts3
+            else:
+                usableSizes3[usableSize] = counts1
+
+        for usableSize in usableSizes2:
+            usableSizes3[-usableSize] = usableSizes2[usableSize]
+
+        self.usableSizes = usableSizes3
+
+    @staticmethod
+    def cmpByUsableSize(r1, r2):
+        # Sort by usable size, then by req size.
+        return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize(
+            r1, r2
+        )
+
+    @staticmethod
+    def cmpByReqSize(r1, r2):
+        # Sort by req size.
+        return cmp(abs(r1.reqSize), abs(r2.reqSize))
+
+    @staticmethod
+    def cmpBySlopSize(r1, r2):
+        # Sort by slop size.
+        return cmp(abs(r1.slopSize), abs(r2.slopSize))
+
+    @staticmethod
+    def cmpByNumBlocks(r1, r2):
+        # Sort by block counts, then by usable size.
+        return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize(
+            r1, r2
+        )
+
+
+sortByChoices = {
+    "usable": Record.cmpByUsableSize,  # the default
+    "req": Record.cmpByReqSize,
+    "slop": Record.cmpBySlopSize,
+    "num-blocks": Record.cmpByNumBlocks,
+}
+
+
+def parseCommandLine():
+    # 24 is the maximum number of frames that DMD will produce.
+    def range_1_24(string):
+        value = int(string)
+        if value < 1 or value > 24:
+            msg = "{:s} is not in the range 1..24".format(string)
+            raise argparse.ArgumentTypeError(msg)
+        return value
+
+    description = """
+Analyze heap data produced by DMD.
+If one file is specified, analyze it; if two files are specified, analyze the
+difference.
+Input files can be gzipped.
+Write to stdout unless -o/--output is specified.
+Stack traces are fixed to show function names, filenames and line numbers
+unless --no-fix-stacks is specified; stack fixing modifies the original file
+and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment
+variable is used to find breakpad symbols for stack fixing.
+"""
+    p = argparse.ArgumentParser(description=description)
+
+    p.add_argument(
+        "-o",
+        "--output",
+        type=argparse.FileType("w"),
+        help="output file; stdout if unspecified",
+    )
+
+    p.add_argument(
+        "-f",
+        "--max-frames",
+        type=range_1_24,
+        default=8,
+        help="maximum number of frames to consider in each trace",
+    )
+
+    p.add_argument(
+        "-s",
+        "--sort-by",
+        choices=sortByChoices.keys(),
+        default="usable",
+        help="sort the records by a particular metric",
+    )
+
+    p.add_argument(
+        "-a",
+        "--ignore-alloc-fns",
+        action="store_true",
+        help="ignore allocation functions at the start of traces",
+    )
+
+    p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks")
+
+    p.add_argument(
+        "--clamp-contents",
+        action="store_true",
+        help="for a scan mode log, clamp addresses to the start of live blocks, "
+        "or zero if not in one",
+    )
+
+    p.add_argument(
+        "--print-clamp-stats",
+        action="store_true",
+        help="print information about the results of pointer clamping; mostly "
+        "useful for debugging clamping",
+    )
+
+    p.add_argument(
+        "--filter-stacks-for-testing",
+        action="store_true",
+        help="filter stack traces; only useful for testing purposes",
+    )
+
+    p.add_argument(
+        "--filter",
+        default=[],
+        action="append",
+        help="Only print entries that have a stack that matches the filter. "
+        "A filter may be negated by prefixing it with `!`. "
+        "If multiple filters are specified, all of them must match.",
+    )
+
+    p.add_argument("input_file", help="a file produced by DMD")
+
+    p.add_argument(
+        "input_file2",
+        nargs="?",
+        help="a file produced by DMD; if present, it is diff'd with input_file",
+    )
+
+    return p.parse_args(sys.argv[1:])
+
+
+# Fix stacks if necessary: first write the output to a tempfile, then replace
+# the original file with it.
+def fixStackTraces(inputFilename, isZipped, opener):
+    # This append() call is needed to make the import statements work when this
+    # script is installed as a symlink.
+    sys.path.append(os.path.dirname(__file__))
+
+    bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None)
+    sysname = platform.system()
+    if bpsyms and os.path.exists(bpsyms):
+        import fix_stacks as fixModule
+
+        def fix(line):
+            return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms)
+
+    elif sysname in ("Linux", "Darwin", "Windows"):
+        import fix_stacks as fixModule
+
+        def fix(line):
+            return fixModule.fixSymbols(line, jsonMode=True)
+
+    else:
+        return
+
+    # Fix stacks, writing output to a temporary file, and then overwrite the
+    # original file.
+    tmpFile = tempfile.NamedTemporaryFile(delete=False)
+
+    # If the input is gzipped, then the output (written initially to |tmpFile|)
+    # should be gzipped as well.
+    #
+    # And we want to set its pre-gzipped filename to '' rather than the name of
+    # the temporary file, so that programs like the Unix 'file' utility don't
+    # say that it was called 'tmp6ozTxE' (or something like that) before it was
+    # zipped. So that explains the |filename=''| parameter.
+    #
+    # But setting the filename like that clobbers |tmpFile.name|, so we must
+    # get that now in order to move |tmpFile| at the end.
+    tmpFilename = tmpFile.name
+    if isZipped:
+        tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb")
+
+    with opener(inputFilename, "rb") as inputFile:
+        for line in inputFile:
+            tmpFile.write(fix(line))
+
+    tmpFile.close()
+
+    shutil.move(tmpFilename, inputFilename)
+
+
+def getDigestFromFile(args, inputFile):
+    # Handle gzipped input if necessary.
+    isZipped = inputFile.endswith(".gz")
+    opener = gzip.open if isZipped else open
+
+    # Fix stack traces unless otherwise instructed.
+    if not args.no_fix_stacks:
+        fixStackTraces(inputFile, isZipped, opener)
+
+    if args.clamp_contents:
+        clampBlockList(args, inputFile, isZipped, opener)
+
+    with opener(inputFile, "rb") as f:
+        j = json.load(f)
+
+    if j["version"] != outputVersion:
+        raise Exception("'version' property isn't '{:d}'".format(outputVersion))
+
+    # Extract the main parts of the JSON object.
+    invocation = j["invocation"]
+    dmdEnvVar = invocation["dmdEnvVar"]
+    mode = invocation["mode"]
+    blockList = j["blockList"]
+    traceTable = j["traceTable"]
+    frameTable = j["frameTable"]
+
+    # Insert the necessary entries for unrecorded stack traces. Note that 'ut'
+    # and 'uf' will not overlap with any keys produced by DMD's
+    # ToIdStringConverter::Base32() function.
+    unrecordedTraceID = "ut"
+    unrecordedFrameID = "uf"
+    traceTable[unrecordedTraceID] = [unrecordedFrameID]
+    frameTable[
+        unrecordedFrameID
+    ] = "#00: (no stack trace recorded due to --stacks=partial)"
+
+    # For the purposes of this script, 'scan' behaves like 'live'.
+    if mode == "scan":
+        mode = "live"
+
+    if mode not in ["live", "dark-matter", "cumulative"]:
+        raise Exception("bad 'mode' property: '{:s}'".format(mode))
+
+    # Remove allocation functions at the start of traces.
+    if args.ignore_alloc_fns:
+        # Build a regexp that matches every function in allocatorFns.
+        escapedAllocatorFns = map(re.escape, allocatorFns)
+        fn_re = re.compile("|".join(escapedAllocatorFns))
+
+        # Remove allocator fns from each stack trace.
+        for traceKey, frameKeys in traceTable.items():
+            numSkippedFrames = 0
+            for frameKey in frameKeys:
+                frameDesc = frameTable[frameKey]
+                if re.search(fn_re, frameDesc):
+                    numSkippedFrames += 1
+                else:
+                    break
+            if numSkippedFrames > 0:
+                traceTable[traceKey] = frameKeys[numSkippedFrames:]
+
+    # Trim the number of frames.
+    for traceKey, frameKeys in traceTable.items():
+        if len(frameKeys) > args.max_frames:
+            del frameKeys[args.max_frames :]
+
+    def buildTraceDescription(traceTable, frameTable, traceKey):
+        frameKeys = traceTable[traceKey]
+        fmt = "    #{:02d}{:}"
+
+        if args.filter_stacks_for_testing:
+            # This option is used by `test_dmd.js`, which runs the code in
+            # `SmokeDMD.cpp`. When running that test, there is too much
+            # variation in the stack traces across different machines and
+            # platforms to do exact output matching. However, every stack trace
+            # should have at least three frames that contain `DMD` (in one of
+            # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some
+            # example frames from automation (where `..` indicates excised path
+            # segments):
+            #
+            # Linux debug, with stack fixing using breakpad syms:
+            # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]`
+            #
+            # Linux opt, with native stack fixing:
+            # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)`
+            #
+            # Mac opt, with native stack fixing:
+            # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)`
+            #
+            # Windows opt, with native stack fixing failing due to a missing PDB:
+            # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)`
+            #
+            # If we see three such frames, we replace the entire stack trace
+            # with a single, predictable frame. This imprecise matching will at
+            # least detect if stack fixing fails completely.
+            dmd_frame_matches = 0
+            for frameKey in frameKeys:
+                frameDesc = frameTable[frameKey]
+                if "DMD" in frameDesc:
+                    dmd_frame_matches += 1
+                    if dmd_frame_matches >= 3:
+                        return [fmt.format(1, ": ... DMD.cpp ...")]
+
+        # The frame number is always '#00' (see DMD.h for why), so we have to
+        # replace that with the correct frame number.
+        desc = []
+        for n, frameKey in enumerate(traceTable[traceKey], start=1):
+            desc.append(fmt.format(n, frameTable[frameKey][3:]))
+        return desc
+
+    # Aggregate blocks into records. All sufficiently similar blocks go into a
+    # single record.
+
+    if mode in ["live", "cumulative"]:
+        liveOrCumulativeRecords = collections.defaultdict(Record)
+    elif mode == "dark-matter":
+        unreportedRecords = collections.defaultdict(Record)
+        onceReportedRecords = collections.defaultdict(Record)
+        twiceReportedRecords = collections.defaultdict(Record)
+
+    heapUsableSize = 0
+    heapBlocks = 0
+
+    recordKeyPartCache = {}
+
+    for block in blockList:
+        # For each block we compute a |recordKey|, and all blocks with the same
+        # |recordKey| are aggregated into a single record. The |recordKey| is
+        # derived from the block's 'alloc' and 'reps' (if present) stack
+        # traces.
+        #
+        # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing
+        # traces for equality. We can't use trace keys or frame keys because
+        # they're not comparable across different DMD runs (which is relevant
+        # when doing diffs).
+        #
+        # Using frame descriptions also fits in with the stack trimming done
+        # for --max-frames, which requires that stack traces with common
+        # beginnings but different endings to be considered equivalent. E.g. if
+        # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4]
+        # and we trim the final frame of each they should be considered
+        # equivalent because the untrimmed frame descriptions (D1 and D2)
+        # match.
+        #
+        # Having said all that, during a single invocation of dmd.py on a
+        # single DMD file, for a single frameKey value the record key will
+        # always be the same, and we might encounter it 1000s of times. So we
+        # cache prior results for speed.
+        def makeRecordKeyPart(traceKey):
+            if traceKey in recordKeyPartCache:
+                return recordKeyPartCache[traceKey]
+
+            recordKeyPart = str(
+                list(map(lambda frameKey: frameTable[frameKey], traceTable[traceKey]))
+            )
+            recordKeyPartCache[traceKey] = recordKeyPart
+            return recordKeyPart
+
+        allocatedAtTraceKey = block.get("alloc", unrecordedTraceID)
+        if mode in ["live", "cumulative"]:
+            recordKey = makeRecordKeyPart(allocatedAtTraceKey)
+            records = liveOrCumulativeRecords
+        elif mode == "dark-matter":
+            recordKey = makeRecordKeyPart(allocatedAtTraceKey)
+            if "reps" in block:
+                reportedAtTraceKeys = block["reps"]
+                for reportedAtTraceKey in reportedAtTraceKeys:
+                    recordKey += makeRecordKeyPart(reportedAtTraceKey)
+                if len(reportedAtTraceKeys) == 1:
+                    records = onceReportedRecords
+                else:
+                    records = twiceReportedRecords
+            else:
+                records = unreportedRecords
+
+        record = records[recordKey]
+
+        if "req" not in block:
+            raise Exception("'req' property missing in block'")
+
+        reqSize = block["req"]
+        slopSize = block.get("slop", 0)
+
+        if "num" in block:
+            num = block["num"]
+        else:
+            num = 1
+
+        usableSize = reqSize + slopSize
+        heapUsableSize += num * usableSize
+        heapBlocks += num
+
+        record.numBlocks += num
+        record.reqSize += num * reqSize
+        record.slopSize += num * slopSize
+        record.usableSize += num * usableSize
+        if record.allocatedAtDesc is None:
+            record.allocatedAtDesc = buildTraceDescription(
+                traceTable, frameTable, allocatedAtTraceKey
+            )
+
+        if mode in ["live", "cumulative"]:
+            pass
+        elif mode == "dark-matter":
+            if "reps" in block and record.reportedAtDescs == []:
+
+                def f(k):
+                    return buildTraceDescription(traceTable, frameTable, k)
+
+                record.reportedAtDescs = list(map(f, reportedAtTraceKeys))
+        record.usableSizes[usableSize] += num
+
+    # All the processed data for a single DMD file is called a "digest".
+    digest = {}
+    digest["dmdEnvVar"] = dmdEnvVar
+    digest["mode"] = mode
+    digest["heapUsableSize"] = heapUsableSize
+    digest["heapBlocks"] = heapBlocks
+    if mode in ["live", "cumulative"]:
+        digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords
+    elif mode == "dark-matter":
+        digest["unreportedRecords"] = unreportedRecords
+        digest["onceReportedRecords"] = onceReportedRecords
+        digest["twiceReportedRecords"] = twiceReportedRecords
+    return digest
+
+
+def diffRecords(args, records1, records2):
+    records3 = {}
+
+    # Process records1.
+    for k in records1:
+        r1 = records1[k]
+        if k in records2:
+            # This record is present in both records1 and records2.
+            r2 = records2[k]
+            del records2[k]
+            r2.subtract(r1)
+            if not r2.isZero(args):
+                records3[k] = r2
+        else:
+            # This record is present only in records1.
+            r1.negate()
+            records3[k] = r1
+
+    for k in records2:
+        # This record is present only in records2.
+        records3[k] = records2[k]
+
+    return records3
+
+
+def diffDigests(args, d1, d2):
+    if d1["mode"] != d2["mode"]:
+        raise Exception("the input files have different 'mode' properties")
+
+    d3 = {}
+    d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"])
+    d3["mode"] = d1["mode"]
+    d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"]
+    d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"]
+    if d1["mode"] in ["live", "cumulative"]:
+        d3["liveOrCumulativeRecords"] = diffRecords(
+            args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"]
+        )
+    elif d1["mode"] == "dark-matter":
+        d3["unreportedRecords"] = diffRecords(
+            args, d1["unreportedRecords"], d2["unreportedRecords"]
+        )
+        d3["onceReportedRecords"] = diffRecords(
+            args, d1["onceReportedRecords"], d2["onceReportedRecords"]
+        )
+        d3["twiceReportedRecords"] = diffRecords(
+            args, d1["twiceReportedRecords"], d2["twiceReportedRecords"]
+        )
+    return d3
+
+
+def printDigest(args, digest):
+    dmdEnvVar = digest["dmdEnvVar"]
+    mode = digest["mode"]
+    heapUsableSize = digest["heapUsableSize"]
+    heapBlocks = digest["heapBlocks"]
+    if mode in ["live", "cumulative"]:
+        liveOrCumulativeRecords = digest["liveOrCumulativeRecords"]
+    elif mode == "dark-matter":
+        unreportedRecords = digest["unreportedRecords"]
+        onceReportedRecords = digest["onceReportedRecords"]
+        twiceReportedRecords = digest["twiceReportedRecords"]
+
+    separator = "#" + "-" * 65 + "\n"
+
+    def number(n):
+        """Format a number with comma as a separator."""
+        return "{:,d}".format(n)
+
+    def perc(m, n):
+        return 0 if n == 0 else (100 * m / n)
+
+    def plural(n):
+        return "" if n == 1 else "s"
+
+    # Prints to stdout, or to file if -o/--output was specified.
+    def out(*arguments, **kwargs):
+        print(*arguments, file=args.output, **kwargs)
+
+    def printStack(traceDesc):
+        for frameDesc in traceDesc:
+            out(frameDesc)
+
+    def printRecords(recordKind, records, heapUsableSize):
+        RecordKind = recordKind.capitalize()
+        out(separator)
+        numRecords = len(records)
+        cmpRecords = sortByChoices[args.sort_by]
+        sortedRecords = sorted(
+            records.values(), key=cmp_to_key(cmpRecords), reverse=True
+        )
+        kindBlocks = 0
+        kindUsableSize = 0
+        maxRecord = 1000
+
+        def is_match(rec: Record, key: str):
+            return any(key in desc for desc in rec.allocatedAtDesc)
+
+        for arg in args.filter:
+            key: str
+            cond: Callable[[Record], bool]
+            if arg.startswith("\\"):
+                # just in case you really need to start a filter with '!' (or '\')
+                key = arg[1:]
+                cond = is_match
+            elif arg.startswith("!"):
+                key = arg[1:]
+
+                def cond(rec, key):
+                    return not is_match(rec, key)  # noqa: E731
+
+            else:
+                key = arg
+                cond = is_match
+            sortedRecords = [rec for rec in sortedRecords if cond(rec, key)]
+
+        # First iteration: get totals, etc.
+        for record in sortedRecords:
+            kindBlocks += record.numBlocks
+            kindUsableSize += record.usableSize
+
+        # Second iteration: print.
+        if numRecords == 0:
+            out("# no {:} heap blocks\n".format(recordKind))
+
+        kindCumulativeUsableSize = 0
+        for i, record in enumerate(sortedRecords, start=1):
+            # Stop printing at the |maxRecord|th record.
+            if i == maxRecord:
+                out(
+                    "# {:}: stopping after {:,d} heap block records\n".format(
+                        RecordKind, i
+                    )
+                )
+                break
+
+            kindCumulativeUsableSize += record.usableSize
+
+            out(RecordKind + " {")
+            out(
+                "  {:} block{:} in heap block record {:,d} of {:,d}".format(
+                    number(record.numBlocks), plural(record.numBlocks), i, numRecords
+                )
+            )
+            out(
+                "  {:} bytes ({:} requested / {:} slop)".format(
+                    number(record.usableSize),
+                    number(record.reqSize),
+                    number(record.slopSize),
+                )
+            )
+
+            usableSizes = sorted(
+                record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True
+            )
+            hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1
+
+            if not hasSingleBlock:
+                out("  Individual block sizes: ", end="")
+                if len(usableSizes) == 0:
+                    out("(no change)", end="")
+                else:
+                    isFirst = True
+                    for usableSize, count in usableSizes:
+                        if not isFirst:
+                            out("; ", end="")
+                        out("{:}".format(number(usableSize)), end="")
+                        if count > 1:
+                            out(" x {:,d}".format(count), end="")
+                        isFirst = False
+                out()
+
+            out(
+                "  {:4.2f}% of the heap ({:4.2f}% cumulative)".format(
+                    perc(record.usableSize, heapUsableSize),
+                    perc(kindCumulativeUsableSize, heapUsableSize),
+                )
+            )
+            if mode in ["live", "cumulative"]:
+                pass
+            elif mode == "dark-matter":
+                out(
+                    "  {:4.2f}% of {:} ({:4.2f}% cumulative)".format(
+                        perc(record.usableSize, kindUsableSize),
+                        recordKind,
+                        perc(kindCumulativeUsableSize, kindUsableSize),
+                    )
+                )
+            out("  Allocated at {")
+            printStack(record.allocatedAtDesc)
+            out("  }")
+            if mode in ["live", "cumulative"]:
+                pass
+            elif mode == "dark-matter":
+                for n, reportedAtDesc in enumerate(record.reportedAtDescs):
+                    again = "again " if n > 0 else ""
+                    out("  Reported {:}at {{".format(again))
+                    printStack(reportedAtDesc)
+                    out("  }")
+            out("}\n")
+
+        return (kindUsableSize, kindBlocks)
+
+    def printInvocation(n, dmdEnvVar, mode):
+        out("Invocation{:} {{".format(n))
+        if dmdEnvVar is None:
+            out("  $DMD is undefined")
+        else:
+            out("  $DMD = '" + dmdEnvVar + "'")
+        out("  Mode = '" + mode + "'")
+        out("}\n")
+
+    # Print command line. Strip dirs so the output is deterministic, which is
+    # needed for testing.
+    out(separator, end="")
+    out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n")
+
+    # Print invocation(s).
+    if type(dmdEnvVar) is not tuple:
+        printInvocation("", dmdEnvVar, mode)
+    else:
+        printInvocation(" 1", dmdEnvVar[0], mode)
+        printInvocation(" 2", dmdEnvVar[1], mode)
+
+    # Print records.
+    if mode in ["live", "cumulative"]:
+        liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords(
+            mode, liveOrCumulativeRecords, heapUsableSize
+        )
+    elif mode == "dark-matter":
+        twiceReportedUsableSize, twiceReportedBlocks = printRecords(
+            "twice-reported", twiceReportedRecords, heapUsableSize
+        )
+
+        unreportedUsableSize, unreportedBlocks = printRecords(
+            "unreported", unreportedRecords, heapUsableSize
+        )
+
+        onceReportedUsableSize, onceReportedBlocks = printRecords(
+            "once-reported", onceReportedRecords, heapUsableSize
+        )
+
+    # Print summary.
+    out(separator)
+    out("Summary {")
+    if mode in ["live", "cumulative"]:
+        out(
+            "  Total: {:} bytes in {:} blocks".format(
+                number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks)
+            )
+        )
+    elif mode == "dark-matter":
+        fmt = "  {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)"
+        out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100))
+        out(
+            fmt.format(
+                "Unreported:",
+                number(unreportedUsableSize),
+                perc(unreportedUsableSize, heapUsableSize),
+                number(unreportedBlocks),
+                perc(unreportedBlocks, heapBlocks),
+            )
+        )
+        out(
+            fmt.format(
+                "Once-reported:",
+                number(onceReportedUsableSize),
+                perc(onceReportedUsableSize, heapUsableSize),
+                number(onceReportedBlocks),
+                perc(onceReportedBlocks, heapBlocks),
+            )
+        )
+        out(
+            fmt.format(
+                "Twice-reported:",
+                number(twiceReportedUsableSize),
+                perc(twiceReportedUsableSize, heapUsableSize),
+                number(twiceReportedBlocks),
+                perc(twiceReportedBlocks, heapBlocks),
+            )
+        )
+    out("}\n")
+
+
+#############################
+# Pretty printer for DMD JSON
+#############################
+
+
+def prettyPrintDmdJson(out, j):
+    out.write("{\n")
+
+    out.write(' "version": {0},\n'.format(j["version"]))
+    out.write(' "invocation": ')
+    json.dump(j["invocation"], out, sort_keys=True)
+    out.write(",\n")
+
+    out.write(' "blockList": [')
+    first = True
+    for b in j["blockList"]:
+        out.write("" if first else ",")
+        out.write("\n  ")
+        json.dump(b, out, sort_keys=True)
+        first = False
+    out.write("\n ],\n")
+
+    out.write(' "traceTable": {')
+    first = True
+    for k, l in j["traceTable"].items():
+        out.write("" if first else ",")
+        out.write('\n  "{0}": {1}'.format(k, json.dumps(l)))
+        first = False
+    out.write("\n },\n")
+
+    out.write(' "frameTable": {')
+    first = True
+    for k, v in j["frameTable"].items():
+        out.write("" if first else ",")
+        out.write('\n  "{0}": {1}'.format(k, json.dumps(v)))
+        first = False
+    out.write("\n }\n")
+
+    out.write("}\n")
+
+
+##################################################################
+# Code for clamping addresses using conservative pointer analysis.
+##################################################################
+
+# Start is the address of the first byte of the block, while end is
+# the address of the first byte after the final byte in the block.
+class AddrRange:
+    def __init__(self, block, length):
+        self.block = block
+        self.start = int(block, 16)
+        self.length = length
+        self.end = self.start + self.length
+
+        assert self.start > 0
+        assert length >= 0
+
+
+class ClampStats:
+    def __init__(self):
+        # Number of pointers already pointing to the start of a block.
+        self.startBlockPtr = 0
+
+        # Number of pointers pointing to the middle of a block. These
+        # are clamped to the start of the block they point into.
+        self.midBlockPtr = 0
+
+        # Number of null pointers.
+        self.nullPtr = 0
+
+        # Number of non-null pointers that didn't point into the middle
+        # of any blocks. These are clamped to null.
+        self.nonNullNonBlockPtr = 0
+
+    def clampedBlockAddr(self, sameAddress):
+        if sameAddress:
+            self.startBlockPtr += 1
+        else:
+            self.midBlockPtr += 1
+
+    def nullAddr(self):
+        self.nullPtr += 1
+
+    def clampedNonBlockAddr(self):
+        self.nonNullNonBlockPtr += 1
+
+    def log(self):
+        sys.stderr.write("Results:\n")
+        sys.stderr.write(
+            "  Number of pointers already pointing to start of blocks: "
+            + str(self.startBlockPtr)
+            + "\n"
+        )
+        sys.stderr.write(
+            "  Number of pointers clamped to start of blocks: "
+            + str(self.midBlockPtr)
+            + "\n"
+        )
+        sys.stderr.write(
+            "  Number of non-null pointers not pointing into blocks "
+            "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n"
+        )
+        sys.stderr.write("  Number of null pointers: " + str(self.nullPtr) + "\n")
+
+
+# Search the block ranges array for a block that address points into.
+# The search is carried out in an array of starting addresses for each blocks
+# because it is faster.
+def clampAddress(blockRanges, blockStarts, clampStats, address):
+    i = bisect_right(blockStarts, address)
+
+    # Any addresses completely out of the range should have been eliminated already.
+    assert i > 0
+    r = blockRanges[i - 1]
+    assert r.start <= address
+
+    if address >= r.end:
+        assert address < blockRanges[i].start
+        clampStats.clampedNonBlockAddr()
+        return "0"
+
+    clampStats.clampedBlockAddr(r.start == address)
+    return r.block
+
+
+def clampBlockList(args, inputFileName, isZipped, opener):
+    # XXX This isn't very efficient because we end up reading and writing
+    # the file multiple times.
+    with opener(inputFileName, "rb") as f:
+        j = json.load(f)
+
+    if j["version"] != outputVersion:
+        raise Exception("'version' property isn't '{:d}'".format(outputVersion))
+
+    # Check that the invocation is reasonable for contents clamping.
+    invocation = j["invocation"]
+    if invocation["mode"] != "scan":
+        raise Exception("Log was taken in mode " + invocation["mode"] + " not scan")
+
+    sys.stderr.write("Creating block range list.\n")
+    blockList = j["blockList"]
+    blockRanges = []
+    for block in blockList:
+        blockRanges.append(AddrRange(block["addr"], block["req"]))
+    blockRanges.sort(key=lambda r: r.start)
+
+    # Make sure there are no overlapping blocks.
+    prevRange = blockRanges[0]
+    for currRange in blockRanges[1:]:
+        assert prevRange.end <= currRange.start
+        prevRange = currRange
+
+    sys.stderr.write("Clamping block contents.\n")
+    clampStats = ClampStats()
+    firstAddr = blockRanges[0].start
+    lastAddr = blockRanges[-1].end
+
+    blockStarts = []
+    for r in blockRanges:
+        blockStarts.append(r.start)
+
+    for block in blockList:
+        # Small blocks don't have any contents.
+        if "contents" not in block:
+            continue
+
+        cont = block["contents"]
+        for i in range(len(cont)):
+            address = int(cont[i], 16)
+
+            if address == 0:
+                clampStats.nullAddr()
+                continue
+
+            # If the address is before the first block or after the last
+            # block then it can't be within a block.
+            if address < firstAddr or address >= lastAddr:
+                clampStats.clampedNonBlockAddr()
+                cont[i] = "0"
+                continue
+
+            cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address)
+
+        # Remove any trailing nulls.
+        while len(cont) and cont[-1] == "0":
+            cont.pop()
+
+    if args.print_clamp_stats:
+        clampStats.log()
+
+    sys.stderr.write("Saving file.\n")
+    tmpFile = tempfile.NamedTemporaryFile(delete=False)
+    tmpFilename = tmpFile.name
+    if isZipped:
+        tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb")
+    prettyPrintDmdJson(io.TextIOWrapper(tmpFile, encoding="utf-8"), j)
+    tmpFile.close()
+    shutil.move(tmpFilename, inputFileName)
+
+
+def main():
+    args = parseCommandLine()
+    digest = getDigestFromFile(args, args.input_file)
+    if args.input_file2:
+        digest2 = getDigestFromFile(args, args.input_file2)
+        digest = diffDigests(args, digest, digest2)
+    printDigest(args, digest)
+
+
+if __name__ == "__main__":
+    main()
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /memory/replace/dmd/dmd.py
parent	Initial commit. (diff)
download	thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip