memory/replace/dmd/block_analyzer.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293

#!/usr/bin/env python3

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# From a scan mode DMD log, extract some information about a
# particular block, such as its allocation stack or which other blocks
# contain pointers to it. This can be useful when investigating leaks
# caused by unknown references to refcounted objects.

import argparse
import gzip
import json
import re
import sys

# The DMD output version this script handles.
outputVersion = 5

# If --ignore-alloc-fns is specified, stack frames containing functions that
# match these strings will be removed from the *start* of stack traces. (Once
# we hit a non-matching frame, any subsequent frames won't be removed even if
# they do match.)
allocatorFns = [
    "malloc (",
    "replace_malloc",
    "replace_calloc",
    "replace_realloc",
    "replace_memalign",
    "replace_posix_memalign",
    "malloc_zone_malloc",
    "moz_xmalloc",
    "moz_xcalloc",
    "moz_xrealloc",
    "operator new(",
    "operator new[](",
    "g_malloc",
    "g_slice_alloc",
    "callocCanGC",
    "reallocCanGC",
    "vpx_malloc",
    "vpx_calloc",
    "vpx_realloc",
    "vpx_memalign",
    "js_malloc",
    "js_calloc",
    "js_realloc",
    "pod_malloc",
    "pod_calloc",
    "pod_realloc",
    "nsTArrayInfallibleAllocator::Malloc",
    "Allocator<ReplaceMallocBase>::malloc(",
    "mozilla::dmd::StackTrace::Get(",
    "mozilla::dmd::AllocCallback(",
    "mozilla::dom::DOMArena::Allocate(",
    # This one necessary to fully filter some sequences of allocation functions
    # that happen in practice. Note that ??? entries that follow non-allocation
    # functions won't be stripped, as explained above.
    "???",
]

####

# Command line arguments


def range_1_24(string):
    value = int(string)
    if value < 1 or value > 24:
        msg = "{:s} is not in the range 1..24".format(string)
        raise argparse.ArgumentTypeError(msg)
    return value


parser = argparse.ArgumentParser(
    description="Analyze the heap graph to find out things about an object. \
By default this prints out information about blocks that point to the given block."
)

parser.add_argument("dmd_log_file_name", help="clamped DMD log file name")

parser.add_argument("block", help="address of the block of interest")

parser.add_argument(
    "--info",
    dest="info",
    action="store_true",
    default=False,
    help="Print out information about the block.",
)

parser.add_argument(
    "-sfl",
    "--max-stack-frame-length",
    type=int,
    default=300,
    help="Maximum number of characters to print from each stack frame",
)

parser.add_argument(
    "-a",
    "--ignore-alloc-fns",
    action="store_true",
    help="ignore allocation functions at the start of traces",
)

parser.add_argument(
    "-f",
    "--max-frames",
    type=range_1_24,
    default=8,
    help="maximum number of frames to consider in each trace",
)

parser.add_argument(
    "-c",
    "--chain-reports",
    action="store_true",
    help="if only one block is found to hold onto the object, report "
    "the next one, too",
)


####


class BlockData:
    def __init__(self, json_block):
        self.addr = json_block["addr"]

        if "contents" in json_block:
            contents = json_block["contents"]
        else:
            contents = []
        self.contents = []
        for c in contents:
            self.contents.append(int(c, 16))

        self.req_size = json_block["req"]

        self.alloc_stack = json_block["alloc"]


def print_trace_segment(args, stacks, block):
    (traceTable, frameTable) = stacks

    for l in traceTable[block.alloc_stack]:
        # The 5: is to remove the bogus leading "#00: " from the stack frame.
        print("  " + frameTable[l][5 : args.max_stack_frame_length])


def show_referrers(args, blocks, stacks, block):
    visited = set([])

    anyFound = False

    while True:
        referrers = {}

        for b, data in blocks.items():
            which_edge = 0
            for e in data.contents:
                if e == block:
                    # 8 is the number of bytes per word on a 64-bit system.
                    # XXX This means that this output will be wrong for logs from 32-bit systems!
                    referrers.setdefault(b, []).append(8 * which_edge)
                    anyFound = True
                which_edge += 1

        for r in referrers:
            sys.stdout.write(
                "0x{} size = {} bytes".format(blocks[r].addr, blocks[r].req_size)
            )
            plural = "s" if len(referrers[r]) > 1 else ""
            sys.stdout.write(
                " at byte offset"
                + plural
                + " "
                + (", ".join(str(x) for x in referrers[r]))
            )
            print()
            print_trace_segment(args, stacks, blocks[r])
            print()

        if args.chain_reports:
            if len(referrers) == 0:
                sys.stdout.write("Found no more referrers.\n")
                break
            if len(referrers) > 1:
                sys.stdout.write("Found too many referrers.\n")
                break

            sys.stdout.write("Chaining to next referrer.\n\n")
            for r in referrers:
                block = r
            if block in visited:
                sys.stdout.write("Found a loop.\n")
                break
            visited.add(block)
        else:
            break

    if not anyFound:
        print("No referrers found.")


def show_block_info(args, blocks, stacks, block):
    b = blocks[block]
    sys.stdout.write("block: 0x{}\n".format(b.addr))
    sys.stdout.write("requested size: {} bytes\n".format(b.req_size))
    sys.stdout.write("\n")
    sys.stdout.write("block contents: ")
    for c in b.contents:
        v = "0" if c == 0 else blocks[c].addr
        sys.stdout.write("0x{} ".format(v))
    sys.stdout.write("\n\n")
    sys.stdout.write("allocation stack:\n")
    print_trace_segment(args, stacks, b)
    return


def cleanupTraceTable(args, frameTable, traceTable):
    # Remove allocation functions at the start of traces.
    if args.ignore_alloc_fns:
        # Build a regexp that matches every function in allocatorFns.
        escapedAllocatorFns = map(re.escape, allocatorFns)
        fn_re = re.compile("|".join(escapedAllocatorFns))

        # Remove allocator fns from each stack trace.
        for traceKey, frameKeys in traceTable.items():
            numSkippedFrames = 0
            for frameKey in frameKeys:
                frameDesc = frameTable[frameKey]
                if re.search(fn_re, frameDesc):
                    numSkippedFrames += 1
                else:
                    break
            if numSkippedFrames > 0:
                traceTable[traceKey] = frameKeys[numSkippedFrames:]

    # Trim the number of frames.
    for traceKey, frameKeys in traceTable.items():
        if len(frameKeys) > args.max_frames:
            traceTable[traceKey] = frameKeys[: args.max_frames]


def loadGraph(options):
    # Handle gzipped input if necessary.
    isZipped = options.dmd_log_file_name.endswith(".gz")
    opener = gzip.open if isZipped else open

    with opener(options.dmd_log_file_name, "rb") as f:
        j = json.load(f)

    if j["version"] != outputVersion:
        raise Exception("'version' property isn't '{:d}'".format(outputVersion))

    block_list = j["blockList"]
    blocks = {}

    for json_block in block_list:
        blocks[int(json_block["addr"], 16)] = BlockData(json_block)

    traceTable = j["traceTable"]
    frameTable = j["frameTable"]

    cleanupTraceTable(options, frameTable, traceTable)

    return (blocks, (traceTable, frameTable))


def analyzeLogs():
    options = parser.parse_args()

    (blocks, stacks) = loadGraph(options)

    block = int(options.block, 16)

    if block not in blocks:
        print("Object " + options.block + " not found in traces.")
        print("It could still be the target of some nodes.")
        return

    if options.info:
        show_block_info(options, blocks, stacks, block)
        return

    show_referrers(options, blocks, stacks, block)


if __name__ == "__main__":
    analyzeLogs()