js/src/devtools/rootAnalysis/loadCallgraph.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */

"use strict";

loadRelativeToScript('utility.js');

// Functions come out of sixgill in the form "mangled$readable". The mangled
// name is Truth. One mangled name might correspond to multiple readable names,
// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types
// the same way or de-typedef the same amount; (2) sixgill's output treats
// references and pointers the same, and so doesn't distinguish them, but C++
// treats them as separate for overloading and linking; (3) (identical)
// destructors sometimes have an int32 parameter, sometimes not.
//
// The readable names are useful because they're far more meaningful to the
// user, and are what should show up in reports and questions to mrgiggles. At
// least in most cases, it's fine to have the extra mangled name tacked onto
// the beginning for these.
//
// The strategy used is to separate out the pieces whenever they are read in,
// create a table mapping mangled names to all readable names, and use the
// mangled names in all computation -- except for limited circumstances when
// the readable name is used in annotations.
//
// Note that callgraph.txt uses a compressed representation -- each name is
// mapped to an integer, and those integers are what is recorded in the edges.
// But the integers depend on the full name, whereas the true edge should only
// consider the mangled name. And some of the names encoded in callgraph.txt
// are FieldCalls, not just function names.

var readableNames = {}; // map from mangled name => list of readable names
var calleesOf = {}; // map from mangled => list of tuples of {'callee':mangled, 'limits':intset}
var callersOf; // map from mangled => list of tuples of {'caller':mangled, 'limits':intset}
var gcFunctions = {}; // map from mangled callee => reason
var limitedFunctions = {}; // set of mangled names (map from mangled name => limit intset)
var gcEdges = {};

// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
var functionNames = [""];

var mangledToId = {};

// Returns whether the function was added. (It will be refused if it was
// already there, or if limits or annotations say it shouldn't be added.)
function addGCFunction(caller, reason, functionLimits)
{
    if (functionLimits[caller] & LIMIT_CANNOT_GC)
        return false;

    if (ignoreGCFunction(functionNames[caller]))
        return false;

    if (!(caller in gcFunctions)) {
        gcFunctions[caller] = reason;
        return true;
    }

    return false;
}

// Every caller->callee callsite is associated with a limit saying what is
// allowed at that callsite (eg if it's in a GC suppression zone, it would have
// LIMIT_CANNOT_GC set.) A given caller might call the same callee multiple
// times, with different limits, so we want to associate the <caller,callee>
// edge with the intersection ('AND') of all of the callsites' limits.
//
// Scan through all call edges and intersect the limits for all matching
// <caller,callee> edges (so that the result is the least limiting of all
// matching edges.) Preserve the original order.
//
// During the same scan, build callersOf from calleesOf.
function merge_repeated_calls(calleesOf) {
    const callersOf = Object.create(null);

    for (const [caller, callee_limits] of Object.entries(calleesOf)) {
        const ordered_callees = [];

        // callee_limits is a list of {callee,limit} objects.
        const callee2limit = new Map();
        for (const {callee, limits} of callee_limits) {
            const prev_limits = callee2limit.get(callee);
            if (prev_limits === undefined) {
                callee2limit.set(callee, limits);
                ordered_callees.push(callee);
            } else {
                callee2limit.set(callee, prev_limits & limits);
            }
        }

        // Update the contents of callee_limits to contain a single entry for
        // each callee, with its limits set to the AND of the limits observed
        // at all callsites within this caller function.
        callee_limits.length = 0;
        for (const callee of ordered_callees) {
            const limits = callee2limit.get(callee);
            callee_limits.push({callee, limits});
            if (!(callee in callersOf))
                callersOf[callee] = [];
            callersOf[callee].push({caller, limits});
        }
    }

    return callersOf;
}

function loadCallgraph(file)
{
    const fieldCallLimits = {};
    const fieldCallCSU = new Map(); // map from full field name id => csu name
    const resolvedFieldCalls = new Set();

    // set of mangled names (map from mangled name => limit intset)
    var functionLimits = {};

    let numGCCalls = 0;

    for (let line of readFileLines_gen(file)) {
        line = line.replace(/\n/, "");

        let match;
        if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
            const [ _, id, mangled ] = match;
            assert(functionNames.length == id);
            functionNames.push(mangled);
            mangledToId[mangled] = id;
            continue;
        }
        if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
            const [ _, id, readable ] = match;
            const mangled = functionNames[id];
            if (mangled in readableNames)
                readableNames[mangled].push(readable);
            else
                readableNames[mangled] = [ readable ];
            continue;
        }

        let limits = 0;
        // Example line: D /17 6 7
        //
        // This means a direct call from 6 -> 7, but within a scope that
        // applies limits 0x1 and 0x10 to the callee.
        //
        // Look for a limit and remove it from the line if found.
        if (line.indexOf("/") != -1) {
            match = /^(..)\/(\d+) (.*)/.exec(line);
            line = match[1] + match[3];
            limits = match[2]|0;
        }
        const tag = line.charAt(0);
        if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) {
            const caller = match[1]|0;
            const name = match[2];
            if (!indirectCallCannotGC(functionNames[caller], name) &&
                !(limits & LIMIT_CANNOT_GC))
            {
                addGCFunction(caller, "IndirectCall: " + name, functionLimits);
            }
        } else if (match = (tag == 'F' || tag == 'V') && /^[FV] (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
            const caller = match[1]|0;
            const fullfield = match[2]|0;
            const csu = match[3];
            const fullfield_str = csu + "." + match[4];
            assert(functionNames[fullfield] == fullfield_str);
            if (limits)
                fieldCallLimits[fullfield] = limits;
            addToKeyedList(calleesOf, caller, {callee:fullfield, limits});
            fieldCallCSU.set(fullfield, csu);
        } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) {
            const caller = match[1]|0;
            const callee = match[2]|0;
            addToKeyedList(calleesOf, caller, {callee:callee, limits:limits});
        } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) {
            const callerField = match[1]|0;
            const callee = match[2]|0;
            // Resolved virtual functions create a dummy node for the field
            // call, and callers call it. It will then call all possible
            // instantiations. No additional limits are placed on the callees;
            // it's as if there were a function named BaseClass.foo:
            //
            //     void BaseClass.foo() {
            //         Subclass1::foo();
            //         Subclass2::foo();
            //     }
            //
            addToKeyedList(calleesOf, callerField, {callee:callee, limits:0});
            // Mark that we resolved this virtual method, so that it isn't
            // assumed to call some random function that might do anything.
            resolvedFieldCalls.add(callerField);
        } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
            const id = match[1]|0;
            let tag = match[2];
            if (tag == 'GC Call') {
                addGCFunction(id, "GC", functionLimits);
                numGCCalls++;
            }
        } else {
            assert(false, "Invalid format in callgraph line: " + line);
        }
    }

    // Callers have a list of callees, with duplicates (if the same function is
    // called more than once.) Merge the repeated calls, only keeping limits
    // that are in force for *every* callsite of that callee. Also, generate
    // the callersOf table at the same time.
    callersOf = merge_repeated_calls(calleesOf);

    // Add in any extra functions at the end. (If we did this early, it would
    // mess up the id <-> name correspondence. Also, we need to know if the
    // functions even exist in the first place.)
    for (var func of extraGCFunctions()) {
        addGCFunction(mangledToId[func], "annotation", functionLimits);
    }

    // Compute functionLimits: it should contain the set of functions that
    // are *always* called within some sort of limited context (eg GC
    // suppression).

    // Initialize to limited field calls.
    for (var [name, limits] of Object.entries(fieldCallLimits)) {
        if (limits)
            functionLimits[name] = limits;
    }

    // Initialize functionLimits to the set of all functions, where each one is
    // maximally limited, and return a worklist containing all simple roots
    // (nodes with no callers).
    var roots = gather_simple_roots(functionLimits, callersOf);

    // Traverse the graph, spreading the limits down from the roots.
    propagate_limits(roots, functionLimits, calleesOf);

    // There are a surprising number of "recursive roots", where there is a
    // cycle of functions calling each other but not called by anything else,
    // and these roots may also have descendants. Now that the above traversal
    // has eliminated everything reachable from simple roots, traverse the
    // remaining graph to gather up a representative function from each root
    // cycle.
    roots = gather_recursive_roots(roots, functionLimits, callersOf);

    // And do a final traversal starting with the recursive roots.
    propagate_limits(roots, functionLimits, calleesOf);

    // Eliminate GC-limited functions from the set of functions known to GC.
    for (var name in gcFunctions) {
        if (functionLimits[name] & LIMIT_CANNOT_GC)
            delete gcFunctions[name];
    }

    // functionLimits should now contain all functions that are always called
    // in a limited context.

    // Sanity check to make sure the callgraph has some functions annotated as
    // GC Calls. This is mostly a check to be sure the earlier processing
    // succeeded (as opposed to, say, running on empty xdb files because you
    // didn't actually compile anything interesting.)
    assert(numGCCalls > 0, "No GC functions found!");

    // Initialize the worklist to all known gcFunctions.
    var worklist = [];
    for (const name in gcFunctions)
        worklist.push(name);

    // Include all field calls and unresolved virtual method calls.
    for (const [name, csuName] of fieldCallCSU) {
        if (resolvedFieldCalls.has(name))
            continue; // Skip resolved virtual functions.
        const fullFieldName = functionNames[name];
        if (!fieldCallCannotGC(csuName, fullFieldName)) {
            gcFunctions[name] = 'unresolved ' + fullFieldName;
            worklist.push(name);
        }
    }

    // Recursively find all callers not always called in a GC suppression
    // context, and add them to the set of gcFunctions.
    while (worklist.length) {
        name = worklist.shift();
        assert(name in gcFunctions, "gcFunctions does not contain " + name);
        if (!(name in callersOf))
            continue;
        for (const {caller, limits} of callersOf[name]) {
            if (!(limits & LIMIT_CANNOT_GC)) {
                if (addGCFunction(caller, name, functionLimits))
                    worklist.push(caller);
            }
        }
    }

    // Convert functionLimits to limitedFunctions (using mangled names instead
    // of ids.)

    for (const [id, limits] of Object.entries(functionLimits))
        limitedFunctions[functionNames[id]] = limits;

    // The above code uses integer ids for efficiency. External code uses
    // mangled names. Rewrite the various data structures to convert ids to
    // mangled names.
    remap_ids_to_mangled_names();
}

// Return a worklist of functions with no callers, and also initialize
// functionLimits to the set of all functions, each mapped to LIMIT_UNVISTED.
function gather_simple_roots(functionLimits, callersOf) {
    const roots = [];
    for (let callee in callersOf)
        functionLimits[callee] = LIMIT_UNVISITED;
    for (let caller in calleesOf) {
        if (!(caller in callersOf)) {
            functionLimits[caller] = LIMIT_UNVISITED;
            roots.push([caller, LIMIT_NONE, 'root']);
        }
    }

    return roots;
}

// Recursively traverse the callgraph from the roots. Recurse through every
// edge that weakens the limits. (Limits that entirely disappear, aka go to a
// zero intset, will be removed from functionLimits.)
function propagate_limits(worklist, functionLimits, calleesOf) {
    let top = worklist.length;
    while (top > 0) {
        // Consider caller where (graph) -> caller -> (0 or more callees)
        // 'callercaller' is for debugging.
        const [caller, edge_limits, callercaller] = worklist[--top];
        const prev_limits = functionLimits[caller];
        if (prev_limits & ~edge_limits) {
            // Turning off a limit (or unvisited marker). Must recurse to the
            // children. But first, update this caller's limits: we just found
            // out it is reachable by an unlimited path, so it must be treated
            // as unlimited (with respect to that bit).
            const new_limits = prev_limits & edge_limits;
            if (new_limits)
                functionLimits[caller] = new_limits;
            else
                delete functionLimits[caller];
            for (const {callee, limits} of (calleesOf[caller] || []))
                worklist[top++] = [callee, limits | edge_limits, caller];
        }
    }
}

// Mutually-recursive roots and their descendants will not have been visited,
// and will still be set to LIMIT_UNVISITED. Scan through and gather them.
function gather_recursive_roots(functionLimits, callersOf) {
    const roots = [];

    // 'seen' maps functions to the most recent starting function that each was
    // first reachable from, to distinguish between the current pass and passes
    // for preceding functions.
    //
    // Consider:
    //
    //   A <--> B --> C <-- D <--> E
    //                C --> F
    //                C --> G
    //
    // So there are two root cycles AB and DE, both calling C that in turn
    // calls F and G. If we start at F and scan up through callers, we will
    // keep going until A loops back to B and E loops back to D, and will add B
    // and D as roots. Then if we scan from G, we encounter C and see that it
    // was already been seen on an earlier pass. So C and everything reachable
    // from it is already reachable by some root. (We need to label nodes with
    // their pass because otherwise we couldn't distinguish "already seen C,
    // done" from "already seen B, must be a root".)
    //
    const seen = new Map();
    for (var func in functionLimits) {
        if (functionLimits[func] != LIMIT_UNVISITED)
            continue;

        // We should only be looking at nodes with callers, since otherwise
        // they would have been handled in the previous pass!
        assert(callersOf[func].length > 0);

        const work = [func];
        while (work.length > 0) {
            const f = work.pop();
            if (seen.has(f)) {
                if (seen.get(f) == func) {
                    // We have traversed a cycle and reached an already-seen
                    // node. Treat it as a root.
                    roots.push([f, LIMIT_NONE, 'root']);
                    print(`recursive root? ${f} = ${functionNames[f]}`);
                } else {
                    // Otherwise we hit the portion of the graph that is
                    // reachable from a past root.
                    seen.set(f, func);
                }
            } else {
                print(`retained by recursive root? ${f} = ${functionNames[f]}`);
                work.push(...callersOf[f]);
                seen.set(f, func);
            }
        }
    }

    return roots;
}

function remap_ids_to_mangled_names() {
    var tmp = gcFunctions;
    gcFunctions = {};
    for (const [caller, reason] of Object.entries(tmp))
        gcFunctions[functionNames[caller]] = functionNames[reason] || reason;

    tmp = calleesOf;
    calleesOf = {};
    for (const [callerId, callees] of Object.entries(calleesOf)) {
        const caller = functionNames[callerId];
        for (const {calleeId, limits} of callees)
            calleesOf[caller][functionNames[calleeId]] = limits;
    }

    tmp = callersOf;
    callersOf = {};
    for (const [calleeId, callers] of Object.entries(callersOf)) {
        const callee = functionNames[calleeId];
        callersOf[callee] = {};
        for (const {callerId, limits} of callers)
            callersOf[callee][functionNames[caller]] = limits;
    }
}