1 files changed, 428 insertions, 0 deletions
diff --git a/js/src/devtools/rootAnalysis/loadCallgraph.js b/js/src/devtools/rootAnalysis/loadCallgraph.js
new file mode 100644
index 0000000000..cfe5ab6c58
--- /dev/null
+++ b/js/src/devtools/rootAnalysis/loadCallgraph.js
@@ -0,0 +1,428 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
+
+"use strict";
+
+loadRelativeToScript('utility.js');
+
+// Functions come out of sixgill in the form "mangled$readable". The mangled
+// name is Truth. One mangled name might correspond to multiple readable names,
+// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types
+// the same way or de-typedef the same amount; (2) sixgill's output treats
+// references and pointers the same, and so doesn't distinguish them, but C++
+// treats them as separate for overloading and linking; (3) (identical)
+// destructors sometimes have an int32 parameter, sometimes not.
+//
+// The readable names are useful because they're far more meaningful to the
+// user, and are what should show up in reports and questions to mrgiggles. At
+// least in most cases, it's fine to have the extra mangled name tacked onto
+// the beginning for these.
+//
+// The strategy used is to separate out the pieces whenever they are read in,
+// create a table mapping mangled names to all readable names, and use the
+// mangled names in all computation -- except for limited circumstances when
+// the readable name is used in annotations.
+//
+// Note that callgraph.txt uses a compressed representation -- each name is
+// mapped to an integer, and those integers are what is recorded in the edges.
+// But the integers depend on the full name, whereas the true edge should only
+// consider the mangled name. And some of the names encoded in callgraph.txt
+// are FieldCalls, not just function names.
+
+var readableNames = {}; // map from mangled name => list of readable names
+var calleesOf = {}; // map from mangled => list of tuples of {'callee':mangled, 'limits':intset}
+var callersOf; // map from mangled => list of tuples of {'caller':mangled, 'limits':intset}
+var gcFunctions = {}; // map from mangled callee => reason
+var limitedFunctions = {}; // set of mangled names (map from mangled name => limit intset)
+var gcEdges = {};
+
+// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
+var functionNames = [""];
+
+var mangledToId = {};
+
+// Returns whether the function was added. (It will be refused if it was
+// already there, or if limits or annotations say it shouldn't be added.)
+function addGCFunction(caller, reason, functionLimits)
+{
+    if (functionLimits[caller] & LIMIT_CANNOT_GC)
+        return false;
+
+    if (ignoreGCFunction(functionNames[caller]))
+        return false;
+
+    if (!(caller in gcFunctions)) {
+        gcFunctions[caller] = reason;
+        return true;
+    }
+
+    return false;
+}
+
+// Every caller->callee callsite is associated with a limit saying what is
+// allowed at that callsite (eg if it's in a GC suppression zone, it would have
+// LIMIT_CANNOT_GC set.) A given caller might call the same callee multiple
+// times, with different limits, so we want to associate the <caller,callee>
+// edge with the intersection ('AND') of all of the callsites' limits.
+//
+// Scan through all call edges and intersect the limits for all matching
+// <caller,callee> edges (so that the result is the least limiting of all
+// matching edges.) Preserve the original order.
+//
+// During the same scan, build callersOf from calleesOf.
+function merge_repeated_calls(calleesOf) {
+    const callersOf = Object.create(null);
+
+    for (const [caller, callee_limits] of Object.entries(calleesOf)) {
+        const ordered_callees = [];
+
+        // callee_limits is a list of {callee,limit} objects.
+        const callee2limit = new Map();
+        for (const {callee, limits} of callee_limits) {
+            const prev_limits = callee2limit.get(callee);
+            if (prev_limits === undefined) {
+                callee2limit.set(callee, limits);
+                ordered_callees.push(callee);
+            } else {
+                callee2limit.set(callee, prev_limits & limits);
+            }
+        }
+
+        // Update the contents of callee_limits to contain a single entry for
+        // each callee, with its limits set to the AND of the limits observed
+        // at all callsites within this caller function.
+        callee_limits.length = 0;
+        for (const callee of ordered_callees) {
+            const limits = callee2limit.get(callee);
+            callee_limits.push({callee, limits});
+            if (!(callee in callersOf))
+                callersOf[callee] = [];
+            callersOf[callee].push({caller, limits});
+        }
+    }
+
+    return callersOf;
+}
+
+function loadCallgraph(file)
+{
+    const fieldCallLimits = {};
+    const fieldCallCSU = new Map(); // map from full field name id => csu name
+    const resolvedFieldCalls = new Set();
+
+    // set of mangled names (map from mangled name => limit intset)
+    var functionLimits = {};
+
+    let numGCCalls = 0;
+
+    for (let line of readFileLines_gen(file)) {
+        line = line.replace(/\n/, "");
+
+        let match;
+        if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
+            const [ _, id, mangled ] = match;
+            assert(functionNames.length == id);
+            functionNames.push(mangled);
+            mangledToId[mangled] = id;
+            continue;
+        }
+        if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
+            const [ _, id, readable ] = match;
+            const mangled = functionNames[id];
+            if (mangled in readableNames)
+                readableNames[mangled].push(readable);
+            else
+                readableNames[mangled] = [ readable ];
+            continue;
+        }
+
+        let limits = 0;
+        // Example line: D /17 6 7
+        //
+        // This means a direct call from 6 -> 7, but within a scope that
+        // applies limits 0x1 and 0x10 to the callee.
+        //
+        // Look for a limit and remove it from the line if found.
+        if (line.indexOf("/") != -1) {
+            match = /^(..)\/(\d+) (.*)/.exec(line);
+            line = match[1] + match[3];
+            limits = match[2]|0;
+        }
+        const tag = line.charAt(0);
+        if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) {
+            const caller = match[1]|0;
+            const name = match[2];
+            if (!indirectCallCannotGC(functionNames[caller], name) &&
+                !(limits & LIMIT_CANNOT_GC))
+            {
+                addGCFunction(caller, "IndirectCall: " + name, functionLimits);
+            }
+        } else if (match = (tag == 'F' || tag == 'V') && /^[FV] (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
+            const caller = match[1]|0;
+            const fullfield = match[2]|0;
+            const csu = match[3];
+            const fullfield_str = csu + "." + match[4];
+            assert(functionNames[fullfield] == fullfield_str);
+            if (limits)
+                fieldCallLimits[fullfield] = limits;
+            addToKeyedList(calleesOf, caller, {callee:fullfield, limits});
+            fieldCallCSU.set(fullfield, csu);
+        } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) {
+            const caller = match[1]|0;
+            const callee = match[2]|0;
+            addToKeyedList(calleesOf, caller, {callee:callee, limits:limits});
+        } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) {
+            const callerField = match[1]|0;
+            const callee = match[2]|0;
+            // Resolved virtual functions create a dummy node for the field
+            // call, and callers call it. It will then call all possible
+            // instantiations. No additional limits are placed on the callees;
+            // it's as if there were a function named BaseClass.foo:
+            //
+            //     void BaseClass.foo() {
+            //         Subclass1::foo();
+            //         Subclass2::foo();
+            //     }
+            //
+            addToKeyedList(calleesOf, callerField, {callee:callee, limits:0});
+            // Mark that we resolved this virtual method, so that it isn't
+            // assumed to call some random function that might do anything.
+            resolvedFieldCalls.add(callerField);
+        } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
+            const id = match[1]|0;
+            let tag = match[2];
+            if (tag == 'GC Call') {
+                addGCFunction(id, "GC", functionLimits);
+                numGCCalls++;
+            }
+        } else {
+            assert(false, "Invalid format in callgraph line: " + line);
+        }
+    }
+
+    // Callers have a list of callees, with duplicates (if the same function is
+    // called more than once.) Merge the repeated calls, only keeping limits
+    // that are in force for *every* callsite of that callee. Also, generate
+    // the callersOf table at the same time.
+    callersOf = merge_repeated_calls(calleesOf);
+
+    // Add in any extra functions at the end. (If we did this early, it would
+    // mess up the id <-> name correspondence. Also, we need to know if the
+    // functions even exist in the first place.)
+    for (var func of extraGCFunctions()) {
+        addGCFunction(mangledToId[func], "annotation", functionLimits);
+    }
+
+    // Compute functionLimits: it should contain the set of functions that
+    // are *always* called within some sort of limited context (eg GC
+    // suppression).
+
+    // Initialize to limited field calls.
+    for (var [name, limits] of Object.entries(fieldCallLimits)) {
+        if (limits)
+            functionLimits[name] = limits;
+    }
+
+    // Initialize functionLimits to the set of all functions, where each one is
+    // maximally limited, and return a worklist containing all simple roots
+    // (nodes with no callers).
+    var roots = gather_simple_roots(functionLimits, callersOf);
+
+    // Traverse the graph, spreading the limits down from the roots.
+    propagate_limits(roots, functionLimits, calleesOf);
+
+    // There are a surprising number of "recursive roots", where there is a
+    // cycle of functions calling each other but not called by anything else,
+    // and these roots may also have descendants. Now that the above traversal
+    // has eliminated everything reachable from simple roots, traverse the
+    // remaining graph to gather up a representative function from each root
+    // cycle.
+    roots = gather_recursive_roots(roots, functionLimits, callersOf);
+
+    // And do a final traversal starting with the recursive roots.
+    propagate_limits(roots, functionLimits, calleesOf);
+
+    // Eliminate GC-limited functions from the set of functions known to GC.
+    for (var name in gcFunctions) {
+        if (functionLimits[name] & LIMIT_CANNOT_GC)
+            delete gcFunctions[name];
+    }
+
+    // functionLimits should now contain all functions that are always called
+    // in a limited context.
+
+    // Sanity check to make sure the callgraph has some functions annotated as
+    // GC Calls. This is mostly a check to be sure the earlier processing
+    // succeeded (as opposed to, say, running on empty xdb files because you
+    // didn't actually compile anything interesting.)
+    assert(numGCCalls > 0, "No GC functions found!");
+
+    // Initialize the worklist to all known gcFunctions.
+    var worklist = [];
+    for (const name in gcFunctions)
+        worklist.push(name);
+
+    // Include all field calls and unresolved virtual method calls.
+    for (const [name, csuName] of fieldCallCSU) {
+        if (resolvedFieldCalls.has(name))
+            continue; // Skip resolved virtual functions.
+        const fullFieldName = functionNames[name];
+        if (!fieldCallCannotGC(csuName, fullFieldName)) {
+            gcFunctions[name] = 'unresolved ' + fullFieldName;
+            worklist.push(name);
+        }
+    }
+
+    // Recursively find all callers not always called in a GC suppression
+    // context, and add them to the set of gcFunctions.
+    while (worklist.length) {
+        name = worklist.shift();
+        assert(name in gcFunctions, "gcFunctions does not contain " + name);
+        if (!(name in callersOf))
+            continue;
+        for (const {caller, limits} of callersOf[name]) {
+            if (!(limits & LIMIT_CANNOT_GC)) {
+                if (addGCFunction(caller, name, functionLimits))
+                    worklist.push(caller);
+            }
+        }
+    }
+
+    // Convert functionLimits to limitedFunctions (using mangled names instead
+    // of ids.)
+
+    for (const [id, limits] of Object.entries(functionLimits))
+        limitedFunctions[functionNames[id]] = limits;
+
+    // The above code uses integer ids for efficiency. External code uses
+    // mangled names. Rewrite the various data structures to convert ids to
+    // mangled names.
+    remap_ids_to_mangled_names();
+}
+
+// Return a worklist of functions with no callers, and also initialize
+// functionLimits to the set of all functions, each mapped to LIMIT_UNVISTED.
+function gather_simple_roots(functionLimits, callersOf) {
+    const roots = [];
+    for (let callee in callersOf)
+        functionLimits[callee] = LIMIT_UNVISITED;
+    for (let caller in calleesOf) {
+        if (!(caller in callersOf)) {
+            functionLimits[caller] = LIMIT_UNVISITED;
+            roots.push([caller, LIMIT_NONE, 'root']);
+        }
+    }
+
+    return roots;
+}
+
+// Recursively traverse the callgraph from the roots. Recurse through every
+// edge that weakens the limits. (Limits that entirely disappear, aka go to a
+// zero intset, will be removed from functionLimits.)
+function propagate_limits(worklist, functionLimits, calleesOf) {
+    let top = worklist.length;
+    while (top > 0) {
+        // Consider caller where (graph) -> caller -> (0 or more callees)
+        // 'callercaller' is for debugging.
+        const [caller, edge_limits, callercaller] = worklist[--top];
+        const prev_limits = functionLimits[caller];
+        if (prev_limits & ~edge_limits) {
+            // Turning off a limit (or unvisited marker). Must recurse to the
+            // children. But first, update this caller's limits: we just found
+            // out it is reachable by an unlimited path, so it must be treated
+            // as unlimited (with respect to that bit).
+            const new_limits = prev_limits & edge_limits;
+            if (new_limits)
+                functionLimits[caller] = new_limits;
+            else
+                delete functionLimits[caller];
+            for (const {callee, limits} of (calleesOf[caller] || []))
+                worklist[top++] = [callee, limits | edge_limits, caller];
+        }
+    }
+}
+
+// Mutually-recursive roots and their descendants will not have been visited,
+// and will still be set to LIMIT_UNVISITED. Scan through and gather them.
+function gather_recursive_roots(functionLimits, callersOf) {
+    const roots = [];
+
+    // 'seen' maps functions to the most recent starting function that each was
+    // first reachable from, to distinguish between the current pass and passes
+    // for preceding functions.
+    //
+    // Consider:
+    //
+    //   A <--> B --> C <-- D <--> E
+    //                C --> F
+    //                C --> G
+    //
+    // So there are two root cycles AB and DE, both calling C that in turn
+    // calls F and G. If we start at F and scan up through callers, we will
+    // keep going until A loops back to B and E loops back to D, and will add B
+    // and D as roots. Then if we scan from G, we encounter C and see that it
+    // was already been seen on an earlier pass. So C and everything reachable
+    // from it is already reachable by some root. (We need to label nodes with
+    // their pass because otherwise we couldn't distinguish "already seen C,
+    // done" from "already seen B, must be a root".)
+    //
+    const seen = new Map();
+    for (var func in functionLimits) {
+        if (functionLimits[func] != LIMIT_UNVISITED)
+            continue;
+
+        // We should only be looking at nodes with callers, since otherwise
+        // they would have been handled in the previous pass!
+        assert(callersOf[func].length > 0);
+
+        const work = [func];
+        while (work.length > 0) {
+            const f = work.pop();
+            if (seen.has(f)) {
+                if (seen.get(f) == func) {
+                    // We have traversed a cycle and reached an already-seen
+                    // node. Treat it as a root.
+                    roots.push([f, LIMIT_NONE, 'root']);
+                    print(`recursive root? ${f} = ${functionNames[f]}`);
+                } else {
+                    // Otherwise we hit the portion of the graph that is
+                    // reachable from a past root.
+                    seen.set(f, func);
+                }
+            } else {
+                print(`retained by recursive root? ${f} = ${functionNames[f]}`);
+                work.push(...callersOf[f]);
+                seen.set(f, func);
+            }
+        }
+    }
+
+    return roots;
+}
+
+function remap_ids_to_mangled_names() {
+    var tmp = gcFunctions;
+    gcFunctions = {};
+    for (const [caller, reason] of Object.entries(tmp))
+        gcFunctions[functionNames[caller]] = functionNames[reason] || reason;
+
+    tmp = calleesOf;
+    calleesOf = {};
+    for (const [callerId, callees] of Object.entries(calleesOf)) {
+        const caller = functionNames[callerId];
+        for (const {calleeId, limits} of callees)
+            calleesOf[caller][functionNames[calleeId]] = limits;
+    }
+
+    tmp = callersOf;
+    callersOf = {};
+    for (const [calleeId, callers] of Object.entries(callersOf)) {
+        const callee = functionNames[calleeId];
+        callersOf[callee] = {};
+        for (const {callerId, limits} of callers)
+            callersOf[callee][functionNames[caller]] = limits;
+    }
+}