summaryrefslogtreecommitdiffstats
path: root/js/src/devtools/rootAnalysis/loadCallgraph.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/devtools/rootAnalysis/loadCallgraph.js')
-rw-r--r--js/src/devtools/rootAnalysis/loadCallgraph.js428
1 files changed, 428 insertions, 0 deletions
diff --git a/js/src/devtools/rootAnalysis/loadCallgraph.js b/js/src/devtools/rootAnalysis/loadCallgraph.js
new file mode 100644
index 0000000000..cfe5ab6c58
--- /dev/null
+++ b/js/src/devtools/rootAnalysis/loadCallgraph.js
@@ -0,0 +1,428 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
+
+"use strict";
+
+loadRelativeToScript('utility.js');
+
+// Functions come out of sixgill in the form "mangled$readable". The mangled
+// name is Truth. One mangled name might correspond to multiple readable names,
+// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types
+// the same way or de-typedef the same amount; (2) sixgill's output treats
+// references and pointers the same, and so doesn't distinguish them, but C++
+// treats them as separate for overloading and linking; (3) (identical)
+// destructors sometimes have an int32 parameter, sometimes not.
+//
+// The readable names are useful because they're far more meaningful to the
+// user, and are what should show up in reports and questions to mrgiggles. At
+// least in most cases, it's fine to have the extra mangled name tacked onto
+// the beginning for these.
+//
+// The strategy used is to separate out the pieces whenever they are read in,
+// create a table mapping mangled names to all readable names, and use the
+// mangled names in all computation -- except for limited circumstances when
+// the readable name is used in annotations.
+//
+// Note that callgraph.txt uses a compressed representation -- each name is
+// mapped to an integer, and those integers are what is recorded in the edges.
+// But the integers depend on the full name, whereas the true edge should only
+// consider the mangled name. And some of the names encoded in callgraph.txt
+// are FieldCalls, not just function names.
+
+var readableNames = {}; // map from mangled name => list of readable names
+var calleesOf = {}; // map from mangled => list of tuples of {'callee':mangled, 'limits':intset}
+var callersOf; // map from mangled => list of tuples of {'caller':mangled, 'limits':intset}
+var gcFunctions = {}; // map from mangled callee => reason
+var limitedFunctions = {}; // set of mangled names (map from mangled name => limit intset)
+var gcEdges = {};
+
+// "Map" from identifier to mangled name, or sometimes to a Class.Field name.
+var functionNames = [""];
+
+var mangledToId = {};
+
+// Returns whether the function was added. (It will be refused if it was
+// already there, or if limits or annotations say it shouldn't be added.)
+function addGCFunction(caller, reason, functionLimits)
+{
+ if (functionLimits[caller] & LIMIT_CANNOT_GC)
+ return false;
+
+ if (ignoreGCFunction(functionNames[caller]))
+ return false;
+
+ if (!(caller in gcFunctions)) {
+ gcFunctions[caller] = reason;
+ return true;
+ }
+
+ return false;
+}
+
+// Every caller->callee callsite is associated with a limit saying what is
+// allowed at that callsite (eg if it's in a GC suppression zone, it would have
+// LIMIT_CANNOT_GC set.) A given caller might call the same callee multiple
+// times, with different limits, so we want to associate the <caller,callee>
+// edge with the intersection ('AND') of all of the callsites' limits.
+//
+// Scan through all call edges and intersect the limits for all matching
+// <caller,callee> edges (so that the result is the least limiting of all
+// matching edges.) Preserve the original order.
+//
+// During the same scan, build callersOf from calleesOf.
+function merge_repeated_calls(calleesOf) {
+ const callersOf = Object.create(null);
+
+ for (const [caller, callee_limits] of Object.entries(calleesOf)) {
+ const ordered_callees = [];
+
+ // callee_limits is a list of {callee,limit} objects.
+ const callee2limit = new Map();
+ for (const {callee, limits} of callee_limits) {
+ const prev_limits = callee2limit.get(callee);
+ if (prev_limits === undefined) {
+ callee2limit.set(callee, limits);
+ ordered_callees.push(callee);
+ } else {
+ callee2limit.set(callee, prev_limits & limits);
+ }
+ }
+
+ // Update the contents of callee_limits to contain a single entry for
+ // each callee, with its limits set to the AND of the limits observed
+ // at all callsites within this caller function.
+ callee_limits.length = 0;
+ for (const callee of ordered_callees) {
+ const limits = callee2limit.get(callee);
+ callee_limits.push({callee, limits});
+ if (!(callee in callersOf))
+ callersOf[callee] = [];
+ callersOf[callee].push({caller, limits});
+ }
+ }
+
+ return callersOf;
+}
+
+function loadCallgraph(file)
+{
+ const fieldCallLimits = {};
+ const fieldCallCSU = new Map(); // map from full field name id => csu name
+ const resolvedFieldCalls = new Set();
+
+ // set of mangled names (map from mangled name => limit intset)
+ var functionLimits = {};
+
+ let numGCCalls = 0;
+
+ for (let line of readFileLines_gen(file)) {
+ line = line.replace(/\n/, "");
+
+ let match;
+ if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) {
+ const [ _, id, mangled ] = match;
+ assert(functionNames.length == id);
+ functionNames.push(mangled);
+ mangledToId[mangled] = id;
+ continue;
+ }
+ if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) {
+ const [ _, id, readable ] = match;
+ const mangled = functionNames[id];
+ if (mangled in readableNames)
+ readableNames[mangled].push(readable);
+ else
+ readableNames[mangled] = [ readable ];
+ continue;
+ }
+
+ let limits = 0;
+ // Example line: D /17 6 7
+ //
+ // This means a direct call from 6 -> 7, but within a scope that
+ // applies limits 0x1 and 0x10 to the callee.
+ //
+ // Look for a limit and remove it from the line if found.
+ if (line.indexOf("/") != -1) {
+ match = /^(..)\/(\d+) (.*)/.exec(line);
+ line = match[1] + match[3];
+ limits = match[2]|0;
+ }
+ const tag = line.charAt(0);
+ if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) {
+ const caller = match[1]|0;
+ const name = match[2];
+ if (!indirectCallCannotGC(functionNames[caller], name) &&
+ !(limits & LIMIT_CANNOT_GC))
+ {
+ addGCFunction(caller, "IndirectCall: " + name, functionLimits);
+ }
+ } else if (match = (tag == 'F' || tag == 'V') && /^[FV] (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) {
+ const caller = match[1]|0;
+ const fullfield = match[2]|0;
+ const csu = match[3];
+ const fullfield_str = csu + "." + match[4];
+ assert(functionNames[fullfield] == fullfield_str);
+ if (limits)
+ fieldCallLimits[fullfield] = limits;
+ addToKeyedList(calleesOf, caller, {callee:fullfield, limits});
+ fieldCallCSU.set(fullfield, csu);
+ } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) {
+ const caller = match[1]|0;
+ const callee = match[2]|0;
+ addToKeyedList(calleesOf, caller, {callee:callee, limits:limits});
+ } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) {
+ const callerField = match[1]|0;
+ const callee = match[2]|0;
+ // Resolved virtual functions create a dummy node for the field
+ // call, and callers call it. It will then call all possible
+ // instantiations. No additional limits are placed on the callees;
+ // it's as if there were a function named BaseClass.foo:
+ //
+ // void BaseClass.foo() {
+ // Subclass1::foo();
+ // Subclass2::foo();
+ // }
+ //
+ addToKeyedList(calleesOf, callerField, {callee:callee, limits:0});
+ // Mark that we resolved this virtual method, so that it isn't
+ // assumed to call some random function that might do anything.
+ resolvedFieldCalls.add(callerField);
+ } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) {
+ const id = match[1]|0;
+ let tag = match[2];
+ if (tag == 'GC Call') {
+ addGCFunction(id, "GC", functionLimits);
+ numGCCalls++;
+ }
+ } else {
+ assert(false, "Invalid format in callgraph line: " + line);
+ }
+ }
+
+ // Callers have a list of callees, with duplicates (if the same function is
+ // called more than once.) Merge the repeated calls, only keeping limits
+ // that are in force for *every* callsite of that callee. Also, generate
+ // the callersOf table at the same time.
+ callersOf = merge_repeated_calls(calleesOf);
+
+ // Add in any extra functions at the end. (If we did this early, it would
+ // mess up the id <-> name correspondence. Also, we need to know if the
+ // functions even exist in the first place.)
+ for (var func of extraGCFunctions()) {
+ addGCFunction(mangledToId[func], "annotation", functionLimits);
+ }
+
+ // Compute functionLimits: it should contain the set of functions that
+ // are *always* called within some sort of limited context (eg GC
+ // suppression).
+
+ // Initialize to limited field calls.
+ for (var [name, limits] of Object.entries(fieldCallLimits)) {
+ if (limits)
+ functionLimits[name] = limits;
+ }
+
+ // Initialize functionLimits to the set of all functions, where each one is
+ // maximally limited, and return a worklist containing all simple roots
+ // (nodes with no callers).
+ var roots = gather_simple_roots(functionLimits, callersOf);
+
+ // Traverse the graph, spreading the limits down from the roots.
+ propagate_limits(roots, functionLimits, calleesOf);
+
+ // There are a surprising number of "recursive roots", where there is a
+ // cycle of functions calling each other but not called by anything else,
+ // and these roots may also have descendants. Now that the above traversal
+ // has eliminated everything reachable from simple roots, traverse the
+ // remaining graph to gather up a representative function from each root
+ // cycle.
+ roots = gather_recursive_roots(roots, functionLimits, callersOf);
+
+ // And do a final traversal starting with the recursive roots.
+ propagate_limits(roots, functionLimits, calleesOf);
+
+ // Eliminate GC-limited functions from the set of functions known to GC.
+ for (var name in gcFunctions) {
+ if (functionLimits[name] & LIMIT_CANNOT_GC)
+ delete gcFunctions[name];
+ }
+
+ // functionLimits should now contain all functions that are always called
+ // in a limited context.
+
+ // Sanity check to make sure the callgraph has some functions annotated as
+ // GC Calls. This is mostly a check to be sure the earlier processing
+ // succeeded (as opposed to, say, running on empty xdb files because you
+ // didn't actually compile anything interesting.)
+ assert(numGCCalls > 0, "No GC functions found!");
+
+ // Initialize the worklist to all known gcFunctions.
+ var worklist = [];
+ for (const name in gcFunctions)
+ worklist.push(name);
+
+ // Include all field calls and unresolved virtual method calls.
+ for (const [name, csuName] of fieldCallCSU) {
+ if (resolvedFieldCalls.has(name))
+ continue; // Skip resolved virtual functions.
+ const fullFieldName = functionNames[name];
+ if (!fieldCallCannotGC(csuName, fullFieldName)) {
+ gcFunctions[name] = 'unresolved ' + fullFieldName;
+ worklist.push(name);
+ }
+ }
+
+ // Recursively find all callers not always called in a GC suppression
+ // context, and add them to the set of gcFunctions.
+ while (worklist.length) {
+ name = worklist.shift();
+ assert(name in gcFunctions, "gcFunctions does not contain " + name);
+ if (!(name in callersOf))
+ continue;
+ for (const {caller, limits} of callersOf[name]) {
+ if (!(limits & LIMIT_CANNOT_GC)) {
+ if (addGCFunction(caller, name, functionLimits))
+ worklist.push(caller);
+ }
+ }
+ }
+
+ // Convert functionLimits to limitedFunctions (using mangled names instead
+ // of ids.)
+
+ for (const [id, limits] of Object.entries(functionLimits))
+ limitedFunctions[functionNames[id]] = limits;
+
+ // The above code uses integer ids for efficiency. External code uses
+ // mangled names. Rewrite the various data structures to convert ids to
+ // mangled names.
+ remap_ids_to_mangled_names();
+}
+
+// Return a worklist of functions with no callers, and also initialize
+// functionLimits to the set of all functions, each mapped to LIMIT_UNVISTED.
+function gather_simple_roots(functionLimits, callersOf) {
+ const roots = [];
+ for (let callee in callersOf)
+ functionLimits[callee] = LIMIT_UNVISITED;
+ for (let caller in calleesOf) {
+ if (!(caller in callersOf)) {
+ functionLimits[caller] = LIMIT_UNVISITED;
+ roots.push([caller, LIMIT_NONE, 'root']);
+ }
+ }
+
+ return roots;
+}
+
+// Recursively traverse the callgraph from the roots. Recurse through every
+// edge that weakens the limits. (Limits that entirely disappear, aka go to a
+// zero intset, will be removed from functionLimits.)
+function propagate_limits(worklist, functionLimits, calleesOf) {
+ let top = worklist.length;
+ while (top > 0) {
+ // Consider caller where (graph) -> caller -> (0 or more callees)
+ // 'callercaller' is for debugging.
+ const [caller, edge_limits, callercaller] = worklist[--top];
+ const prev_limits = functionLimits[caller];
+ if (prev_limits & ~edge_limits) {
+ // Turning off a limit (or unvisited marker). Must recurse to the
+ // children. But first, update this caller's limits: we just found
+ // out it is reachable by an unlimited path, so it must be treated
+ // as unlimited (with respect to that bit).
+ const new_limits = prev_limits & edge_limits;
+ if (new_limits)
+ functionLimits[caller] = new_limits;
+ else
+ delete functionLimits[caller];
+ for (const {callee, limits} of (calleesOf[caller] || []))
+ worklist[top++] = [callee, limits | edge_limits, caller];
+ }
+ }
+}
+
+// Mutually-recursive roots and their descendants will not have been visited,
+// and will still be set to LIMIT_UNVISITED. Scan through and gather them.
+function gather_recursive_roots(functionLimits, callersOf) {
+ const roots = [];
+
+ // 'seen' maps functions to the most recent starting function that each was
+ // first reachable from, to distinguish between the current pass and passes
+ // for preceding functions.
+ //
+ // Consider:
+ //
+ // A <--> B --> C <-- D <--> E
+ // C --> F
+ // C --> G
+ //
+ // So there are two root cycles AB and DE, both calling C that in turn
+ // calls F and G. If we start at F and scan up through callers, we will
+ // keep going until A loops back to B and E loops back to D, and will add B
+ // and D as roots. Then if we scan from G, we encounter C and see that it
+ // was already been seen on an earlier pass. So C and everything reachable
+ // from it is already reachable by some root. (We need to label nodes with
+ // their pass because otherwise we couldn't distinguish "already seen C,
+ // done" from "already seen B, must be a root".)
+ //
+ const seen = new Map();
+ for (var func in functionLimits) {
+ if (functionLimits[func] != LIMIT_UNVISITED)
+ continue;
+
+ // We should only be looking at nodes with callers, since otherwise
+ // they would have been handled in the previous pass!
+ assert(callersOf[func].length > 0);
+
+ const work = [func];
+ while (work.length > 0) {
+ const f = work.pop();
+ if (seen.has(f)) {
+ if (seen.get(f) == func) {
+ // We have traversed a cycle and reached an already-seen
+ // node. Treat it as a root.
+ roots.push([f, LIMIT_NONE, 'root']);
+ print(`recursive root? ${f} = ${functionNames[f]}`);
+ } else {
+ // Otherwise we hit the portion of the graph that is
+ // reachable from a past root.
+ seen.set(f, func);
+ }
+ } else {
+ print(`retained by recursive root? ${f} = ${functionNames[f]}`);
+ work.push(...callersOf[f]);
+ seen.set(f, func);
+ }
+ }
+ }
+
+ return roots;
+}
+
+function remap_ids_to_mangled_names() {
+ var tmp = gcFunctions;
+ gcFunctions = {};
+ for (const [caller, reason] of Object.entries(tmp))
+ gcFunctions[functionNames[caller]] = functionNames[reason] || reason;
+
+ tmp = calleesOf;
+ calleesOf = {};
+ for (const [callerId, callees] of Object.entries(calleesOf)) {
+ const caller = functionNames[callerId];
+ for (const {calleeId, limits} of callees)
+ calleesOf[caller][functionNames[calleeId]] = limits;
+ }
+
+ tmp = callersOf;
+ callersOf = {};
+ for (const [calleeId, callers] of Object.entries(callersOf)) {
+ const callee = functionNames[calleeId];
+ callersOf[callee] = {};
+ for (const {callerId, limits} of callers)
+ callersOf[callee][functionNames[caller]] = limits;
+ }
+}