/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ "use strict"; loadRelativeToScript('utility.js'); loadRelativeToScript('callgraph.js'); // Functions come out of sixgill in the form "mangled$readable". The mangled // name is Truth. One mangled name might correspond to multiple readable names, // for multiple reasons, including (1) sixgill/gcc doesn't always qualify types // the same way or de-typedef the same amount; (2) sixgill's output treats // references and pointers the same, and so doesn't distinguish them, but C++ // treats them as separate for overloading and linking; (3) (identical) // destructors sometimes have an int32 parameter, sometimes not. // // The readable names are useful because they're far more meaningful to the // user, and are what should show up in reports and questions to mrgiggles. At // least in most cases, it's fine to have the extra mangled name tacked onto // the beginning for these. // // The strategy used is to separate out the pieces whenever they are read in, // create a table mapping mangled names to all readable names, and use the // mangled names in all computation -- except for limited circumstances when // the readable name is used in annotations. // // Note that callgraph.txt uses a compressed representation -- each name is // mapped to an integer, and those integers are what is recorded in the edges. // But the integers depend on the full name, whereas the true edge should only // consider the mangled name. And some of the names encoded in callgraph.txt // are FieldCalls, not just function names. var gcEdges = {}; // Returns whether the function was added. (It will be refused if it was // already there, or if attrs or annotations say it shouldn't be added.) function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions) { if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED) return false; if (ignoreGCFunction(functions.name[caller], functions.readableName)) return false; if (!(caller in gcFunctions)) { gcFunctions[caller] = reason; return true; } return false; } // Every caller->callee callsite is associated with attrs saying what is // allowed at that callsite (eg if it's in a GC suppression zone, it would have // ATTR_GC_SUPPRESSED set.) A given caller might call the same callee multiple // times, with different attributes. Associate the edge with // the intersection (AND) and disjunction (OR) of all of the callsites' attrs. // The AND ('all') says what attributes are present for all callers; the OR // ('any') says what attributes are present on any caller. Preserve the // original order. // // During the same scan, build callersOf from calleesOf. function generate_callgraph(rawCallees) { const callersOf = new Map(); const calleesOf = new Map(); for (const [caller, callee_attrs] of rawCallees) { const ordered_callees = []; // callee_attrs is a list of {callee,any,all} objects. const callee2any = new Map(); const callee2all = new Map(); for (const {callee, any, all} of callee_attrs) { const prev_any = callee2any.get(callee); if (prev_any === undefined) { assert(!callee2all.has(callee)); callee2any.set(callee, any); callee2all.set(callee, all); ordered_callees.push(callee); } else { const prev_all = callee2all.get(callee); callee2any.set(callee, prev_any | any); callee2all.set(callee, prev_all & all); } } // Update the contents of callee_attrs to contain a single entry for // each callee, with its attrs set to the AND of the attrs observed at // all callsites within this caller function. callee_attrs.length = 0; for (const callee of ordered_callees) { const any = callee2any.get(callee); const all = callee2all.get(callee); if (!calleesOf.has(caller)) calleesOf.set(caller, new Map()); calleesOf.get(caller).set(callee, {any, all}); if (!callersOf.has(callee)) callersOf.set(callee, new Map()); callersOf.get(callee).set(caller, {any, all}); } } return {callersOf, calleesOf}; } // Returns object mapping mangled => reason for GCing function loadRawCallgraphFile(file, verbose) { const functions = { // "Map" from identifier to mangled name, or sometimes to a Class.Field name. name: [""], // map from mangled name => list of readable names readableName: {}, mangledToId: {} }; const fieldCallAttrs = {}; const fieldCallCSU = new Map(); // map from full field name id => csu name // set of mangled names (map from mangled name => [any,all]) var functionAttrs = {}; const gcCalls = []; const indirectCalls = []; // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset} const rawCallees = new Map(); for (let line of readFileLines_gen(file)) { line = line.replace(/\n/, ""); let match; if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) { const [ _, id, mangled ] = match; assert(functions.name.length == id); functions.name.push(mangled); functions.mangledToId[mangled] = id|0; continue; } if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) { const [ _, id, readable ] = match; const mangled = functions.name[id]; if (mangled in functions.readableName) functions.readableName[mangled].push(readable); else functions.readableName[mangled] = [ readable ]; continue; } let attrs = 0; // Example line: D /17 6 7 // // This means a direct call from 6 -> 7, but within a scope that // applies attrs 0x1 and 0x10 to the callee. // // Look for a bit specifier and remove it from the line if found. if (line.indexOf("/") != -1) { match = /^(..)\/(\d+) (.*)/.exec(line); line = match[1] + match[3]; attrs = match[2]|0; } const tag = line.charAt(0); if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) { const caller = match[1]|0; const name = match[2]; if (indirectCallCannotGC(functions.name[caller], name)) attrs |= ATTR_GC_SUPPRESSED; indirectCalls.push([caller, "IndirectCall: " + name, attrs]); } else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { const caller = match[1]|0; const fullfield = match[2]|0; const csu = match[3]; const fullfield_str = csu + "." + match[4]; assert(functions.name[fullfield] == fullfield_str); if (attrs) fieldCallAttrs[fullfield] = attrs; addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs}); fieldCallCSU.set(fullfield, csu); if (fieldCallCannotGC(csu, fullfield_str)) addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0}); else addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0}); } else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { // V tag is no longer used, but we are still emitting it becasue it // can be helpful to understand what's going on. } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) { const caller = match[1]|0; const callee = match[2]|0; addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs}); } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) { assert(false, "R tag is no longer used"); } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) { const id = match[1]|0; let tag = match[2]; if (tag == 'GC Call') gcCalls.push(id); } else { assert(false, "Invalid format in callgraph line: " + line); } } if (verbose) { printErr("Loaded[verbose=" + verbose + "] " + file); } return { fieldCallAttrs, fieldCallCSU, gcCalls, indirectCalls, rawCallees, functions }; } // Take a set of rawcalls filenames (as in, the raw callgraph data output by // computeCallgraph.js) and combine them into a global callgraph, renumbering // the IDs as needed. function mergeRawCallgraphs(filenames, verbose) { let d; for (const filename of filenames) { const raw = loadRawCallgraphFile(filename, verbose); if (!d) { d = raw; continue; } const { fieldCallAttrs, fieldCallCSU, gcCalls, indirectCalls, rawCallees, functions } = raw; // Compute the ID mapping. Incoming functions that already have an ID // will be mapped to that ID; new ones will allocate a fresh ID. const remap = new Array(functions.name.length); for (let i = 1; i < functions.name.length; i++) { const mangled = functions.name[i]; const old_id = d.functions.mangledToId[mangled] if (old_id) { remap[i] = old_id; } else { const newid = d.functions.name.length; d.functions.mangledToId[mangled] = newid; d.functions.name.push(mangled); remap[i] = newid; assert(!(mangled in d.functions.readableName), mangled + " readable name is already found"); const readables = functions.readableName[mangled]; if (readables !== undefined) d.functions.readableName[mangled] = readables; } } for (const [fullfield, attrs] of Object.entries(fieldCallAttrs)) d.fieldCallAttrs[remap[fullfield]] = attrs; for (const [fullfield, csu] of fieldCallCSU.entries()) d.fieldCallCSU.set(remap[fullfield], csu); for (const call of gcCalls) d.gcCalls.push(remap[call]); for (const [caller, name, attrs] of indirectCalls) d.indirectCalls.push([remap[caller], name, attrs]); for (const [caller, callees] of rawCallees) { for (const {callee, any, all} of callees) { addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all}); } } } return d; } function loadCallgraph(files, verbose) { const { fieldCallAttrs, fieldCallCSU, gcCalls, indirectCalls, rawCallees, functions } = mergeRawCallgraphs(files, verbose); assert(ID.jscode == functions.mangledToId["(js-code)"]); assert(ID.anyfunc == functions.mangledToId["(any-function)"]); assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]); assert(ID.gc == functions.mangledToId["(GC)"]); addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0}); // Compute functionAttrs: it should contain the set of functions that // are *always* called within some sort of limited context (eg GC // suppression). // set of mangled names (map from mangled name => [any,all]) const functionAttrs = {}; // Initialize to field calls with attrs set. for (var [name, attrs] of Object.entries(fieldCallAttrs)) functionAttrs[name] = [attrs, attrs]; // map from ID => reason const gcFunctions = { [ID.gc]: 'internal' }; // Add in any extra functions at the end. (If we did this early, it would // mess up the id <-> name correspondence. Also, we need to know if the // functions even exist in the first place.) for (var func of extraGCFunctions(functions.readableName)) { addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions); } for (const func of gcCalls) addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0}); for (const [caller, indirect, attrs] of indirectCalls) { const id = functions.name.length; functions.name.push(indirect); functions.mangledToId[indirect] = id; addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs}); addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0}); } // Callers have a list of callees, with duplicates (if the same function is // called more than once.) Merge the repeated calls, only keeping attrs // that are in force for *every* callsite of that callee. Also, generate // the callersOf table at the same time. // // calleesOf : map from mangled => {mangled callee => {'any':intset, 'all':intset}} // callersOf : map from mangled => {mangled caller => {'any':intset, 'all':intset}} const {callersOf, calleesOf} = generate_callgraph(rawCallees); // Compute functionAttrs: it should contain the set of functions that // are *always* called within some sort of limited context (eg GC // suppression). // Initialize to field calls with attrs set. for (var [name, attrs] of Object.entries(fieldCallAttrs)) functionAttrs[name] = [attrs, attrs]; // Initialize functionAttrs to the set of all functions, where each one is // maximally attributed, and return a worklist containing all simple roots // (nodes with no callers). const simple_roots = gather_simple_roots(functionAttrs, calleesOf, callersOf); // Traverse the graph, spreading the attrs down from the roots. propagate_attrs(simple_roots, functionAttrs, calleesOf); // There are a surprising number of "recursive roots", where there is a // cycle of functions calling each other but not called by anything else, // and these roots may also have descendants. Now that the above traversal // has eliminated everything reachable from simple roots, traverse the // remaining graph to gather up a representative function from each root // cycle. // // Simple example: in the JS shell build, moz_xstrdup calls itself, but // there are no calls to it from within js/src. const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions); // And do a final traversal starting with the recursive roots. propagate_attrs(recursive_roots, functionAttrs, calleesOf); for (const [f, [any, all]] of Object.entries(functionAttrs)) { // Throw out all functions with no attrs set, to reduce the size of the // output. From now on, "not in functionAttrs" means [any=0, all=0]. if (any == 0 && all == 0) delete functionAttrs[f]; // Remove GC-suppressed functions from the set of functions known to GC. // Also remove functions only reachable through calls that have been // replaced. if (all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) delete gcFunctions[name]; } // functionAttrs now contains all functions that are ever called in an // attributed context, based on the known callgraph (i.e., calls through // function pointers are not taken into consideration.) // Sanity check to make sure the callgraph has some functions annotated as // GC Calls. This is mostly a check to be sure the earlier processing // succeeded (as opposed to, say, running on empty xdb files because you // didn't actually compile anything interesting.) assert(gcCalls.length > 0, "No GC functions found!"); // Initialize the worklist to all known gcFunctions. const worklist = [ID.gc]; // Include all field calls (but not virtual method calls). for (const [name, csuName] of fieldCallCSU) { const fullFieldName = functions.name[name]; if (!fieldCallCannotGC(csuName, fullFieldName)) { gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName; worklist.push(name); } } // Recursively find all callers not always called in a GC suppression // context, and add them to the set of gcFunctions. while (worklist.length) { name = worklist.shift(); assert(name in gcFunctions, "gcFunctions does not contain " + name); if (!callersOf.has(name)) continue; for (const [caller, {any, all}] of callersOf.get(name)) { if ((all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) == 0) { if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions)) worklist.push(caller); } } } // Convert functionAttrs to limitedFunctions (using mangled names instead // of ids.) // set of mangled names (map from mangled name => {any,all,recursive_root:bool} var limitedFunctions = {}; for (const [id, [any, all]] of Object.entries(functionAttrs)) { if (all) { limitedFunctions[functions.name[id]] = { attributes: all }; } } for (const [id, limits, label] of recursive_roots) { const name = functions.name[id]; const s = limitedFunctions[name] || (limitedFunctions[name] = {}); s.recursive_root = true; } // Remap ids to mangled names. const namedGCFunctions = {}; for (const [caller, reason] of Object.entries(gcFunctions)) { namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason; } return { gcFunctions: namedGCFunctions, functions, calleesOf, callersOf, limitedFunctions }; } function saveCallgraph(functions, calleesOf) { // Write out all the ids and their readable names. let id = -1; for (const name of functions.name) { id += 1; if (id == 0) continue; print(`#${id} ${name}`); for (const readable of (functions.readableName[name] || [])) { if (readable != name) print(`= ${id} ${readable}`); } } // Omit field calls for now; let them appear as if they were functions. const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : ''; for (const [caller, callees] of calleesOf) { for (const [callee, attrs] of callees) { print(`D ${attrstring(attrs)}${caller} ${callee}`); } } // Omit tags for now. This really should preserve all tags. The "GC Call" // tag will already be represented in the graph by having an edge to the // "(GC)" node. } // Return a worklist of functions with no callers, and also initialize // functionAttrs to the set of all functions, each mapped to // [ATTRS_NONE, ATTRS_UNVISITED]. function gather_simple_roots(functionAttrs, calleesOf, callersOf) { const roots = []; for (const callee of callersOf.keys()) functionAttrs[callee] = [ATTRS_NONE, ATTRS_UNVISITED]; for (const caller of calleesOf.keys()) { functionAttrs[caller] = [ATTRS_NONE, ATTRS_UNVISITED]; if (!callersOf.has(caller)) roots.push([caller, ATTRS_NONE, 'root']); } return roots; } // Recursively traverse the callgraph from the roots. Recurse through every // edge that weakens the attrs. (Attrs that entirely disappear, ie go to a zero // intset, will be removed from functionAttrs.) function propagate_attrs(roots, functionAttrs, calleesOf) { const worklist = Array.from(roots); let top = worklist.length; while (top > 0) { // Consider caller where (graph) -> caller -> (0 or more callees) // 'callercaller' is for debugging. const [caller, edge_attrs, callercaller] = worklist[--top]; assert(caller in functionAttrs); const [prev_any, prev_all] = functionAttrs[caller]; assert(prev_any !== undefined); assert(prev_all !== undefined); const [new_any, new_all] = [prev_any | edge_attrs, prev_all & edge_attrs]; if (prev_any != new_any || prev_all != new_all) { // Update function attrs, then recurse to the children if anything // was updated. functionAttrs[caller] = [new_any, new_all]; for (const [callee, {any, all}] of (calleesOf.get(caller) || new Map)) worklist[top++] = [callee, all | edge_attrs, caller]; } } } // Mutually-recursive roots and their descendants will not have been visited, // and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them. function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) { const roots = []; // Pick any node. Mark everything reachable by adding to a 'seen' set. At // the end, if there are any incoming edges to that node from an unmarked // node, then it is not a root. Otherwise, mark the node as a root. (There // will be at least one back edge coming into the node from a marked node // in this case, since otherwise it would have already been considered to // be a root.) // // Repeat with remaining unmarked nodes until all nodes are marked. const seen = new Set(); for (let [func, [any, all]] of Object.entries(functionAttrs)) { func = func|0; if (all != ATTRS_UNVISITED) continue; // We should only be looking at nodes with callers, since otherwise // they would have been handled in the previous pass! assert(callersOf.has(func)); assert(callersOf.get(func).size > 0); if (seen.has(func)) continue; const work = [func]; while (work.length > 0) { const f = work.pop(); if (!calleesOf.has(f)) continue; for (const callee of calleesOf.get(f).keys()) { if (!seen.has(callee) && callee != func && functionAttrs[callee][1] == ATTRS_UNVISITED) { work.push(callee); seen.add(callee); } } } assert(!seen.has(func)); seen.add(func); if ([...callersOf.get(func).keys()].findIndex(f => !seen.has(f)) == -1) { // No unmarked incoming edges, including self-edges, so this is a // (recursive) root. roots.push([func, ATTRS_NONE, 'recursive-root']); } } return roots; tmp = calleesOf; calleesOf = {}; for (const [callerId, callees] of Object.entries(calleesOf)) { const caller = functionNames[callerId]; for (const {calleeId, limits} of callees) calleesOf[caller][functionNames[calleeId]] = limits; } tmp = callersOf; callersOf = {}; for (const [calleeId, callers] of Object.entries(callersOf)) { const callee = functionNames[calleeId]; callersOf[callee] = {}; for (const {callerId, limits} of callers) callersOf[callee][functionNames[caller]] = limits; } }