diff options
Diffstat (limited to '')
-rwxr-xr-x | compilerplugins/clang/methodcycles.py | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/compilerplugins/clang/methodcycles.py b/compilerplugins/clang/methodcycles.py new file mode 100755 index 0000000000..5a1b731cc9 --- /dev/null +++ b/compilerplugins/clang/methodcycles.py @@ -0,0 +1,265 @@ +#!/usr/bin/python3 + +from collections import defaultdict +import io +import re +import subprocess + +# -------------------------------------------------------------------------------------------- +# globals +# -------------------------------------------------------------------------------------------- + +definitionSet = set() # set of method_name +definitionToSourceLocationMap = dict() + +# for the "unused methods" analysis +callDict = defaultdict(set) # map of from_method_name -> set(method_name) + +# clang does not always use exactly the same numbers in the type-parameter vars it generates +# so I need to substitute them to ensure we can match correctly. +normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+") +def normalizeTypeParams( line ): + line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line) + # make some of the types a little prettier + line = line.replace("std::__debug", "std::") + line = line.replace("class ", "") + line = line.replace("struct ", "") + line = line.replace("_Bool", "bool") + return line + +# -------------------------------------------------------------------------------------------- +# primary input loop +# -------------------------------------------------------------------------------------------- + +cnt = 0 +with io.open("workdir/loplugin.methodcycles.log", "r", buffering=1024*1024) as txt: + for line in txt: + tokens = line.strip().split("\t") + if tokens[0] == "definition:": + returnType = tokens[1] + nameAndParams = tokens[2] + sourceLocation = tokens[3] + funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip() + definitionSet.add(funcInfo) + definitionToSourceLocationMap[funcInfo] = sourceLocation + elif tokens[0] == "call:": + returnTypeFrom = tokens[1] + nameAndParamsFrom = tokens[2] + returnTypeTo = tokens[3] + nameAndParamsTo = tokens[4] + caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip() + callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip() + callDict[caller].add(callee) + else: + print( "unknown line: " + line) + cnt = cnt + 1 + #if cnt > 100000: break + +# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely +def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): + return [int(text) if text.isdigit() else text.lower() + for text in re.split(_nsre, s)] +# sort by both the source-line and the datatype, so the output file ordering is stable +# when we have multiple items on the same source line +def v_sort_key(v): + return natural_sort_key(v[1]) + [v[0]] +def sort_set_by_natural_key(s): + return sorted(s, key=lambda v: v_sort_key(v)) + + +# -------------------------------------------------------------------------------------------- +# analysis +# -------------------------------------------------------------------------------------------- + +# follow caller-callee chains, removing all methods reachable from a root method +def remove_reachable(callDict, startCaller): + worklist = list() + worklist.append(startCaller) + while len(worklist) > 0: + caller = worklist.pop() + if not caller in callDict: + continue + calleeSet = callDict[caller] + del callDict[caller] + if caller in definitionSet: + definitionSet.remove(caller) + for c in calleeSet: + worklist.append(c) + +# look for all the external entry points and remove code called from there +to_be_removed = set() +to_be_removed.add("int main(int,char **)") +# random dynload entrypoints that we don't otherwise find +to_be_removed.add("bool TestImportOLE2(SvStream &)") +to_be_removed.add("void SbiRuntime::StepREDIMP()") +to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)"); +for caller in definitionSet: + if not caller in definitionToSourceLocationMap: + to_be_removed.append(caller) + continue + location = definitionToSourceLocationMap[caller] + if "include/com/" in location \ + or "include/cppu/" in location \ + or "include/cppuhelper/" in location \ + or "include/osl/" in location \ + or "include/rtl/" in location \ + or "include/sal/" in location \ + or "include/salhelper/" in location \ + or "include/typelib/" in location \ + or "include/uno/" in location \ + or "workdir/UnpackedTarball/" in location \ + or "workdir/UnoApiHeadersTarget/" in location \ + or "workdir/CustomTarget/officecfg/" in location \ + or "workdir/LexTarget/" in location \ + or "workdir/CustomTarget/i18npool/localedata/" in location \ + or "workdir/SdiTarget/" in location \ + or "/qa/" in location \ + or "include/test/" in location: + to_be_removed.add(caller) + # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called, + # which is not ideal + if "::~" in caller: + to_be_removed.add(caller) + # dyload entry points for VCL builder + if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller: + to_be_removed.add(caller) + if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller: + to_be_removed.add(caller) +# find all the UNO load-by-symbol-name entrypoints +uno_constructor_entrypoints = set() +git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True) +with git_grep_process.stdout as txt: + for line in txt: + idx1 = line.find(b"\"") + idx2 = line.find(b"\"", idx1 + 1) + func = line[idx1+1 : idx2] + uno_constructor_entrypoints.add(func.decode('utf-8')) +for caller in callDict: + if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller: + for func in uno_constructor_entrypoints: + if func in caller: + to_be_removed.add(caller) +# remove everything reachable from the found entry points +for caller in to_be_removed: + remove_reachable(callDict, caller) +for caller in callDict: + callDict[caller] -= to_be_removed + +# create a reverse call graph +inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name) +for caller in callDict: + for callee in callDict[caller]: + inverseCallDict[callee].add(caller) + +print_tree_recurse_set = set() # protect against cycles +def print_tree(f, callDict, caller, depth): + if depth == 0: + f.write("\n") # add an empty line before each tree + print_tree_recurse_set.clear() + # protect against cycles + if caller in print_tree_recurse_set: + return + # when printing out trees, things that are not in the map are things that are reachable, + # so we're not interested in them + if not caller in callDict: + return + print_tree_recurse_set.add(caller) + f.write(" " * depth + caller + "\n") + f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n") + calleeSet = callDict[caller] + for c in calleeSet: + print_tree(f, callDict, c, depth+1) + +# find possible roots (ie. entrypoints) by looking for methods that are not called +def dump_possible_roots(): + possibleRootList = list() + for caller in callDict: + if not caller in inverseCallDict and caller in definitionToSourceLocationMap: + possibleRootList.append(caller) + possibleRootList.sort() + + # print out first 100 trees of caller->callees + count = 0 + with open("compilerplugins/clang/methodcycles.roots", "wt") as f: + f.write("callDict size " + str(len(callDict)) + "\n") + f.write("possibleRootList size " + str(len(possibleRootList)) + "\n") + f.write("\n") + for caller in possibleRootList: + f.write(caller + "\n") + f.write(" " + definitionToSourceLocationMap[caller] + "\n") + #print_tree(f, caller, 0) + count = count + 1 + #if count>1000: break + +# Look for cycles in a directed graph +# Adapted from: +# https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle +def print_cycles(): + with open("compilerplugins/clang/methodcycles.results", "wt") as f: + path = set() + visited = set() + + def printPath(path): + if len(path) < 2: + return + # we may have found a cycle, but if the cycle is called from outside the cycle + # the code is still in use. + for p in path: + for caller in inverseCallDict[p]: + if not caller in path: + return + f.write("found cycle\n") + for p in path: + f.write(" " + p + "\n") + f.write(" " + definitionToSourceLocationMap[p] + "\n") + f.write("\n") + + def checkCyclic(vertex): + if vertex in visited: + return + visited.add(vertex) + path.add(vertex) + if vertex in callDict: + for neighbour in callDict[vertex]: + if neighbour in path: + printPath(path) + break + else: + checkCyclic(neighbour) + path.remove(vertex) + + for caller in callDict: + checkCyclic(caller) + +print_cycles() + +# print partitioned sub-graphs +def print_partitions(): + callDict2 = callDict + # Remove anything with no callees, and that is itself not called. + # After this stage, we should only be left with closed sub-graphs ie. partitions + while True: + to_be_removed.clear() + for caller in callDict2: + if len(callDict2[caller]) == 0 \ + or not caller in inverseCallDict[caller]: + to_be_removed.add(caller) + if len(to_be_removed) == 0: + break + for caller in to_be_removed: + remove_reachable(callDict2, caller) + for caller in callDict2: + callDict2[caller] -= to_be_removed + + count = 0 + with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f: + f.write("callDict size " + str(len(callDict2)) + "\n") + f.write("\n") + while len(callDict2) > 0: + print_tree(f, callDict2, next(iter(callDict2)), 0) + for c in print_tree_recurse_set: + callDict2.pop(c, None) + count = count + 1 + if count>1000: break + +print_partitions() |