summaryrefslogtreecommitdiffstats
path: root/compilerplugins/clang/methodcycles.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /compilerplugins/clang/methodcycles.py
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rwxr-xr-xcompilerplugins/clang/methodcycles.py265
1 files changed, 265 insertions, 0 deletions
diff --git a/compilerplugins/clang/methodcycles.py b/compilerplugins/clang/methodcycles.py
new file mode 100755
index 0000000000..5a1b731cc9
--- /dev/null
+++ b/compilerplugins/clang/methodcycles.py
@@ -0,0 +1,265 @@
+#!/usr/bin/python3
+
+from collections import defaultdict
+import io
+import re
+import subprocess
+
+# --------------------------------------------------------------------------------------------
+# globals
+# --------------------------------------------------------------------------------------------
+
+definitionSet = set() # set of method_name
+definitionToSourceLocationMap = dict()
+
+# for the "unused methods" analysis
+callDict = defaultdict(set) # map of from_method_name -> set(method_name)
+
+# clang does not always use exactly the same numbers in the type-parameter vars it generates
+# so I need to substitute them to ensure we can match correctly.
+normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
+def normalizeTypeParams( line ):
+ line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
+ # make some of the types a little prettier
+ line = line.replace("std::__debug", "std::")
+ line = line.replace("class ", "")
+ line = line.replace("struct ", "")
+ line = line.replace("_Bool", "bool")
+ return line
+
+# --------------------------------------------------------------------------------------------
+# primary input loop
+# --------------------------------------------------------------------------------------------
+
+cnt = 0
+with io.open("workdir/loplugin.methodcycles.log", "r", buffering=1024*1024) as txt:
+ for line in txt:
+ tokens = line.strip().split("\t")
+ if tokens[0] == "definition:":
+ returnType = tokens[1]
+ nameAndParams = tokens[2]
+ sourceLocation = tokens[3]
+ funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
+ definitionSet.add(funcInfo)
+ definitionToSourceLocationMap[funcInfo] = sourceLocation
+ elif tokens[0] == "call:":
+ returnTypeFrom = tokens[1]
+ nameAndParamsFrom = tokens[2]
+ returnTypeTo = tokens[3]
+ nameAndParamsTo = tokens[4]
+ caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
+ callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
+ callDict[caller].add(callee)
+ else:
+ print( "unknown line: " + line)
+ cnt = cnt + 1
+ #if cnt > 100000: break
+
+# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
+def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
+ return [int(text) if text.isdigit() else text.lower()
+ for text in re.split(_nsre, s)]
+# sort by both the source-line and the datatype, so the output file ordering is stable
+# when we have multiple items on the same source line
+def v_sort_key(v):
+ return natural_sort_key(v[1]) + [v[0]]
+def sort_set_by_natural_key(s):
+ return sorted(s, key=lambda v: v_sort_key(v))
+
+
+# --------------------------------------------------------------------------------------------
+# analysis
+# --------------------------------------------------------------------------------------------
+
+# follow caller-callee chains, removing all methods reachable from a root method
+def remove_reachable(callDict, startCaller):
+ worklist = list()
+ worklist.append(startCaller)
+ while len(worklist) > 0:
+ caller = worklist.pop()
+ if not caller in callDict:
+ continue
+ calleeSet = callDict[caller]
+ del callDict[caller]
+ if caller in definitionSet:
+ definitionSet.remove(caller)
+ for c in calleeSet:
+ worklist.append(c)
+
+# look for all the external entry points and remove code called from there
+to_be_removed = set()
+to_be_removed.add("int main(int,char **)")
+# random dynload entrypoints that we don't otherwise find
+to_be_removed.add("bool TestImportOLE2(SvStream &)")
+to_be_removed.add("void SbiRuntime::StepREDIMP()")
+to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)");
+for caller in definitionSet:
+ if not caller in definitionToSourceLocationMap:
+ to_be_removed.append(caller)
+ continue
+ location = definitionToSourceLocationMap[caller]
+ if "include/com/" in location \
+ or "include/cppu/" in location \
+ or "include/cppuhelper/" in location \
+ or "include/osl/" in location \
+ or "include/rtl/" in location \
+ or "include/sal/" in location \
+ or "include/salhelper/" in location \
+ or "include/typelib/" in location \
+ or "include/uno/" in location \
+ or "workdir/UnpackedTarball/" in location \
+ or "workdir/UnoApiHeadersTarget/" in location \
+ or "workdir/CustomTarget/officecfg/" in location \
+ or "workdir/LexTarget/" in location \
+ or "workdir/CustomTarget/i18npool/localedata/" in location \
+ or "workdir/SdiTarget/" in location \
+ or "/qa/" in location \
+ or "include/test/" in location:
+ to_be_removed.add(caller)
+ # TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
+ # which is not ideal
+ if "::~" in caller:
+ to_be_removed.add(caller)
+ # dyload entry points for VCL builder
+ if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
+ to_be_removed.add(caller)
+ if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
+ to_be_removed.add(caller)
+# find all the UNO load-by-symbol-name entrypoints
+uno_constructor_entrypoints = set()
+git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
+with git_grep_process.stdout as txt:
+ for line in txt:
+ idx1 = line.find(b"\"")
+ idx2 = line.find(b"\"", idx1 + 1)
+ func = line[idx1+1 : idx2]
+ uno_constructor_entrypoints.add(func.decode('utf-8'))
+for caller in callDict:
+ if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
+ for func in uno_constructor_entrypoints:
+ if func in caller:
+ to_be_removed.add(caller)
+# remove everything reachable from the found entry points
+for caller in to_be_removed:
+ remove_reachable(callDict, caller)
+for caller in callDict:
+ callDict[caller] -= to_be_removed
+
+# create a reverse call graph
+inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
+for caller in callDict:
+ for callee in callDict[caller]:
+ inverseCallDict[callee].add(caller)
+
+print_tree_recurse_set = set() # protect against cycles
+def print_tree(f, callDict, caller, depth):
+ if depth == 0:
+ f.write("\n") # add an empty line before each tree
+ print_tree_recurse_set.clear()
+ # protect against cycles
+ if caller in print_tree_recurse_set:
+ return
+ # when printing out trees, things that are not in the map are things that are reachable,
+ # so we're not interested in them
+ if not caller in callDict:
+ return
+ print_tree_recurse_set.add(caller)
+ f.write(" " * depth + caller + "\n")
+ f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n")
+ calleeSet = callDict[caller]
+ for c in calleeSet:
+ print_tree(f, callDict, c, depth+1)
+
+# find possible roots (ie. entrypoints) by looking for methods that are not called
+def dump_possible_roots():
+ possibleRootList = list()
+ for caller in callDict:
+ if not caller in inverseCallDict and caller in definitionToSourceLocationMap:
+ possibleRootList.append(caller)
+ possibleRootList.sort()
+
+ # print out first 100 trees of caller->callees
+ count = 0
+ with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
+ f.write("callDict size " + str(len(callDict)) + "\n")
+ f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
+ f.write("\n")
+ for caller in possibleRootList:
+ f.write(caller + "\n")
+ f.write(" " + definitionToSourceLocationMap[caller] + "\n")
+ #print_tree(f, caller, 0)
+ count = count + 1
+ #if count>1000: break
+
+# Look for cycles in a directed graph
+# Adapted from:
+# https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
+def print_cycles():
+ with open("compilerplugins/clang/methodcycles.results", "wt") as f:
+ path = set()
+ visited = set()
+
+ def printPath(path):
+ if len(path) < 2:
+ return
+ # we may have found a cycle, but if the cycle is called from outside the cycle
+ # the code is still in use.
+ for p in path:
+ for caller in inverseCallDict[p]:
+ if not caller in path:
+ return
+ f.write("found cycle\n")
+ for p in path:
+ f.write(" " + p + "\n")
+ f.write(" " + definitionToSourceLocationMap[p] + "\n")
+ f.write("\n")
+
+ def checkCyclic(vertex):
+ if vertex in visited:
+ return
+ visited.add(vertex)
+ path.add(vertex)
+ if vertex in callDict:
+ for neighbour in callDict[vertex]:
+ if neighbour in path:
+ printPath(path)
+ break
+ else:
+ checkCyclic(neighbour)
+ path.remove(vertex)
+
+ for caller in callDict:
+ checkCyclic(caller)
+
+print_cycles()
+
+# print partitioned sub-graphs
+def print_partitions():
+ callDict2 = callDict
+ # Remove anything with no callees, and that is itself not called.
+ # After this stage, we should only be left with closed sub-graphs ie. partitions
+ while True:
+ to_be_removed.clear()
+ for caller in callDict2:
+ if len(callDict2[caller]) == 0 \
+ or not caller in inverseCallDict[caller]:
+ to_be_removed.add(caller)
+ if len(to_be_removed) == 0:
+ break
+ for caller in to_be_removed:
+ remove_reachable(callDict2, caller)
+ for caller in callDict2:
+ callDict2[caller] -= to_be_removed
+
+ count = 0
+ with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
+ f.write("callDict size " + str(len(callDict2)) + "\n")
+ f.write("\n")
+ while len(callDict2) > 0:
+ print_tree(f, callDict2, next(iter(callDict2)), 0)
+ for c in print_tree_recurse_set:
+ callDict2.pop(c, None)
+ count = count + 1
+ if count>1000: break
+
+print_partitions()