summaryrefslogtreecommitdiffstats
path: root/compilerplugins/clang/constantparam.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
commit940b4d1848e8c70ab7642901a68594e8016caffc (patch)
treeeb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /compilerplugins/clang/constantparam.py
parentInitial commit. (diff)
downloadlibreoffice-upstream.tar.xz
libreoffice-upstream.zip
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compilerplugins/clang/constantparam.py')
-rwxr-xr-xcompilerplugins/clang/constantparam.py192
1 files changed, 192 insertions, 0 deletions
diff --git a/compilerplugins/clang/constantparam.py b/compilerplugins/clang/constantparam.py
new file mode 100755
index 000000000..a2a820778
--- /dev/null
+++ b/compilerplugins/clang/constantparam.py
@@ -0,0 +1,192 @@
+#!/usr/bin/python2
+
+import sys
+import re
+import io
+
+callDict = dict() # callInfo tuple -> callValue
+
+# clang does not always use exactly the same numbers in the type-parameter vars it generates
+# so I need to substitute them to ensure we can match correctly.
+normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
+def normalizeTypeParams( line ):
+ return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
+
+# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
+with io.open("workdir/loplugin.constantparam.log", "rb", buffering=1024*1024) as txt:
+ for line in txt:
+ try:
+ tokens = line.strip().split("\t")
+ returnType = normalizeTypeParams(tokens[0])
+ nameAndParams = normalizeTypeParams(tokens[1])
+ sourceLocation = tokens[2]
+ paramName = tokens[3]
+ paramType = normalizeTypeParams(tokens[4])
+ callValue = tokens[5]
+ callInfo = (returnType, nameAndParams, paramName, paramType, sourceLocation)
+ if not callInfo in callDict:
+ callDict[callInfo] = set()
+ callDict[callInfo].add(callValue)
+ except IndexError:
+ print "problem with line " + line.strip()
+ raise
+
+def RepresentsInt(s):
+ try:
+ int(s)
+ return True
+ except ValueError:
+ return False
+
+constructor_regex = re.compile("^\w+\(\)$")
+
+tmp1list = list()
+tmp2list = list()
+tmp3list = list()
+tmp4list = list()
+for callInfo, callValues in callDict.iteritems():
+ nameAndParams = callInfo[1]
+ if len(callValues) != 1:
+ continue
+ callValue = next(iter(callValues))
+ if "unknown" in callValue:
+ continue
+ sourceLoc = callInfo[4]
+ functionSig = callInfo[0] + " " + callInfo[1]
+
+ # try to ignore setter methods
+ if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
+ continue
+ # ignore code that follows a common pattern
+ if sourceLoc.startswith("sw/inc/swatrset.hxx"): continue
+ if sourceLoc.startswith("sw/inc/format.hxx"): continue
+ # template generated code
+ if sourceLoc.startswith("include/sax/fshelper.hxx"): continue
+ # debug code
+ if sourceLoc.startswith("include/oox/dump"): continue
+ # part of our binary API
+ if sourceLoc.startswith("include/LibreOfficeKit"): continue
+
+ # ignore methods generated by SFX macros
+ if "RegisterInterface(class SfxModule *)" in nameAndParams: continue
+ if "RegisterChildWindow(_Bool,class SfxModule *,enum SfxChildWindowFlags)" in nameAndParams: continue
+ if "RegisterControl(unsigned short,class SfxModule *)" in nameAndParams: continue
+
+ if RepresentsInt(callValue):
+ if callValue == "0" or callValue == "1":
+ tmp1list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
+ else:
+ tmp2list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
+ # look for places where the callsite is always a constructor invocation
+ elif constructor_regex.match(callValue) or callValue == "\"\"":
+ if callValue.startswith("Get"): continue
+ if callValue.startswith("get"): continue
+ if "operator=" in functionSig: continue
+ if "&&" in functionSig: continue
+ if callInfo[2] == "###0" and callValue == "InitData()": continue
+ if callInfo[2] == "###0" and callValue == "InitAggregate()": continue
+ if callValue == "shared_from_this()": continue
+ tmp3list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
+ else:
+ tmp4list.append((sourceLoc, functionSig, callInfo[3] + " " + callInfo[2], callValue))
+
+
+# sort results by filename:lineno
+def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
+ return [int(text) if text.isdigit() else text.lower()
+ for text in re.split(_nsre, s)]
+tmp1list.sort(key=lambda v: natural_sort_key(v[0]))
+tmp2list.sort(key=lambda v: natural_sort_key(v[0]))
+tmp3list.sort(key=lambda v: natural_sort_key(v[0]))
+tmp4list.sort(key=lambda v: natural_sort_key(v[0]))
+
+# print out the results
+with open("compilerplugins/clang/constantparam.booleans.results", "wt") as f:
+ for v in tmp1list:
+ f.write(v[0] + "\n")
+ f.write(" " + v[1] + "\n")
+ f.write(" " + v[2] + "\n")
+ f.write(" " + v[3] + "\n")
+with open("compilerplugins/clang/constantparam.numbers.results", "wt") as f:
+ for v in tmp2list:
+ f.write(v[0] + "\n")
+ f.write(" " + v[1] + "\n")
+ f.write(" " + v[2] + "\n")
+ f.write(" " + v[3] + "\n")
+with open("compilerplugins/clang/constantparam.constructors.results", "wt") as f:
+ for v in tmp3list:
+ f.write(v[0] + "\n")
+ f.write(" " + v[1] + "\n")
+ f.write(" " + v[2] + "\n")
+ f.write(" " + v[3] + "\n")
+with open("compilerplugins/clang/constantparam.others.results", "wt") as f:
+ for v in tmp4list:
+ f.write(v[0] + "\n")
+ f.write(" " + v[1] + "\n")
+ f.write(" " + v[2] + "\n")
+ f.write(" " + v[3] + "\n")
+
+# -------------------------------------------------------------
+# Now a fun set of heuristics to look for methods that
+# take bitmask parameters where one or more of the bits in the
+# bitmask is always one or always zero
+
+# integer to hex str
+def hex(i):
+ return "0x%x" % i
+# I can't use python's ~ operator, because that produces negative numbers
+def negate(i):
+ return (1 << 32) - 1 - i
+
+tmp2list = list()
+for callInfo, callValues in callDict.iteritems():
+ nameAndParams = callInfo[1]
+ if len(callValues) < 2:
+ continue
+ # we are only interested in enum parameters
+ if not "enum" in callInfo[3]: continue
+ if not "Flag" in callInfo[3] and not "flag" in callInfo[3] and not "Bit" in callInfo[3] and not "State" in callInfo[3]: continue
+ # try to ignore setter methods
+ if ("," not in nameAndParams) and (("::set" in nameAndParams) or ("::Set" in nameAndParams)):
+ continue
+
+ setBits = 0
+ clearBits = 0
+ continue_flag = False
+ first = True
+ for callValue in callValues:
+ if "unknown" == callValue or not callValue.isdigit():
+ continue_flag = True
+ break
+ if first:
+ setBits = int(callValue)
+ clearBits = negate(int(callValue))
+ first = False
+ else:
+ setBits = setBits & int(callValue)
+ clearBits = clearBits & negate(int(callValue))
+
+ # estimate allBits by using the highest bit we have seen
+ # TODO dump more precise information about the allBits values of enums
+ allBits = (1 << setBits.bit_length()) - 1
+ clearBits = clearBits & allBits
+ if continue_flag or (setBits == 0 and clearBits == 0): continue
+
+ sourceLoc = callInfo[4]
+ functionSig = callInfo[0] + " " + callInfo[1]
+
+ v2 = callInfo[3] + " " + callInfo[2]
+ if setBits != 0: v2 += " setBits=" + hex(setBits)
+ if clearBits != 0: v2 += " clearBits=" + hex(clearBits)
+ tmp2list.append((sourceLoc, functionSig, v2))
+
+
+# sort results by filename:lineno
+tmp2list.sort(key=lambda v: natural_sort_key(v[0]))
+
+# print out the results
+with open("compilerplugins/clang/constantparam.bitmask.results", "wt") as f:
+ for v in tmp2list:
+ f.write(v[0] + "\n")
+ f.write(" " + v[1] + "\n")
+ f.write(" " + v[2] + "\n")