summaryrefslogtreecommitdiffstats
path: root/bin/find-can-be-private-symbols.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 16:51:28 +0000
commit940b4d1848e8c70ab7642901a68594e8016caffc (patch)
treeeb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /bin/find-can-be-private-symbols.py
parentInitial commit. (diff)
downloadlibreoffice-upstream.tar.xz
libreoffice-upstream.zip
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'bin/find-can-be-private-symbols.py')
-rwxr-xr-xbin/find-can-be-private-symbols.py226
1 files changed, 226 insertions, 0 deletions
diff --git a/bin/find-can-be-private-symbols.py b/bin/find-can-be-private-symbols.py
new file mode 100755
index 000000000..f5ff83fd1
--- /dev/null
+++ b/bin/find-can-be-private-symbols.py
@@ -0,0 +1,226 @@
+#!/usr/bin/python2
+#
+# Find exported symbols that can be made non-exported.
+#
+# Noting that (a) parsing these commands is a pain, the output is quite irregular and (b) I'm fumbling in the
+# dark here, trying to guess what exactly constitutes an "import" vs an "export" of a symbol, linux linking
+# is rather complex.
+#
+# Takes about 5min to run on a decent machine.
+#
+# The standalone function analysis is reasonable reliable, but the class/method analysis is less so
+# (something to do with destructor thunks not showing up in my results?)
+#
+# Also, the class/method analysis will not catch problems like
+# 'dynamic_cast from 'Foo' with hidden type visibility to 'Bar' with default type visibility'
+# but loplugin:dyncastvisibility will do that for you
+#
+
+import subprocess
+import sys
+import re
+
+exported_symbols = set()
+imported_symbols = set()
+# standalone functions that are exported but not imported
+unused_function_exports = set()
+classes_with_exported_symbols = set()
+classes_with_imported_symbols = set()
+# all names that exist in the source code
+all_source_names = set()
+
+
+# look for imported symbols in executables
+subprocess_find_all_source_names = subprocess.Popen("git grep -oh -P '\\b\\w\\w\\w+\\b' -- '*.h*'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+with subprocess_find_all_source_names.stdout as txt:
+ for line in txt:
+ line = line.strip()
+ all_source_names.add(line)
+subprocess_find_all_source_names.terminate()
+
+subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so", stdout=subprocess.PIPE, shell=True)
+with subprocess_find.stdout as txt:
+ for line in txt:
+ sharedlib = line.strip()
+ # look for exported symbols
+ subprocess_nm = subprocess.Popen("nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
+ with subprocess_nm.stdout as txt2:
+ # We are looking for lines something like:
+ # 0000000000036ed0 T flash_component_getFactory
+ line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
+ for line2 in txt2:
+ line2 = line2.strip()
+ if line_regex.match(line2):
+ exported_symbols.add(line2.split(" ")[2])
+ # look for imported symbols
+ subprocess_objdump = subprocess.Popen("objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
+ with subprocess_objdump.stdout as txt2:
+ # ignore some header bumpf
+ txt2.readline()
+ txt2.readline()
+ txt2.readline()
+ txt2.readline()
+ # We are looking for lines something like:
+ # 0000000000000000 DF *UND* 0000000000000000 _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
+ for line2 in txt2:
+ line2 = line2.strip()
+ tokens = line2.split(" ")
+ if len(tokens) < 7 or not(tokens[7].startswith("*UND*")): continue
+ sym = tokens[len(tokens)-1]
+ imported_symbols.add(sym)
+subprocess_find.terminate()
+
+# look for imported symbols in executables
+subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
+with subprocess_find.stdout as txt:
+ for line in txt:
+ executable = line.strip()
+ # look for exported symbols
+ subprocess_nm = subprocess.Popen("nm -D " + executable + " | grep -w U", stdout=subprocess.PIPE, shell=True)
+ with subprocess_nm.stdout as txt2:
+ # We are looking for lines something like:
+ # U sal_detail_deinitialize
+ for line2 in txt2:
+ line2 = line2.strip()
+ sym = line2.split(" ")[1]
+ imported_symbols.add(sym)
+subprocess_find.terminate()
+
+diff = exported_symbols - imported_symbols
+print("exported = " + str(len(exported_symbols)))
+print("imported = " + str(len(imported_symbols)))
+print("diff = " + str(len(diff)))
+
+for sym in exported_symbols:
+ filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
+ if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
+ elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
+ i = filtered_sym.find("(")
+ i = filtered_sym.rfind("::", 0, i)
+ if i != -1:
+ classname = filtered_sym[:i]
+ # find classes where all of the exported symbols are not imported
+ classes_with_exported_symbols.add(classname)
+ else:
+ func = filtered_sym
+ # find standalone functions which are exported but not imported
+ if not(sym in imported_symbols): unused_function_exports.add(func)
+
+for sym in imported_symbols:
+ filtered_sym = subprocess.check_output(["c++filt", sym]).strip()
+ if filtered_sym.startswith("non-virtual thunk to "): filtered_sym = filtered_sym[21:]
+ elif filtered_sym.startswith("virtual thunk to "): filtered_sym = filtered_sym[17:]
+ i = filtered_sym.find("(")
+ i = filtered_sym.rfind("::", 0, i)
+ if i != -1:
+ classname = filtered_sym[:i]
+ classes_with_imported_symbols.add(classname)
+
+def extractFunctionNameFromSignature(sym):
+ i = sym.find("(")
+ if i == -1: return sym
+ return sym[:i]
+
+with open("bin/find-can-be-private-symbols.functions.results", "wt") as f:
+ for sym in sorted(unused_function_exports):
+ # Filter out most of the noise.
+ # No idea where these are coming from, but not our code.
+ if sym.startswith("CERT_"): continue
+ elif sym.startswith("DER_"): continue
+ elif sym.startswith("FORM_"): continue
+ elif sym.startswith("FPDF"): continue
+ elif sym.startswith("HASH_"): continue
+ elif sym.startswith("Hunspell_"): continue
+ elif sym.startswith("LL_"): continue
+ elif sym.startswith("LP_"): continue
+ elif sym.startswith("LU"): continue
+ elif sym.startswith("MIP"): continue
+ elif sym.startswith("MPS"): continue
+ elif sym.startswith("NSS"): continue
+ elif sym.startswith("NSC_"): continue
+ elif sym.startswith("PK11"): continue
+ elif sym.startswith("PL_"): continue
+ elif sym.startswith("PQ"): continue
+ elif sym.startswith("PBE_"): continue
+ elif sym.startswith("PORT_"): continue
+ elif sym.startswith("PRP_"): continue
+ elif sym.startswith("PR_"): continue
+ elif sym.startswith("PT_"): continue
+ elif sym.startswith("QS_"): continue
+ elif sym.startswith("REPORT_"): continue
+ elif sym.startswith("RSA_"): continue
+ elif sym.startswith("SEC"): continue
+ elif sym.startswith("SGN"): continue
+ elif sym.startswith("SOS"): continue
+ elif sym.startswith("SSL_"): continue
+ elif sym.startswith("VFY_"): continue
+ elif sym.startswith("_PR_"): continue
+ elif sym.startswith("_"): continue
+ elif sym.startswith("ber_"): continue
+ elif sym.startswith("bfp_"): continue
+ elif sym.startswith("ldap_"): continue
+ elif sym.startswith("ne_"): continue
+ elif sym.startswith("opj_"): continue
+ elif sym.startswith("pg_"): continue
+ elif sym.startswith("pq"): continue
+ elif sym.startswith("presolve_"): continue
+ elif sym.startswith("sqlite3_"): continue
+ # dynamically loaded
+ elif sym.endswith("get_implementation"): continue
+ elif sym.endswith("component_getFactory"): continue
+ elif sym == "CreateDialogFactory": continue
+ elif sym == "CreateUnoWrapper": continue
+ elif sym == "CreateWindow": continue
+ elif sym == "ExportDOC": continue
+ elif sym == "ExportPPT": continue
+ elif sym == "ExportRTF": continue
+ elif sym == "GetSaveWarningOfMSVBAStorage_ww8": continue
+ elif sym == "GetSpecialCharsForEdit": continue
+ elif sym.startswith("Import"): continue
+ elif sym.startswith("Java_com_sun_star_"): continue
+ elif sym.startswith("TestImport"): continue
+ elif sym.startswith("getAllCalendars_"): continue
+ elif sym.startswith("getAllCurrencies_"): continue
+ elif sym.startswith("getAllFormats"): continue
+ elif sym.startswith("getBreakIteratorRules_"): continue
+ elif sym.startswith("getCollationOptions_"): continue
+ elif sym.startswith("getCollatorImplementation_"): continue
+ elif sym.startswith("getContinuousNumberingLevels_"): continue
+ elif sym.startswith("getDateAcceptancePatterns_"): continue
+ elif sym.startswith("getForbiddenCharacters_"): continue
+ elif sym.startswith("getIndexAlgorithm_"): continue
+ elif sym.startswith("getLCInfo_"): continue
+ elif sym.startswith("getLocaleItem_"): continue
+ elif sym.startswith("getOutlineNumberingLevels_"): continue
+ elif sym.startswith("getReservedWords_"): continue
+ elif sym.startswith("getSTC_"): continue
+ elif sym.startswith("getSearchOptions_"): continue
+ elif sym.startswith("getTransliterations_"): continue
+ elif sym.startswith("getUnicodeScripts_"): continue
+ elif sym.startswith("lok_"): continue
+ # UDK API
+ elif sym.startswith("osl_"): continue
+ elif sym.startswith("rtl_"): continue
+ elif sym.startswith("typelib_"): continue
+ elif sym.startswith("typereg_"): continue
+ elif sym.startswith("uno_"): continue
+ # remove things we found that do not exist in our source code, they're not ours
+ if not(extractFunctionNameFromSignature(sym) in all_source_names): continue
+ f.write(sym + "\n")
+
+with open("bin/find-can-be-private-symbols.classes.results", "wt") as f:
+ for sym in sorted(classes_with_exported_symbols - classes_with_imported_symbols):
+ # externals
+ if sym.startswith("libcdr"): continue
+ elif sym.startswith("libabw"): continue
+ elif sym.startswith("libebook"): continue
+ elif sym.startswith("libepubgen"): continue
+ elif sym.startswith("libfreehand"): continue
+ elif sym.startswith("libmspub"): continue
+ elif sym.startswith("libpagemaker"): continue
+ elif sym.startswith("libqxp"): continue
+ elif sym.startswith("libvisio"): continue
+ elif sym.startswith("libzmf"): continue
+ elif sym.startswith("lucene::"): continue
+ elif sym.startswith("Sk"): continue
+ f.write(sym + "\n")