summaryrefslogtreecommitdiffstats
path: root/config/check_vanilla_allocations.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
commit0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
treea31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /config/check_vanilla_allocations.py
parentInitial commit. (diff)
downloadfirefox-esr-upstream/115.8.0esr.tar.xz
firefox-esr-upstream/115.8.0esr.zip
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'config/check_vanilla_allocations.py')
-rw-r--r--config/check_vanilla_allocations.py292
1 files changed, 292 insertions, 0 deletions
diff --git a/config/check_vanilla_allocations.py b/config/check_vanilla_allocations.py
new file mode 100644
index 0000000000..10bacd4fd3
--- /dev/null
+++ b/config/check_vanilla_allocations.py
@@ -0,0 +1,292 @@
+# vim: set ts=8 sts=4 et sw=4 tw=79:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# ----------------------------------------------------------------------------
+# All heap allocations in SpiderMonkey must go through js_malloc, js_calloc,
+# js_realloc, and js_free. This is so that any embedder who uses a custom
+# allocator (by defining JS_USE_CUSTOM_ALLOCATOR) will see all heap allocation
+# go through that custom allocator.
+#
+# Therefore, the presence of any calls to "vanilla" allocation/free functions
+# from within SpiderMonkey itself (e.g. malloc(), free()) is a bug. Calls from
+# within mozglue and non-SpiderMonkey locations are fine; there is a list of
+# exceptions that can be added to as the need arises.
+#
+# This script checks for the presence of such disallowed vanilla
+# allocation/free function in SpiderMonkey when it's built as a library. It
+# relies on |nm| from the GNU binutils, and so only works on Linux, but one
+# platform is good enough to catch almost all violations.
+#
+# This checking is only 100% reliable in a JS_USE_CUSTOM_ALLOCATOR build in
+# which the default definitions of js_malloc et al (in Utility.h) -- which call
+# malloc et al -- are replaced with empty definitions. This is because the
+# presence and possible inlining of the default js_malloc et al can cause
+# malloc/calloc/realloc/free calls show up in unpredictable places.
+#
+# Unfortunately, that configuration cannot be tested on Mozilla's standard
+# testing infrastructure. Instead, by default this script only tests that none
+# of the other vanilla allocation/free functions (operator new, memalign, etc)
+# are present. If given the --aggressive flag, it will also check for
+# malloc/calloc/realloc/free.
+#
+# Note: We don't check for |operator delete| and |operator delete[]|. These
+# can be present somehow due to virtual destructors, but this is not too
+# because vanilla delete/delete[] calls don't make sense without corresponding
+# vanilla new/new[] calls, and any explicit calls will be caught by Valgrind's
+# mismatched alloc/free checking.
+# ----------------------------------------------------------------------------
+
+import argparse
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+import buildconfig
+
+# The obvious way to implement this script is to search for occurrences of
+# malloc et al, succeed if none are found, and fail is some are found.
+# However, "none are found" does not necessarily mean "none are present" --
+# this script could be buggy. (Or the output format of |nm| might change in
+# the future.)
+#
+# So util/Utility.cpp deliberately contains a (never-called) function that
+# contains a single use of all the vanilla allocation/free functions. And this
+# script fails if it (a) finds uses of those functions in files other than
+# util/Utility.cpp, *or* (b) fails to find them in util/Utility.cpp.
+
+# Tracks overall success of the test.
+has_failed = False
+
+
+def fail(msg):
+ print("TEST-UNEXPECTED-FAIL | check_vanilla_allocations.py |", msg)
+ global has_failed
+ has_failed = True
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--aggressive",
+ action="store_true",
+ help="also check for malloc, calloc, realloc and free",
+ )
+ parser.add_argument("file", type=str, help="name of the file to check")
+ args = parser.parse_args()
+
+ # Run |nm|. Options:
+ # -C: demangle symbol names
+ # -A: show an object filename for each undefined symbol
+ nm = buildconfig.substs.get("NM") or "nm"
+ cmd = [nm, "-C", "-A", args.file]
+ lines = subprocess.check_output(
+ cmd, universal_newlines=True, stderr=subprocess.PIPE
+ ).split("\n")
+
+ # alloc_fns contains all the vanilla allocation/free functions that we look
+ # for. Regexp chars are escaped appropriately.
+
+ operator_news = [
+ # Matches |operator new(unsigned T)|, where |T| is |int| or |long|.
+ r"operator new(unsigned",
+ # Matches |operator new[](unsigned T)|, where |T| is |int| or |long|.
+ r"operator new[](unsigned",
+ ]
+
+ # operator new may end up inlined and replaced with moz_xmalloc.
+ inlined_operator_news = [
+ r"moz_xmalloc",
+ ]
+
+ alloc_fns = (
+ operator_news
+ + inlined_operator_news
+ + [
+ r"memalign",
+ # These three aren't available on all Linux configurations.
+ # r'posix_memalign',
+ # r'aligned_alloc',
+ # r'valloc',
+ ]
+ )
+
+ if args.aggressive:
+ alloc_fns += [r"malloc", r"calloc", r"realloc", r"free", r"strdup"]
+
+ # This is like alloc_fns, but regexp chars are not escaped.
+ alloc_fns_escaped = [re.escape(fn) for fn in alloc_fns]
+
+ # This regexp matches the relevant lines in the output of |nm|, which look
+ # like the following.
+ #
+ # js/src/libjs_static.a:Utility.o: U malloc
+ # js/src/libjs_static.a:Utility.o: 00000000000007e0 T js::SetSourceOptions(...)
+ #
+ # It may also, in LTO builds, look like
+ # js/src/libjs_static.a:Utility.o: ---------------- T js::SetSourceOptions(...)
+ #
+ nm_line_re = re.compile(r"([^:/ ]+):\s*(?:[0-9a-fA-F]*|-*)\s+([TUw]) (.*)")
+ alloc_fns_re = re.compile(r"|".join(alloc_fns_escaped))
+
+ # This tracks which allocation/free functions have been seen.
+ functions = defaultdict(set)
+ files = defaultdict(int)
+
+ # Files to ignore allocation/free functions from.
+ ignored_files = [
+ # Ignore implicit call to operator new in std::condition_variable_any.
+ #
+ # From intl/icu/source/common/umutex.h:
+ # On Linux, the default constructor of std::condition_variable_any
+ # produces an in-line reference to global operator new(), [...].
+ "umutex.o",
+ # Ignore allocations from decimal conversion functions inside mozglue.
+ "Decimal.o",
+ # Ignore use of std::string in regexp AST debug output.
+ "regexp-ast.o",
+ ]
+ all_ignored_files = set((f, 1) for f in ignored_files)
+
+ # Would it be helpful to emit detailed line number information after a failure?
+ emit_line_info = False
+
+ prev_filename = None
+ for line in lines:
+ m = nm_line_re.search(line)
+ if m is None:
+ continue
+
+ filename, symtype, fn = m.groups()
+ if prev_filename != filename:
+ # When the same filename appears multiple times, separated by other
+ # file names, this denotes a different file. Thankfully, we can more
+ # or less safely assume that dir1/Foo.o and dir2/Foo.o are not going
+ # to be next to each other.
+ files[filename] += 1
+ prev_filename = filename
+
+ # The stdc++compat library has an implicit call to operator new in
+ # thread::_M_start_thread.
+ if "stdc++compat" in filename:
+ continue
+
+ # The memory allocator code contains calls to memalign. These are ok, so
+ # we whitelist them.
+ if "_memory_" in filename:
+ continue
+
+ # Ignore the fuzzing code imported from m-c
+ if "Fuzzer" in filename:
+ continue
+
+ # Ignore the profiling pseudo-stack, since it needs to run even when
+ # SpiderMonkey's allocator isn't initialized.
+ if "ProfilingStack" in filename:
+ continue
+
+ if symtype == "T":
+ # We can't match intl/components files by file name because in
+ # non-unified builds they overlap with files in js/src.
+ # So we check symbols they define, and consider files with symbols
+ # in the mozilla::intl namespace to be those.
+ if fn.startswith("mozilla::intl::"):
+ all_ignored_files.add((filename, files[filename]))
+ else:
+ m = alloc_fns_re.match(fn)
+ if m:
+ functions[(filename, files[filename])].add(m.group(0))
+
+ util_Utility_cpp = functions.pop(("Utility.o", 1))
+ if ("Utility.o", 2) in functions:
+ fail("There should be only one Utility.o file")
+
+ for f, n in all_ignored_files:
+ functions.pop((f, n), None)
+ if f in ignored_files and (f, 2) in functions:
+ fail(f"There should be only one {f} file")
+
+ for (filename, n) in sorted(functions):
+ for fn in functions[(filename, n)]:
+ # An allocation is present in a non-special file. Fail!
+ fail("'" + fn + "' present in " + filename)
+ # Try to give more precise information about the offending code.
+ emit_line_info = True
+
+ # Check that all functions we expect are used in util/Utility.cpp. (This
+ # will fail if the function-detection code breaks at any point.)
+ # operator new and its inlined equivalent are mutually exclusive.
+ has_operator_news = any(fn in operator_news for fn in util_Utility_cpp)
+ has_inlined_operator_news = any(
+ fn in inlined_operator_news for fn in util_Utility_cpp
+ )
+ if has_operator_news and has_inlined_operator_news:
+ fail(
+ "Both operator new and moz_xmalloc aren't expected in util/Utility.cpp at the same time"
+ )
+
+ for fn in alloc_fns:
+ if fn not in util_Utility_cpp:
+ if (
+ (fn in operator_news and not has_inlined_operator_news)
+ or (fn in inlined_operator_news and not has_operator_news)
+ or (fn not in operator_news and fn not in inlined_operator_news)
+ ):
+ fail("'" + fn + "' isn't used as expected in util/Utility.cpp")
+ else:
+ util_Utility_cpp.remove(fn)
+
+ # This should never happen, but check just in case.
+ if util_Utility_cpp:
+ fail(
+ "unexpected allocation fns used in util/Utility.cpp: "
+ + ", ".join(util_Utility_cpp)
+ )
+
+ # If we found any improper references to allocation functions, try to use
+ # DWARF debug info to get more accurate line number information about the
+ # bad calls. This is a lot slower than 'nm -A', and it is not always
+ # precise when building with --enable-optimized.
+ if emit_line_info:
+ print("check_vanilla_allocations.py: Source lines with allocation calls:")
+ print(
+ "check_vanilla_allocations.py: Accurate in unoptimized builds; "
+ "util/Utility.cpp expected."
+ )
+
+ # Run |nm|. Options:
+ # -u: show only undefined symbols
+ # -C: demangle symbol names
+ # -l: show line number information for each undefined symbol
+ cmd = ["nm", "-u", "-C", "-l", args.file]
+ lines = subprocess.check_output(
+ cmd, universal_newlines=True, stderr=subprocess.PIPE
+ ).split("\n")
+
+ # This regexp matches the relevant lines in the output of |nm -l|,
+ # which look like the following.
+ #
+ # U malloc util/Utility.cpp:117
+ #
+ alloc_lines_re = (
+ r"[Uw] ((" + r"|".join(alloc_fns_escaped) + r").*)\s+(\S+:\d+)$"
+ )
+
+ for line in lines:
+ m = re.search(alloc_lines_re, line)
+ if m:
+ print(
+ "check_vanilla_allocations.py:", m.group(1), "called at", m.group(3)
+ )
+
+ if has_failed:
+ sys.exit(1)
+
+ print("TEST-PASS | check_vanilla_allocations.py | ok")
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ main()