7 files changed, 552 insertions, 0 deletions
diff --git a/testing/mozbase/mozleak/mozleak/__init__.py b/testing/mozbase/mozleak/mozleak/__init__.py
new file mode 100644
index 0000000000..206806da0c
--- /dev/null
+++ b/testing/mozbase/mozleak/mozleak/__init__.py
@@ -0,0 +1,12 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+mozleak is a library for extracting memory leaks from leak logs files.
+"""
+
+from .leaklog import process_leak_log
+from .lsan import LSANLeaks
+
+__all__ = ["process_leak_log", "LSANLeaks"]
diff --git a/testing/mozbase/mozleak/mozleak/leaklog.py b/testing/mozbase/mozleak/mozleak/leaklog.py
new file mode 100644
index 0000000000..8a3ee5aee3
--- /dev/null
+++ b/testing/mozbase/mozleak/mozleak/leaklog.py
@@ -0,0 +1,255 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import re
+
+from geckoprocesstypes import process_types
+
+
+def _get_default_logger():
+    from mozlog import get_default_logger
+
+    log = get_default_logger(component="mozleak")
+
+    if not log:
+        import logging
+
+        log = logging.getLogger(__name__)
+    return log
+
+
+def process_single_leak_file(
+    leakLogFileName,
+    processType,
+    leakThreshold,
+    ignoreMissingLeaks,
+    log=None,
+    stackFixer=None,
+    scope=None,
+    allowed=None,
+):
+    """Process a single leak log."""
+
+    #     |              |Per-Inst  Leaked|     Total  Rem|
+    #   0 |TOTAL         |      17     192| 419115886    2|
+    # 833 |nsTimerImpl   |      60     120|     24726    2|
+    # 930 |Foo<Bar, Bar> |      32       8|       100    1|
+    lineRe = re.compile(
+        r"^\s*\d+ \|"
+        r"(?P<name>[^|]+)\|"
+        r"\s*(?P<size>-?\d+)\s+(?P<bytesLeaked>-?\d+)\s*\|"
+        r"\s*-?\d+\s+(?P<numLeaked>-?\d+)"
+    )
+    # The class name can contain spaces. We remove trailing whitespace later.
+
+    log = log or _get_default_logger()
+
+    if allowed is None:
+        allowed = {}
+
+    processString = "%s process:" % processType
+    crashedOnPurpose = False
+    totalBytesLeaked = None
+    leakedObjectAnalysis = []
+    leakedObjectNames = []
+    recordLeakedObjects = False
+    header = []
+    log.info("leakcheck | Processing leak log file %s" % leakLogFileName)
+
+    with open(leakLogFileName, "r") as leaks:
+        for line in leaks:
+            if line.find("purposefully crash") > -1:
+                crashedOnPurpose = True
+            matches = lineRe.match(line)
+            if not matches:
+                # eg: the leak table header row
+                strippedLine = line.rstrip()
+                logLine = stackFixer(strippedLine) if stackFixer else strippedLine
+                if recordLeakedObjects:
+                    log.info(logLine)
+                else:
+                    header.append(logLine)
+                continue
+            name = matches.group("name").rstrip()
+            size = int(matches.group("size"))
+            bytesLeaked = int(matches.group("bytesLeaked"))
+            numLeaked = int(matches.group("numLeaked"))
+            # Output the raw line from the leak log table if it is for an object
+            # row that has been leaked.
+            if numLeaked != 0:
+                # If this is the TOTAL line, first output the header lines.
+                if name == "TOTAL":
+                    for logLine in header:
+                        log.info(logLine)
+                log.info(line.rstrip())
+            # If this is the TOTAL line, we're done with the header lines,
+            # whether or not it leaked.
+            if name == "TOTAL":
+                header = []
+            # Analyse the leak log, but output later or it will interrupt the
+            # leak table
+            if name == "TOTAL":
+                # Multiple default processes can end up writing their bloat views into a single
+                # log, particularly on B2G. Eventually, these should be split into multiple
+                # logs (bug 1068869), but for now, we report the largest leak.
+                if totalBytesLeaked is not None:
+                    log.warning(
+                        "leakcheck | %s "
+                        "multiple BloatView byte totals found" % processString
+                    )
+                else:
+                    totalBytesLeaked = 0
+                if bytesLeaked > totalBytesLeaked:
+                    totalBytesLeaked = bytesLeaked
+                    # Throw out the information we had about the previous bloat
+                    # view.
+                    leakedObjectNames = []
+                    leakedObjectAnalysis = []
+                    recordLeakedObjects = True
+                else:
+                    recordLeakedObjects = False
+            if (size < 0 or bytesLeaked < 0 or numLeaked < 0) and leakThreshold >= 0:
+                log.error(
+                    "TEST-UNEXPECTED-FAIL | leakcheck | %s negative leaks caught!"
+                    % processString
+                )
+                continue
+            if name != "TOTAL" and numLeaked != 0 and recordLeakedObjects:
+                leakedObjectNames.append(name)
+                leakedObjectAnalysis.append((numLeaked, name))
+
+    for numLeaked, name in leakedObjectAnalysis:
+        leak_allowed = False
+        if name in allowed:
+            limit = leak_allowed[name]
+            leak_allowed = limit is None or numLeaked <= limit
+
+        log.mozleak_object(
+            processType, numLeaked, name, scope=scope, allowed=leak_allowed
+        )
+
+    log.mozleak_total(
+        processType,
+        totalBytesLeaked,
+        leakThreshold,
+        leakedObjectNames,
+        scope=scope,
+        induced_crash=crashedOnPurpose,
+        ignore_missing=ignoreMissingLeaks,
+    )
+
+
+def process_leak_log(
+    leak_log_file,
+    leak_thresholds=None,
+    ignore_missing_leaks=None,
+    log=None,
+    stack_fixer=None,
+    scope=None,
+    allowed=None,
+):
+    """Process the leak log, including separate leak logs created
+    by child processes.
+
+    Use this function if you want an additional PASS/FAIL summary.
+    It must be used with the |XPCOM_MEM_BLOAT_LOG| environment variable.
+
+    The base of leak_log_file for a non-default process needs to end with
+      _proctype_pid12345.log
+    "proctype" is a string denoting the type of the process, which should
+    be the result of calling XRE_GeckoProcessTypeToString(). 12345 is
+    a series of digits that is the pid for the process. The .log is
+    optional.
+
+    All other file names are treated as being for default processes.
+
+    leak_thresholds should be a dict mapping process types to leak thresholds,
+    in bytes. If a process type is not present in the dict the threshold
+    will be 0. If the threshold is a negative number we additionally ignore
+    the case where there's negative leaks.
+
+    allowed - A dictionary mapping process types to dictionaries containing
+    the number of objects of that type which are allowed to leak.
+
+    scope - An identifier for the set of tests run during the browser session
+            (e.g. a directory name)
+
+    ignore_missing_leaks should be a list of process types. If a process
+    creates a leak log without a TOTAL, then we report an error if it isn't
+    in the list ignore_missing_leaks.
+
+    Returns a list of files that were processed. The caller is responsible for
+    cleaning these up.
+    """
+    log = log or _get_default_logger()
+
+    processed_files = []
+
+    leakLogFile = leak_log_file
+    if not os.path.exists(leakLogFile):
+        log.warning("leakcheck | refcount logging is off, so leaks can't be detected!")
+        return processed_files
+
+    log.info(
+        "leakcheck | Processing log file %s%s"
+        % (leakLogFile, (" for scope %s" % scope) if scope is not None else "")
+    )
+
+    leakThresholds = leak_thresholds or {}
+    ignoreMissingLeaks = ignore_missing_leaks or []
+
+    # This list is based on XRE_GeckoProcessTypeToString. ipdlunittest processes likely
+    # are not going to produce leak logs we will ever see.
+
+    knownProcessTypes = [
+        p.string_name for p in process_types if p.string_name != "ipdlunittest"
+    ]
+
+    for processType in knownProcessTypes:
+        log.info(
+            "TEST-INFO | leakcheck | %s process: leak threshold set at %d bytes"
+            % (processType, leakThresholds.get(processType, 0))
+        )
+
+    for processType in leakThresholds:
+        if processType not in knownProcessTypes:
+            log.error(
+                "TEST-UNEXPECTED-FAIL | leakcheck | "
+                "Unknown process type %s in leakThresholds" % processType
+            )
+
+    (leakLogFileDir, leakFileBase) = os.path.split(leakLogFile)
+    if leakFileBase[-4:] == ".log":
+        leakFileBase = leakFileBase[:-4]
+        fileNameRegExp = re.compile(r"_([a-z]*)_pid\d*.log$")
+    else:
+        fileNameRegExp = re.compile(r"_([a-z]*)_pid\d*$")
+
+    for fileName in os.listdir(leakLogFileDir):
+        if fileName.find(leakFileBase) != -1:
+            thisFile = os.path.join(leakLogFileDir, fileName)
+            m = fileNameRegExp.search(fileName)
+            if m:
+                processType = m.group(1)
+            else:
+                processType = "default"
+            if processType not in knownProcessTypes:
+                log.error(
+                    "TEST-UNEXPECTED-FAIL | leakcheck | "
+                    "Leak log with unknown process type %s" % processType
+                )
+            leakThreshold = leakThresholds.get(processType, 0)
+            process_single_leak_file(
+                thisFile,
+                processType,
+                leakThreshold,
+                processType in ignoreMissingLeaks,
+                log=log,
+                stackFixer=stack_fixer,
+                scope=scope,
+                allowed=allowed,
+            )
+            processed_files.append(thisFile)
+    return processed_files
diff --git a/testing/mozbase/mozleak/mozleak/lsan.py b/testing/mozbase/mozleak/mozleak/lsan.py
new file mode 100644
index 0000000000..f6555eff2d
--- /dev/null
+++ b/testing/mozbase/mozleak/mozleak/lsan.py
@@ -0,0 +1,220 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+
+class LSANLeaks(object):
+
+    """
+    Parses the log when running an LSAN build, looking for interesting stack frames
+    in allocation stacks
+    """
+
+    def __init__(
+        self,
+        logger,
+        scope=None,
+        allowed=None,
+        maxNumRecordedFrames=None,
+        allowAll=False,
+    ):
+        self.logger = logger
+        self.inReport = False
+        self.fatalError = False
+        self.symbolizerError = False
+        self.foundFrames = set()
+        self.recordMoreFrames = None
+        self.currStack = None
+        self.maxNumRecordedFrames = maxNumRecordedFrames if maxNumRecordedFrames else 4
+        self.summaryData = None
+        self.scope = scope
+        self.allowedMatch = None
+        self.allowAll = allowAll
+        self.sawError = False
+
+        # Don't various allocation-related stack frames, as they do not help much to
+        # distinguish different leaks.
+        unescapedSkipList = [
+            "malloc",
+            "js_malloc",
+            "malloc_",
+            "__interceptor_malloc",
+            "moz_xmalloc",
+            "calloc",
+            "js_calloc",
+            "calloc_",
+            "__interceptor_calloc",
+            "moz_xcalloc",
+            "realloc",
+            "js_realloc",
+            "realloc_",
+            "__interceptor_realloc",
+            "moz_xrealloc",
+            "new",
+            "js::MallocProvider",
+        ]
+        self.skipListRegExp = re.compile(
+            "^" + "|".join([re.escape(f) for f in unescapedSkipList]) + "$"
+        )
+
+        self.startRegExp = re.compile(
+            "==\d+==ERROR: LeakSanitizer: detected memory leaks"
+        )
+        self.fatalErrorRegExp = re.compile(
+            "==\d+==LeakSanitizer has encountered a fatal error."
+        )
+        self.symbolizerOomRegExp = re.compile(
+            "LLVMSymbolizer: error reading file: Cannot allocate memory"
+        )
+        self.stackFrameRegExp = re.compile("    #\d+ 0x[0-9a-f]+ in ([^(</]+)")
+        self.sysLibStackFrameRegExp = re.compile(
+            "    #\d+ 0x[0-9a-f]+ \(([^+]+)\+0x[0-9a-f]+\)"
+        )
+        self.summaryRegexp = re.compile(
+            "SUMMARY: AddressSanitizer: (\d+) byte\(s\) leaked in (\d+) allocation\(s\)."
+        )
+        self.rustRegexp = re.compile("::h[a-f0-9]{16}$")
+        self.setAllowed(allowed)
+
+    def setAllowed(self, allowedLines):
+        if not allowedLines or self.allowAll:
+            self.allowedRegexp = None
+        else:
+            self.allowedRegexp = re.compile(
+                "^" + "|".join([re.escape(f) for f in allowedLines])
+            )
+
+    def log(self, line):
+        if re.match(self.startRegExp, line):
+            self.inReport = True
+            # Downgrade this from an ERROR
+            self.sawError = True
+            return "LeakSanitizer: detected memory leaks"
+
+        if re.match(self.fatalErrorRegExp, line):
+            self.fatalError = True
+            return line
+
+        if re.match(self.symbolizerOomRegExp, line):
+            self.symbolizerError = True
+            return line
+
+        if not self.inReport:
+            return line
+
+        if line.startswith("Direct leak") or line.startswith("Indirect leak"):
+            self._finishStack()
+            self.recordMoreFrames = True
+            self.currStack = []
+            return line
+
+        summaryData = self.summaryRegexp.match(line)
+        if summaryData:
+            assert self.summaryData is None
+            self._finishStack()
+            self.inReport = False
+            self.summaryData = (int(item) for item in summaryData.groups())
+            # We don't return the line here because we want to control whether the
+            # leak is seen as an expected failure later
+            return
+
+        if not self.recordMoreFrames:
+            return line
+
+        stackFrame = re.match(self.stackFrameRegExp, line)
+        if stackFrame:
+            # Split the frame to remove any return types.
+            frame = stackFrame.group(1).split()[-1]
+            if not re.match(self.skipListRegExp, frame):
+                self._recordFrame(frame)
+            return line
+
+        sysLibStackFrame = re.match(self.sysLibStackFrameRegExp, line)
+        if sysLibStackFrame:
+            # System library stack frames will never match the skip list,
+            # so don't bother checking if they do.
+            self._recordFrame(sysLibStackFrame.group(1))
+
+        # If we don't match either of these, just ignore the frame.
+        # We'll end up with "unknown stack" if everything is ignored.
+        return line
+
+    def process(self):
+        failures = 0
+
+        if self.allowAll:
+            self.logger.info("LeakSanitizer | Leak checks disabled")
+            return
+
+        if self.summaryData:
+            allowed = all(allowed for _, allowed in self.foundFrames)
+            self.logger.lsan_summary(*self.summaryData, allowed=allowed)
+            self.summaryData = None
+
+        if self.fatalError:
+            self.logger.error(
+                "LeakSanitizer | LeakSanitizer has encountered a fatal error."
+            )
+            failures += 1
+
+        if self.symbolizerError:
+            self.logger.error(
+                "LeakSanitizer | LLVMSymbolizer was unable to allocate memory.\n"
+                "This will cause leaks that "
+                "should be ignored to instead be reported as an error"
+            )
+            failures += 1
+
+        if self.foundFrames:
+            self.logger.info(
+                "LeakSanitizer | To show the "
+                "addresses of leaked objects add report_objects=1 to LSAN_OPTIONS\n"
+                "This can be done in testing/mozbase/mozrunner/mozrunner/utils.py"
+            )
+            self.logger.info("Allowed depth was %d" % self.maxNumRecordedFrames)
+
+            for frames, allowed in self.foundFrames:
+                self.logger.lsan_leak(frames, scope=self.scope, allowed_match=allowed)
+                if not allowed:
+                    failures += 1
+
+        if self.sawError and not (
+            self.summaryData
+            or self.foundFrames
+            or self.fatalError
+            or self.symbolizerError
+        ):
+            self.logger.error(
+                "LeakSanitizer | Memory leaks detected but no leak report generated"
+            )
+
+        self.sawError = False
+
+        return failures
+
+    def _finishStack(self):
+        if self.recordMoreFrames and len(self.currStack) == 0:
+            self.currStack = {"unknown stack"}
+        if self.currStack:
+            self.foundFrames.add((tuple(self.currStack), self.allowedMatch))
+            self.currStack = None
+            self.allowedMatch = None
+        self.recordMoreFrames = False
+        self.numRecordedFrames = 0
+
+    def _recordFrame(self, frame):
+        if self.allowedMatch is None and self.allowedRegexp is not None:
+            self.allowedMatch = frame if self.allowedRegexp.match(frame) else None
+        frame = self._cleanFrame(frame)
+        self.currStack.append(frame)
+        self.numRecordedFrames += 1
+        if self.numRecordedFrames >= self.maxNumRecordedFrames:
+            self.recordMoreFrames = False
+
+    def _cleanFrame(self, frame):
+        # Rust frames aren't properly demangled and in particular can contain
+        # some trailing junk of the form ::h[a-f0-9]{16} that changes with
+        # compiler versions; see bug 1507350.
+        return self.rustRegexp.sub("", frame)
diff --git a/testing/mozbase/mozleak/setup.cfg b/testing/mozbase/mozleak/setup.cfg
new file mode 100644
index 0000000000..3c6e79cf31
--- /dev/null
+++ b/testing/mozbase/mozleak/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/testing/mozbase/mozleak/setup.py b/testing/mozbase/mozleak/setup.py
new file mode 100644
index 0000000000..0c1ecb74a2
--- /dev/null
+++ b/testing/mozbase/mozleak/setup.py
@@ -0,0 +1,29 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from setuptools import setup
+
+PACKAGE_NAME = "mozleak"
+PACKAGE_VERSION = "1.0.0"
+
+
+setup(
+    name=PACKAGE_NAME,
+    version=PACKAGE_VERSION,
+    description="Library for extracting memory leaks from leak logs files",
+    long_description="see https://firefox-source-docs.mozilla.org/mozbase/index.html",
+    classifiers=[
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3.5",
+    ],
+    # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
+    keywords="mozilla",
+    author="Mozilla Automation and Tools team",
+    author_email="tools@lists.mozilla.org",
+    url="https://wiki.mozilla.org/Auto-tools/Projects/Mozbase",
+    license="MPL",
+    packages=["mozleak"],
+    zip_safe=False,
+    install_requires=[],
+)
diff --git a/testing/mozbase/mozleak/tests/manifest.toml b/testing/mozbase/mozleak/tests/manifest.toml
new file mode 100644
index 0000000000..133b0581e6
--- /dev/null
+++ b/testing/mozbase/mozleak/tests/manifest.toml
@@ -0,0 +1,4 @@
+[DEFAULT]
+subsuite = "mozbase"
+
+["test_lsan.py"]
diff --git a/testing/mozbase/mozleak/tests/test_lsan.py b/testing/mozbase/mozleak/tests/test_lsan.py
new file mode 100644
index 0000000000..6a55a555b7
--- /dev/null
+++ b/testing/mozbase/mozleak/tests/test_lsan.py
@@ -0,0 +1,30 @@
+import mozunit
+import pytest
+from mozleak import lsan
+
+
+@pytest.mark.parametrize(
+    ("input_", "expected"),
+    [
+        (
+            "alloc_system::platform::_$LT$impl$u20$core..alloc.."
+            "GlobalAlloc$u20$for$u20$alloc_system..System$GT$::"
+            "alloc::h5a1f0db41e296502",
+            "alloc_system::platform::_$LT$impl$u20$core..alloc.."
+            "GlobalAlloc$u20$for$u20$alloc_system..System$GT$::alloc",
+        ),
+        (
+            "alloc_system::platform::_$LT$impl$u20$core..alloc.."
+            "GlobalAlloc$u20$for$u20$alloc_system..System$GT$::alloc",
+            "alloc_system::platform::_$LT$impl$u20$core..alloc.."
+            "GlobalAlloc$u20$for$u20$alloc_system..System$GT$::alloc",
+        ),
+    ],
+)
+def test_clean(input_, expected):
+    leaks = lsan.LSANLeaks(None)
+    assert leaks._cleanFrame(input_) == expected
+
+
+if __name__ == "__main__":
+    mozunit.main()