diff options
Diffstat (limited to 'python/mozbuild/mozbuild/compilation/warnings.py')
-rw-r--r-- | python/mozbuild/mozbuild/compilation/warnings.py | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/compilation/warnings.py b/python/mozbuild/mozbuild/compilation/warnings.py new file mode 100644 index 0000000000..4f0ef57e51 --- /dev/null +++ b/python/mozbuild/mozbuild/compilation/warnings.py @@ -0,0 +1,392 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +# This modules provides functionality for dealing with compiler warnings. + +import errno +import io +import json +import os +import re + +import mozpack.path as mozpath +import six + +from mozbuild.util import hash_file + +# Regular expression to strip ANSI color sequences from a string. This is +# needed to properly analyze Clang compiler output, which may be colorized. +# It assumes ANSI escape sequences. +RE_STRIP_COLORS = re.compile(r"\x1b\[[\d;]+m") + +# This captures Clang diagnostics with the standard formatting. +RE_CLANG_WARNING_AND_ERROR = re.compile( + r""" + (?P<file>[^:]+) + : + (?P<line>\d+) + : + (?P<column>\d+) + : + \s(?P<type>warning|error):\s + (?P<message>.+) + \[(?P<flag>[^\]]+) + """, + re.X, +) + +# This captures Clang-cl warning format. +RE_CLANG_CL_WARNING_AND_ERROR = re.compile( + r""" + (?P<file>.*) + \((?P<line>\d+),(?P<column>\d+)\) + \s?:\s+(?P<type>warning|error):\s + (?P<message>.*) + \[(?P<flag>[^\]]+) + """, + re.X, +) + +IN_FILE_INCLUDED_FROM = "In file included from " + + +class CompilerWarning(dict): + """Represents an individual compiler warning.""" + + def __init__(self): + dict.__init__(self) + + self["filename"] = None + self["line"] = None + self["column"] = None + self["message"] = None + self["flag"] = None + + def copy(self): + """Returns a copy of this compiler warning.""" + w = CompilerWarning() + w.update(self) + return w + + # Since we inherit from dict, functools.total_ordering gets confused. + # Thus, we define a key function, a generic comparison, and then + # implement all the rich operators with those; approach is from: + # http://regebro.wordpress.com/2010/12/13/python-implementing-rich-comparison-the-correct-way/ + def _cmpkey(self): + return (self["filename"], self["line"], self["column"]) + + def _compare(self, other, func): + if not isinstance(other, CompilerWarning): + return NotImplemented + + return func(self._cmpkey(), other._cmpkey()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __neq__(self, other): + return self._compare(other, lambda s, o: s != o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + + def __gt__(self, other): + return self._compare(other, lambda s, o: s > o) + + def __ge__(self, other): + return self._compare(other, lambda s, o: s >= o) + + def __hash__(self): + """Define so this can exist inside a set, etc.""" + return hash(tuple(sorted(self.items()))) + + +class WarningsDatabase(object): + """Holds a collection of warnings. + + The warnings database is a semi-intelligent container that holds warnings + encountered during builds. + + The warnings database is backed by a JSON file. But, that is transparent + to consumers. + + Under most circumstances, the warnings database is insert only. When a + warning is encountered, the caller simply blindly inserts it into the + database. The database figures out whether it is a dupe, etc. + + During the course of development, it is common for warnings to change + slightly as source code changes. For example, line numbers will disagree. + The WarningsDatabase handles this by storing the hash of a file a warning + occurred in. At warning insert time, if the hash of the file does not match + what is stored in the database, the existing warnings for that file are + purged from the database. + + Callers should periodically prune old, invalid warnings from the database + by calling prune(). A good time to do this is at the end of a build. + """ + + def __init__(self): + """Create an empty database.""" + self._files = {} + + def __len__(self): + i = 0 + for value in self._files.values(): + i += len(value["warnings"]) + + return i + + def __iter__(self): + for value in self._files.values(): + for warning in value["warnings"]: + yield warning + + def __contains__(self, item): + for value in self._files.values(): + for warning in value["warnings"]: + if warning == item: + return True + + return False + + @property + def warnings(self): + """All the CompilerWarning instances in this database.""" + for value in self._files.values(): + for w in value["warnings"]: + yield w + + def type_counts(self, dirpath=None): + """Returns a mapping of warning types to their counts.""" + + types = {} + for value in self._files.values(): + for warning in value["warnings"]: + if dirpath and not mozpath.normsep(warning["filename"]).startswith( + dirpath + ): + continue + flag = warning["flag"] + count = types.get(flag, 0) + count += 1 + + types[flag] = count + + return types + + def has_file(self, filename): + """Whether we have any warnings for the specified file.""" + return filename in self._files + + def warnings_for_file(self, filename): + """Obtain the warnings for the specified file.""" + f = self._files.get(filename, {"warnings": []}) + + for warning in f["warnings"]: + yield warning + + def insert(self, warning, compute_hash=True): + assert isinstance(warning, CompilerWarning) + + filename = warning["filename"] + + new_hash = None + + if compute_hash: + new_hash = hash_file(filename) + + if filename in self._files: + if new_hash != self._files[filename]["hash"]: + del self._files[filename] + + value = self._files.get( + filename, + { + "hash": new_hash, + "warnings": set(), + }, + ) + + value["warnings"].add(warning) + + self._files[filename] = value + + def prune(self): + """Prune the contents of the database. + + This removes warnings that are no longer valid. A warning is no longer + valid if the file it was in no longer exists or if the content has + changed. + + The check for changed content catches the case where a file previously + contained warnings but no longer does. + """ + + # Need to calculate up front since we are mutating original object. + filenames = list(six.iterkeys(self._files)) + for filename in filenames: + if not os.path.exists(filename): + del self._files[filename] + continue + + if self._files[filename]["hash"] is None: + continue + + current_hash = hash_file(filename) + if current_hash != self._files[filename]["hash"]: + del self._files[filename] + continue + + def serialize(self, fh): + """Serialize the database to an open file handle.""" + obj = {"files": {}} + + # All this hackery because JSON can't handle sets. + for k, v in six.iteritems(self._files): + obj["files"][k] = {} + + for k2, v2 in six.iteritems(v): + normalized = v2 + if isinstance(v2, set): + normalized = list(v2) + obj["files"][k][k2] = normalized + + to_write = six.ensure_text(json.dumps(obj, indent=2)) + fh.write(to_write) + + def deserialize(self, fh): + """Load serialized content from a handle into the current instance.""" + obj = json.load(fh) + + self._files = obj["files"] + + # Normalize data types. + for filename, value in six.iteritems(self._files): + if "warnings" in value: + normalized = set() + for d in value["warnings"]: + w = CompilerWarning() + w.update(d) + normalized.add(w) + + self._files[filename]["warnings"] = normalized + + def load_from_file(self, filename): + """Load the database from a file.""" + with io.open(filename, "r", encoding="utf-8") as fh: + self.deserialize(fh) + + def save_to_file(self, filename): + """Save the database to a file.""" + try: + # Ensure the directory exists + os.makedirs(os.path.dirname(filename)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + with io.open(filename, "w", encoding="utf-8", newline="\n") as fh: + self.serialize(fh) + + +class WarningsCollector(object): + """Collects warnings from text data. + + Instances of this class receive data (usually the output of compiler + invocations) and parse it into warnings. + + The collector works by incrementally receiving data, usually line-by-line + output from the compiler. Therefore, it can maintain state to parse + multi-line warning messages. + """ + + def __init__(self, cb, objdir=None): + """Initialize a new collector. + + ``cb`` is a callable that is called with a ``CompilerWarning`` + instance whenever a new warning is parsed. + + ``objdir`` is the object directory. Used for normalizing paths. + """ + self.cb = cb + self.objdir = objdir + self.included_from = [] + + def process_line(self, line): + """Take a line of text and process it for a warning.""" + + filtered = RE_STRIP_COLORS.sub("", line) + + # Clang warnings in files included from the one(s) being compiled will + # start with "In file included from /path/to/file:line:". Here, we + # record those. + if filtered.startswith(IN_FILE_INCLUDED_FROM): + included_from = filtered[len(IN_FILE_INCLUDED_FROM) :] + + parts = included_from.split(":") + + self.included_from.append(parts[0]) + + return + + warning = CompilerWarning() + filename = None + + # TODO make more efficient so we run minimal regexp matches. + match_clang = RE_CLANG_WARNING_AND_ERROR.match(filtered) + match_clang_cl = RE_CLANG_CL_WARNING_AND_ERROR.match(filtered) + if match_clang: + d = match_clang.groupdict() + + filename = d["file"] + warning["type"] = d["type"] + warning["line"] = int(d["line"]) + warning["column"] = int(d["column"]) + warning["flag"] = d["flag"] + warning["message"] = d["message"].rstrip() + + elif match_clang_cl: + d = match_clang_cl.groupdict() + + filename = d["file"] + warning["type"] = d["type"] + warning["line"] = int(d["line"]) + warning["column"] = int(d["column"]) + warning["flag"] = d["flag"] + warning["message"] = d["message"].rstrip() + + else: + self.included_from = [] + return None + + filename = os.path.normpath(filename) + + # Sometimes we get relative includes. These typically point to files in + # the object directory. We try to resolve the relative path. + if not os.path.isabs(filename): + filename = self._normalize_relative_path(filename) + + warning["filename"] = filename + + self.cb(warning) + + return warning + + def _normalize_relative_path(self, filename): + # Special case files in dist/include. + idx = filename.find("/dist/include") + if idx != -1: + return self.objdir + filename[idx:] + + for included_from in self.included_from: + source_dir = os.path.dirname(included_from) + + candidate = os.path.normpath(os.path.join(source_dir, filename)) + + if os.path.exists(candidate): + return candidate + + return filename |