diff options
Diffstat (limited to 'third_party/python/compare-locales/compare_locales/checks')
6 files changed, 1185 insertions, 0 deletions
diff --git a/third_party/python/compare-locales/compare_locales/checks/__init__.py b/third_party/python/compare-locales/compare_locales/checks/__init__.py new file mode 100644 index 0000000000..0c81a4b715 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/__init__.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +from .base import Checker, EntityPos +from .android import AndroidChecker +from .dtd import DTDChecker +from .fluent import FluentChecker +from .properties import PropertiesChecker + + +__all__ = [ + 'Checker', 'EntityPos', + 'AndroidChecker', 'DTDChecker', 'FluentChecker', 'PropertiesChecker', +] + + +def getChecker(file, extra_tests=None): + if PropertiesChecker.use(file): + return PropertiesChecker(extra_tests, locale=file.locale) + if DTDChecker.use(file): + return DTDChecker(extra_tests, locale=file.locale) + if FluentChecker.use(file): + return FluentChecker(extra_tests, locale=file.locale) + if AndroidChecker.use(file): + return AndroidChecker(extra_tests, locale=file.locale) + return Checker(extra_tests, locale=file.locale) diff --git a/third_party/python/compare-locales/compare_locales/checks/android.py b/third_party/python/compare-locales/compare_locales/checks/android.py new file mode 100644 index 0000000000..9791c49a4f --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/android.py @@ -0,0 +1,253 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re +from xml.dom import minidom + +from .base import Checker +from ..parser.android import textContent + + +class AndroidChecker(Checker): + pattern = re.compile('(.*)?strings.*\\.xml$') + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + ''' + for encoding_trouble in super( + AndroidChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refNode = refEnt.node + l10nNode = l10nEnt.node + # Apples and oranges, error out. + if refNode.nodeName != l10nNode.nodeName: + yield ("error", 0, "Incompatible resource types", "android") + return + # Once we start parsing more resource types, make sure to add checks + # for them. + if refNode.nodeName != "string": + yield ("warning", 0, "Unsupported resource type", "android") + return + for report_tuple in self.check_string([refNode], l10nEnt): + yield report_tuple + + def check_string(self, refs, l10nEnt): + '''Check a single string literal against a list of references. + + There should be multiple nodes given for <plurals> or <string-array>. + ''' + l10n = l10nEnt.node + if self.not_translatable(l10n, *refs): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(l10n): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(*refs): + yield ( + "warning", + 0, + "strings must be translatable", + "android" + ) + if self.non_simple_data(l10n): + yield ( + "error", + 0, + "Only plain text allowed, " + "or one CDATA surrounded by whitespace", + "android" + ) + return + for report_tuple in check_apostrophes(l10nEnt.val): + yield report_tuple + + params, errors = get_params(refs) + for error, pos in errors: + yield ( + "warning", + pos, + error, + "android" + ) + if params: + for report_tuple in check_params(params, l10nEnt.val): + yield report_tuple + + def not_translatable(self, *nodes): + return any( + node.hasAttribute("translatable") + and node.getAttribute("translatable") == "false" + for node in nodes + ) + + def no_at_string(self, *ref_nodes): + '''Android allows to reference other strings by using + @string/identifier + instead of the actual value. Those references don't belong into + a localizable file, warn on that. + ''' + return any( + textContent(node).startswith('@string/') + for node in ref_nodes + ) + + def non_simple_data(self, node): + '''Only allow single text nodes, or, a single CDATA node + surrounded by whitespace. + ''' + cdata = [ + child + for child in node.childNodes + if child.nodeType == minidom.Node.CDATA_SECTION_NODE + ] + if len(cdata) == 0: + if node.childNodes.length == 0: + # empty translation is OK + return False + if node.childNodes.length != 1: + return True + return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE + if len(cdata) > 1: + return True + for child in node.childNodes: + if child == cdata[0]: + continue + if child.nodeType != minidom.Node.TEXT_NODE: + return True + if child.data.strip() != "": + return True + return False + + +silencer = re.compile(r'\\.|""') + + +def check_apostrophes(string): + '''Check Android logic for quotes and apostrophes. + + If you have an apostrophe (') in your string, you must either escape it + with a backslash (\') or enclose the string in double-quotes ("). + + Unescaped quotes are not visually shown on Android, but they're + also harmless, so we're not checking for quotes. We might do once we're + better at checking for inline XML, which is full of quotes. + Pairing quotes as in '""' is bad, though, so report errors for that. + Mostly, because it's hard to tell if a string is consider quoted or not + by Android in the end. + + https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes + ''' + for m in re.finditer('""', string): + yield ( + "error", + m.start(), + "Double straight quotes not allowed", + "android" + ) + string = silencer.sub(" ", string) + + is_quoted = string.startswith('"') and string.endswith('"') + if not is_quoted: + # apostrophes need to be escaped + for m in re.finditer("'", string): + yield ( + "error", + m.start(), + "Apostrophe must be escaped", + "android" + ) + + +def get_params(refs): + '''Get printf parameters and internal errors. + + Returns a sparse map of positions to formatter, and a list + of errors. Errors covered so far are mismatching formatters. + ''' + params = {} + errors = [] + next_implicit = 1 + for ref in refs: + if isinstance(ref, minidom.Node): + ref = textContent(ref) + for m in re.finditer(r'%(?P<order>[1-9]\$)?(?P<format>[sSd])', ref): + order = m.group('order') + if order: + order = int(order[0]) + else: + order = next_implicit + next_implicit += 1 + fmt = m.group('format') + if order not in params: + params[order] = fmt + else: + # check for consistency errors + if params[order] == fmt: + continue + msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}" + errors.append(( + msg.format(order=order, f1=fmt, f2=params[order]), + m.start() + )) + return params, errors + + +def check_params(params, string): + '''Compare the printf parameters in the given string to the reference + parameters. + + Also yields errors that are internal to the parameters inside string, + as found by `get_params`. + ''' + lparams, errors = get_params([string]) + for error, pos in errors: + yield ( + "error", + pos, + error, + "android" + ) + # Compare reference for each localized parameter. + # If there's no reference found, error, as an out-of-bounds + # parameter crashes. + # This assumes that all parameters are actually used in the reference, + # which should be OK. + # If there's a mismatch in the formatter, error. + for order in sorted(lparams): + if order not in params: + yield ( + "error", + 0, + "Formatter %{}${} not found in reference".format( + order, lparams[order] + ), + "android" + ) + elif params[order] != lparams[order]: + yield ( + "error", + 0, + "Mismatching formatter", + "android" + ) diff --git a/third_party/python/compare-locales/compare_locales/checks/base.py b/third_party/python/compare-locales/compare_locales/checks/base.py new file mode 100644 index 0000000000..3b04caa7a9 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/base.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re +import six + + +class EntityPos(int): + pass + + +mochibake = re.compile('\ufffd') + + +class Checker(object): + '''Abstract class to implement checks per file type. + ''' + pattern = None + # if a check uses all reference entities, set this to True + needs_reference = False + + @classmethod + def use(cls, file): + return cls.pattern.match(file.file) + + def __init__(self, extra_tests, locale=None): + self.extra_tests = extra_tests + self.locale = locale + self.reference = None + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + + By default, check for possible encoding errors. + ''' + for m in mochibake.finditer(l10nEnt.all): + yield ( + "warning", + EntityPos(m.start()), + "\ufffd in: {}".format(l10nEnt.key), + "encodings" + ) + + def set_reference(self, reference): + '''Set the reference entities. + Only do this if self.needs_reference is True. + ''' + self.reference = reference + + +class CSSCheckMixin(object): + def maybe_style(self, ref_value, l10n_value): + ref_map, _ = self.parse_css_spec(ref_value) + if not ref_map: + return + l10n_map, errors = self.parse_css_spec(l10n_value) + for t in self.check_style(ref_map, l10n_map, errors): + yield t + + def check_style(self, ref_map, l10n_map, errors): + if not l10n_map: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + if errors: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + msgs = [] + for prop, unit in l10n_map.items(): + if prop not in ref_map: + msgs.insert(0, '%s only in l10n' % prop) + continue + else: + ref_unit = ref_map.pop(prop) + if unit != ref_unit: + msgs.append("units for %s don't match " + "(%s != %s)" % (prop, unit, ref_unit)) + for prop in six.iterkeys(ref_map): + msgs.insert(0, '%s only in reference' % prop) + if msgs: + yield ('warning', 0, ', '.join(msgs), 'css') + + def parse_css_spec(self, val): + if not hasattr(self, '_css_spec'): + self._css_spec = re.compile( + r'(?:' + r'(?P<prop>(?:min\-|max\-)?(?:width|height))' + r'[ \t\r\n]*:[ \t\r\n]*' + r'(?P<length>[0-9]+|[0-9]*\.[0-9]+)' + r'(?P<unit>ch|em|ex|rem|px|cm|mm|in|pc|pt)' + r')' + r'|\Z' + ) + self._css_sep = re.compile(r'[ \t\r\n]*(?P<semi>;)?[ \t\r\n]*$') + refMap = errors = None + end = 0 + for m in self._css_spec.finditer(val): + if end == 0 and m.start() == m.end(): + # no CSS spec found, just immediately end of string + return None, None + if m.start() > end: + split = self._css_sep.match(val, end, m.start()) + if split is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-bad-content', + }) + elif end > 0 and split.group('semi') is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-missing-semicolon', + }) + if m.group('prop'): + refMap = refMap or {} + refMap[m.group('prop')] = m.group('unit') + end = m.end() + return refMap, errors diff --git a/third_party/python/compare-locales/compare_locales/checks/dtd.py b/third_party/python/compare-locales/compare_locales/checks/dtd.py new file mode 100644 index 0000000000..37d3c7846d --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/dtd.py @@ -0,0 +1,246 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from xml import sax +import six + +from compare_locales.parser import DTDParser +from .base import Checker, CSSCheckMixin + + +class DTDChecker(Checker, CSSCheckMixin): + """Tests to run on DTD files. + + Uses xml.sax for the heavy lifting of xml parsing. + + The code tries to parse until it doesn't find any unresolved entities + anymore. If it finds one, it tries to grab the key, and adds an empty + <!ENTITY key ""> definition to the header. + + Also checks for some CSS and number heuristics in the values. + """ + pattern = re.compile(r'.*\.dtd$') + needs_reference = True # to cast a wider net for known entity references + + eref = re.compile('&(%s);' % DTDParser.Name) + tmpl = b'''<!DOCTYPE elem [%s]> +<elem>%s</elem> +''' + xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot')) + + def __init__(self, extra_tests, locale=None): + super(DTDChecker, self).__init__(extra_tests, locale=locale) + self.processContent = False + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + self.processContent = True + self.__known_entities = None + + def known_entities(self, refValue): + if self.__known_entities is None and self.reference is not None: + self.__known_entities = set() + for ent in self.reference.values(): + self.__known_entities.update( + self.entities_for_value(ent.raw_val)) + return self.__known_entities if self.__known_entities is not None \ + else self.entities_for_value(refValue) + + def entities_for_value(self, value): + reflist = set(m.group(1) + for m in self.eref.finditer(value)) + reflist -= self.xmllist + return reflist + + # Setup for XML parser, with default and text-only content handler + class TextContent(sax.handler.ContentHandler): + textcontent = '' + + def characters(self, content): + self.textcontent += content + + defaulthandler = sax.handler.ContentHandler() + texthandler = TextContent() + + numPattern = r'([0-9]+|[0-9]*\.[0-9]+)' + num = re.compile('^%s$' % numPattern) + lengthPattern = '%s(em|px|ch|cm|in)' % numPattern + length = re.compile('^%s$' % lengthPattern) + + def check(self, refEnt, l10nEnt): + """Try to parse the refvalue inside a dummy element, and keep + track of entities that we need to define to make that work. + + Return a checker that offers just those entities. + """ + for encoding_trouble in super( + DTDChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refValue, l10nValue = refEnt.raw_val, l10nEnt.raw_val + # find entities the refValue references, + # reusing markup from DTDParser. + reflist = self.known_entities(refValue) + inContext = self.entities_for_value(refValue) + entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist)) + parser = sax.make_parser() + parser.setFeature(sax.handler.feature_external_ges, False) + + parser.setContentHandler(self.defaulthandler) + try: + parser.parse( + six.BytesIO(self.tmpl % + (entities.encode('utf-8'), + refValue.encode('utf-8')))) + # also catch stray % + parser.parse( + six.BytesIO(self.tmpl % + ((refEnt.all + entities).encode('utf-8'), + b'&%s;' % refEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + e # noqa + yield ('warning', + (0, 0), + "can't parse en-US value", 'xmlparse') + + # find entities the l10nValue references, + # reusing markup from DTDParser. + l10nlist = self.entities_for_value(l10nValue) + missing = sorted(l10nlist - reflist) + _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing) + if self.processContent: + self.texthandler.textcontent = '' + parser.setContentHandler(self.texthandler) + try: + parser.parse(six.BytesIO(self.tmpl % (_entities.encode('utf-8'), + l10nValue.encode('utf-8')))) + # also catch stray % + # if this fails, we need to substract the entity definition + parser.setContentHandler(self.defaulthandler) + parser.parse( + six.BytesIO(self.tmpl % + ((l10nEnt.all + _entities).encode('utf-8'), + b'&%s;' % l10nEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + # xml parse error, yield error + # sometimes, the error is reported on our fake closing + # element, make that the end of the last line + lnr = e.getLineNumber() - 1 + lines = l10nValue.splitlines() + if lnr > len(lines): + lnr = len(lines) + col = len(lines[lnr-1]) + else: + col = e.getColumnNumber() + if lnr == 1: + # first line starts with <elem>, substract + col -= len("<elem>") + elif lnr == 0: + col -= len("<!DOCTYPE elem [") # first line is DOCTYPE + yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse') + + warntmpl = u'Referencing unknown entity `%s`' + if reflist: + if inContext: + elsewhere = reflist - inContext + warntmpl += ' (%s used in context' % \ + ', '.join(sorted(inContext)) + if elsewhere: + warntmpl += ', %s known)' % ', '.join(sorted(elsewhere)) + else: + warntmpl += ')' + else: + warntmpl += ' (%s known)' % ', '.join(sorted(reflist)) + for key in missing: + yield ('warning', (0, 0), warntmpl % key, + 'xmlparse') + if inContext and l10nlist and l10nlist - inContext - set(missing): + mismatch = sorted(l10nlist - inContext - set(missing)) + for key in mismatch: + yield ('warning', (0, 0), + 'Entity %s referenced, but %s used in context' % ( + key, + ', '.join(sorted(inContext)) + ), 'xmlparse') + + # Number check + if self.num.match(refValue) and not self.num.match(l10nValue): + yield ('warning', 0, 'reference is a number', 'number') + # CSS checks + # just a length, width="100em" + if self.length.match(refValue) and not self.length.match(l10nValue): + yield ('error', 0, 'reference is a CSS length', 'css') + # Check for actual CSS style attribute values + for t in self.maybe_style(refValue, l10nValue): + yield t + + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + for t in self.processAndroidContent(self.texthandler.textcontent): + yield t + + quoted = re.compile("(?P<q>[\"']).*(?P=q)$") + + def unicode_escape(self, str): + """Helper method to try to decode all unicode escapes in a string. + + This code uses the standard python decode for unicode-escape, but + that's somewhat tricky, as its input needs to be ascii. To get to + ascii, the unicode string gets converted to ascii with + backslashreplace, i.e., all non-ascii unicode chars get unicode + escaped. And then we try to roll all of that back. + Now, when that hits an error, that's from the original string, and we + need to search for the actual error position in the original string, + as the backslashreplace code changes string positions quite badly. + See also the last check in TestAndroid.test_android_dtd, with a + lengthy chinese string. + """ + val = str.encode('ascii', 'backslashreplace') + try: + val.decode('unicode-escape') + except UnicodeDecodeError as e: + args = list(e.args) + badstring = args[1][args[2]:args[3]] + i = len(args[1][:args[2]].decode('unicode-escape')) + args[2] = i + args[3] = i + len(badstring) + raise UnicodeDecodeError(*args) + + def processAndroidContent(self, val): + """Check for the string values that Android puts into an XML container. + + http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa + + Check for unicode escapes and unescaped quotes and apostrophes, + if string's not quoted. + """ + # first, try to decode unicode escapes + try: + self.unicode_escape(val) + except UnicodeDecodeError as e: + yield ('error', e.args[2], e.args[4], 'android') + # check for unescaped single or double quotes. + # first, see if the complete string is single or double quoted, + # that changes the rules + m = self.quoted.match(val) + if m: + q = m.group('q') + offset = 0 + val = val[1:-1] # strip quotes + else: + q = "[\"']" + offset = -1 + stray_quot = re.compile(r"[\\\\]*(%s)" % q) + + for m in stray_quot.finditer(val): + if len(m.group(0)) % 2: + # found an unescaped single or double quote, which message? + if m.group(1) == '"': + msg = "Quotes in Android DTDs need escaping with \\\" "\ + "or \\u0022, or put string in apostrophes." + else: + msg = "Apostrophes in Android DTDs need escaping with "\ + "\\' or \\u0027, or use \u2019, or put string in "\ + "quotes." + yield ('error', m.end(0)+offset, msg, 'android') diff --git a/third_party/python/compare-locales/compare_locales/checks/fluent.py b/third_party/python/compare-locales/compare_locales/checks/fluent.py new file mode 100644 index 0000000000..feb7242fb7 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/fluent.py @@ -0,0 +1,356 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from collections import defaultdict + +from fluent.syntax import ast as ftl +from fluent.syntax.serializer import serialize_variant_key +from fluent.syntax.visitor import Visitor + +from .base import Checker, CSSCheckMixin +from compare_locales import plurals + + +MSGS = { + 'missing-msg-ref': 'Missing message reference: {ref}', + 'missing-term-ref': 'Missing term reference: {ref}', + 'obsolete-msg-ref': 'Obsolete message reference: {ref}', + 'obsolete-term-ref': 'Obsolete term reference: {ref}', + 'duplicate-attribute': 'Attribute "{name}" is duplicated', + 'missing-value': 'Missing value', + 'obsolete-value': 'Obsolete value', + 'missing-attribute': 'Missing attribute: {name}', + 'obsolete-attribute': 'Obsolete attribute: {name}', + 'duplicate-variant': 'Variant key "{name}" is duplicated', + 'missing-plural': 'Plural categories missing: {categories}', + 'plain-message': '{message}', +} + + +def pattern_variants(pattern): + """Get variants of plain text of a pattern. + + For now, just return simple text patterns. + This can be improved to allow for SelectExpressions + of simple text patterns, or even nested expressions, and Literals. + Variants with Variable-, Message-, or TermReferences should be ignored. + """ + elements = pattern.elements + if len(elements) == 1: + if isinstance(elements[0], ftl.TextElement): + return [elements[0].value] + return [] + + +class ReferenceMessageVisitor(Visitor, CSSCheckMixin): + def __init__(self): + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(dict) + # The currently active references + self.refs = {} + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + # If we're a messsage, store if there was a value + self.message_has_value = False + # Map attribute names to positions + self.attribute_positions = {} + # Map of CSS style attribute properties and units + self.css_styles = None + self.css_errors = None + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super(ReferenceMessageVisitor, self).generic_visit(node) + + def visit_Message(self, node): + if node.value is not None: + self.message_has_value = True + super(ReferenceMessageVisitor, self).generic_visit(node) + + def visit_Attribute(self, node): + self.attribute_positions[node.id.name] = node.span.start + old_refs = self.refs + self.refs = self.entry_refs[node.id.name] + super(ReferenceMessageVisitor, self).generic_visit(node) + self.refs = old_refs + if node.id.name != 'style': + return + text_values = pattern_variants(node.value) + if not text_values: + self.css_styles = 'skip' + return + # right now, there's just one possible text value + self.css_styles, self.css_errors = self.parse_css_spec(text_values[0]) + + def visit_SelectExpression(self, node): + # optimize select expressions to only go through the variants + self.visit(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs[ref] = 'msg-ref' + + def visit_TermReference(self, node): + # only collect term references, but not attributes of terms + if node.attribute: + return + self.refs['-' + node.id.name] = 'term-ref' + + +class GenericL10nChecks(object): + '''Helper Mixin for checks shared between Terms and Messages.''' + def check_duplicate_attributes(self, node): + warned = set() + for left in range(len(node.attributes) - 1): + if left in warned: + continue + left_attr = node.attributes[left] + warned_left = False + for right in range(left+1, len(node.attributes)): + right_attr = node.attributes[right] + if left_attr.id.name == right_attr.id.name: + if not warned_left: + warned_left = True + self.messages.append( + ( + 'warning', left_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', right_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + + def check_variants(self, variants): + # Check for duplicate variants + warned = set() + for left in range(len(variants) - 1): + if left in warned: + continue + left_key = variants[left].key + key_string = None + for right in range(left+1, len(variants)): + if left_key.equals(variants[right].key): + if key_string is None: + key_string = serialize_variant_key(left_key) + self.messages.append( + ( + 'warning', left_key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', variants[right].key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + # Check for plural categories + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + known_plurals = set(known_plurals) + # Ask for known plurals, but check for plurals w/out `other`. + # `other` is used for all kinds of things. + check_plurals = known_plurals.copy() + check_plurals.discard('other') + given_plurals = set(serialize_variant_key(v.key) for v in variants) + if given_plurals & check_plurals: + missing_plurals = sorted(known_plurals - given_plurals) + if missing_plurals: + self.messages.append( + ( + 'warning', variants[0].key.span.start, + MSGS['missing-plural'].format( + categories=', '.join(missing_plurals) + ) + ) + ) + + +class L10nMessageVisitor(GenericL10nChecks, ReferenceMessageVisitor): + def __init__(self, locale, reference): + super(L10nMessageVisitor, self).__init__() + self.locale = locale + # Overload refs to map to sets, just store what we found + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(set) + # The currently active references + self.refs = set() + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + self.reference = reference + self.reference_refs = reference.entry_refs[None] + self.messages = [] + + def visit_Message(self, node): + self.check_duplicate_attributes(node) + super(L10nMessageVisitor, self).visit_Message(node) + if self.message_has_value and not self.reference.message_has_value: + self.messages.append( + ('error', node.value.span.start, MSGS['obsolete-value']) + ) + if not self.message_has_value and self.reference.message_has_value: + self.messages.append( + ('error', 0, MSGS['missing-value']) + ) + ref_attrs = set(self.reference.attribute_positions) + l10n_attrs = set(self.attribute_positions) + for missing_attr in ref_attrs - l10n_attrs: + self.messages.append( + ( + 'error', 0, + MSGS['missing-attribute'].format(name=missing_attr) + ) + ) + for obs_attr in l10n_attrs - ref_attrs: + self.messages.append( + ( + 'error', self.attribute_positions[obs_attr], + MSGS['obsolete-attribute'].format(name=obs_attr) + ) + ) + + def visit_Term(self, node): + raise RuntimeError("Should not use L10nMessageVisitor for Terms") + + def visit_Attribute(self, node): + old_reference_refs = self.reference_refs + self.reference_refs = self.reference.entry_refs[node.id.name] + super(L10nMessageVisitor, self).visit_Attribute(node) + self.reference_refs = old_reference_refs + if node.id.name != 'style' or self.css_styles == 'skip': + return + ref_styles = self.reference.css_styles + if ref_styles in ('skip', None): + # Reference is complex, l10n isn't. + # Let's still validate the css spec. + ref_styles = {} + for cat, msg, pos, _ in self.check_style( + ref_styles, + self.css_styles, + self.css_errors + ): + self.messages.append((cat, msg, pos)) + + def visit_SelectExpression(self, node): + super(L10nMessageVisitor, self).visit_SelectExpression(node) + self.check_variants(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'msg-ref') + + def visit_TermReference(self, node): + if node.attribute: + return + ref = '-' + node.id.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'term-ref') + + def check_obsolete_ref(self, node, ref, ref_type): + if ref not in self.reference_refs: + self.messages.append( + ( + 'warning', node.span.start, + MSGS['obsolete-' + ref_type].format(ref=ref), + ) + ) + + +class TermVisitor(GenericL10nChecks, Visitor): + def __init__(self, locale): + super(TermVisitor, self).__init__() + self.locale = locale + self.messages = [] + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super(TermVisitor, self).generic_visit(node) + + def visit_Message(self, node): + raise RuntimeError("Should not use TermVisitor for Messages") + + def visit_Term(self, node): + self.check_duplicate_attributes(node) + super(TermVisitor, self).generic_visit(node) + + def visit_SelectExpression(self, node): + super(TermVisitor, self).generic_visit(node) + self.check_variants(node.variants) + + +class FluentChecker(Checker): + '''Tests to run on Fluent (FTL) files. + ''' + pattern = re.compile(r'.*\.ftl') + + def check_message(self, ref_entry, l10n_entry): + '''Run checks on localized messages against reference message.''' + ref_data = ReferenceMessageVisitor() + ref_data.visit(ref_entry) + l10n_data = L10nMessageVisitor(self.locale, ref_data) + l10n_data.visit(l10n_entry) + + messages = l10n_data.messages + for attr_or_val, refs in ref_data.entry_refs.items(): + for ref, ref_type in refs.items(): + if ref not in l10n_data.entry_refs[attr_or_val]: + msg = MSGS['missing-' + ref_type].format(ref=ref) + messages.append(('warning', 0, msg)) + return messages + + def check_term(self, l10n_entry): + '''Check localized terms.''' + l10n_data = TermVisitor(self.locale) + l10n_data.visit(l10n_entry) + return l10n_data.messages + + def check(self, refEnt, l10nEnt): + for encoding_trouble in super( + FluentChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + l10n_entry = l10nEnt.entry + if isinstance(l10n_entry, ftl.Message): + ref_entry = refEnt.entry + messages = self.check_message(ref_entry, l10n_entry) + elif isinstance(l10n_entry, ftl.Term): + messages = self.check_term(l10n_entry) + + messages.sort(key=lambda t: t[1]) + for cat, pos, msg in messages: + if pos: + pos = pos - l10n_entry.span.start + yield (cat, pos, msg, 'fluent') diff --git a/third_party/python/compare-locales/compare_locales/checks/properties.py b/third_party/python/compare-locales/compare_locales/checks/properties.py new file mode 100644 index 0000000000..9ff2e4cdae --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/properties.py @@ -0,0 +1,173 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from difflib import SequenceMatcher +from six.moves import range +from six.moves import zip + +from compare_locales.parser import PropertiesEntity +from compare_locales import plurals +from .base import Checker + + +class PrintfException(Exception): + def __init__(self, msg, pos): + self.pos = pos + self.msg = msg + + +class PropertiesChecker(Checker): + '''Tests to run on .properties files. + ''' + pattern = re.compile(r'.*\.properties$') + printf = re.compile(r'%(?P<good>%|' + r'(?:(?P<number>[1-9][0-9]*)\$)?' + r'(?P<width>\*|[0-9]+)?' + r'(?P<prec>\.(?:\*|[0-9]+)?)?' + r'(?P<spec>[duxXosScpfg]))?') + + def check(self, refEnt, l10nEnt): + '''Test for the different variable formats. + ''' + for encoding_trouble in super( + PropertiesChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refValue, l10nValue = refEnt.val, l10nEnt.val + refSpecs = None + # check for PluralForm.jsm stuff, should have the docs in the + # comment + # That also includes intl.properties' pluralRule, so exclude + # entities with that key and values with just numbers + if (refEnt.pre_comment + and 'Localization_and_Plurals' in refEnt.pre_comment.all + and refEnt.key != 'pluralRule' + and not re.match(r'\d+$', refValue)): + for msg_tuple in self.check_plural(refValue, l10nValue): + yield msg_tuple + return + # check for lost escapes + raw_val = l10nEnt.raw_val + for m in PropertiesEntity.escape.finditer(raw_val): + if m.group('single') and \ + m.group('single') not in PropertiesEntity.known_escapes: + yield ('warning', m.start(), + 'unknown escape sequence, \\' + m.group('single'), + 'escape') + try: + refSpecs = self.getPrintfSpecs(refValue) + except PrintfException: + refSpecs = [] + if refSpecs: + for t in self.checkPrintf(refSpecs, l10nValue): + yield t + return + + def check_plural(self, refValue, l10nValue): + '''Check for the stringbundle plurals logic. + The common variable pattern is #1. + ''' + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + expected_forms = len(known_plurals) + found_forms = l10nValue.count(';') + 1 + msg = 'expecting {} plurals, found {}'.format( + expected_forms, + found_forms + ) + if expected_forms > found_forms: + yield ('warning', 0, msg, 'plural') + if expected_forms < found_forms: + yield ('warning', 0, msg, 'plural') + pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + refValue)) + if len(pats) == 0: + return + lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + l10nValue)) + if pats - lpats: + yield ('warning', 0, 'not all variables used in l10n', + 'plural') + return + if lpats - pats: + yield ('error', 0, 'unreplaced variables in l10n', + 'plural') + + def checkPrintf(self, refSpecs, l10nValue): + try: + l10nSpecs = self.getPrintfSpecs(l10nValue) + except PrintfException as e: + yield ('error', e.pos, e.msg, 'printf') + return + if refSpecs != l10nSpecs: + sm = SequenceMatcher() + sm.set_seqs(refSpecs, l10nSpecs) + msgs = [] + warn = None + for action, i1, i2, j1, j2 in sm.get_opcodes(): + if action == 'equal': + continue + if action == 'delete': + # missing argument in l10n + if i2 == len(refSpecs): + # trailing specs missing, that's just a warning + warn = ', '.join('trailing argument %d `%s` missing' % + (i+1, refSpecs[i]) + for i in range(i1, i2)) + else: + for i in range(i1, i2): + msgs.append('argument %d `%s` missing' % + (i+1, refSpecs[i])) + continue + if action == 'insert': + # obsolete argument in l10n + for i in range(j1, j2): + msgs.append('argument %d `%s` obsolete' % + (i+1, l10nSpecs[i])) + continue + if action == 'replace': + for i, j in zip(range(i1, i2), range(j1, j2)): + msgs.append('argument %d `%s` should be `%s`' % + (j+1, l10nSpecs[j], refSpecs[i])) + if msgs: + yield ('error', 0, ', '.join(msgs), 'printf') + if warn is not None: + yield ('warning', 0, warn, 'printf') + + def getPrintfSpecs(self, val): + hasNumber = False + specs = [] + for m in self.printf.finditer(val): + if m.group("good") is None: + # found just a '%', signal an error + raise PrintfException('Found single %', m.start()) + if m.group("good") == '%': + # escaped % + continue + if ((hasNumber and m.group('number') is None) or + (not hasNumber and specs and + m.group('number') is not None)): + # mixed style, numbered and not + raise PrintfException('Mixed ordered and non-ordered args', + m.start()) + hasNumber = m.group('number') is not None + if hasNumber: + pos = int(m.group('number')) - 1 + ls = len(specs) + if pos >= ls: + # pad specs + nones = pos - ls + specs[ls:pos] = nones*[None] + specs.append(m.group('spec')) + else: + specs[pos] = m.group('spec') + else: + specs.append(m.group('spec')) + # check for missing args + if hasNumber and not all(specs): + raise PrintfException('Ordered argument missing', 0) + return specs |