summaryrefslogtreecommitdiffstats
path: root/third_party/python/compare-locales/compare_locales/checks
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/compare-locales/compare_locales/checks')
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/__init__.py30
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/android.py253
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/base.py127
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/dtd.py246
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/fluent.py356
-rw-r--r--third_party/python/compare-locales/compare_locales/checks/properties.py173
6 files changed, 1185 insertions, 0 deletions
diff --git a/third_party/python/compare-locales/compare_locales/checks/__init__.py b/third_party/python/compare-locales/compare_locales/checks/__init__.py
new file mode 100644
index 0000000000..0c81a4b715
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/__init__.py
@@ -0,0 +1,30 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+from .base import Checker, EntityPos
+from .android import AndroidChecker
+from .dtd import DTDChecker
+from .fluent import FluentChecker
+from .properties import PropertiesChecker
+
+
+__all__ = [
+ 'Checker', 'EntityPos',
+ 'AndroidChecker', 'DTDChecker', 'FluentChecker', 'PropertiesChecker',
+]
+
+
+def getChecker(file, extra_tests=None):
+ if PropertiesChecker.use(file):
+ return PropertiesChecker(extra_tests, locale=file.locale)
+ if DTDChecker.use(file):
+ return DTDChecker(extra_tests, locale=file.locale)
+ if FluentChecker.use(file):
+ return FluentChecker(extra_tests, locale=file.locale)
+ if AndroidChecker.use(file):
+ return AndroidChecker(extra_tests, locale=file.locale)
+ return Checker(extra_tests, locale=file.locale)
diff --git a/third_party/python/compare-locales/compare_locales/checks/android.py b/third_party/python/compare-locales/compare_locales/checks/android.py
new file mode 100644
index 0000000000..9791c49a4f
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/android.py
@@ -0,0 +1,253 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import re
+from xml.dom import minidom
+
+from .base import Checker
+from ..parser.android import textContent
+
+
+class AndroidChecker(Checker):
+ pattern = re.compile('(.*)?strings.*\\.xml$')
+
+ def check(self, refEnt, l10nEnt):
+ '''Given the reference and localized Entities, performs checks.
+
+ This is a generator yielding tuples of
+ - "warning" or "error", depending on what should be reported,
+ - tuple of line, column info for the error within the string
+ - description string to be shown in the report
+ '''
+ for encoding_trouble in super(
+ AndroidChecker, self
+ ).check(refEnt, l10nEnt):
+ yield encoding_trouble
+ refNode = refEnt.node
+ l10nNode = l10nEnt.node
+ # Apples and oranges, error out.
+ if refNode.nodeName != l10nNode.nodeName:
+ yield ("error", 0, "Incompatible resource types", "android")
+ return
+ # Once we start parsing more resource types, make sure to add checks
+ # for them.
+ if refNode.nodeName != "string":
+ yield ("warning", 0, "Unsupported resource type", "android")
+ return
+ for report_tuple in self.check_string([refNode], l10nEnt):
+ yield report_tuple
+
+ def check_string(self, refs, l10nEnt):
+ '''Check a single string literal against a list of references.
+
+ There should be multiple nodes given for <plurals> or <string-array>.
+ '''
+ l10n = l10nEnt.node
+ if self.not_translatable(l10n, *refs):
+ yield (
+ "error",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ return
+ if self.no_at_string(l10n):
+ yield (
+ "error",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ return
+ if self.no_at_string(*refs):
+ yield (
+ "warning",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ if self.non_simple_data(l10n):
+ yield (
+ "error",
+ 0,
+ "Only plain text allowed, "
+ "or one CDATA surrounded by whitespace",
+ "android"
+ )
+ return
+ for report_tuple in check_apostrophes(l10nEnt.val):
+ yield report_tuple
+
+ params, errors = get_params(refs)
+ for error, pos in errors:
+ yield (
+ "warning",
+ pos,
+ error,
+ "android"
+ )
+ if params:
+ for report_tuple in check_params(params, l10nEnt.val):
+ yield report_tuple
+
+ def not_translatable(self, *nodes):
+ return any(
+ node.hasAttribute("translatable")
+ and node.getAttribute("translatable") == "false"
+ for node in nodes
+ )
+
+ def no_at_string(self, *ref_nodes):
+ '''Android allows to reference other strings by using
+ @string/identifier
+ instead of the actual value. Those references don't belong into
+ a localizable file, warn on that.
+ '''
+ return any(
+ textContent(node).startswith('@string/')
+ for node in ref_nodes
+ )
+
+ def non_simple_data(self, node):
+ '''Only allow single text nodes, or, a single CDATA node
+ surrounded by whitespace.
+ '''
+ cdata = [
+ child
+ for child in node.childNodes
+ if child.nodeType == minidom.Node.CDATA_SECTION_NODE
+ ]
+ if len(cdata) == 0:
+ if node.childNodes.length == 0:
+ # empty translation is OK
+ return False
+ if node.childNodes.length != 1:
+ return True
+ return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE
+ if len(cdata) > 1:
+ return True
+ for child in node.childNodes:
+ if child == cdata[0]:
+ continue
+ if child.nodeType != minidom.Node.TEXT_NODE:
+ return True
+ if child.data.strip() != "":
+ return True
+ return False
+
+
+silencer = re.compile(r'\\.|""')
+
+
+def check_apostrophes(string):
+ '''Check Android logic for quotes and apostrophes.
+
+ If you have an apostrophe (') in your string, you must either escape it
+ with a backslash (\') or enclose the string in double-quotes (").
+
+ Unescaped quotes are not visually shown on Android, but they're
+ also harmless, so we're not checking for quotes. We might do once we're
+ better at checking for inline XML, which is full of quotes.
+ Pairing quotes as in '""' is bad, though, so report errors for that.
+ Mostly, because it's hard to tell if a string is consider quoted or not
+ by Android in the end.
+
+ https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes
+ '''
+ for m in re.finditer('""', string):
+ yield (
+ "error",
+ m.start(),
+ "Double straight quotes not allowed",
+ "android"
+ )
+ string = silencer.sub(" ", string)
+
+ is_quoted = string.startswith('"') and string.endswith('"')
+ if not is_quoted:
+ # apostrophes need to be escaped
+ for m in re.finditer("'", string):
+ yield (
+ "error",
+ m.start(),
+ "Apostrophe must be escaped",
+ "android"
+ )
+
+
+def get_params(refs):
+ '''Get printf parameters and internal errors.
+
+ Returns a sparse map of positions to formatter, and a list
+ of errors. Errors covered so far are mismatching formatters.
+ '''
+ params = {}
+ errors = []
+ next_implicit = 1
+ for ref in refs:
+ if isinstance(ref, minidom.Node):
+ ref = textContent(ref)
+ for m in re.finditer(r'%(?P<order>[1-9]\$)?(?P<format>[sSd])', ref):
+ order = m.group('order')
+ if order:
+ order = int(order[0])
+ else:
+ order = next_implicit
+ next_implicit += 1
+ fmt = m.group('format')
+ if order not in params:
+ params[order] = fmt
+ else:
+ # check for consistency errors
+ if params[order] == fmt:
+ continue
+ msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}"
+ errors.append((
+ msg.format(order=order, f1=fmt, f2=params[order]),
+ m.start()
+ ))
+ return params, errors
+
+
+def check_params(params, string):
+ '''Compare the printf parameters in the given string to the reference
+ parameters.
+
+ Also yields errors that are internal to the parameters inside string,
+ as found by `get_params`.
+ '''
+ lparams, errors = get_params([string])
+ for error, pos in errors:
+ yield (
+ "error",
+ pos,
+ error,
+ "android"
+ )
+ # Compare reference for each localized parameter.
+ # If there's no reference found, error, as an out-of-bounds
+ # parameter crashes.
+ # This assumes that all parameters are actually used in the reference,
+ # which should be OK.
+ # If there's a mismatch in the formatter, error.
+ for order in sorted(lparams):
+ if order not in params:
+ yield (
+ "error",
+ 0,
+ "Formatter %{}${} not found in reference".format(
+ order, lparams[order]
+ ),
+ "android"
+ )
+ elif params[order] != lparams[order]:
+ yield (
+ "error",
+ 0,
+ "Mismatching formatter",
+ "android"
+ )
diff --git a/third_party/python/compare-locales/compare_locales/checks/base.py b/third_party/python/compare-locales/compare_locales/checks/base.py
new file mode 100644
index 0000000000..3b04caa7a9
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/base.py
@@ -0,0 +1,127 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import re
+import six
+
+
+class EntityPos(int):
+ pass
+
+
+mochibake = re.compile('\ufffd')
+
+
+class Checker(object):
+ '''Abstract class to implement checks per file type.
+ '''
+ pattern = None
+ # if a check uses all reference entities, set this to True
+ needs_reference = False
+
+ @classmethod
+ def use(cls, file):
+ return cls.pattern.match(file.file)
+
+ def __init__(self, extra_tests, locale=None):
+ self.extra_tests = extra_tests
+ self.locale = locale
+ self.reference = None
+
+ def check(self, refEnt, l10nEnt):
+ '''Given the reference and localized Entities, performs checks.
+
+ This is a generator yielding tuples of
+ - "warning" or "error", depending on what should be reported,
+ - tuple of line, column info for the error within the string
+ - description string to be shown in the report
+
+ By default, check for possible encoding errors.
+ '''
+ for m in mochibake.finditer(l10nEnt.all):
+ yield (
+ "warning",
+ EntityPos(m.start()),
+ "\ufffd in: {}".format(l10nEnt.key),
+ "encodings"
+ )
+
+ def set_reference(self, reference):
+ '''Set the reference entities.
+ Only do this if self.needs_reference is True.
+ '''
+ self.reference = reference
+
+
+class CSSCheckMixin(object):
+ def maybe_style(self, ref_value, l10n_value):
+ ref_map, _ = self.parse_css_spec(ref_value)
+ if not ref_map:
+ return
+ l10n_map, errors = self.parse_css_spec(l10n_value)
+ for t in self.check_style(ref_map, l10n_map, errors):
+ yield t
+
+ def check_style(self, ref_map, l10n_map, errors):
+ if not l10n_map:
+ yield ('error', 0, 'reference is a CSS spec', 'css')
+ return
+ if errors:
+ yield ('error', 0, 'reference is a CSS spec', 'css')
+ return
+ msgs = []
+ for prop, unit in l10n_map.items():
+ if prop not in ref_map:
+ msgs.insert(0, '%s only in l10n' % prop)
+ continue
+ else:
+ ref_unit = ref_map.pop(prop)
+ if unit != ref_unit:
+ msgs.append("units for %s don't match "
+ "(%s != %s)" % (prop, unit, ref_unit))
+ for prop in six.iterkeys(ref_map):
+ msgs.insert(0, '%s only in reference' % prop)
+ if msgs:
+ yield ('warning', 0, ', '.join(msgs), 'css')
+
+ def parse_css_spec(self, val):
+ if not hasattr(self, '_css_spec'):
+ self._css_spec = re.compile(
+ r'(?:'
+ r'(?P<prop>(?:min\-|max\-)?(?:width|height))'
+ r'[ \t\r\n]*:[ \t\r\n]*'
+ r'(?P<length>[0-9]+|[0-9]*\.[0-9]+)'
+ r'(?P<unit>ch|em|ex|rem|px|cm|mm|in|pc|pt)'
+ r')'
+ r'|\Z'
+ )
+ self._css_sep = re.compile(r'[ \t\r\n]*(?P<semi>;)?[ \t\r\n]*$')
+ refMap = errors = None
+ end = 0
+ for m in self._css_spec.finditer(val):
+ if end == 0 and m.start() == m.end():
+ # no CSS spec found, just immediately end of string
+ return None, None
+ if m.start() > end:
+ split = self._css_sep.match(val, end, m.start())
+ if split is None:
+ errors = errors or []
+ errors.append({
+ 'pos': end,
+ 'code': 'css-bad-content',
+ })
+ elif end > 0 and split.group('semi') is None:
+ errors = errors or []
+ errors.append({
+ 'pos': end,
+ 'code': 'css-missing-semicolon',
+ })
+ if m.group('prop'):
+ refMap = refMap or {}
+ refMap[m.group('prop')] = m.group('unit')
+ end = m.end()
+ return refMap, errors
diff --git a/third_party/python/compare-locales/compare_locales/checks/dtd.py b/third_party/python/compare-locales/compare_locales/checks/dtd.py
new file mode 100644
index 0000000000..37d3c7846d
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/dtd.py
@@ -0,0 +1,246 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+import re
+from xml import sax
+import six
+
+from compare_locales.parser import DTDParser
+from .base import Checker, CSSCheckMixin
+
+
+class DTDChecker(Checker, CSSCheckMixin):
+ """Tests to run on DTD files.
+
+ Uses xml.sax for the heavy lifting of xml parsing.
+
+ The code tries to parse until it doesn't find any unresolved entities
+ anymore. If it finds one, it tries to grab the key, and adds an empty
+ <!ENTITY key ""> definition to the header.
+
+ Also checks for some CSS and number heuristics in the values.
+ """
+ pattern = re.compile(r'.*\.dtd$')
+ needs_reference = True # to cast a wider net for known entity references
+
+ eref = re.compile('&(%s);' % DTDParser.Name)
+ tmpl = b'''<!DOCTYPE elem [%s]>
+<elem>%s</elem>
+'''
+ xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))
+
+ def __init__(self, extra_tests, locale=None):
+ super(DTDChecker, self).__init__(extra_tests, locale=locale)
+ self.processContent = False
+ if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
+ self.processContent = True
+ self.__known_entities = None
+
+ def known_entities(self, refValue):
+ if self.__known_entities is None and self.reference is not None:
+ self.__known_entities = set()
+ for ent in self.reference.values():
+ self.__known_entities.update(
+ self.entities_for_value(ent.raw_val))
+ return self.__known_entities if self.__known_entities is not None \
+ else self.entities_for_value(refValue)
+
+ def entities_for_value(self, value):
+ reflist = set(m.group(1)
+ for m in self.eref.finditer(value))
+ reflist -= self.xmllist
+ return reflist
+
+ # Setup for XML parser, with default and text-only content handler
+ class TextContent(sax.handler.ContentHandler):
+ textcontent = ''
+
+ def characters(self, content):
+ self.textcontent += content
+
+ defaulthandler = sax.handler.ContentHandler()
+ texthandler = TextContent()
+
+ numPattern = r'([0-9]+|[0-9]*\.[0-9]+)'
+ num = re.compile('^%s$' % numPattern)
+ lengthPattern = '%s(em|px|ch|cm|in)' % numPattern
+ length = re.compile('^%s$' % lengthPattern)
+
+ def check(self, refEnt, l10nEnt):
+ """Try to parse the refvalue inside a dummy element, and keep
+ track of entities that we need to define to make that work.
+
+ Return a checker that offers just those entities.
+ """
+ for encoding_trouble in super(
+ DTDChecker, self
+ ).check(refEnt, l10nEnt):
+ yield encoding_trouble
+ refValue, l10nValue = refEnt.raw_val, l10nEnt.raw_val
+ # find entities the refValue references,
+ # reusing markup from DTDParser.
+ reflist = self.known_entities(refValue)
+ inContext = self.entities_for_value(refValue)
+ entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
+ parser = sax.make_parser()
+ parser.setFeature(sax.handler.feature_external_ges, False)
+
+ parser.setContentHandler(self.defaulthandler)
+ try:
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ (entities.encode('utf-8'),
+ refValue.encode('utf-8'))))
+ # also catch stray %
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ ((refEnt.all + entities).encode('utf-8'),
+ b'&%s;' % refEnt.key.encode('utf-8'))))
+ except sax.SAXParseException as e:
+ e # noqa
+ yield ('warning',
+ (0, 0),
+ "can't parse en-US value", 'xmlparse')
+
+ # find entities the l10nValue references,
+ # reusing markup from DTDParser.
+ l10nlist = self.entities_for_value(l10nValue)
+ missing = sorted(l10nlist - reflist)
+ _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
+ if self.processContent:
+ self.texthandler.textcontent = ''
+ parser.setContentHandler(self.texthandler)
+ try:
+ parser.parse(six.BytesIO(self.tmpl % (_entities.encode('utf-8'),
+ l10nValue.encode('utf-8'))))
+ # also catch stray %
+ # if this fails, we need to substract the entity definition
+ parser.setContentHandler(self.defaulthandler)
+ parser.parse(
+ six.BytesIO(self.tmpl %
+ ((l10nEnt.all + _entities).encode('utf-8'),
+ b'&%s;' % l10nEnt.key.encode('utf-8'))))
+ except sax.SAXParseException as e:
+ # xml parse error, yield error
+ # sometimes, the error is reported on our fake closing
+ # element, make that the end of the last line
+ lnr = e.getLineNumber() - 1
+ lines = l10nValue.splitlines()
+ if lnr > len(lines):
+ lnr = len(lines)
+ col = len(lines[lnr-1])
+ else:
+ col = e.getColumnNumber()
+ if lnr == 1:
+ # first line starts with <elem>, substract
+ col -= len("<elem>")
+ elif lnr == 0:
+ col -= len("<!DOCTYPE elem [") # first line is DOCTYPE
+ yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse')
+
+ warntmpl = u'Referencing unknown entity `%s`'
+ if reflist:
+ if inContext:
+ elsewhere = reflist - inContext
+ warntmpl += ' (%s used in context' % \
+ ', '.join(sorted(inContext))
+ if elsewhere:
+ warntmpl += ', %s known)' % ', '.join(sorted(elsewhere))
+ else:
+ warntmpl += ')'
+ else:
+ warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
+ for key in missing:
+ yield ('warning', (0, 0), warntmpl % key,
+ 'xmlparse')
+ if inContext and l10nlist and l10nlist - inContext - set(missing):
+ mismatch = sorted(l10nlist - inContext - set(missing))
+ for key in mismatch:
+ yield ('warning', (0, 0),
+ 'Entity %s referenced, but %s used in context' % (
+ key,
+ ', '.join(sorted(inContext))
+ ), 'xmlparse')
+
+ # Number check
+ if self.num.match(refValue) and not self.num.match(l10nValue):
+ yield ('warning', 0, 'reference is a number', 'number')
+ # CSS checks
+ # just a length, width="100em"
+ if self.length.match(refValue) and not self.length.match(l10nValue):
+ yield ('error', 0, 'reference is a CSS length', 'css')
+ # Check for actual CSS style attribute values
+ for t in self.maybe_style(refValue, l10nValue):
+ yield t
+
+ if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
+ for t in self.processAndroidContent(self.texthandler.textcontent):
+ yield t
+
+ quoted = re.compile("(?P<q>[\"']).*(?P=q)$")
+
+ def unicode_escape(self, str):
+ """Helper method to try to decode all unicode escapes in a string.
+
+ This code uses the standard python decode for unicode-escape, but
+ that's somewhat tricky, as its input needs to be ascii. To get to
+ ascii, the unicode string gets converted to ascii with
+ backslashreplace, i.e., all non-ascii unicode chars get unicode
+ escaped. And then we try to roll all of that back.
+ Now, when that hits an error, that's from the original string, and we
+ need to search for the actual error position in the original string,
+ as the backslashreplace code changes string positions quite badly.
+ See also the last check in TestAndroid.test_android_dtd, with a
+ lengthy chinese string.
+ """
+ val = str.encode('ascii', 'backslashreplace')
+ try:
+ val.decode('unicode-escape')
+ except UnicodeDecodeError as e:
+ args = list(e.args)
+ badstring = args[1][args[2]:args[3]]
+ i = len(args[1][:args[2]].decode('unicode-escape'))
+ args[2] = i
+ args[3] = i + len(badstring)
+ raise UnicodeDecodeError(*args)
+
+ def processAndroidContent(self, val):
+ """Check for the string values that Android puts into an XML container.
+
+ http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa
+
+ Check for unicode escapes and unescaped quotes and apostrophes,
+ if string's not quoted.
+ """
+ # first, try to decode unicode escapes
+ try:
+ self.unicode_escape(val)
+ except UnicodeDecodeError as e:
+ yield ('error', e.args[2], e.args[4], 'android')
+ # check for unescaped single or double quotes.
+ # first, see if the complete string is single or double quoted,
+ # that changes the rules
+ m = self.quoted.match(val)
+ if m:
+ q = m.group('q')
+ offset = 0
+ val = val[1:-1] # strip quotes
+ else:
+ q = "[\"']"
+ offset = -1
+ stray_quot = re.compile(r"[\\\\]*(%s)" % q)
+
+ for m in stray_quot.finditer(val):
+ if len(m.group(0)) % 2:
+ # found an unescaped single or double quote, which message?
+ if m.group(1) == '"':
+ msg = "Quotes in Android DTDs need escaping with \\\" "\
+ "or \\u0022, or put string in apostrophes."
+ else:
+ msg = "Apostrophes in Android DTDs need escaping with "\
+ "\\' or \\u0027, or use \u2019, or put string in "\
+ "quotes."
+ yield ('error', m.end(0)+offset, msg, 'android')
diff --git a/third_party/python/compare-locales/compare_locales/checks/fluent.py b/third_party/python/compare-locales/compare_locales/checks/fluent.py
new file mode 100644
index 0000000000..feb7242fb7
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/fluent.py
@@ -0,0 +1,356 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+import re
+from collections import defaultdict
+
+from fluent.syntax import ast as ftl
+from fluent.syntax.serializer import serialize_variant_key
+from fluent.syntax.visitor import Visitor
+
+from .base import Checker, CSSCheckMixin
+from compare_locales import plurals
+
+
+MSGS = {
+ 'missing-msg-ref': 'Missing message reference: {ref}',
+ 'missing-term-ref': 'Missing term reference: {ref}',
+ 'obsolete-msg-ref': 'Obsolete message reference: {ref}',
+ 'obsolete-term-ref': 'Obsolete term reference: {ref}',
+ 'duplicate-attribute': 'Attribute "{name}" is duplicated',
+ 'missing-value': 'Missing value',
+ 'obsolete-value': 'Obsolete value',
+ 'missing-attribute': 'Missing attribute: {name}',
+ 'obsolete-attribute': 'Obsolete attribute: {name}',
+ 'duplicate-variant': 'Variant key "{name}" is duplicated',
+ 'missing-plural': 'Plural categories missing: {categories}',
+ 'plain-message': '{message}',
+}
+
+
+def pattern_variants(pattern):
+ """Get variants of plain text of a pattern.
+
+ For now, just return simple text patterns.
+ This can be improved to allow for SelectExpressions
+ of simple text patterns, or even nested expressions, and Literals.
+ Variants with Variable-, Message-, or TermReferences should be ignored.
+ """
+ elements = pattern.elements
+ if len(elements) == 1:
+ if isinstance(elements[0], ftl.TextElement):
+ return [elements[0].value]
+ return []
+
+
+class ReferenceMessageVisitor(Visitor, CSSCheckMixin):
+ def __init__(self):
+ # References to Messages, their Attributes, and Terms
+ # Store reference name and type
+ self.entry_refs = defaultdict(dict)
+ # The currently active references
+ self.refs = {}
+ # Start with the Entry value (associated with None)
+ self.entry_refs[None] = self.refs
+ # If we're a messsage, store if there was a value
+ self.message_has_value = False
+ # Map attribute names to positions
+ self.attribute_positions = {}
+ # Map of CSS style attribute properties and units
+ self.css_styles = None
+ self.css_errors = None
+
+ def generic_visit(self, node):
+ if isinstance(
+ node,
+ (ftl.Span, ftl.Annotation, ftl.BaseComment)
+ ):
+ return
+ super(ReferenceMessageVisitor, self).generic_visit(node)
+
+ def visit_Message(self, node):
+ if node.value is not None:
+ self.message_has_value = True
+ super(ReferenceMessageVisitor, self).generic_visit(node)
+
+ def visit_Attribute(self, node):
+ self.attribute_positions[node.id.name] = node.span.start
+ old_refs = self.refs
+ self.refs = self.entry_refs[node.id.name]
+ super(ReferenceMessageVisitor, self).generic_visit(node)
+ self.refs = old_refs
+ if node.id.name != 'style':
+ return
+ text_values = pattern_variants(node.value)
+ if not text_values:
+ self.css_styles = 'skip'
+ return
+ # right now, there's just one possible text value
+ self.css_styles, self.css_errors = self.parse_css_spec(text_values[0])
+
+ def visit_SelectExpression(self, node):
+ # optimize select expressions to only go through the variants
+ self.visit(node.variants)
+
+ def visit_MessageReference(self, node):
+ ref = node.id.name
+ if node.attribute:
+ ref += '.' + node.attribute.name
+ self.refs[ref] = 'msg-ref'
+
+ def visit_TermReference(self, node):
+ # only collect term references, but not attributes of terms
+ if node.attribute:
+ return
+ self.refs['-' + node.id.name] = 'term-ref'
+
+
+class GenericL10nChecks(object):
+ '''Helper Mixin for checks shared between Terms and Messages.'''
+ def check_duplicate_attributes(self, node):
+ warned = set()
+ for left in range(len(node.attributes) - 1):
+ if left in warned:
+ continue
+ left_attr = node.attributes[left]
+ warned_left = False
+ for right in range(left+1, len(node.attributes)):
+ right_attr = node.attributes[right]
+ if left_attr.id.name == right_attr.id.name:
+ if not warned_left:
+ warned_left = True
+ self.messages.append(
+ (
+ 'warning', left_attr.span.start,
+ MSGS['duplicate-attribute'].format(
+ name=left_attr.id.name
+ )
+ )
+ )
+ warned.add(right)
+ self.messages.append(
+ (
+ 'warning', right_attr.span.start,
+ MSGS['duplicate-attribute'].format(
+ name=left_attr.id.name
+ )
+ )
+ )
+
+ def check_variants(self, variants):
+ # Check for duplicate variants
+ warned = set()
+ for left in range(len(variants) - 1):
+ if left in warned:
+ continue
+ left_key = variants[left].key
+ key_string = None
+ for right in range(left+1, len(variants)):
+ if left_key.equals(variants[right].key):
+ if key_string is None:
+ key_string = serialize_variant_key(left_key)
+ self.messages.append(
+ (
+ 'warning', left_key.span.start,
+ MSGS['duplicate-variant'].format(
+ name=key_string
+ )
+ )
+ )
+ warned.add(right)
+ self.messages.append(
+ (
+ 'warning', variants[right].key.span.start,
+ MSGS['duplicate-variant'].format(
+ name=key_string
+ )
+ )
+ )
+ # Check for plural categories
+ known_plurals = plurals.get_plural(self.locale)
+ if known_plurals:
+ known_plurals = set(known_plurals)
+ # Ask for known plurals, but check for plurals w/out `other`.
+ # `other` is used for all kinds of things.
+ check_plurals = known_plurals.copy()
+ check_plurals.discard('other')
+ given_plurals = set(serialize_variant_key(v.key) for v in variants)
+ if given_plurals & check_plurals:
+ missing_plurals = sorted(known_plurals - given_plurals)
+ if missing_plurals:
+ self.messages.append(
+ (
+ 'warning', variants[0].key.span.start,
+ MSGS['missing-plural'].format(
+ categories=', '.join(missing_plurals)
+ )
+ )
+ )
+
+
+class L10nMessageVisitor(GenericL10nChecks, ReferenceMessageVisitor):
+ def __init__(self, locale, reference):
+ super(L10nMessageVisitor, self).__init__()
+ self.locale = locale
+ # Overload refs to map to sets, just store what we found
+ # References to Messages, their Attributes, and Terms
+ # Store reference name and type
+ self.entry_refs = defaultdict(set)
+ # The currently active references
+ self.refs = set()
+ # Start with the Entry value (associated with None)
+ self.entry_refs[None] = self.refs
+ self.reference = reference
+ self.reference_refs = reference.entry_refs[None]
+ self.messages = []
+
+ def visit_Message(self, node):
+ self.check_duplicate_attributes(node)
+ super(L10nMessageVisitor, self).visit_Message(node)
+ if self.message_has_value and not self.reference.message_has_value:
+ self.messages.append(
+ ('error', node.value.span.start, MSGS['obsolete-value'])
+ )
+ if not self.message_has_value and self.reference.message_has_value:
+ self.messages.append(
+ ('error', 0, MSGS['missing-value'])
+ )
+ ref_attrs = set(self.reference.attribute_positions)
+ l10n_attrs = set(self.attribute_positions)
+ for missing_attr in ref_attrs - l10n_attrs:
+ self.messages.append(
+ (
+ 'error', 0,
+ MSGS['missing-attribute'].format(name=missing_attr)
+ )
+ )
+ for obs_attr in l10n_attrs - ref_attrs:
+ self.messages.append(
+ (
+ 'error', self.attribute_positions[obs_attr],
+ MSGS['obsolete-attribute'].format(name=obs_attr)
+ )
+ )
+
+ def visit_Term(self, node):
+ raise RuntimeError("Should not use L10nMessageVisitor for Terms")
+
+ def visit_Attribute(self, node):
+ old_reference_refs = self.reference_refs
+ self.reference_refs = self.reference.entry_refs[node.id.name]
+ super(L10nMessageVisitor, self).visit_Attribute(node)
+ self.reference_refs = old_reference_refs
+ if node.id.name != 'style' or self.css_styles == 'skip':
+ return
+ ref_styles = self.reference.css_styles
+ if ref_styles in ('skip', None):
+ # Reference is complex, l10n isn't.
+ # Let's still validate the css spec.
+ ref_styles = {}
+ for cat, msg, pos, _ in self.check_style(
+ ref_styles,
+ self.css_styles,
+ self.css_errors
+ ):
+ self.messages.append((cat, msg, pos))
+
+ def visit_SelectExpression(self, node):
+ super(L10nMessageVisitor, self).visit_SelectExpression(node)
+ self.check_variants(node.variants)
+
+ def visit_MessageReference(self, node):
+ ref = node.id.name
+ if node.attribute:
+ ref += '.' + node.attribute.name
+ self.refs.add(ref)
+ self.check_obsolete_ref(node, ref, 'msg-ref')
+
+ def visit_TermReference(self, node):
+ if node.attribute:
+ return
+ ref = '-' + node.id.name
+ self.refs.add(ref)
+ self.check_obsolete_ref(node, ref, 'term-ref')
+
+ def check_obsolete_ref(self, node, ref, ref_type):
+ if ref not in self.reference_refs:
+ self.messages.append(
+ (
+ 'warning', node.span.start,
+ MSGS['obsolete-' + ref_type].format(ref=ref),
+ )
+ )
+
+
+class TermVisitor(GenericL10nChecks, Visitor):
+ def __init__(self, locale):
+ super(TermVisitor, self).__init__()
+ self.locale = locale
+ self.messages = []
+
+ def generic_visit(self, node):
+ if isinstance(
+ node,
+ (ftl.Span, ftl.Annotation, ftl.BaseComment)
+ ):
+ return
+ super(TermVisitor, self).generic_visit(node)
+
+ def visit_Message(self, node):
+ raise RuntimeError("Should not use TermVisitor for Messages")
+
+ def visit_Term(self, node):
+ self.check_duplicate_attributes(node)
+ super(TermVisitor, self).generic_visit(node)
+
+ def visit_SelectExpression(self, node):
+ super(TermVisitor, self).generic_visit(node)
+ self.check_variants(node.variants)
+
+
+class FluentChecker(Checker):
+ '''Tests to run on Fluent (FTL) files.
+ '''
+ pattern = re.compile(r'.*\.ftl')
+
+ def check_message(self, ref_entry, l10n_entry):
+ '''Run checks on localized messages against reference message.'''
+ ref_data = ReferenceMessageVisitor()
+ ref_data.visit(ref_entry)
+ l10n_data = L10nMessageVisitor(self.locale, ref_data)
+ l10n_data.visit(l10n_entry)
+
+ messages = l10n_data.messages
+ for attr_or_val, refs in ref_data.entry_refs.items():
+ for ref, ref_type in refs.items():
+ if ref not in l10n_data.entry_refs[attr_or_val]:
+ msg = MSGS['missing-' + ref_type].format(ref=ref)
+ messages.append(('warning', 0, msg))
+ return messages
+
+ def check_term(self, l10n_entry):
+ '''Check localized terms.'''
+ l10n_data = TermVisitor(self.locale)
+ l10n_data.visit(l10n_entry)
+ return l10n_data.messages
+
+ def check(self, refEnt, l10nEnt):
+ for encoding_trouble in super(
+ FluentChecker, self
+ ).check(refEnt, l10nEnt):
+ yield encoding_trouble
+ l10n_entry = l10nEnt.entry
+ if isinstance(l10n_entry, ftl.Message):
+ ref_entry = refEnt.entry
+ messages = self.check_message(ref_entry, l10n_entry)
+ elif isinstance(l10n_entry, ftl.Term):
+ messages = self.check_term(l10n_entry)
+
+ messages.sort(key=lambda t: t[1])
+ for cat, pos, msg in messages:
+ if pos:
+ pos = pos - l10n_entry.span.start
+ yield (cat, pos, msg, 'fluent')
diff --git a/third_party/python/compare-locales/compare_locales/checks/properties.py b/third_party/python/compare-locales/compare_locales/checks/properties.py
new file mode 100644
index 0000000000..9ff2e4cdae
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/checks/properties.py
@@ -0,0 +1,173 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+import re
+from difflib import SequenceMatcher
+from six.moves import range
+from six.moves import zip
+
+from compare_locales.parser import PropertiesEntity
+from compare_locales import plurals
+from .base import Checker
+
+
+class PrintfException(Exception):
+ def __init__(self, msg, pos):
+ self.pos = pos
+ self.msg = msg
+
+
+class PropertiesChecker(Checker):
+ '''Tests to run on .properties files.
+ '''
+ pattern = re.compile(r'.*\.properties$')
+ printf = re.compile(r'%(?P<good>%|'
+ r'(?:(?P<number>[1-9][0-9]*)\$)?'
+ r'(?P<width>\*|[0-9]+)?'
+ r'(?P<prec>\.(?:\*|[0-9]+)?)?'
+ r'(?P<spec>[duxXosScpfg]))?')
+
+ def check(self, refEnt, l10nEnt):
+ '''Test for the different variable formats.
+ '''
+ for encoding_trouble in super(
+ PropertiesChecker, self
+ ).check(refEnt, l10nEnt):
+ yield encoding_trouble
+ refValue, l10nValue = refEnt.val, l10nEnt.val
+ refSpecs = None
+ # check for PluralForm.jsm stuff, should have the docs in the
+ # comment
+ # That also includes intl.properties' pluralRule, so exclude
+ # entities with that key and values with just numbers
+ if (refEnt.pre_comment
+ and 'Localization_and_Plurals' in refEnt.pre_comment.all
+ and refEnt.key != 'pluralRule'
+ and not re.match(r'\d+$', refValue)):
+ for msg_tuple in self.check_plural(refValue, l10nValue):
+ yield msg_tuple
+ return
+ # check for lost escapes
+ raw_val = l10nEnt.raw_val
+ for m in PropertiesEntity.escape.finditer(raw_val):
+ if m.group('single') and \
+ m.group('single') not in PropertiesEntity.known_escapes:
+ yield ('warning', m.start(),
+ 'unknown escape sequence, \\' + m.group('single'),
+ 'escape')
+ try:
+ refSpecs = self.getPrintfSpecs(refValue)
+ except PrintfException:
+ refSpecs = []
+ if refSpecs:
+ for t in self.checkPrintf(refSpecs, l10nValue):
+ yield t
+ return
+
+ def check_plural(self, refValue, l10nValue):
+ '''Check for the stringbundle plurals logic.
+ The common variable pattern is #1.
+ '''
+ known_plurals = plurals.get_plural(self.locale)
+ if known_plurals:
+ expected_forms = len(known_plurals)
+ found_forms = l10nValue.count(';') + 1
+ msg = 'expecting {} plurals, found {}'.format(
+ expected_forms,
+ found_forms
+ )
+ if expected_forms > found_forms:
+ yield ('warning', 0, msg, 'plural')
+ if expected_forms < found_forms:
+ yield ('warning', 0, msg, 'plural')
+ pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
+ refValue))
+ if len(pats) == 0:
+ return
+ lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
+ l10nValue))
+ if pats - lpats:
+ yield ('warning', 0, 'not all variables used in l10n',
+ 'plural')
+ return
+ if lpats - pats:
+ yield ('error', 0, 'unreplaced variables in l10n',
+ 'plural')
+
+ def checkPrintf(self, refSpecs, l10nValue):
+ try:
+ l10nSpecs = self.getPrintfSpecs(l10nValue)
+ except PrintfException as e:
+ yield ('error', e.pos, e.msg, 'printf')
+ return
+ if refSpecs != l10nSpecs:
+ sm = SequenceMatcher()
+ sm.set_seqs(refSpecs, l10nSpecs)
+ msgs = []
+ warn = None
+ for action, i1, i2, j1, j2 in sm.get_opcodes():
+ if action == 'equal':
+ continue
+ if action == 'delete':
+ # missing argument in l10n
+ if i2 == len(refSpecs):
+ # trailing specs missing, that's just a warning
+ warn = ', '.join('trailing argument %d `%s` missing' %
+ (i+1, refSpecs[i])
+ for i in range(i1, i2))
+ else:
+ for i in range(i1, i2):
+ msgs.append('argument %d `%s` missing' %
+ (i+1, refSpecs[i]))
+ continue
+ if action == 'insert':
+ # obsolete argument in l10n
+ for i in range(j1, j2):
+ msgs.append('argument %d `%s` obsolete' %
+ (i+1, l10nSpecs[i]))
+ continue
+ if action == 'replace':
+ for i, j in zip(range(i1, i2), range(j1, j2)):
+ msgs.append('argument %d `%s` should be `%s`' %
+ (j+1, l10nSpecs[j], refSpecs[i]))
+ if msgs:
+ yield ('error', 0, ', '.join(msgs), 'printf')
+ if warn is not None:
+ yield ('warning', 0, warn, 'printf')
+
+ def getPrintfSpecs(self, val):
+ hasNumber = False
+ specs = []
+ for m in self.printf.finditer(val):
+ if m.group("good") is None:
+ # found just a '%', signal an error
+ raise PrintfException('Found single %', m.start())
+ if m.group("good") == '%':
+ # escaped %
+ continue
+ if ((hasNumber and m.group('number') is None) or
+ (not hasNumber and specs and
+ m.group('number') is not None)):
+ # mixed style, numbered and not
+ raise PrintfException('Mixed ordered and non-ordered args',
+ m.start())
+ hasNumber = m.group('number') is not None
+ if hasNumber:
+ pos = int(m.group('number')) - 1
+ ls = len(specs)
+ if pos >= ls:
+ # pad specs
+ nones = pos - ls
+ specs[ls:pos] = nones*[None]
+ specs.append(m.group('spec'))
+ else:
+ specs[pos] = m.group('spec')
+ else:
+ specs.append(m.group('spec'))
+ # check for missing args
+ if hasNumber and not all(specs):
+ raise PrintfException('Ordered argument missing', 0)
+ return specs