diff options
Diffstat (limited to 'third_party/python/compare-locales')
94 files changed, 14974 insertions, 0 deletions
diff --git a/third_party/python/compare-locales/PKG-INFO b/third_party/python/compare-locales/PKG-INFO new file mode 100644 index 0000000000..5daa15c4a5 --- /dev/null +++ b/third_party/python/compare-locales/PKG-INFO @@ -0,0 +1,82 @@ +Metadata-Version: 2.1 +Name: compare-locales +Version: 8.1.0 +Summary: Lint Mozilla localizations +Home-page: UNKNOWN +Author: Axel Hecht +Author-email: axel@mozilla.com +License: MPL 2.0 +Description: [![Build Status](https://travis-ci.org/Pike/compare-locales.svg?branch=master)](https://travis-ci.org/Pike/compare-locales) + # compare-locales + Lint Mozilla localizations + + Finds + * missing strings + * obsolete strings + * errors on runtime errors without false positives + * warns on possible runtime errors + + It also includes `l10n-merge` functionality, which pads localizations with + missing English strings, and replaces entities with errors with English. + + If you want to check your original code for errors like duplicated messages, + use `moz-l10n-lint`, which is also part of this package. You can also use + this to check for conflicts between your strings and those already exposed + to l10n. + + # Configuration + + You configure `compare-locales` (and `moz-l10n-lint`) through a + [project configuration](https://moz-l10n-config.readthedocs.io/en/latest/fileformat.html) + file, `l10n.toml`. + + # Examples + + To check all locales in a project use + + ```bash + compare-locales l10n.toml . + ``` + + To check Firefox against a local check-out of l10n-central, use + + ```bash + compare-locales browser/locales/l10n.toml ../l10n-central + ``` + + If you just want to check particular locales, specify them as additional + commandline parameters. + + To lint your local work, use + + ```bash + moz-l10n-lint l10n.toml + ``` + + To check for conflicts against already existing strings: + + ```bash + moz-l10n-lint --reference-project ../android-l10n/mozilla-mobile/fenix l10n.toml + moz-l10n-lint --l10n-reference ../gecko-strings browser/locales/l10n.toml + ``` + + to check for a monolithic project like Fenix or a gecko project like Firefox, + resp. + +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Localization +Classifier: Topic :: Software Development :: Testing +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4 +Description-Content-Type: text/markdown diff --git a/third_party/python/compare-locales/README.md b/third_party/python/compare-locales/README.md new file mode 100644 index 0000000000..a050c7d3c8 --- /dev/null +++ b/third_party/python/compare-locales/README.md @@ -0,0 +1,56 @@ +[![Build Status](https://travis-ci.org/Pike/compare-locales.svg?branch=master)](https://travis-ci.org/Pike/compare-locales) +# compare-locales +Lint Mozilla localizations + +Finds +* missing strings +* obsolete strings +* errors on runtime errors without false positives +* warns on possible runtime errors + +It also includes `l10n-merge` functionality, which pads localizations with +missing English strings, and replaces entities with errors with English. + +If you want to check your original code for errors like duplicated messages, +use `moz-l10n-lint`, which is also part of this package. You can also use +this to check for conflicts between your strings and those already exposed +to l10n. + +# Configuration + +You configure `compare-locales` (and `moz-l10n-lint`) through a +[project configuration](https://moz-l10n-config.readthedocs.io/en/latest/fileformat.html) +file, `l10n.toml`. + +# Examples + +To check all locales in a project use + +```bash +compare-locales l10n.toml . +``` + +To check Firefox against a local check-out of l10n-central, use + +```bash +compare-locales browser/locales/l10n.toml ../l10n-central +``` + +If you just want to check particular locales, specify them as additional +commandline parameters. + +To lint your local work, use + +```bash +moz-l10n-lint l10n.toml +``` + +To check for conflicts against already existing strings: + +```bash +moz-l10n-lint --reference-project ../android-l10n/mozilla-mobile/fenix l10n.toml +moz-l10n-lint --l10n-reference ../gecko-strings browser/locales/l10n.toml +``` + +to check for a monolithic project like Fenix or a gecko project like Firefox, +resp. diff --git a/third_party/python/compare-locales/compare_locales/__init__.py b/third_party/python/compare-locales/compare_locales/__init__.py new file mode 100644 index 0000000000..3f323bbf7a --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/__init__.py @@ -0,0 +1 @@ +version = "8.1.0" diff --git a/third_party/python/compare-locales/compare_locales/checks/__init__.py b/third_party/python/compare-locales/compare_locales/checks/__init__.py new file mode 100644 index 0000000000..0c81a4b715 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/__init__.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +from .base import Checker, EntityPos +from .android import AndroidChecker +from .dtd import DTDChecker +from .fluent import FluentChecker +from .properties import PropertiesChecker + + +__all__ = [ + 'Checker', 'EntityPos', + 'AndroidChecker', 'DTDChecker', 'FluentChecker', 'PropertiesChecker', +] + + +def getChecker(file, extra_tests=None): + if PropertiesChecker.use(file): + return PropertiesChecker(extra_tests, locale=file.locale) + if DTDChecker.use(file): + return DTDChecker(extra_tests, locale=file.locale) + if FluentChecker.use(file): + return FluentChecker(extra_tests, locale=file.locale) + if AndroidChecker.use(file): + return AndroidChecker(extra_tests, locale=file.locale) + return Checker(extra_tests, locale=file.locale) diff --git a/third_party/python/compare-locales/compare_locales/checks/android.py b/third_party/python/compare-locales/compare_locales/checks/android.py new file mode 100644 index 0000000000..9791c49a4f --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/android.py @@ -0,0 +1,253 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re +from xml.dom import minidom + +from .base import Checker +from ..parser.android import textContent + + +class AndroidChecker(Checker): + pattern = re.compile('(.*)?strings.*\\.xml$') + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + ''' + for encoding_trouble in super( + AndroidChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refNode = refEnt.node + l10nNode = l10nEnt.node + # Apples and oranges, error out. + if refNode.nodeName != l10nNode.nodeName: + yield ("error", 0, "Incompatible resource types", "android") + return + # Once we start parsing more resource types, make sure to add checks + # for them. + if refNode.nodeName != "string": + yield ("warning", 0, "Unsupported resource type", "android") + return + for report_tuple in self.check_string([refNode], l10nEnt): + yield report_tuple + + def check_string(self, refs, l10nEnt): + '''Check a single string literal against a list of references. + + There should be multiple nodes given for <plurals> or <string-array>. + ''' + l10n = l10nEnt.node + if self.not_translatable(l10n, *refs): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(l10n): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(*refs): + yield ( + "warning", + 0, + "strings must be translatable", + "android" + ) + if self.non_simple_data(l10n): + yield ( + "error", + 0, + "Only plain text allowed, " + "or one CDATA surrounded by whitespace", + "android" + ) + return + for report_tuple in check_apostrophes(l10nEnt.val): + yield report_tuple + + params, errors = get_params(refs) + for error, pos in errors: + yield ( + "warning", + pos, + error, + "android" + ) + if params: + for report_tuple in check_params(params, l10nEnt.val): + yield report_tuple + + def not_translatable(self, *nodes): + return any( + node.hasAttribute("translatable") + and node.getAttribute("translatable") == "false" + for node in nodes + ) + + def no_at_string(self, *ref_nodes): + '''Android allows to reference other strings by using + @string/identifier + instead of the actual value. Those references don't belong into + a localizable file, warn on that. + ''' + return any( + textContent(node).startswith('@string/') + for node in ref_nodes + ) + + def non_simple_data(self, node): + '''Only allow single text nodes, or, a single CDATA node + surrounded by whitespace. + ''' + cdata = [ + child + for child in node.childNodes + if child.nodeType == minidom.Node.CDATA_SECTION_NODE + ] + if len(cdata) == 0: + if node.childNodes.length == 0: + # empty translation is OK + return False + if node.childNodes.length != 1: + return True + return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE + if len(cdata) > 1: + return True + for child in node.childNodes: + if child == cdata[0]: + continue + if child.nodeType != minidom.Node.TEXT_NODE: + return True + if child.data.strip() != "": + return True + return False + + +silencer = re.compile(r'\\.|""') + + +def check_apostrophes(string): + '''Check Android logic for quotes and apostrophes. + + If you have an apostrophe (') in your string, you must either escape it + with a backslash (\') or enclose the string in double-quotes ("). + + Unescaped quotes are not visually shown on Android, but they're + also harmless, so we're not checking for quotes. We might do once we're + better at checking for inline XML, which is full of quotes. + Pairing quotes as in '""' is bad, though, so report errors for that. + Mostly, because it's hard to tell if a string is consider quoted or not + by Android in the end. + + https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes + ''' + for m in re.finditer('""', string): + yield ( + "error", + m.start(), + "Double straight quotes not allowed", + "android" + ) + string = silencer.sub(" ", string) + + is_quoted = string.startswith('"') and string.endswith('"') + if not is_quoted: + # apostrophes need to be escaped + for m in re.finditer("'", string): + yield ( + "error", + m.start(), + "Apostrophe must be escaped", + "android" + ) + + +def get_params(refs): + '''Get printf parameters and internal errors. + + Returns a sparse map of positions to formatter, and a list + of errors. Errors covered so far are mismatching formatters. + ''' + params = {} + errors = [] + next_implicit = 1 + for ref in refs: + if isinstance(ref, minidom.Node): + ref = textContent(ref) + for m in re.finditer(r'%(?P<order>[1-9]\$)?(?P<format>[sSd])', ref): + order = m.group('order') + if order: + order = int(order[0]) + else: + order = next_implicit + next_implicit += 1 + fmt = m.group('format') + if order not in params: + params[order] = fmt + else: + # check for consistency errors + if params[order] == fmt: + continue + msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}" + errors.append(( + msg.format(order=order, f1=fmt, f2=params[order]), + m.start() + )) + return params, errors + + +def check_params(params, string): + '''Compare the printf parameters in the given string to the reference + parameters. + + Also yields errors that are internal to the parameters inside string, + as found by `get_params`. + ''' + lparams, errors = get_params([string]) + for error, pos in errors: + yield ( + "error", + pos, + error, + "android" + ) + # Compare reference for each localized parameter. + # If there's no reference found, error, as an out-of-bounds + # parameter crashes. + # This assumes that all parameters are actually used in the reference, + # which should be OK. + # If there's a mismatch in the formatter, error. + for order in sorted(lparams): + if order not in params: + yield ( + "error", + 0, + "Formatter %{}${} not found in reference".format( + order, lparams[order] + ), + "android" + ) + elif params[order] != lparams[order]: + yield ( + "error", + 0, + "Mismatching formatter", + "android" + ) diff --git a/third_party/python/compare-locales/compare_locales/checks/base.py b/third_party/python/compare-locales/compare_locales/checks/base.py new file mode 100644 index 0000000000..3b04caa7a9 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/base.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re +import six + + +class EntityPos(int): + pass + + +mochibake = re.compile('\ufffd') + + +class Checker(object): + '''Abstract class to implement checks per file type. + ''' + pattern = None + # if a check uses all reference entities, set this to True + needs_reference = False + + @classmethod + def use(cls, file): + return cls.pattern.match(file.file) + + def __init__(self, extra_tests, locale=None): + self.extra_tests = extra_tests + self.locale = locale + self.reference = None + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + + By default, check for possible encoding errors. + ''' + for m in mochibake.finditer(l10nEnt.all): + yield ( + "warning", + EntityPos(m.start()), + "\ufffd in: {}".format(l10nEnt.key), + "encodings" + ) + + def set_reference(self, reference): + '''Set the reference entities. + Only do this if self.needs_reference is True. + ''' + self.reference = reference + + +class CSSCheckMixin(object): + def maybe_style(self, ref_value, l10n_value): + ref_map, _ = self.parse_css_spec(ref_value) + if not ref_map: + return + l10n_map, errors = self.parse_css_spec(l10n_value) + for t in self.check_style(ref_map, l10n_map, errors): + yield t + + def check_style(self, ref_map, l10n_map, errors): + if not l10n_map: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + if errors: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + msgs = [] + for prop, unit in l10n_map.items(): + if prop not in ref_map: + msgs.insert(0, '%s only in l10n' % prop) + continue + else: + ref_unit = ref_map.pop(prop) + if unit != ref_unit: + msgs.append("units for %s don't match " + "(%s != %s)" % (prop, unit, ref_unit)) + for prop in six.iterkeys(ref_map): + msgs.insert(0, '%s only in reference' % prop) + if msgs: + yield ('warning', 0, ', '.join(msgs), 'css') + + def parse_css_spec(self, val): + if not hasattr(self, '_css_spec'): + self._css_spec = re.compile( + r'(?:' + r'(?P<prop>(?:min\-|max\-)?(?:width|height))' + r'[ \t\r\n]*:[ \t\r\n]*' + r'(?P<length>[0-9]+|[0-9]*\.[0-9]+)' + r'(?P<unit>ch|em|ex|rem|px|cm|mm|in|pc|pt)' + r')' + r'|\Z' + ) + self._css_sep = re.compile(r'[ \t\r\n]*(?P<semi>;)?[ \t\r\n]*$') + refMap = errors = None + end = 0 + for m in self._css_spec.finditer(val): + if end == 0 and m.start() == m.end(): + # no CSS spec found, just immediately end of string + return None, None + if m.start() > end: + split = self._css_sep.match(val, end, m.start()) + if split is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-bad-content', + }) + elif end > 0 and split.group('semi') is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-missing-semicolon', + }) + if m.group('prop'): + refMap = refMap or {} + refMap[m.group('prop')] = m.group('unit') + end = m.end() + return refMap, errors diff --git a/third_party/python/compare-locales/compare_locales/checks/dtd.py b/third_party/python/compare-locales/compare_locales/checks/dtd.py new file mode 100644 index 0000000000..37d3c7846d --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/dtd.py @@ -0,0 +1,246 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from xml import sax +import six + +from compare_locales.parser import DTDParser +from .base import Checker, CSSCheckMixin + + +class DTDChecker(Checker, CSSCheckMixin): + """Tests to run on DTD files. + + Uses xml.sax for the heavy lifting of xml parsing. + + The code tries to parse until it doesn't find any unresolved entities + anymore. If it finds one, it tries to grab the key, and adds an empty + <!ENTITY key ""> definition to the header. + + Also checks for some CSS and number heuristics in the values. + """ + pattern = re.compile(r'.*\.dtd$') + needs_reference = True # to cast a wider net for known entity references + + eref = re.compile('&(%s);' % DTDParser.Name) + tmpl = b'''<!DOCTYPE elem [%s]> +<elem>%s</elem> +''' + xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot')) + + def __init__(self, extra_tests, locale=None): + super(DTDChecker, self).__init__(extra_tests, locale=locale) + self.processContent = False + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + self.processContent = True + self.__known_entities = None + + def known_entities(self, refValue): + if self.__known_entities is None and self.reference is not None: + self.__known_entities = set() + for ent in self.reference.values(): + self.__known_entities.update( + self.entities_for_value(ent.raw_val)) + return self.__known_entities if self.__known_entities is not None \ + else self.entities_for_value(refValue) + + def entities_for_value(self, value): + reflist = set(m.group(1) + for m in self.eref.finditer(value)) + reflist -= self.xmllist + return reflist + + # Setup for XML parser, with default and text-only content handler + class TextContent(sax.handler.ContentHandler): + textcontent = '' + + def characters(self, content): + self.textcontent += content + + defaulthandler = sax.handler.ContentHandler() + texthandler = TextContent() + + numPattern = r'([0-9]+|[0-9]*\.[0-9]+)' + num = re.compile('^%s$' % numPattern) + lengthPattern = '%s(em|px|ch|cm|in)' % numPattern + length = re.compile('^%s$' % lengthPattern) + + def check(self, refEnt, l10nEnt): + """Try to parse the refvalue inside a dummy element, and keep + track of entities that we need to define to make that work. + + Return a checker that offers just those entities. + """ + for encoding_trouble in super( + DTDChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refValue, l10nValue = refEnt.raw_val, l10nEnt.raw_val + # find entities the refValue references, + # reusing markup from DTDParser. + reflist = self.known_entities(refValue) + inContext = self.entities_for_value(refValue) + entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist)) + parser = sax.make_parser() + parser.setFeature(sax.handler.feature_external_ges, False) + + parser.setContentHandler(self.defaulthandler) + try: + parser.parse( + six.BytesIO(self.tmpl % + (entities.encode('utf-8'), + refValue.encode('utf-8')))) + # also catch stray % + parser.parse( + six.BytesIO(self.tmpl % + ((refEnt.all + entities).encode('utf-8'), + b'&%s;' % refEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + e # noqa + yield ('warning', + (0, 0), + "can't parse en-US value", 'xmlparse') + + # find entities the l10nValue references, + # reusing markup from DTDParser. + l10nlist = self.entities_for_value(l10nValue) + missing = sorted(l10nlist - reflist) + _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing) + if self.processContent: + self.texthandler.textcontent = '' + parser.setContentHandler(self.texthandler) + try: + parser.parse(six.BytesIO(self.tmpl % (_entities.encode('utf-8'), + l10nValue.encode('utf-8')))) + # also catch stray % + # if this fails, we need to substract the entity definition + parser.setContentHandler(self.defaulthandler) + parser.parse( + six.BytesIO(self.tmpl % + ((l10nEnt.all + _entities).encode('utf-8'), + b'&%s;' % l10nEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + # xml parse error, yield error + # sometimes, the error is reported on our fake closing + # element, make that the end of the last line + lnr = e.getLineNumber() - 1 + lines = l10nValue.splitlines() + if lnr > len(lines): + lnr = len(lines) + col = len(lines[lnr-1]) + else: + col = e.getColumnNumber() + if lnr == 1: + # first line starts with <elem>, substract + col -= len("<elem>") + elif lnr == 0: + col -= len("<!DOCTYPE elem [") # first line is DOCTYPE + yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse') + + warntmpl = u'Referencing unknown entity `%s`' + if reflist: + if inContext: + elsewhere = reflist - inContext + warntmpl += ' (%s used in context' % \ + ', '.join(sorted(inContext)) + if elsewhere: + warntmpl += ', %s known)' % ', '.join(sorted(elsewhere)) + else: + warntmpl += ')' + else: + warntmpl += ' (%s known)' % ', '.join(sorted(reflist)) + for key in missing: + yield ('warning', (0, 0), warntmpl % key, + 'xmlparse') + if inContext and l10nlist and l10nlist - inContext - set(missing): + mismatch = sorted(l10nlist - inContext - set(missing)) + for key in mismatch: + yield ('warning', (0, 0), + 'Entity %s referenced, but %s used in context' % ( + key, + ', '.join(sorted(inContext)) + ), 'xmlparse') + + # Number check + if self.num.match(refValue) and not self.num.match(l10nValue): + yield ('warning', 0, 'reference is a number', 'number') + # CSS checks + # just a length, width="100em" + if self.length.match(refValue) and not self.length.match(l10nValue): + yield ('error', 0, 'reference is a CSS length', 'css') + # Check for actual CSS style attribute values + for t in self.maybe_style(refValue, l10nValue): + yield t + + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + for t in self.processAndroidContent(self.texthandler.textcontent): + yield t + + quoted = re.compile("(?P<q>[\"']).*(?P=q)$") + + def unicode_escape(self, str): + """Helper method to try to decode all unicode escapes in a string. + + This code uses the standard python decode for unicode-escape, but + that's somewhat tricky, as its input needs to be ascii. To get to + ascii, the unicode string gets converted to ascii with + backslashreplace, i.e., all non-ascii unicode chars get unicode + escaped. And then we try to roll all of that back. + Now, when that hits an error, that's from the original string, and we + need to search for the actual error position in the original string, + as the backslashreplace code changes string positions quite badly. + See also the last check in TestAndroid.test_android_dtd, with a + lengthy chinese string. + """ + val = str.encode('ascii', 'backslashreplace') + try: + val.decode('unicode-escape') + except UnicodeDecodeError as e: + args = list(e.args) + badstring = args[1][args[2]:args[3]] + i = len(args[1][:args[2]].decode('unicode-escape')) + args[2] = i + args[3] = i + len(badstring) + raise UnicodeDecodeError(*args) + + def processAndroidContent(self, val): + """Check for the string values that Android puts into an XML container. + + http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa + + Check for unicode escapes and unescaped quotes and apostrophes, + if string's not quoted. + """ + # first, try to decode unicode escapes + try: + self.unicode_escape(val) + except UnicodeDecodeError as e: + yield ('error', e.args[2], e.args[4], 'android') + # check for unescaped single or double quotes. + # first, see if the complete string is single or double quoted, + # that changes the rules + m = self.quoted.match(val) + if m: + q = m.group('q') + offset = 0 + val = val[1:-1] # strip quotes + else: + q = "[\"']" + offset = -1 + stray_quot = re.compile(r"[\\\\]*(%s)" % q) + + for m in stray_quot.finditer(val): + if len(m.group(0)) % 2: + # found an unescaped single or double quote, which message? + if m.group(1) == '"': + msg = "Quotes in Android DTDs need escaping with \\\" "\ + "or \\u0022, or put string in apostrophes." + else: + msg = "Apostrophes in Android DTDs need escaping with "\ + "\\' or \\u0027, or use \u2019, or put string in "\ + "quotes." + yield ('error', m.end(0)+offset, msg, 'android') diff --git a/third_party/python/compare-locales/compare_locales/checks/fluent.py b/third_party/python/compare-locales/compare_locales/checks/fluent.py new file mode 100644 index 0000000000..feb7242fb7 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/fluent.py @@ -0,0 +1,356 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from collections import defaultdict + +from fluent.syntax import ast as ftl +from fluent.syntax.serializer import serialize_variant_key +from fluent.syntax.visitor import Visitor + +from .base import Checker, CSSCheckMixin +from compare_locales import plurals + + +MSGS = { + 'missing-msg-ref': 'Missing message reference: {ref}', + 'missing-term-ref': 'Missing term reference: {ref}', + 'obsolete-msg-ref': 'Obsolete message reference: {ref}', + 'obsolete-term-ref': 'Obsolete term reference: {ref}', + 'duplicate-attribute': 'Attribute "{name}" is duplicated', + 'missing-value': 'Missing value', + 'obsolete-value': 'Obsolete value', + 'missing-attribute': 'Missing attribute: {name}', + 'obsolete-attribute': 'Obsolete attribute: {name}', + 'duplicate-variant': 'Variant key "{name}" is duplicated', + 'missing-plural': 'Plural categories missing: {categories}', + 'plain-message': '{message}', +} + + +def pattern_variants(pattern): + """Get variants of plain text of a pattern. + + For now, just return simple text patterns. + This can be improved to allow for SelectExpressions + of simple text patterns, or even nested expressions, and Literals. + Variants with Variable-, Message-, or TermReferences should be ignored. + """ + elements = pattern.elements + if len(elements) == 1: + if isinstance(elements[0], ftl.TextElement): + return [elements[0].value] + return [] + + +class ReferenceMessageVisitor(Visitor, CSSCheckMixin): + def __init__(self): + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(dict) + # The currently active references + self.refs = {} + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + # If we're a messsage, store if there was a value + self.message_has_value = False + # Map attribute names to positions + self.attribute_positions = {} + # Map of CSS style attribute properties and units + self.css_styles = None + self.css_errors = None + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super(ReferenceMessageVisitor, self).generic_visit(node) + + def visit_Message(self, node): + if node.value is not None: + self.message_has_value = True + super(ReferenceMessageVisitor, self).generic_visit(node) + + def visit_Attribute(self, node): + self.attribute_positions[node.id.name] = node.span.start + old_refs = self.refs + self.refs = self.entry_refs[node.id.name] + super(ReferenceMessageVisitor, self).generic_visit(node) + self.refs = old_refs + if node.id.name != 'style': + return + text_values = pattern_variants(node.value) + if not text_values: + self.css_styles = 'skip' + return + # right now, there's just one possible text value + self.css_styles, self.css_errors = self.parse_css_spec(text_values[0]) + + def visit_SelectExpression(self, node): + # optimize select expressions to only go through the variants + self.visit(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs[ref] = 'msg-ref' + + def visit_TermReference(self, node): + # only collect term references, but not attributes of terms + if node.attribute: + return + self.refs['-' + node.id.name] = 'term-ref' + + +class GenericL10nChecks(object): + '''Helper Mixin for checks shared between Terms and Messages.''' + def check_duplicate_attributes(self, node): + warned = set() + for left in range(len(node.attributes) - 1): + if left in warned: + continue + left_attr = node.attributes[left] + warned_left = False + for right in range(left+1, len(node.attributes)): + right_attr = node.attributes[right] + if left_attr.id.name == right_attr.id.name: + if not warned_left: + warned_left = True + self.messages.append( + ( + 'warning', left_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', right_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + + def check_variants(self, variants): + # Check for duplicate variants + warned = set() + for left in range(len(variants) - 1): + if left in warned: + continue + left_key = variants[left].key + key_string = None + for right in range(left+1, len(variants)): + if left_key.equals(variants[right].key): + if key_string is None: + key_string = serialize_variant_key(left_key) + self.messages.append( + ( + 'warning', left_key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', variants[right].key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + # Check for plural categories + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + known_plurals = set(known_plurals) + # Ask for known plurals, but check for plurals w/out `other`. + # `other` is used for all kinds of things. + check_plurals = known_plurals.copy() + check_plurals.discard('other') + given_plurals = set(serialize_variant_key(v.key) for v in variants) + if given_plurals & check_plurals: + missing_plurals = sorted(known_plurals - given_plurals) + if missing_plurals: + self.messages.append( + ( + 'warning', variants[0].key.span.start, + MSGS['missing-plural'].format( + categories=', '.join(missing_plurals) + ) + ) + ) + + +class L10nMessageVisitor(GenericL10nChecks, ReferenceMessageVisitor): + def __init__(self, locale, reference): + super(L10nMessageVisitor, self).__init__() + self.locale = locale + # Overload refs to map to sets, just store what we found + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(set) + # The currently active references + self.refs = set() + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + self.reference = reference + self.reference_refs = reference.entry_refs[None] + self.messages = [] + + def visit_Message(self, node): + self.check_duplicate_attributes(node) + super(L10nMessageVisitor, self).visit_Message(node) + if self.message_has_value and not self.reference.message_has_value: + self.messages.append( + ('error', node.value.span.start, MSGS['obsolete-value']) + ) + if not self.message_has_value and self.reference.message_has_value: + self.messages.append( + ('error', 0, MSGS['missing-value']) + ) + ref_attrs = set(self.reference.attribute_positions) + l10n_attrs = set(self.attribute_positions) + for missing_attr in ref_attrs - l10n_attrs: + self.messages.append( + ( + 'error', 0, + MSGS['missing-attribute'].format(name=missing_attr) + ) + ) + for obs_attr in l10n_attrs - ref_attrs: + self.messages.append( + ( + 'error', self.attribute_positions[obs_attr], + MSGS['obsolete-attribute'].format(name=obs_attr) + ) + ) + + def visit_Term(self, node): + raise RuntimeError("Should not use L10nMessageVisitor for Terms") + + def visit_Attribute(self, node): + old_reference_refs = self.reference_refs + self.reference_refs = self.reference.entry_refs[node.id.name] + super(L10nMessageVisitor, self).visit_Attribute(node) + self.reference_refs = old_reference_refs + if node.id.name != 'style' or self.css_styles == 'skip': + return + ref_styles = self.reference.css_styles + if ref_styles in ('skip', None): + # Reference is complex, l10n isn't. + # Let's still validate the css spec. + ref_styles = {} + for cat, msg, pos, _ in self.check_style( + ref_styles, + self.css_styles, + self.css_errors + ): + self.messages.append((cat, msg, pos)) + + def visit_SelectExpression(self, node): + super(L10nMessageVisitor, self).visit_SelectExpression(node) + self.check_variants(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'msg-ref') + + def visit_TermReference(self, node): + if node.attribute: + return + ref = '-' + node.id.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'term-ref') + + def check_obsolete_ref(self, node, ref, ref_type): + if ref not in self.reference_refs: + self.messages.append( + ( + 'warning', node.span.start, + MSGS['obsolete-' + ref_type].format(ref=ref), + ) + ) + + +class TermVisitor(GenericL10nChecks, Visitor): + def __init__(self, locale): + super(TermVisitor, self).__init__() + self.locale = locale + self.messages = [] + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super(TermVisitor, self).generic_visit(node) + + def visit_Message(self, node): + raise RuntimeError("Should not use TermVisitor for Messages") + + def visit_Term(self, node): + self.check_duplicate_attributes(node) + super(TermVisitor, self).generic_visit(node) + + def visit_SelectExpression(self, node): + super(TermVisitor, self).generic_visit(node) + self.check_variants(node.variants) + + +class FluentChecker(Checker): + '''Tests to run on Fluent (FTL) files. + ''' + pattern = re.compile(r'.*\.ftl') + + def check_message(self, ref_entry, l10n_entry): + '''Run checks on localized messages against reference message.''' + ref_data = ReferenceMessageVisitor() + ref_data.visit(ref_entry) + l10n_data = L10nMessageVisitor(self.locale, ref_data) + l10n_data.visit(l10n_entry) + + messages = l10n_data.messages + for attr_or_val, refs in ref_data.entry_refs.items(): + for ref, ref_type in refs.items(): + if ref not in l10n_data.entry_refs[attr_or_val]: + msg = MSGS['missing-' + ref_type].format(ref=ref) + messages.append(('warning', 0, msg)) + return messages + + def check_term(self, l10n_entry): + '''Check localized terms.''' + l10n_data = TermVisitor(self.locale) + l10n_data.visit(l10n_entry) + return l10n_data.messages + + def check(self, refEnt, l10nEnt): + for encoding_trouble in super( + FluentChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + l10n_entry = l10nEnt.entry + if isinstance(l10n_entry, ftl.Message): + ref_entry = refEnt.entry + messages = self.check_message(ref_entry, l10n_entry) + elif isinstance(l10n_entry, ftl.Term): + messages = self.check_term(l10n_entry) + + messages.sort(key=lambda t: t[1]) + for cat, pos, msg in messages: + if pos: + pos = pos - l10n_entry.span.start + yield (cat, pos, msg, 'fluent') diff --git a/third_party/python/compare-locales/compare_locales/checks/properties.py b/third_party/python/compare-locales/compare_locales/checks/properties.py new file mode 100644 index 0000000000..9ff2e4cdae --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/checks/properties.py @@ -0,0 +1,173 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +from difflib import SequenceMatcher +from six.moves import range +from six.moves import zip + +from compare_locales.parser import PropertiesEntity +from compare_locales import plurals +from .base import Checker + + +class PrintfException(Exception): + def __init__(self, msg, pos): + self.pos = pos + self.msg = msg + + +class PropertiesChecker(Checker): + '''Tests to run on .properties files. + ''' + pattern = re.compile(r'.*\.properties$') + printf = re.compile(r'%(?P<good>%|' + r'(?:(?P<number>[1-9][0-9]*)\$)?' + r'(?P<width>\*|[0-9]+)?' + r'(?P<prec>\.(?:\*|[0-9]+)?)?' + r'(?P<spec>[duxXosScpfg]))?') + + def check(self, refEnt, l10nEnt): + '''Test for the different variable formats. + ''' + for encoding_trouble in super( + PropertiesChecker, self + ).check(refEnt, l10nEnt): + yield encoding_trouble + refValue, l10nValue = refEnt.val, l10nEnt.val + refSpecs = None + # check for PluralForm.jsm stuff, should have the docs in the + # comment + # That also includes intl.properties' pluralRule, so exclude + # entities with that key and values with just numbers + if (refEnt.pre_comment + and 'Localization_and_Plurals' in refEnt.pre_comment.all + and refEnt.key != 'pluralRule' + and not re.match(r'\d+$', refValue)): + for msg_tuple in self.check_plural(refValue, l10nValue): + yield msg_tuple + return + # check for lost escapes + raw_val = l10nEnt.raw_val + for m in PropertiesEntity.escape.finditer(raw_val): + if m.group('single') and \ + m.group('single') not in PropertiesEntity.known_escapes: + yield ('warning', m.start(), + 'unknown escape sequence, \\' + m.group('single'), + 'escape') + try: + refSpecs = self.getPrintfSpecs(refValue) + except PrintfException: + refSpecs = [] + if refSpecs: + for t in self.checkPrintf(refSpecs, l10nValue): + yield t + return + + def check_plural(self, refValue, l10nValue): + '''Check for the stringbundle plurals logic. + The common variable pattern is #1. + ''' + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + expected_forms = len(known_plurals) + found_forms = l10nValue.count(';') + 1 + msg = 'expecting {} plurals, found {}'.format( + expected_forms, + found_forms + ) + if expected_forms > found_forms: + yield ('warning', 0, msg, 'plural') + if expected_forms < found_forms: + yield ('warning', 0, msg, 'plural') + pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + refValue)) + if len(pats) == 0: + return + lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + l10nValue)) + if pats - lpats: + yield ('warning', 0, 'not all variables used in l10n', + 'plural') + return + if lpats - pats: + yield ('error', 0, 'unreplaced variables in l10n', + 'plural') + + def checkPrintf(self, refSpecs, l10nValue): + try: + l10nSpecs = self.getPrintfSpecs(l10nValue) + except PrintfException as e: + yield ('error', e.pos, e.msg, 'printf') + return + if refSpecs != l10nSpecs: + sm = SequenceMatcher() + sm.set_seqs(refSpecs, l10nSpecs) + msgs = [] + warn = None + for action, i1, i2, j1, j2 in sm.get_opcodes(): + if action == 'equal': + continue + if action == 'delete': + # missing argument in l10n + if i2 == len(refSpecs): + # trailing specs missing, that's just a warning + warn = ', '.join('trailing argument %d `%s` missing' % + (i+1, refSpecs[i]) + for i in range(i1, i2)) + else: + for i in range(i1, i2): + msgs.append('argument %d `%s` missing' % + (i+1, refSpecs[i])) + continue + if action == 'insert': + # obsolete argument in l10n + for i in range(j1, j2): + msgs.append('argument %d `%s` obsolete' % + (i+1, l10nSpecs[i])) + continue + if action == 'replace': + for i, j in zip(range(i1, i2), range(j1, j2)): + msgs.append('argument %d `%s` should be `%s`' % + (j+1, l10nSpecs[j], refSpecs[i])) + if msgs: + yield ('error', 0, ', '.join(msgs), 'printf') + if warn is not None: + yield ('warning', 0, warn, 'printf') + + def getPrintfSpecs(self, val): + hasNumber = False + specs = [] + for m in self.printf.finditer(val): + if m.group("good") is None: + # found just a '%', signal an error + raise PrintfException('Found single %', m.start()) + if m.group("good") == '%': + # escaped % + continue + if ((hasNumber and m.group('number') is None) or + (not hasNumber and specs and + m.group('number') is not None)): + # mixed style, numbered and not + raise PrintfException('Mixed ordered and non-ordered args', + m.start()) + hasNumber = m.group('number') is not None + if hasNumber: + pos = int(m.group('number')) - 1 + ls = len(specs) + if pos >= ls: + # pad specs + nones = pos - ls + specs[ls:pos] = nones*[None] + specs.append(m.group('spec')) + else: + specs[pos] = m.group('spec') + else: + specs.append(m.group('spec')) + # check for missing args + if hasNumber and not all(specs): + raise PrintfException('Ordered argument missing', 0) + return specs diff --git a/third_party/python/compare-locales/compare_locales/commands.py b/third_party/python/compare-locales/compare_locales/commands.py new file mode 100644 index 0000000000..c2a2f2fe01 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/commands.py @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Commands exposed to commandlines' + +from __future__ import absolute_import +from __future__ import print_function +import logging +from argparse import ArgumentParser +from json import dump as json_dump +import os +import sys + +from compare_locales import mozpath +from compare_locales import version +from compare_locales.paths import EnumerateApp, TOMLParser, ConfigNotFound +from compare_locales.compare import compareProjects + + +class CompareLocales(object): + """Check the localization status of gecko applications. +The first arguments are paths to the l10n.toml or ini files for the +applications, followed by the base directory of the localization repositories. +Then you pass in the list of locale codes you want to compare. If there are +no locales given, the list of locales will be taken from the l10n.toml file +or the all-locales file referenced by the application\'s l10n.ini.""" + + def __init__(self): + self.parser = self.get_parser() + + def get_parser(self): + """Get an ArgumentParser, with class docstring as description. + """ + parser = ArgumentParser(description=self.__doc__) + parser.add_argument('--version', action='version', + version='%(prog)s ' + version) + parser.add_argument('-v', '--verbose', action='count', + default=0, help='Make more noise') + parser.add_argument('-q', '--quiet', action='count', + default=0, help='''Show less data. +Specified once, don't show obsolete entities. Specified twice, also hide +missing entities. Specify thrice to exclude warnings and four times to +just show stats''') + parser.add_argument('--validate', action='store_true', + help='Run compare-locales against reference') + parser.add_argument('-m', '--merge', + help='''Use this directory to stage merged files, +use {ab_CD} to specify a different directory for each locale''') + parser.add_argument('config_paths', metavar='l10n.toml', nargs='+', + help='TOML or INI file for the project') + parser.add_argument('l10n_base_dir', metavar='l10n-base-dir', + help='Parent directory of localizations') + parser.add_argument('locales', nargs='*', metavar='locale-code', + help='Locale code and top-level directory of ' + 'each localization') + parser.add_argument('--json', + help='''Serialize to JSON. Value is the name of +the output file, pass "-" to serialize to stdout and hide the default output. +''') + parser.add_argument('-D', action='append', metavar='var=value', + default=[], dest='defines', + help='Overwrite variables in TOML files') + parser.add_argument('--full', action="store_true", + help="Compare sub-projects that are disabled") + parser.add_argument('--return-zero', action="store_true", + help="Return 0 regardless of l10n status") + parser.add_argument('--clobber-merge', action="store_true", + default=False, dest='clobber', + help="""WARNING: DATALOSS. +Use this option with care. If specified, the merge directory will +be clobbered for each module. That means, the subdirectory will +be completely removed, any files that were there are lost. +Be careful to specify the right merge directory when using this option.""") + return parser + + @classmethod + def call(cls): + """Entry_point for setuptools. + The actual command handling is done in the handle() method of the + subclasses. + """ + cmd = cls() + args = cmd.parser.parse_args() + return cmd.handle(**vars(args)) + + def handle( + self, + quiet=0, verbose=0, + validate=False, + merge=None, + config_paths=[], l10n_base_dir=None, locales=[], + defines=[], + full=False, + return_zero=False, + clobber=False, + json=None, + ): + """The instance part of the classmethod call. + + Using keyword arguments as that is what we need for mach + commands in mozilla-central. + """ + # log as verbose or quiet as we want, warn by default + logging_level = logging.WARNING - (verbose - quiet) * 10 + logging.basicConfig() + logging.getLogger().setLevel(logging_level) + + config_paths, l10n_base_dir, locales = self.extract_positionals( + validate=validate, + config_paths=config_paths, + l10n_base_dir=l10n_base_dir, + locales=locales, + ) + + # when we compare disabled projects, we set our locales + # on all subconfigs, so deep is True. + locales_deep = full + configs = [] + config_env = { + 'l10n_base': l10n_base_dir + } + for define in defines: + var, _, value = define.partition('=') + config_env[var] = value + for config_path in config_paths: + if config_path.endswith('.toml'): + try: + config = TOMLParser().parse(config_path, env=config_env) + except ConfigNotFound as e: + self.parser.exit('config file %s not found' % e.filename) + if locales_deep: + if not locales: + # no explicit locales given, force all locales + config.set_locales(config.all_locales, deep=True) + else: + config.set_locales(locales, deep=True) + configs.append(config) + else: + app = EnumerateApp(config_path, l10n_base_dir) + configs.append(app.asConfig()) + try: + observers = compareProjects( + configs, + locales, + l10n_base_dir, + quiet=quiet, + merge_stage=merge, clobber_merge=clobber) + except (OSError, IOError) as exc: + print("FAIL: " + str(exc)) + self.parser.exit(2) + + if json is None or json != '-': + details = observers.serializeDetails() + if details: + print(details) + if len(configs) > 1: + if details: + print('') + print("Summaries for") + for config_path in config_paths: + print(" " + config_path) + print(" and the union of these, counting each string once") + print(observers.serializeSummaries()) + if json is not None: + data = [observer.toJSON() for observer in observers] + stdout = json == '-' + indent = 1 if stdout else None + fh = sys.stdout if stdout else open(json, 'w') + json_dump(data, fh, sort_keys=True, indent=indent) + if stdout: + fh.write('\n') + fh.close() + rv = 1 if not return_zero and observers.error else 0 + return rv + + def extract_positionals( + self, + validate=False, + config_paths=[], l10n_base_dir=None, locales=[], + ): + # using nargs multiple times in argparser totally screws things + # up, repair that. + # First files are configs, then the base dir, everything else is + # locales + all_args = config_paths + [l10n_base_dir] + locales + config_paths = [] + # The first directory is our l10n base, split there. + while all_args and not os.path.isdir(all_args[0]): + config_paths.append(all_args.pop(0)) + if not config_paths: + self.parser.error('no configuration file given') + for cf in config_paths: + if not os.path.isfile(cf): + self.parser.error('config file %s not found' % cf) + if not all_args: + self.parser.error('l10n-base-dir not found') + l10n_base_dir = mozpath.abspath(all_args.pop(0)) + if validate: + # signal validation mode by setting locale list to [None] + locales = [None] + else: + locales = all_args + + return config_paths, l10n_base_dir, locales diff --git a/third_party/python/compare-locales/compare_locales/compare/__init__.py b/third_party/python/compare-locales/compare_locales/compare/__init__.py new file mode 100644 index 0000000000..434dab9553 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/compare/__init__.py @@ -0,0 +1,91 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from __future__ import absolute_import +from __future__ import print_function +import os +import shutil + +from compare_locales import paths, mozpath + +from .content import ContentComparer +from .observer import Observer, ObserverList +from .utils import Tree, AddRemove + + +__all__ = [ + 'ContentComparer', + 'Observer', 'ObserverList', + 'AddRemove', 'Tree', + 'compareProjects', +] + + +def compareProjects( + project_configs, + locales, + l10n_base_dir, + stat_observer=None, + merge_stage=None, + clobber_merge=False, + quiet=0, + ): + all_locales = set(locales) + comparer = ContentComparer(quiet) + observers = comparer.observers + for project in project_configs: + # disable filter if we're in validation mode + if None in locales: + filter = None + else: + filter = project.filter + observers.append( + Observer( + quiet=quiet, + filter=filter, + )) + if not locales: + all_locales.update(project.all_locales) + for locale in sorted(all_locales): + files = paths.ProjectFiles(locale, project_configs, + mergebase=merge_stage) + if merge_stage is not None: + if clobber_merge: + mergematchers = set(_m.get('merge') for _m in files.matchers) + mergematchers.discard(None) + for matcher in mergematchers: + clobberdir = matcher.prefix + if os.path.exists(clobberdir): + shutil.rmtree(clobberdir) + print("clobbered " + clobberdir) + for l10npath, refpath, mergepath, extra_tests in files: + # module and file path are needed for legacy filter.py support + module = None + fpath = mozpath.relpath(l10npath, l10n_base_dir) + for _m in files.matchers: + if _m['l10n'].match(l10npath): + if _m['module']: + # legacy ini support, set module, and resolve + # local path against the matcher prefix, + # which includes the module + module = _m['module'] + fpath = mozpath.relpath(l10npath, _m['l10n'].prefix) + break + reffile = paths.File(refpath, fpath or refpath, module=module) + if locale is None: + # When validating the reference files, set locale + # to a private subtag. This only shows in the output. + locale = paths.REFERENCE_LOCALE + l10n = paths.File(l10npath, fpath or l10npath, + module=module, locale=locale) + if not os.path.exists(l10npath): + comparer.add(reffile, l10n, mergepath) + continue + if not os.path.exists(refpath): + comparer.remove(reffile, l10n, mergepath) + continue + comparer.compare(reffile, l10n, mergepath, extra_tests) + return observers diff --git a/third_party/python/compare-locales/compare_locales/compare/content.py b/third_party/python/compare-locales/compare_locales/compare/content.py new file mode 100644 index 0000000000..03ba222d8e --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/compare/content.py @@ -0,0 +1,307 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from __future__ import absolute_import +from __future__ import print_function +import codecs +import os +import shutil +import re + +from compare_locales import parser +from compare_locales import mozpath +from compare_locales.checks import getChecker, EntityPos +from compare_locales.keyedtuple import KeyedTuple + +from .observer import ObserverList +from .utils import AddRemove + + +class ContentComparer: + keyRE = re.compile('[kK]ey') + nl = re.compile('\n', re.M) + + def __init__(self, quiet=0): + '''Create a ContentComparer. + observer is usually a instance of Observer. The return values + of the notify method are used to control the handling of missing + entities. + ''' + self.observers = ObserverList(quiet=quiet) + + def create_merge_dir(self, merge_file): + outdir = mozpath.dirname(merge_file) + if not os.path.isdir(outdir): + os.makedirs(outdir) + + def merge(self, ref_entities, ref_file, l10n_file, merge_file, + missing, skips, ctx, capabilities, encoding): + '''Create localized file in merge dir + + `ref_entities` and `ref_map` are the parser result of the + reference file + `ref_file` and `l10n_file` are the File objects for the reference and + the l10n file, resp. + `merge_file` is the output path for the generated content. This is None + if we're just comparing or validating. + `missing` are the missing messages in l10n - potentially copied from + reference + `skips` are entries to be dropped from the localized file + `ctx` is the parsing context + `capabilities` are the capabilities for the merge algorithm + `encoding` is the encoding to be used when serializing, usually utf-8 + ''' + + if not merge_file: + return + + if capabilities == parser.CAN_NONE: + return + + self.create_merge_dir(merge_file) + + if capabilities & parser.CAN_COPY: + # copy the l10n file if it's good, or the reference file if not + if skips or missing: + src = ref_file.fullpath + else: + src = l10n_file.fullpath + shutil.copyfile(src, merge_file) + print("copied reference to " + merge_file) + return + + if not (capabilities & parser.CAN_SKIP): + return + + # Start with None in case the merge file doesn't need to be created. + f = None + + if skips: + # skips come in ordered by key name, we need them in file order + skips.sort(key=lambda s: s.span[0]) + + # we need to skip a few erroneous blocks in the input, copy by hand + f = codecs.open(merge_file, 'wb', encoding) + offset = 0 + for skip in skips: + chunk = skip.span + f.write(ctx.contents[offset:chunk[0]]) + offset = chunk[1] + f.write(ctx.contents[offset:]) + + if f is None: + # l10n file is a good starting point + shutil.copyfile(l10n_file.fullpath, merge_file) + + if not (capabilities & parser.CAN_MERGE): + if f: + f.close() + return + + if skips or missing: + if f is None: + f = codecs.open(merge_file, 'ab', encoding) + trailing = (['\n'] + + [ref_entities[key].all for key in missing] + + [ref_entities[skip.key].all for skip in skips + if not isinstance(skip, parser.Junk)]) + + def ensureNewline(s): + if not s.endswith('\n'): + return s + '\n' + return s + + print("adding to " + merge_file) + f.write(''.join(map(ensureNewline, trailing))) + + if f is not None: + f.close() + + def remove(self, ref_file, l10n, merge_file): + '''Obsolete l10n file. + + Copy to merge stage if we can. + ''' + self.observers.notify('obsoleteFile', l10n, None) + self.merge( + KeyedTuple([]), ref_file, l10n, merge_file, + [], [], None, parser.CAN_COPY, None + ) + + def compare(self, ref_file, l10n, merge_file, extra_tests=None): + try: + p = parser.getParser(ref_file.file) + except UserWarning: + # no comparison, XXX report? + # At least, merge + self.merge( + KeyedTuple([]), ref_file, l10n, merge_file, [], [], None, + parser.CAN_COPY, None) + return + try: + p.readFile(ref_file) + except Exception as e: + self.observers.notify('error', ref_file, str(e)) + return + ref_entities = p.parse() + try: + p.readFile(l10n) + l10n_entities = p.parse() + l10n_ctx = p.ctx + except Exception as e: + self.observers.notify('error', l10n, str(e)) + return + + ar = AddRemove() + ar.set_left(ref_entities.keys()) + ar.set_right(l10n_entities.keys()) + report = missing = obsolete = changed = unchanged = keys = 0 + missing_w = changed_w = unchanged_w = 0 # word stats + missings = [] + skips = [] + checker = getChecker(l10n, extra_tests=extra_tests) + if checker and checker.needs_reference: + checker.set_reference(ref_entities) + for msg in p.findDuplicates(ref_entities): + self.observers.notify('warning', l10n, msg) + for msg in p.findDuplicates(l10n_entities): + self.observers.notify('error', l10n, msg) + for action, entity_id in ar: + if action == 'delete': + # missing entity + if isinstance(ref_entities[entity_id], parser.Junk): + self.observers.notify( + 'warning', l10n, 'Parser error in en-US' + ) + continue + _rv = self.observers.notify('missingEntity', l10n, entity_id) + if _rv == "ignore": + continue + if _rv == "error": + # only add to missing entities for l10n-merge on error, + # not report + missings.append(entity_id) + missing += 1 + refent = ref_entities[entity_id] + missing_w += refent.count_words() + else: + # just report + report += 1 + elif action == 'add': + # obsolete entity or junk + if isinstance(l10n_entities[entity_id], + parser.Junk): + junk = l10n_entities[entity_id] + self.observers.notify( + 'error', l10n, + junk.error_message() + ) + if merge_file is not None: + skips.append(junk) + elif ( + self.observers.notify('obsoleteEntity', l10n, entity_id) + != 'ignore' + ): + obsolete += 1 + else: + # entity found in both ref and l10n, check for changed + refent = ref_entities[entity_id] + l10nent = l10n_entities[entity_id] + if self.keyRE.search(entity_id): + keys += 1 + else: + if refent.equals(l10nent): + self.doUnchanged(l10nent) + unchanged += 1 + unchanged_w += refent.count_words() + else: + self.doChanged(ref_file, refent, l10nent) + changed += 1 + changed_w += refent.count_words() + # run checks: + if checker: + for tp, pos, msg, cat in checker.check(refent, l10nent): + if isinstance(pos, EntityPos): + line, col = l10nent.position(pos) + else: + line, col = l10nent.value_position(pos) + # skip error entities when merging + if tp == 'error' and merge_file is not None: + skips.append(l10nent) + self.observers.notify( + tp, l10n, + u"%s at line %d, column %d for %s" % + (msg, line, col, refent.key) + ) + pass + + if merge_file is not None: + self.merge( + ref_entities, ref_file, + l10n, merge_file, missings, skips, l10n_ctx, + p.capabilities, p.encoding) + + stats = { + 'missing': missing, + 'missing_w': missing_w, + 'report': report, + 'obsolete': obsolete, + 'changed': changed, + 'changed_w': changed_w, + 'unchanged': unchanged, + 'unchanged_w': unchanged_w, + 'keys': keys, + } + self.observers.updateStats(l10n, stats) + pass + + def add(self, orig, missing, merge_file): + ''' Add missing localized file.''' + f = orig + try: + p = parser.getParser(f.file) + except UserWarning: + p = None + + # if we don't support this file, assume CAN_COPY to mimick + # l10n dir as closely as possible + caps = p.capabilities if p else parser.CAN_COPY + if (caps & (parser.CAN_COPY | parser.CAN_MERGE)): + # even if we can merge, pretend we can only copy + self.merge( + KeyedTuple([]), orig, missing, merge_file, + ['trigger copy'], [], None, parser.CAN_COPY, None + ) + + if self.observers.notify('missingFile', missing, None) == "ignore": + # filter said that we don't need this file, don't count it + return + + if p is None: + # We don't have a parser, cannot count missing strings + return + + try: + p.readFile(f) + entities = p.parse() + except Exception as ex: + self.observers.notify('error', f, str(ex)) + return + # strip parse errors + entities = [e for e in entities if not isinstance(e, parser.Junk)] + self.observers.updateStats(missing, {'missing': len(entities)}) + missing_w = 0 + for e in entities: + missing_w += e.count_words() + self.observers.updateStats(missing, {'missing_w': missing_w}) + + def doUnchanged(self, entity): + # overload this if needed + pass + + def doChanged(self, file, ref_entity, l10n_entity): + # overload this if needed + pass diff --git a/third_party/python/compare-locales/compare_locales/compare/observer.py b/third_party/python/compare-locales/compare_locales/compare/observer.py new file mode 100644 index 0000000000..7301d9a356 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/compare/observer.py @@ -0,0 +1,218 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from __future__ import absolute_import +from __future__ import print_function +from collections import defaultdict +import six + +from .utils import Tree + + +class Observer(object): + + def __init__(self, quiet=0, filter=None): + '''Create Observer + For quiet=1, skip per-entity missing and obsolete strings, + for quiet=2, skip missing and obsolete files. For quiet=3, + skip warnings and errors. + ''' + self.summary = defaultdict(lambda: { + "errors": 0, + "warnings": 0, + "missing": 0, + "missing_w": 0, + "report": 0, + "obsolete": 0, + "changed": 0, + "changed_w": 0, + "unchanged": 0, + "unchanged_w": 0, + "keys": 0, + }) + self.details = Tree(list) + self.quiet = quiet + self.filter = filter + self.error = False + + def _dictify(self, d): + plaindict = {} + for k, v in six.iteritems(d): + plaindict[k] = dict(v) + return plaindict + + def toJSON(self): + # Don't export file stats, even if we collected them. + # Those are not part of the data we use toJSON for. + return { + 'summary': self._dictify(self.summary), + 'details': self.details.toJSON() + } + + def updateStats(self, file, stats): + # in multi-project scenarios, this file might not be ours, + # check that. + # Pass in a dummy entity key '' to avoid getting in to + # generic file filters. If we have stats for those, + # we want to aggregate the counts + if (self.filter is not None and + self.filter(file, entity='') == 'ignore'): + return + for category, value in six.iteritems(stats): + if category == 'errors': + # updateStats isn't called with `errors`, but make sure + # we handle this if that changes + self.error = True + self.summary[file.locale][category] += value + + def notify(self, category, file, data): + rv = 'error' + if category in ['missingFile', 'obsoleteFile']: + if self.filter is not None: + rv = self.filter(file) + if rv == "ignore" or self.quiet >= 2: + return rv + if self.quiet == 0 or category == 'missingFile': + self.details[file].append({category: rv}) + return rv + if self.filter is not None: + rv = self.filter(file, data) + if rv == "ignore": + return rv + if category in ['missingEntity', 'obsoleteEntity']: + if ( + (category == 'missingEntity' and self.quiet < 2) + or (category == 'obsoleteEntity' and self.quiet < 1) + ): + self.details[file].append({category: data}) + return rv + if category == 'error': + # Set error independently of quiet + self.error = True + if category in ('error', 'warning'): + if ( + (category == 'error' and self.quiet < 4) + or (category == 'warning' and self.quiet < 3) + ): + self.details[file].append({category: data}) + self.summary[file.locale][category + 's'] += 1 + return rv + + +class ObserverList(Observer): + def __init__(self, quiet=0): + super(ObserverList, self).__init__(quiet=quiet) + self.observers = [] + + def __iter__(self): + return iter(self.observers) + + def append(self, observer): + self.observers.append(observer) + + def notify(self, category, file, data): + """Check observer for the found data, and if it's + not to ignore, notify stat_observers. + """ + rvs = set( + observer.notify(category, file, data) + for observer in self.observers + ) + if all(rv == 'ignore' for rv in rvs): + return 'ignore' + # our return value doesn't count + super(ObserverList, self).notify(category, file, data) + rvs.discard('ignore') + if 'error' in rvs: + return 'error' + assert len(rvs) == 1 + return rvs.pop() + + def updateStats(self, file, stats): + """Check observer for the found data, and if it's + not to ignore, notify stat_observers. + """ + for observer in self.observers: + observer.updateStats(file, stats) + super(ObserverList, self).updateStats(file, stats) + + def serializeDetails(self): + + def tostr(t): + if t[1] == 'key': + return ' ' * t[0] + '/'.join(t[2]) + o = [] + indent = ' ' * (t[0] + 1) + for item in t[2]: + if 'error' in item: + o += [indent + 'ERROR: ' + item['error']] + elif 'warning' in item: + o += [indent + 'WARNING: ' + item['warning']] + elif 'missingEntity' in item: + o += [indent + '+' + item['missingEntity']] + elif 'obsoleteEntity' in item: + o += [indent + '-' + item['obsoleteEntity']] + elif 'missingFile' in item: + o.append(indent + '// add and localize this file') + elif 'obsoleteFile' in item: + o.append(indent + '// remove this file') + return '\n'.join(o) + + return '\n'.join(tostr(c) for c in self.details.getContent()) + + def serializeSummaries(self): + summaries = { + loc: [] + for loc in self.summary.keys() + } + for observer in self.observers: + for loc, lst in summaries.items(): + # Not all locales are on all projects, + # default to empty summary + lst.append(observer.summary.get(loc, {})) + if len(self.observers) > 1: + # add ourselves if there's more than one project + for loc, lst in summaries.items(): + lst.append(self.summary[loc]) + keys = ( + 'errors', + 'warnings', + 'missing', 'missing_w', + 'obsolete', + 'changed', 'changed_w', + 'unchanged', 'unchanged_w', + 'keys', + ) + leads = [ + '{:12}'.format(k) for k in keys + ] + out = [] + for locale, summaries in sorted(six.iteritems(summaries)): + if locale: + out.append(locale + ':') + segment = [''] * len(keys) + for summary in summaries: + for row, key in enumerate(keys): + segment[row] += ' {:6}'.format(summary.get(key) or '') + + out += [ + lead + row + for lead, row in zip(leads, segment) + if row.strip() + ] + + total = sum([summaries[-1].get(k, 0) + for k in ['changed', 'unchanged', 'report', 'missing'] + ]) + rate = 0 + if total: + rate = (('changed' in summary and summary['changed'] * 100) or + 0) / total + out.append('%d%% of entries changed' % rate) + return '\n'.join(out) + + def __str__(self): + return 'observer' diff --git a/third_party/python/compare-locales/compare_locales/compare/utils.py b/third_party/python/compare-locales/compare_locales/compare/utils.py new file mode 100644 index 0000000000..5d79b5c47d --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/compare/utils.py @@ -0,0 +1,140 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from __future__ import absolute_import +from __future__ import print_function + +import six +from six.moves import zip + +from compare_locales import paths + + +class Tree(object): + def __init__(self, valuetype): + self.branches = dict() + self.valuetype = valuetype + self.value = None + + def __getitem__(self, leaf): + parts = [] + if isinstance(leaf, paths.File): + parts = [] + if leaf.module: + parts += [leaf.locale] + leaf.module.split('/') + parts += leaf.file.split('/') + else: + parts = leaf.split('/') + return self.__get(parts) + + def __get(self, parts): + common = None + old = None + new = tuple(parts) + t = self + for k, v in six.iteritems(self.branches): + for i, part in enumerate(zip(k, parts)): + if part[0] != part[1]: + i -= 1 + break + if i < 0: + continue + i += 1 + common = tuple(k[:i]) + old = tuple(k[i:]) + new = tuple(parts[i:]) + break + if old: + self.branches.pop(k) + t = Tree(self.valuetype) + t.branches[old] = v + self.branches[common] = t + elif common: + t = self.branches[common] + if new: + if common: + return t.__get(new) + t2 = t + t = Tree(self.valuetype) + t2.branches[new] = t + if t.value is None: + t.value = t.valuetype() + return t.value + + indent = ' ' + + def getContent(self, depth=0): + ''' + Returns iterator of (depth, flag, key_or_value) tuples. + If flag is 'value', key_or_value is a value object, otherwise + (flag is 'key') it's a key string. + ''' + keys = sorted(self.branches.keys()) + if self.value is not None: + yield (depth, 'value', self.value) + for key in keys: + yield (depth, 'key', key) + for child in self.branches[key].getContent(depth + 1): + yield child + + def toJSON(self): + ''' + Returns this Tree as a JSON-able tree of hashes. + Only the values need to take care that they're JSON-able. + ''' + if self.value is not None: + return self.value + return dict(('/'.join(key), self.branches[key].toJSON()) + for key in self.branches.keys()) + + def getStrRows(self): + def tostr(t): + if t[1] == 'key': + return self.indent * t[0] + '/'.join(t[2]) + return self.indent * (t[0] + 1) + str(t[2]) + + return [tostr(c) for c in self.getContent()] + + def __str__(self): + return '\n'.join(self.getStrRows()) + + +class AddRemove(object): + def __init__(self): + self.left = self.right = None + + def set_left(self, left): + if not isinstance(left, list): + left = list(l for l in left) + self.left = left + + def set_right(self, right): + if not isinstance(right, list): + right = list(l for l in right) + self.right = right + + def __iter__(self): + # order_map stores index in left and then index in right + order_map = dict((item, (i, -1)) for i, item in enumerate(self.left)) + left_items = set(order_map) + # as we go through the right side, keep track of which left + # item we had in right last, and for items not in left, + # set the sortmap to (left_offset, right_index) + left_offset = -1 + right_items = set() + for i, item in enumerate(self.right): + right_items.add(item) + if item in order_map: + left_offset = order_map[item][0] + else: + order_map[item] = (left_offset, i) + for item in sorted(order_map, key=lambda item: order_map[item]): + if item in left_items and item in right_items: + yield ('equal', item) + elif item in left_items: + yield ('delete', item) + else: + yield ('add', item) diff --git a/third_party/python/compare-locales/compare_locales/integration_tests/__init__.py b/third_party/python/compare-locales/compare_locales/integration_tests/__init__.py new file mode 100644 index 0000000000..ba9db8b8ec --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/integration_tests/__init__.py @@ -0,0 +1,5 @@ +'''Tests that are not run by default. + +They might just take long, or depend on external services, or both. +They might also fail for external changes. +''' diff --git a/third_party/python/compare-locales/compare_locales/integration_tests/test_plurals.py b/third_party/python/compare-locales/compare_locales/integration_tests/test_plurals.py new file mode 100644 index 0000000000..b36c41222b --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/integration_tests/test_plurals.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import json +import unittest +from six.moves.urllib.error import URLError +from six.moves.urllib.request import urlopen + +from compare_locales import plurals + + +TRANSVISION_URL = ( + 'https://transvision.mozfr.org/' + 'api/v1/entity/gecko_strings/' + '?id=toolkit/chrome/global/intl.properties:pluralRule' +) + + +class TestPlural(unittest.TestCase): + '''Integration test for plural forms and l10n-central. + + Having more plural forms than in l10n-central is OK, missing or + mismatching ones isn't. + Depends on Transvision. + ''' + maxDiff = None + + def test_valid_forms(self): + reference_form_map = self._load_transvision() + # Strip matches from dicts, to make diff for test small + locales = list(reference_form_map) + cl_form_map = {} + for locale in locales: + cl_form = str(plurals.get_plural_rule(locale)) + if cl_form == reference_form_map[locale]: + reference_form_map.pop(locale) + else: + cl_form_map[locale] = cl_form + self.assertDictEqual(reference_form_map, cl_form_map) + + def _load_transvision(self): + '''Use the Transvision API to load all values of pluralRule + in intl.properties. + Skip test on load failure. + ''' + try: + data = urlopen(TRANSVISION_URL).read() + except URLError: + raise unittest.SkipTest("Couldn't load Transvision API.") + return json.loads(data) diff --git a/third_party/python/compare-locales/compare_locales/keyedtuple.py b/third_party/python/compare-locales/compare_locales/keyedtuple.py new file mode 100644 index 0000000000..c232cc63c7 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/keyedtuple.py @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''A tuple with keys. + +A Sequence type that allows to refer to its elements by key. +Making this immutable, 'cause keeping track of mutations is hard. + +compare-locales uses strings for Entity keys, and tuples in the +case of PO. Support both. + +In the interfaces that check for membership, dicts check keys and +sequences check values. Always try our dict cache `__map` first, +and fall back to the superclass implementation. +''' + +from __future__ import absolute_import +from __future__ import unicode_literals + + +class KeyedTuple(tuple): + + def __new__(cls, iterable): + return super(KeyedTuple, cls).__new__(cls, iterable) + + def __init__(self, iterable): + self.__map = {} + if iterable: + for index, item in enumerate(self): + self.__map[item.key] = index + + def __contains__(self, key): + try: + contains = key in self.__map + if contains: + return True + except TypeError: + pass + return super(KeyedTuple, self).__contains__(key) + + def __getitem__(self, key): + try: + key = self.__map[key] + except (KeyError, TypeError): + pass + return super(KeyedTuple, self).__getitem__(key) + + def keys(self): + for value in self: + yield value.key + + def items(self): + for value in self: + yield value.key, value + + def values(self): + return self diff --git a/third_party/python/compare-locales/compare_locales/lint/__init__.py b/third_party/python/compare-locales/compare_locales/lint/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/lint/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/lint/cli.py b/third_party/python/compare-locales/compare_locales/lint/cli.py new file mode 100644 index 0000000000..35c026ee22 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/lint/cli.py @@ -0,0 +1,95 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from __future__ import absolute_import +from __future__ import unicode_literals + +import argparse +import os + +from compare_locales.lint.linter import L10nLinter +from compare_locales.lint.util import ( + default_reference_and_tests, + mirror_reference_and_tests, + l10n_base_reference_and_tests, +) +from compare_locales import mozpath +from compare_locales import paths +from compare_locales import parser +from compare_locales import version + + +epilog = '''\ +moz-l10n-lint checks for common mistakes in localizable files. It tests for +duplicate entries, parsing errors, and the like. Optionally, it can compare +the strings to an external reference with strings and warn if a string might +need to get a new ID. +''' + + +def main(): + p = argparse.ArgumentParser( + description='Validate localizable strings', + epilog=epilog, + ) + p.add_argument('l10n_toml') + p.add_argument( + '--version', action='version', version='%(prog)s ' + version + ) + p.add_argument('-W', action='store_true', help='error on warnings') + p.add_argument( + '--l10n-reference', + dest='l10n_reference', + metavar='PATH', + help='check for conflicts against an l10n-only reference repository ' + 'like gecko-strings', + ) + p.add_argument( + '--reference-project', + dest='ref_project', + metavar='PATH', + help='check for conflicts against a reference project like ' + 'android-l10n', + ) + args = p.parse_args() + if args.l10n_reference: + l10n_base, locale = \ + os.path.split(os.path.abspath(args.l10n_reference)) + if not locale or not os.path.isdir(args.l10n_reference): + p.error('Pass an existing l10n reference') + else: + l10n_base = '.' + locale = None + pc = paths.TOMLParser().parse(args.l10n_toml, env={'l10n_base': l10n_base}) + if locale: + pc.set_locales([locale], deep=True) + files = paths.ProjectFiles(locale, [pc]) + get_reference_and_tests = default_reference_and_tests + if args.l10n_reference: + get_reference_and_tests = l10n_base_reference_and_tests(files) + elif args.ref_project: + get_reference_and_tests = mirror_reference_and_tests( + files, args.ref_project + ) + linter = L10nLinter() + results = linter.lint( + (f for f, _, _, _ in files.iter_reference() if parser.hasParser(f)), + get_reference_and_tests + ) + rv = 0 + if results: + rv = 1 + if all(r['level'] == 'warning' for r in results) and not args.W: + rv = 0 + for result in results: + print('{} ({}:{}): {}'.format( + mozpath.relpath(result['path'], '.'), + result.get('lineno', 0), + result.get('column', 0), + result['message'] + )) + return rv + + +if __name__ == '__main__': + main() diff --git a/third_party/python/compare-locales/compare_locales/lint/linter.py b/third_party/python/compare-locales/compare_locales/lint/linter.py new file mode 100644 index 0000000000..682d6e2ccf --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/lint/linter.py @@ -0,0 +1,123 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from __future__ import absolute_import +from __future__ import unicode_literals + +from collections import Counter +import os + +from compare_locales import parser, checks +from compare_locales.paths import File, REFERENCE_LOCALE + + +class L10nLinter(object): + + def lint(self, files, get_reference_and_tests): + results = [] + for path in files: + if not parser.hasParser(path): + continue + ref, extra_tests = get_reference_and_tests(path) + results.extend(self.lint_file(path, ref, extra_tests)) + return results + + def lint_file(self, path, ref, extra_tests): + file_parser = parser.getParser(path) + if ref is not None and os.path.isfile(ref): + file_parser.readFile(ref) + reference = file_parser.parse() + else: + reference = {} + file_parser.readFile(path) + current = file_parser.parse() + checker = checks.getChecker( + File(path, path, locale=REFERENCE_LOCALE), + extra_tests=extra_tests + ) + if checker and checker.needs_reference: + checker.set_reference(current) + linter = EntityLinter(current, checker, reference) + for current_entity in current: + for result in linter.lint_entity(current_entity): + result['path'] = path + yield result + + +class EntityLinter(object): + '''Factored out helper to run linters on a single entity.''' + def __init__(self, current, checker, reference): + self.key_count = Counter(entity.key for entity in current) + self.checker = checker + self.reference = reference + + def lint_entity(self, current_entity): + res = self.handle_junk(current_entity) + if res: + yield res + return + for res in self.lint_full_entity(current_entity): + yield res + for res in self.lint_value(current_entity): + yield res + + def lint_full_entity(self, current_entity): + '''Checks that go good or bad for a full entity, + without a particular spot inside the entity. + ''' + lineno = col = None + if self.key_count[current_entity.key] > 1: + lineno, col = current_entity.position() + yield { + 'lineno': lineno, + 'column': col, + 'level': 'error', + 'message': 'Duplicate string with ID: {}'.format( + current_entity.key + ) + } + + if current_entity.key in self.reference: + reference_entity = self.reference[current_entity.key] + if not current_entity.equals(reference_entity): + if lineno is None: + lineno, col = current_entity.position() + msg = 'Changes to string require a new ID: {}'.format( + current_entity.key + ) + yield { + 'lineno': lineno, + 'column': col, + 'level': 'warning', + 'message': msg, + } + + def lint_value(self, current_entity): + '''Checks that error on particular locations in the entity value. + ''' + if self.checker: + for tp, pos, msg, cat in self.checker.check( + current_entity, current_entity + ): + if isinstance(pos, checks.EntityPos): + lineno, col = current_entity.position(pos) + else: + lineno, col = current_entity.value_position(pos) + yield { + 'lineno': lineno, + 'column': col, + 'level': tp, + 'message': msg, + } + + def handle_junk(self, current_entity): + if not isinstance(current_entity, parser.Junk): + return None + + lineno, col = current_entity.position() + return { + 'lineno': lineno, + 'column': col, + 'level': 'error', + 'message': current_entity.error_message() + } diff --git a/third_party/python/compare-locales/compare_locales/lint/util.py b/third_party/python/compare-locales/compare_locales/lint/util.py new file mode 100644 index 0000000000..0b2557dfdd --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/lint/util.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from __future__ import absolute_import +from __future__ import unicode_literals + +from compare_locales import paths + + +def default_reference_and_tests(path): + return None, None + + +def mirror_reference_and_tests(files, basedir): + '''Get reference files to check for conflicts in android-l10n and friends. + ''' + def get_reference_and_tests(path): + for matchers in files.matchers: + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + if matcher.match(path) is None: + continue + ref_matcher = paths.Matcher(matcher, root=basedir) + ref_path = matcher.sub(ref_matcher, path) + return ref_path, matchers.get('test') + return None, None + return get_reference_and_tests + + +def l10n_base_reference_and_tests(files): + '''Get reference files to check for conflicts in gecko-strings and friends. + ''' + def get_reference_and_tests(path): + match = files.match(path) + if match is None: + return None, None + ref, _, _, extra_tests = match + return ref, extra_tests + return get_reference_and_tests diff --git a/third_party/python/compare-locales/compare_locales/merge.py b/third_party/python/compare-locales/compare_locales/merge.py new file mode 100644 index 0000000000..9399e639e0 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/merge.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Merge resources across channels. + +Merging resources is done over a series of parsed resources, or source +strings. +The nomenclature is that the resources are ordered from newest to oldest. +The generated file structure is taken from the newest file, and then the +next-newest, etc. The values of the returned entities are taken from the +newest to the oldest resource, too. + +In merge_resources, there's an option to choose the values from oldest +to newest instead. +''' + +from collections import OrderedDict, defaultdict +from codecs import encode +import six + + +from compare_locales import parser as cl +from compare_locales.parser.base import StickyEntry +from compare_locales.compare.utils import AddRemove + + +class MergeNotSupportedError(ValueError): + pass + + +def merge_channels(name, resources): + try: + parser = cl.getParser(name) + except UserWarning: + raise MergeNotSupportedError( + 'Unsupported file format ({}).'.format(name)) + + entities = merge_resources(parser, resources) + return encode(serialize_legacy_resource(entities), parser.encoding) + + +def merge_resources(parser, resources, keep_newest=True): + '''Merge parsed or unparsed resources, returning a enumerable of Entities. + + Resources are ordered from newest to oldest in the input. The structure + of the generated content is taken from the newest resource first, and + then filled by the next etc. + Values are also taken from the newest, unless keep_newest is False, + then values are taken from the oldest first. + ''' + + def parse_resource(resource): + # The counter dict keeps track of number of identical comments. + counter = defaultdict(int) + if isinstance(resource, bytes): + parser.readContents(resource) + resource = parser.walk() + pairs = [get_key_value(entity, counter) for entity in resource] + return OrderedDict(pairs) + + def get_key_value(entity, counter): + if isinstance(entity, cl.Comment): + counter[entity.val] += 1 + # Use the (value, index) tuple as the key. AddRemove will + # de-deplicate identical comments at the same index. + return ((entity.val, counter[entity.val]), entity) + + if isinstance(entity, cl.Whitespace): + # Use the Whitespace instance as the key so that it's always + # unique. Adjecent whitespace will be folded into the longer one in + # prune. + return (entity, entity) + + return (entity.key, entity) + + entities = six.moves.reduce( + lambda x, y: merge_two(x, y, keep_newer=keep_newest), + map(parse_resource, resources)) + return entities.values() + + +def merge_two(newer, older, keep_newer=True): + '''Merge two OrderedDicts. + + The order of the result dict is determined by `newer`. + The values in the dict are the newer ones by default, too. + If `keep_newer` is False, the values will be taken from the older + dict. + ''' + diff = AddRemove() + diff.set_left(newer.keys()) + diff.set_right(older.keys()) + + # Create a flat sequence of all entities in order reported by AddRemove. + get_entity = get_newer_entity if keep_newer else get_older_entity + contents = [(key, get_entity(newer, older, key)) for _, key in diff] + + def prune(acc, cur): + _, entity = cur + if entity is None: + # Prune Nones which stand for duplicated comments. + return acc + + if len(acc) and isinstance(entity, cl.Whitespace): + _, prev_entity = acc[-1] + + if isinstance(prev_entity, cl.Whitespace): + # Prefer the longer whitespace. + if len(entity.all) > len(prev_entity.all): + acc[-1] = (entity, entity) + return acc + + acc.append(cur) + return acc + + pruned = six.moves.reduce(prune, contents, []) + return OrderedDict(pruned) + + +def get_newer_entity(newer, older, key): + entity = newer.get(key, None) + + # Always prefer the newer version. + if entity is not None: + return entity + + return older.get(key) + + +def get_older_entity(newer, older, key): + entity = older.get(key, None) + + # If we don't have an older version, or it's a StickyEntry, + # get a newer version + if entity is None or isinstance(entity, StickyEntry): + return newer.get(key) + + return entity + + +def serialize_legacy_resource(entities): + return "".join((entity.all for entity in entities)) diff --git a/third_party/python/compare-locales/compare_locales/mozpath.py b/third_party/python/compare-locales/compare_locales/mozpath.py new file mode 100644 index 0000000000..7280e25279 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/mozpath.py @@ -0,0 +1,155 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +''' +Like :py:mod:`os.path`, with a reduced set of functions, and with normalized +path separators (always use forward slashes). +Also contains a few additional utilities not found in :py:mod:`os.path`. +''' + +from __future__ import absolute_import + +import posixpath +import os +import re + + +def normsep(path): + ''' + Normalize path separators, by using forward slashes instead of whatever + :py:const:`os.sep` is. + ''' + if os.sep != '/': + path = path.replace(os.sep, '/') + if os.altsep and os.altsep != '/': + path = path.replace(os.altsep, '/') + return path + + +def relpath(path, start): + rel = normsep(os.path.relpath(path, start)) + return '' if rel == '.' else rel + + +def realpath(path): + return normsep(os.path.realpath(path)) + + +def abspath(path): + return normsep(os.path.abspath(path)) + + +def join(*paths): + return normsep(os.path.join(*paths)) + + +def normpath(path): + return posixpath.normpath(normsep(path)) + + +def dirname(path): + return posixpath.dirname(normsep(path)) + + +def commonprefix(paths): + return posixpath.commonprefix([normsep(path) for path in paths]) + + +def basename(path): + return os.path.basename(path) + + +def splitext(path): + return posixpath.splitext(normsep(path)) + + +def split(path): + ''' + Return the normalized path as a list of its components. + + ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']`` + ''' + return normsep(path).split('/') + + +def basedir(path, bases): + ''' + Given a list of directories (`bases`), return which one contains the given + path. If several matches are found, the deepest base directory is returned. + + ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'`` + (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match) + ''' + path = normsep(path) + bases = [normsep(b) for b in bases] + if path in bases: + return path + for b in sorted(bases, reverse=True): + if b == '' or path.startswith(b + '/'): + return b + + +re_cache = {} + + +def match(path, pattern): + ''' + Return whether the given path matches the given pattern. + An asterisk can be used to match any string, including the null string, in + one part of the path: + + ``foo`` matches ``*``, ``f*`` or ``fo*o`` + + However, an asterisk matching a subdirectory may not match the null string: + + ``foo/bar`` does *not* match ``foo/*/bar`` + + If the pattern matches one of the ancestor directories of the path, the + patch is considered matching: + + ``foo/bar`` matches ``foo`` + + Two adjacent asterisks can be used to match files and zero or more + directories and subdirectories. + + ``foo/bar`` matches ``foo/**/bar``, or ``**/bar`` + ''' + if not pattern: + return True + if pattern not in re_cache: + last_end = 0 + p = '' + for m in re.finditer(r'(?:(^|/)\*\*(/|$))|(?P<star>\*)', pattern): + if m.start() > last_end: + p += re.escape(pattern[last_end:m.start()]) + if m.group('star'): + p += '[^/]*' + elif m.group(2): + p += re.escape(m.group(1)) + r'(?:.+%s)?' % m.group(2) + else: + p += r'(?:%s.+)?' % re.escape(m.group(1)) + last_end = m.end() + p += re.escape(pattern[last_end:]) + '(?:/.*)?$' + re_cache[pattern] = re.compile(p) + return re_cache[pattern].match(path) is not None + + +def rebase(oldbase, base, relativepath): + ''' + Return `relativepath` relative to `base` instead of `oldbase`. + ''' + if base == oldbase: + return relativepath + if len(base) < len(oldbase): + assert basedir(oldbase, [base]) == base + relbase = relpath(oldbase, base) + result = join(relbase, relativepath) + else: + assert basedir(base, [oldbase]) == oldbase + relbase = relpath(base, oldbase) + result = relpath(relativepath, relbase) + result = normpath(result) + if relativepath.endswith('/') and not result.endswith('/'): + result += '/' + return result diff --git a/third_party/python/compare-locales/compare_locales/parser/__init__.py b/third_party/python/compare-locales/compare_locales/parser/__init__.py new file mode 100644 index 0000000000..8ab36cb082 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/__init__.py @@ -0,0 +1,83 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +from .base import ( + CAN_NONE, CAN_COPY, CAN_SKIP, CAN_MERGE, + Entry, Entity, Comment, OffsetComment, Junk, Whitespace, + BadEntity, Parser, +) +from .android import ( + AndroidParser +) +from .defines import ( + DefinesParser, DefinesInstruction +) +from .dtd import ( + DTDEntity, DTDParser +) +from .fluent import ( + FluentParser, FluentComment, FluentEntity, FluentMessage, FluentTerm, +) +from .ini import ( + IniParser, IniSection, +) +from .po import ( + PoParser +) +from .properties import ( + PropertiesParser, PropertiesEntity +) + +__all__ = [ + "CAN_NONE", "CAN_COPY", "CAN_SKIP", "CAN_MERGE", + "Junk", "Entry", "Entity", "Whitespace", "Comment", "OffsetComment", + "BadEntity", "Parser", + "AndroidParser", + "DefinesParser", "DefinesInstruction", + "DTDParser", "DTDEntity", + "FluentParser", "FluentComment", "FluentEntity", + "FluentMessage", "FluentTerm", + "IniParser", "IniSection", + "PoParser", + "PropertiesParser", "PropertiesEntity", +] + +__constructors = [] + + +def getParser(path): + for item in __constructors: + if re.search(item[0], path): + return item[1] + try: + from pkg_resources import iter_entry_points + for entry_point in iter_entry_points('compare_locales.parsers'): + p = entry_point.resolve()() + if p.use(path): + return p + except (ImportError, IOError): + pass + raise UserWarning("Cannot find Parser") + + +def hasParser(path): + try: + return bool(getParser(path)) + except UserWarning: + return False + + +__constructors = [ + ('strings.*\\.xml$', AndroidParser()), + ('\\.dtd$', DTDParser()), + ('\\.properties$', PropertiesParser()), + ('\\.ini$', IniParser()), + ('\\.inc$', DefinesParser()), + ('\\.ftl$', FluentParser()), + ('\\.pot?$', PoParser()), +] diff --git a/third_party/python/compare-locales/compare_locales/parser/android.py b/third_party/python/compare-locales/compare_locales/parser/android.py new file mode 100644 index 0000000000..a6ad2f5bff --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/android.py @@ -0,0 +1,305 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Android strings.xml parser + +Parses strings.xml files per +https://developer.android.com/guide/topics/resources/localization. +As we're using a built-in XML parser underneath, errors on that level +break the full parsing, and result in a single Junk entry. +""" + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re +from xml.dom import minidom +from xml.dom.minidom import Node + +from .base import ( + CAN_SKIP, + Entity, Comment, Junk, Whitespace, + StickyEntry, LiteralEntity, + Parser +) + + +class AndroidEntity(Entity): + def __init__( + self, ctx, pre_comment, white_space, node, all, key, raw_val, val + ): + # fill out superclass as good as we can right now + # most span can get modified at endElement + super(AndroidEntity, self).__init__( + ctx, pre_comment, white_space, + (None, None), + (None, None), + (None, None) + ) + self.node = node + self._all_literal = all + self._key_literal = key + self._raw_val_literal = raw_val + self._val_literal = val + + @property + def all(self): + chunks = [] + if self.pre_comment is not None: + chunks.append(self.pre_comment.all) + if self.inner_white is not None: + chunks.append(self.inner_white.all) + chunks.append(self._all_literal) + return ''.join(chunks) + + @property + def key(self): + return self._key_literal + + @property + def raw_val(self): + return self._raw_val_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + def wrap(self, raw_val): + clone = self.node.cloneNode(True) + if clone.childNodes.length == 1: + child = clone.childNodes[0] + else: + for child in clone.childNodes: + if child.nodeType == Node.CDATA_SECTION_NODE: + break + child.data = raw_val + all = [] + if self.pre_comment is not None: + all.append(self.pre_comment.all) + if self.inner_white is not None: + all.append(self.inner_white.all) + all.append(clone.toxml()) + return LiteralEntity(self.key, raw_val, ''.join(all)) + + +class NodeMixin(object): + def __init__(self, all, value): + self._all_literal = all + self._val_literal = value + + @property + def all(self): + return self._all_literal + + @property + def key(self): + return self._all_literal + + @property + def raw_val(self): + return self._val_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + +class XMLWhitespace(NodeMixin, Whitespace): + pass + + +class XMLComment(NodeMixin, Comment): + @property + def val(self): + return self._val_literal + + @property + def key(self): + return None + + +# DocumentWrapper is sticky in serialization. +# Always keep the one from the reference document. +class DocumentWrapper(NodeMixin, StickyEntry): + def __init__(self, key, all): + self._all_literal = all + self._val_literal = all + self._key_literal = key + + @property + def key(self): + return self._key_literal + + +class XMLJunk(Junk): + def __init__(self, all): + super(XMLJunk, self).__init__(None, (0, 0)) + self._all_literal = all + + @property + def all(self): + return self._all_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + +def textContent(node): + if node.childNodes.length == 0: + return '' + for child in node.childNodes: + if child.nodeType == minidom.Node.CDATA_SECTION_NODE: + return child.data + if ( + node.childNodes.length != 1 or + node.childNodes[0].nodeType != minidom.Node.TEXT_NODE + ): + # Return something, we'll fail in checks on this + return node.toxml() + return node.childNodes[0].data + + +NEWLINE = re.compile(r'[ \t]*\n[ \t]*') + + +def normalize(val): + return NEWLINE.sub('\n', val.strip(' \t')) + + +class AndroidParser(Parser): + # Android does l10n fallback at runtime, don't merge en-US strings + capabilities = CAN_SKIP + + def __init__(self): + super(AndroidParser, self).__init__() + self.last_comment = None + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + ctx = self.ctx + contents = ctx.contents + try: + doc = minidom.parseString(contents.encode('utf-8')) + except Exception: + yield XMLJunk(contents) + return + docElement = doc.documentElement + if docElement.nodeName != 'resources': + yield XMLJunk(doc.toxml()) + return + root_children = docElement.childNodes + if not only_localizable: + yield DocumentWrapper( + '<?xml?><resources>', + '<?xml version="1.0" encoding="utf-8"?>\n<resources' + ) + for attr_name, attr_value in docElement.attributes.items(): + yield DocumentWrapper( + attr_name, + ' {}="{}"'.format(attr_name, attr_value) + ) + yield DocumentWrapper('>', '>') + child_num = 0 + while child_num < len(root_children): + node = root_children[child_num] + if node.nodeType == Node.COMMENT_NODE: + current_comment, child_num = self.handleComment( + node, root_children, child_num + ) + if child_num < len(root_children): + node = root_children[child_num] + else: + if not only_localizable: + yield current_comment + break + else: + current_comment = None + if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + white_space = XMLWhitespace(node.toxml(), node.nodeValue) + child_num += 1 + if current_comment is None: + if not only_localizable: + yield white_space + continue + if node.nodeValue.count('\n') > 1: + if not only_localizable: + if current_comment is not None: + yield current_comment + yield white_space + continue + if child_num < len(root_children): + node = root_children[child_num] + else: + if not only_localizable: + if current_comment is not None: + yield current_comment + yield white_space + break + else: + white_space = None + if node.nodeType == Node.ELEMENT_NODE: + yield self.handleElement(node, current_comment, white_space) + else: + if not only_localizable: + if current_comment: + yield current_comment + if white_space: + yield white_space + child_num += 1 + if not only_localizable: + yield DocumentWrapper('</resources>', '</resources>\n') + + def handleElement(self, element, current_comment, white_space): + if element.nodeName == 'string' and element.hasAttribute('name'): + return AndroidEntity( + self.ctx, + current_comment, + white_space, + element, + element.toxml(), + element.getAttribute('name'), + textContent(element), + ''.join(c.toxml() for c in element.childNodes) + ) + else: + return XMLJunk(element.toxml()) + + def handleComment(self, node, root_children, child_num): + all = node.toxml() + val = normalize(node.nodeValue) + while True: + child_num += 1 + if child_num >= len(root_children): + break + node = root_children[child_num] + if node.nodeType == Node.TEXT_NODE: + if node.nodeValue.count('\n') > 1: + break + white = node + child_num += 1 + if child_num >= len(root_children): + break + node = root_children[child_num] + else: + white = None + if node.nodeType != Node.COMMENT_NODE: + if white is not None: + # do not consume this node + child_num -= 1 + break + if white: + all += white.toxml() + val += normalize(white.nodeValue) + all += node.toxml() + val += normalize(node.nodeValue) + return XMLComment(all, val), child_num diff --git a/third_party/python/compare-locales/compare_locales/parser/base.py b/third_party/python/compare-locales/compare_locales/parser/base.py new file mode 100644 index 0000000000..efc6119222 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/base.py @@ -0,0 +1,451 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re +import bisect +import codecs +from collections import Counter +from compare_locales.keyedtuple import KeyedTuple +from compare_locales.paths import File + +import six + +__constructors = [] + + +# The allowed capabilities for the Parsers. They define the exact strategy +# used by ContentComparer.merge. + +# Don't perform any merging +CAN_NONE = 0 +# Copy the entire reference file +CAN_COPY = 1 +# Remove broken entities from localization +# Without CAN_MERGE, en-US is not good to use for localization. +CAN_SKIP = 2 +# Add missing and broken entities from the reference to localization +# This effectively means that en-US is good to use for localized files. +CAN_MERGE = 4 + + +class Entry(object): + ''' + Abstraction layer for a localizable entity. + Currently supported are grammars of the form: + + 1: entity definition + 2: entity key (name) + 3: entity value + + <!ENTITY key "value"> + + <--- definition ----> + ''' + def __init__( + self, ctx, pre_comment, inner_white, span, key_span, val_span + ): + self.ctx = ctx + self.span = span + self.key_span = key_span + self.val_span = val_span + self.pre_comment = pre_comment + self.inner_white = inner_white + + def position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the Entity. + + If offset is negative, return the end of the Entity. + """ + if offset < 0: + pos = self.span[1] + else: + pos = self.span[0] + offset + return self.ctx.linecol(pos) + + def value_position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the value. + + If offset is negative, return the end of the value. + """ + assert self.val_span is not None + if offset < 0: + pos = self.val_span[1] + else: + pos = self.val_span[0] + offset + return self.ctx.linecol(pos) + + def _span_start(self): + start = self.span[0] + if hasattr(self, 'pre_comment') and self.pre_comment is not None: + start = self.pre_comment.span[0] + return start + + @property + def all(self): + start = self._span_start() + end = self.span[1] + return self.ctx.contents[start:end] + + @property + def key(self): + return self.ctx.contents[self.key_span[0]:self.key_span[1]] + + @property + def raw_val(self): + if self.val_span is None: + return None + return self.ctx.contents[self.val_span[0]:self.val_span[1]] + + @property + def val(self): + return self.raw_val + + def __repr__(self): + return self.key + + re_br = re.compile('<br[ \t\r\n]*/?>', re.U) + re_sgml = re.compile(r'</?\w+.*?>', re.U | re.M) + + def count_words(self): + """Count the words in an English string. + Replace a couple of xml markup to make that safer, too. + """ + value = self.re_br.sub('\n', self.val) + value = self.re_sgml.sub('', value) + return len(value.split()) + + def equals(self, other): + return self.key == other.key and self.val == other.val + + +class StickyEntry(Entry): + """Subclass of Entry to use in for syntax fragments + which should always be overwritten in the serializer. + """ + pass + + +class Entity(Entry): + @property + def localized(self): + '''Is this entity localized. + + Always true for monolingual files. + In bilingual files, this is a dynamic property. + ''' + return True + + def unwrap(self): + """Return the literal value to be used by tools. + """ + return self.raw_val + + def wrap(self, raw_val): + """Create literal entity based on reference and raw value. + + This is used by the serialization logic. + """ + start = self._span_start() + all = ( + self.ctx.contents[start:self.val_span[0]] + + raw_val + + self.ctx.contents[self.val_span[1]:self.span[1]] + ) + return LiteralEntity(self.key, raw_val, all) + + +class LiteralEntity(Entity): + """Subclass of Entity to represent entities without context slices. + + It's storing string literals for key, raw_val and all instead of spans. + """ + def __init__(self, key, val, all): + super(LiteralEntity, self).__init__(None, None, None, None, None, None) + self._key = key + self._raw_val = val + self._all = all + + @property + def key(self): + return self._key + + @property + def raw_val(self): + return self._raw_val + + @property + def all(self): + return self._all + + +class PlaceholderEntity(LiteralEntity): + """Subclass of Entity to be removed in merges. + """ + def __init__(self, key): + super(PlaceholderEntity, self).__init__(key, "", "\nplaceholder\n") + + +class Comment(Entry): + def __init__(self, ctx, span): + self.ctx = ctx + self.span = span + self.val_span = None + self._val_cache = None + + @property + def key(self): + return None + + @property + def val(self): + if self._val_cache is None: + self._val_cache = self.all + return self._val_cache + + def __repr__(self): + return self.all + + +class OffsetComment(Comment): + '''Helper for file formats that have a constant number of leading + chars to strip from comments. + Offset defaults to 1 + ''' + comment_offset = 1 + + @property + def val(self): + if self._val_cache is None: + self._val_cache = ''.join(( + l[self.comment_offset:] for l in self.all.splitlines(True) + )) + return self._val_cache + + +class Junk(object): + ''' + An almost-Entity, representing junk data that we didn't parse. + This way, we can signal bad content as stuff we don't understand. + And the either fix that, or report real bugs in localizations. + ''' + junkid = 0 + + def __init__(self, ctx, span): + self.ctx = ctx + self.span = span + self.__class__.junkid += 1 + self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1]) + + def position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the Entity. + + If offset is negative, return the end of the Entity. + """ + if offset < 0: + pos = self.span[1] + else: + pos = self.span[0] + offset + return self.ctx.linecol(pos) + + @property + def all(self): + return self.ctx.contents[self.span[0]:self.span[1]] + + @property + def raw_val(self): + return self.all + + @property + def val(self): + return self.all + + def error_message(self): + params = (self.val,) + self.position() + self.position(-1) + return ( + 'Unparsed content "%s" from line %d column %d' + ' to line %d column %d' % params + ) + + def __repr__(self): + return self.key + + +class Whitespace(Entry): + '''Entity-like object representing an empty file with whitespace, + if allowed + ''' + def __init__(self, ctx, span): + self.ctx = ctx + self.span = self.key_span = self.val_span = span + + def __repr__(self): + return self.raw_val + + +class BadEntity(ValueError): + '''Raised when the parser can't create an Entity for a found match. + ''' + pass + + +class Parser(object): + capabilities = CAN_SKIP | CAN_MERGE + reWhitespace = re.compile('[ \t\r\n]+', re.M) + Comment = Comment + # NotImplementedError would be great, but also tedious + reKey = reComment = None + + class Context(object): + "Fixture for content and line numbers" + def __init__(self, contents): + self.contents = contents + # cache split lines + self._lines = None + + def linecol(self, position): + "Returns 1-based line and column numbers." + if self._lines is None: + nl = re.compile('\n', re.M) + self._lines = [m.end() + for m in nl.finditer(self.contents)] + + line_offset = bisect.bisect(self._lines, position) + line_start = self._lines[line_offset - 1] if line_offset else 0 + col_offset = position - line_start + + return line_offset + 1, col_offset + 1 + + def __init__(self): + if not hasattr(self, 'encoding'): + self.encoding = 'utf-8' + self.ctx = None + + def readFile(self, file): + '''Read contents from disk, with universal_newlines''' + if isinstance(file, File): + file = file.fullpath + # python 2 has binary input with universal newlines, + # python 3 doesn't. Let's split code paths + if six.PY2: + with open(file, 'rbU') as f: + self.readContents(f.read()) + else: + with open( + file, 'r', + encoding=self.encoding, errors='replace', + newline=None + ) as f: + self.readUnicode(f.read()) + + def readContents(self, contents): + '''Read contents and create parsing context. + + contents are in native encoding, but with normalized line endings. + ''' + (contents, _) = codecs.getdecoder(self.encoding)(contents, 'replace') + self.readUnicode(contents) + + def readUnicode(self, contents): + self.ctx = self.Context(contents) + + def parse(self): + return KeyedTuple(self) + + def __iter__(self): + return self.walk(only_localizable=True) + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + ctx = self.ctx + contents = ctx.contents + + next_offset = 0 + while next_offset < len(contents): + entity = self.getNext(ctx, next_offset) + + if isinstance(entity, (Entity, Junk)): + yield entity + elif not only_localizable: + yield entity + + next_offset = entity.span[1] + + def getNext(self, ctx, offset): + '''Parse the next fragment. + + Parse comments first, then white-space. + If an entity follows, create that entity with such pre_comment and + inner white-space. If not, emit comment or white-space as standlone. + It's OK that this might parse whitespace more than once. + Comments are associated with entities if they're not separated by + blank lines. Multiple consecutive comments are joined. + ''' + junk_offset = offset + m = self.reComment.match(ctx.contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + if offset < 2 and 'License' in current_comment.val: + # Heuristic. A early comment with "License" is probably + # a license header, and should be standalone. + # Not glueing ourselves to offset == 0 as we might have + # skipped a BOM. + return current_comment + offset = m.end() + else: + current_comment = None + m = self.reWhitespace.match(ctx.contents, offset) + if m: + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + # return the comment, and reparse the whitespace next time + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + m = self.reKey.match(ctx.contents, offset) + if m: + try: + return self.createEntity(ctx, m, current_comment, white_space) + except BadEntity: + # fall through to Junk, probably + pass + if current_comment is not None: + return current_comment + if white_space is not None: + return white_space + return self.getJunk(ctx, junk_offset, self.reKey, self.reComment) + + def getJunk(self, ctx, offset, *expressions): + junkend = None + for exp in expressions: + m = exp.search(ctx.contents, offset) + if m: + junkend = min(junkend, m.start()) if junkend else m.start() + return Junk(ctx, (offset, junkend or len(ctx.contents))) + + def createEntity(self, ctx, m, current_comment, white_space): + return Entity( + ctx, current_comment, white_space, + m.span(), m.span('key'), m.span('val') + ) + + @classmethod + def findDuplicates(cls, entities): + found = Counter(entity.key for entity in entities) + for entity_id, cnt in found.items(): + if cnt > 1: + yield '{} occurs {} times'.format(entity_id, cnt) diff --git a/third_party/python/compare-locales/compare_locales/parser/defines.py b/third_party/python/compare-locales/compare_locales/parser/defines.py new file mode 100644 index 0000000000..45b5be0530 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/defines.py @@ -0,0 +1,106 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +from .base import ( + CAN_COPY, + Entry, OffsetComment, Junk, Whitespace, + Parser +) + + +class DefinesInstruction(Entry): + '''Entity-like object representing processing instructions in inc files + ''' + def __init__(self, ctx, span, val_span): + self.ctx = ctx + self.span = span + self.key_span = self.val_span = val_span + + def __repr__(self): + return self.raw_val + + +class DefinesParser(Parser): + # can't merge, #unfilter needs to be the last item, which we don't support + capabilities = CAN_COPY + reWhitespace = re.compile('\n+', re.M) + + EMPTY_LINES = 1 << 0 + + class Comment(OffsetComment): + comment_offset = 2 + + class Context(Parser.Context): + def __init__(self, contents): + super(DefinesParser.Context, self).__init__(contents) + self.filter_empty_lines = False + + def __init__(self): + self.reComment = re.compile('(?:^# .*?\n)*(?:^# [^\n]*)', re.M) + # corresponds to + # https://hg.mozilla.org/mozilla-central/file/72ee4800d4156931c89b58bd807af4a3083702bb/python/mozbuild/mozbuild/preprocessor.py#l561 # noqa + self.reKey = re.compile( + r'#define[ \t]+(?P<key>\w+)(?:[ \t](?P<val>[^\n]*))?', re.M) + self.rePI = re.compile(r'#(?P<val>\w+[ \t]+[^\n]+)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + junk_offset = offset + contents = ctx.contents + + m = self.reComment.match(ctx.contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + offset = m.end() + else: + current_comment = None + + m = self.reWhitespace.match(contents, offset) + if m: + # blank lines outside of filter_empty_lines or + # leading whitespace are bad + if ( + offset == 0 or + not (len(m.group()) == 1 or ctx.filter_empty_lines) + ): + if current_comment: + return current_comment + return Junk(ctx, m.span()) + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + # return the comment, and reparse the whitespace next time + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + + m = self.reKey.match(contents, offset) + if m: + return self.createEntity(ctx, m, current_comment, white_space) + # defines instructions don't have comments + # Any pending commment is standalone + if current_comment: + return current_comment + if white_space: + return white_space + m = self.rePI.match(contents, offset) + if m: + instr = DefinesInstruction(ctx, m.span(), m.span('val')) + if instr.val == 'filter emptyLines': + ctx.filter_empty_lines = True + if instr.val == 'unfilter emptyLines': + ctx.filter_empty_lines = False + return instr + return self.getJunk( + ctx, junk_offset, self.reComment, self.reKey, self.rePI) diff --git a/third_party/python/compare-locales/compare_locales/parser/dtd.py b/third_party/python/compare-locales/compare_locales/parser/dtd.py new file mode 100644 index 0000000000..5f0574f488 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/dtd.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +try: + from html import unescape as html_unescape +except ImportError: + from HTMLParser import HTMLParser + html_parser = HTMLParser() + html_unescape = html_parser.unescape + +from .base import ( + Entity, Comment, Junk, + Parser +) + + +class DTDEntityMixin(object): + @property + def val(self): + '''Unescape HTML entities into corresponding Unicode characters. + + Named (&), decimal (&), and hex (& and &) formats + are supported. Unknown entities are left intact. + + As of Python 2.7 and Python 3.6 the following 252 named entities are + recognized and unescaped: + + https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py + https://github.com/python/cpython/blob/3.6/Lib/html/entities.py + ''' + return html_unescape(self.raw_val) + + def value_position(self, offset=0): + # DTDChecker already returns tuples of (line, col) positions + if isinstance(offset, tuple): + line_pos, col_pos = offset + line, col = super(DTDEntityMixin, self).value_position() + if line_pos == 1: + col = col + col_pos + else: + col = col_pos + line += line_pos - 1 + return line, col + else: + return super(DTDEntityMixin, self).value_position(offset) + + +class DTDEntity(DTDEntityMixin, Entity): + pass + + +class DTDParser(Parser): + # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar + # ":" | [A-Z] | "_" | [a-z] | + # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] + # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | + # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | + # [#x10000-#xEFFFF] + CharMinusDash = '\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD' + XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash + NameStartChar = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ + '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \ + '\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' + # + \U00010000-\U000EFFFF seems to be unsupported in python + + # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | + # [#x0300-#x036F] | [#x203F-#x2040] + NameChar = NameStartChar + r'\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040' + Name = '[' + NameStartChar + '][' + NameChar + ']*' + reKey = re.compile('<!ENTITY[ \t\r\n]+(?P<key>' + Name + ')[ \t\r\n]+' + '(?P<val>\"[^\"]*\"|\'[^\']*\'?)[ \t\r\n]*>', + re.DOTALL | re.M) + # add BOM to DTDs, details in bug 435002 + reHeader = re.compile('^\ufeff') + reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash, + re.S) + rePE = re.compile('<!ENTITY[ \t\r\n]+%[ \t\r\n]+(?P<key>' + Name + ')' + '[ \t\r\n]+SYSTEM[ \t\r\n]+' + '(?P<val>\"[^\"]*\"|\'[^\']*\')[ \t\r\n]*>[ \t\r\n]*' + '%' + Name + ';' + '(?:[ \t]*(?:' + XmlComment + u'[ \t\r\n]*)*\n?)?') + + class Comment(Comment): + @property + def val(self): + if self._val_cache is None: + # Strip "<!--" and "-->" to comment contents + self._val_cache = self.all[4:-3] + return self._val_cache + + def getNext(self, ctx, offset): + ''' + Overload Parser.getNext to special-case ParsedEntities. + Just check for a parsed entity if that method claims junk. + + <!ENTITY % foo SYSTEM "url"> + %foo; + ''' + if offset == 0 and self.reHeader.match(ctx.contents): + offset += 1 + entity = Parser.getNext(self, ctx, offset) + if (entity and isinstance(entity, Junk)) or entity is None: + m = self.rePE.match(ctx.contents, offset) + if m: + entity = DTDEntity( + ctx, None, None, m.span(), m.span('key'), m.span('val')) + return entity + + def createEntity(self, ctx, m, current_comment, white_space): + valspan = m.span('val') + valspan = (valspan[0]+1, valspan[1]-1) + return DTDEntity(ctx, current_comment, white_space, + m.span(), m.span('key'), valspan) diff --git a/third_party/python/compare-locales/compare_locales/parser/fluent.py b/third_party/python/compare-locales/compare_locales/parser/fluent.py new file mode 100644 index 0000000000..19d7d3c0b9 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/fluent.py @@ -0,0 +1,220 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +from fluent.syntax import FluentParser as FTLParser +from fluent.syntax import ast as ftl +from fluent.syntax.serializer import serialize_comment +from fluent.syntax.visitor import Visitor +from .base import ( + CAN_SKIP, + Entry, Entity, Comment, Junk, Whitespace, + LiteralEntity, + Parser +) + + +class WordCounter(Visitor): + def __init__(self): + self.word_count = 0 + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super(WordCounter, self).generic_visit(node) + + def visit_SelectExpression(self, node): + # optimize select expressions to only go through the variants + self.visit(node.variants) + + def visit_TextElement(self, node): + self.word_count += len(node.value.split()) + + +class FluentAttribute(Entry): + ignored_fields = ['span'] + + def __init__(self, entity, attr_node): + self.ctx = entity.ctx + self.attr = attr_node + self.key_span = (attr_node.id.span.start, attr_node.id.span.end) + self.val_span = (attr_node.value.span.start, attr_node.value.span.end) + + def equals(self, other): + if not isinstance(other, FluentAttribute): + return False + return self.attr.equals( + other.attr, ignored_fields=self.ignored_fields) + + +class FluentEntity(Entity): + # Fields ignored when comparing two entities. + ignored_fields = ['comment', 'span'] + + def __init__(self, ctx, entry): + start = entry.span.start + end = entry.span.end + + self.ctx = ctx + self.span = (start, end) + + if isinstance(entry, ftl.Term): + # Terms don't have their '-' as part of the id, use the prior + # character + self.key_span = (entry.id.span.start - 1, entry.id.span.end) + else: + # Message + self.key_span = (entry.id.span.start, entry.id.span.end) + + if entry.value is not None: + self.val_span = (entry.value.span.start, entry.value.span.end) + else: + self.val_span = None + + self.entry = entry + + # Entry instances are expected to have pre_comment. It's used by + # other formats to associate a Comment with an Entity. FluentEntities + # don't need it because message comments are part of the entry AST and + # are not separate Comment instances. + self.pre_comment = None + + @property + def root_node(self): + '''AST node at which to start traversal for count_words. + + By default we count words in the value and in all attributes. + ''' + return self.entry + + _word_count = None + + def count_words(self): + if self._word_count is None: + counter = WordCounter() + counter.visit(self.root_node) + self._word_count = counter.word_count + + return self._word_count + + def equals(self, other): + return self.entry.equals( + other.entry, ignored_fields=self.ignored_fields) + + # In Fluent we treat entries as a whole. FluentChecker reports errors at + # offsets calculated from the beginning of the entry. + def value_position(self, offset=None): + if offset is None: + # no offset given, use our value start or id end + if self.val_span: + offset = self.val_span[0] - self.span[0] + else: + offset = self.key_span[1] - self.span[0] + return self.position(offset) + + @property + def attributes(self): + for attr_node in self.entry.attributes: + yield FluentAttribute(self, attr_node) + + def unwrap(self): + return self.all + + def wrap(self, raw_val): + """Create literal entity the given raw value. + + For Fluent, we're exposing the message source to tools like + Pontoon. + We also recreate the comment from this entity to the created entity. + """ + all = raw_val + if self.entry.comment is not None: + all = serialize_comment(self.entry.comment) + all + return LiteralEntity(self.key, raw_val, all) + + +class FluentMessage(FluentEntity): + pass + + +class FluentTerm(FluentEntity): + # Fields ignored when comparing two terms. + ignored_fields = ['attributes', 'comment', 'span'] + + @property + def root_node(self): + '''AST node at which to start traversal for count_words. + + In Fluent Terms we only count words in the value. Attributes are + private and do not count towards the word total. + ''' + return self.entry.value + + +class FluentComment(Comment): + def __init__(self, ctx, span, entry): + super(FluentComment, self).__init__(ctx, span) + self._val_cache = entry.content + + +class FluentParser(Parser): + capabilities = CAN_SKIP + + def __init__(self): + super(FluentParser, self).__init__() + self.ftl_parser = FTLParser() + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + + resource = self.ftl_parser.parse(self.ctx.contents) + + last_span_end = 0 + + for entry in resource.body: + if not only_localizable: + if entry.span.start > last_span_end: + yield Whitespace( + self.ctx, (last_span_end, entry.span.start)) + + if isinstance(entry, ftl.Message): + yield FluentMessage(self.ctx, entry) + elif isinstance(entry, ftl.Term): + yield FluentTerm(self.ctx, entry) + elif isinstance(entry, ftl.Junk): + start = entry.span.start + end = entry.span.end + # strip leading whitespace + start += re.match('[ \t\r\n]*', entry.content).end() + if not only_localizable and entry.span.start < start: + yield Whitespace( + self.ctx, (entry.span.start, start) + ) + # strip trailing whitespace + ws, we = re.search('[ \t\r\n]*$', entry.content).span() + end -= we - ws + yield Junk(self.ctx, (start, end)) + if not only_localizable and end < entry.span.end: + yield Whitespace( + self.ctx, (end, entry.span.end) + ) + elif isinstance(entry, ftl.BaseComment) and not only_localizable: + span = (entry.span.start, entry.span.end) + yield FluentComment(self.ctx, span, entry) + + last_span_end = entry.span.end + + # Yield Whitespace at the EOF. + if not only_localizable: + eof_offset = len(self.ctx.contents) + if eof_offset > last_span_end: + yield Whitespace(self.ctx, (last_span_end, eof_offset)) diff --git a/third_party/python/compare-locales/compare_locales/parser/ini.py b/third_party/python/compare-locales/compare_locales/parser/ini.py new file mode 100644 index 0000000000..d3b31d9ae9 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/ini.py @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +from .base import ( + Entry, OffsetComment, + Parser +) + + +class IniSection(Entry): + '''Entity-like object representing sections in ini files + ''' + def __init__(self, ctx, span, val_span): + self.ctx = ctx + self.span = span + self.key_span = self.val_span = val_span + + def __repr__(self): + return self.raw_val + + +class IniParser(Parser): + ''' + Parse files of the form: + # initial comment + [cat] + whitespace* + #comment + string=value + ... + ''' + + Comment = OffsetComment + + def __init__(self): + self.reComment = re.compile('(?:^[;#][^\n]*\n)*(?:^[;#][^\n]*)', re.M) + self.reSection = re.compile(r'\[(?P<val>.*?)\]', re.M) + self.reKey = re.compile('(?P<key>.+?)=(?P<val>.*)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + contents = ctx.contents + m = self.reSection.match(contents, offset) + if m: + return IniSection(ctx, m.span(), m.span('val')) + + return super(IniParser, self).getNext(ctx, offset) + + def getJunk(self, ctx, offset, *expressions): + # base.Parser.getNext calls us with self.reKey, self.reComment. + # Add self.reSection to the end-of-junk expressions + expressions = expressions + (self.reSection,) + return super(IniParser, self).getJunk(ctx, offset, *expressions) diff --git a/third_party/python/compare-locales/compare_locales/parser/po.py b/third_party/python/compare-locales/compare_locales/parser/po.py new file mode 100644 index 0000000000..5880cf7c71 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/po.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Gettext PO(T) parser + +Parses gettext po and pot files. +""" + +from __future__ import absolute_import +from __future__ import unicode_literals + +import re + +from .base import ( + CAN_SKIP, + Entity, + BadEntity, + Parser +) + + +class PoEntityMixin(object): + + @property + def val(self): + return ( + self.stringlist_val + if self.stringlist_val + else self.stringlist_key[0] + ) + + @property + def key(self): + return self.stringlist_key + + @property + def localized(self): + # gettext denotes a non-localized string by an empty value + return bool(self.stringlist_val) + + def __repr__(self): + return self.key[0] + + +class PoEntity(PoEntityMixin, Entity): + pass + + +# Unescape and concat a string list +def eval_stringlist(lines): + return ''.join( + ( + l + .replace(r'\\', '\\') + .replace(r'\t', '\t') + .replace(r'\r', '\r') + .replace(r'\n', '\n') + .replace(r'\"', '"') + ) + for l in lines + ) + + +class PoParser(Parser): + # gettext l10n fallback at runtime, don't merge en-US strings + capabilities = CAN_SKIP + + reKey = re.compile('msgctxt|msgid') + reValue = re.compile('(?P<white>[ \t\r\n]*)(?P<cmd>msgstr)') + reComment = re.compile(r'(?:#.*?\n)+') + # string list item: + # leading whitespace + # `"` + # escaped quotes etc, not quote, newline, backslash + # `"` + reListItem = re.compile(r'[ \t\r\n]*"((?:\\[\\trn"]|[^"\n\\])*)"') + + def __init__(self): + super(PoParser, self).__init__() + + def createEntity(self, ctx, m, current_comment, white_space): + start = cursor = m.start() + id_start = cursor + try: + msgctxt, cursor = self._parse_string_list(ctx, cursor, 'msgctxt') + m = self.reWhitespace.match(ctx.contents, cursor) + if m: + cursor = m.end() + except BadEntity: + # no msgctxt is OK + msgctxt = None + if id_start is None: + id_start = cursor + msgid, cursor = self._parse_string_list(ctx, cursor, 'msgid') + id_end = cursor + m = self.reWhitespace.match(ctx.contents, cursor) + if m: + cursor = m.end() + val_start = cursor + msgstr, cursor = self._parse_string_list(ctx, cursor, 'msgstr') + e = PoEntity( + ctx, + current_comment, + white_space, + (start, cursor), + (id_start, id_end), + (val_start, cursor) + ) + e.stringlist_key = (msgid, msgctxt) + e.stringlist_val = msgstr + return e + + def _parse_string_list(self, ctx, cursor, key): + if not ctx.contents.startswith(key, cursor): + raise BadEntity + cursor += len(key) + frags = [] + while True: + m = self.reListItem.match(ctx.contents, cursor) + if not m: + break + frags.append(m.group(1)) + cursor = m.end() + if not frags: + raise BadEntity + return eval_stringlist(frags), cursor diff --git a/third_party/python/compare-locales/compare_locales/parser/properties.py b/third_party/python/compare-locales/compare_locales/parser/properties.py new file mode 100644 index 0000000000..15b865a026 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/parser/properties.py @@ -0,0 +1,116 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import re + +from .base import ( + Entity, OffsetComment, Whitespace, + Parser +) +from six import unichr + + +class PropertiesEntityMixin(object): + escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|' + '(?P<nl>\n[ \t]*)|(?P<single>.))', re.M) + known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'} + + @property + def val(self): + def unescape(m): + found = m.groupdict() + if found['uni']: + return unichr(int(found['uni'][1:], 16)) + if found['nl']: + return '' + return self.known_escapes.get(found['single'], found['single']) + + return self.escape.sub(unescape, self.raw_val) + + +class PropertiesEntity(PropertiesEntityMixin, Entity): + pass + + +class PropertiesParser(Parser): + + Comment = OffsetComment + + def __init__(self): + self.reKey = re.compile( + '(?P<key>[^#! \t\r\n][^=:\n]*?)[ \t]*[:=][ \t]*', re.M) + self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M) + self._escapedEnd = re.compile(r'\\+$') + self._trailingWS = re.compile(r'[ \t\r\n]*(?:\n|\Z)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + junk_offset = offset + # overwritten to parse values line by line + contents = ctx.contents + + m = self.reComment.match(contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + if offset == 0 and 'License' in current_comment.val: + # Heuristic. A early comment with "License" is probably + # a license header, and should be standalone. + return current_comment + offset = m.end() + else: + current_comment = None + + m = self.reWhitespace.match(contents, offset) + if m: + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + + m = self.reKey.match(contents, offset) + if m: + startline = offset = m.end() + while True: + endval = nextline = contents.find('\n', offset) + if nextline == -1: + endval = offset = len(contents) + break + # is newline escaped? + _e = self._escapedEnd.search(contents, offset, nextline) + offset = nextline + 1 + if _e is None: + break + # backslashes at end of line, if 2*n, not escaped + if len(_e.group()) % 2 == 0: + break + startline = offset + + # strip trailing whitespace + ws = self._trailingWS.search(contents, startline) + if ws: + endval = ws.start() + + entity = PropertiesEntity( + ctx, current_comment, white_space, + (m.start(), endval), # full span + m.span('key'), + (m.end(), endval)) # value span + return entity + + if current_comment is not None: + return current_comment + if white_space is not None: + return white_space + + return self.getJunk(ctx, junk_offset, self.reKey, self.reComment) diff --git a/third_party/python/compare-locales/compare_locales/paths/__init__.py b/third_party/python/compare-locales/compare_locales/paths/__init__.py new file mode 100644 index 0000000000..a3d3cbc43b --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/__init__.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from compare_locales import mozpath +from .files import ProjectFiles, REFERENCE_LOCALE +from .ini import ( + L10nConfigParser, SourceTreeConfigParser, + EnumerateApp, EnumerateSourceTreeApp, +) +from .matcher import Matcher +from .project import ProjectConfig +from .configparser import TOMLParser, ConfigNotFound + + +__all__ = [ + 'Matcher', + 'ProjectConfig', + 'L10nConfigParser', 'SourceTreeConfigParser', + 'EnumerateApp', 'EnumerateSourceTreeApp', + 'ProjectFiles', 'REFERENCE_LOCALE', + 'TOMLParser', 'ConfigNotFound', +] + + +class File(object): + + def __init__(self, fullpath, file, module=None, locale=None): + self.fullpath = fullpath + self.file = file + self.module = module + self.locale = locale + pass + + @property + def localpath(self): + if self.module: + return mozpath.join(self.locale, self.module, self.file) + return self.file + + def __hash__(self): + return hash(self.localpath) + + def __str__(self): + return self.fullpath + + def __eq__(self, other): + if not isinstance(other, File): + return False + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) diff --git a/third_party/python/compare-locales/compare_locales/paths/configparser.py b/third_party/python/compare-locales/compare_locales/paths/configparser.py new file mode 100644 index 0000000000..ce56df10b7 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/configparser.py @@ -0,0 +1,140 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import errno +import logging +from compare_locales import mozpath +from .project import ProjectConfig +from .matcher import expand +import pytoml as toml +import six + + +class ConfigNotFound(EnvironmentError): + def __init__(self, path): + super(ConfigNotFound, self).__init__( + errno.ENOENT, + 'Configuration file not found', + path) + + +class ParseContext(object): + def __init__(self, path, env, ignore_missing_includes): + self.path = path + self.env = env + self.ignore_missing_includes = ignore_missing_includes + self.data = None + self.pc = ProjectConfig(path) + + +class TOMLParser(object): + def parse(self, path, env=None, ignore_missing_includes=False): + ctx = self.context( + path, env=env, ignore_missing_includes=ignore_missing_includes + ) + self.load(ctx) + self.processBasePath(ctx) + self.processEnv(ctx) + self.processPaths(ctx) + self.processFilters(ctx) + self.processIncludes(ctx) + self.processExcludes(ctx) + self.processLocales(ctx) + return self.asConfig(ctx) + + def context(self, path, env=None, ignore_missing_includes=False): + return ParseContext( + path, + env if env is not None else {}, + ignore_missing_includes, + ) + + def load(self, ctx): + try: + with open(ctx.path, 'rb') as fin: + ctx.data = toml.load(fin) + except (toml.TomlError, IOError): + raise ConfigNotFound(ctx.path) + + def processBasePath(self, ctx): + assert ctx.data is not None + ctx.pc.set_root(ctx.data.get('basepath', '.')) + + def processEnv(self, ctx): + assert ctx.data is not None + ctx.pc.add_environment(**ctx.data.get('env', {})) + # add parser environment, possibly overwriting file variables + ctx.pc.add_environment(**ctx.env) + + def processLocales(self, ctx): + assert ctx.data is not None + if 'locales' in ctx.data: + ctx.pc.set_locales(ctx.data['locales']) + + def processPaths(self, ctx): + assert ctx.data is not None + for data in ctx.data.get('paths', []): + paths = { + "l10n": data['l10n'] + } + if 'locales' in data: + paths['locales'] = data['locales'] + if 'reference' in data: + paths['reference'] = data['reference'] + if 'test' in data: + paths['test'] = data['test'] + ctx.pc.add_paths(paths) + + def processFilters(self, ctx): + assert ctx.data is not None + for data in ctx.data.get('filters', []): + paths = data['path'] + if isinstance(paths, six.string_types): + paths = [paths] + rule = { + "path": paths, + "action": data['action'] + } + if 'key' in data: + rule['key'] = data['key'] + ctx.pc.add_rules(rule) + + def processIncludes(self, ctx): + for child in self._processChild(ctx, 'includes'): + ctx.pc.add_child(child) + + def processExcludes(self, ctx): + for child in self._processChild(ctx, 'excludes'): + ctx.pc.exclude(child) + + def _processChild(self, ctx, field): + assert ctx.data is not None + if field not in ctx.data: + return + for child_config in ctx.data[field]: + # resolve child_config['path'] against our root and env + p = mozpath.normpath( + expand( + ctx.pc.root, + child_config['path'], + ctx.pc.environ + ) + ) + try: + child = self.parse( + p, env=ctx.env, + ignore_missing_includes=ctx.ignore_missing_includes + ) + except ConfigNotFound as e: + if not ctx.ignore_missing_includes: + raise + (logging + .getLogger('compare-locales.io') + .error('%s: %s', e.strerror, e.filename)) + continue + yield child + + def asConfig(self, ctx): + return ctx.pc diff --git a/third_party/python/compare-locales/compare_locales/paths/files.py b/third_party/python/compare-locales/compare_locales/paths/files.py new file mode 100644 index 0000000000..b7ec21b9f5 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/files.py @@ -0,0 +1,223 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import os +from compare_locales import mozpath + + +REFERENCE_LOCALE = 'en-x-moz-reference' + + +class ConfigList(list): + def maybe_extend(self, other): + '''Add configs from other list if this list doesn't have this path yet. + ''' + for config in other: + if any(mine.path == config.path for mine in self): + continue + self.append(config) + + +class ProjectFiles(object): + '''Iterable object to get all files and tests for a locale and a + list of ProjectConfigs. + + If the given locale is None, iterate over reference files as + both reference and locale for a reference self-test. + ''' + def __init__(self, locale, projects, mergebase=None): + self.locale = locale + self.matchers = [] + self.exclude = None + self.mergebase = mergebase + configs = ConfigList() + excludes = ConfigList() + for project in projects: + # Only add this project if we're not in validation mode, + # and the given locale is enabled for the project. + if locale is not None and locale not in project.all_locales: + continue + configs.maybe_extend(project.configs) + excludes.maybe_extend(project.excludes) + # If an excluded config is explicitly included, drop if from the + # excludes. + excludes = [ + exclude + for exclude in excludes + if not any(c.path == exclude.path for c in configs) + ] + if excludes: + self.exclude = ProjectFiles(locale, excludes) + for pc in configs: + if locale and pc.locales is not None and locale not in pc.locales: + continue + for paths in pc.paths: + if ( + locale and + 'locales' in paths and + locale not in paths['locales'] + ): + continue + m = { + 'l10n': paths['l10n'].with_env({ + "locale": locale or REFERENCE_LOCALE + }), + 'module': paths.get('module'), + } + if 'reference' in paths: + m['reference'] = paths['reference'] + if self.mergebase is not None: + m['merge'] = paths['l10n'].with_env({ + "locale": locale, + "l10n_base": self.mergebase + }) + m['test'] = set(paths.get('test', [])) + if 'locales' in paths: + m['locales'] = paths['locales'][:] + self.matchers.append(m) + self.matchers.reverse() # we always iterate last first + # Remove duplicate patterns, comparing each matcher + # against all other matchers. + # Avoid n^2 comparisons by only scanning the upper triangle + # of a n x n matrix of all possible combinations. + # Using enumerate and keeping track of indexes, as we can't + # modify the list while iterating over it. + drops = set() # duplicate matchers to remove + for i, m in enumerate(self.matchers[:-1]): + if i in drops: + continue # we're dropping this anyway, don't search again + for i_, m_ in enumerate(self.matchers[(i+1):]): + if (mozpath.realpath(m['l10n'].prefix) != + mozpath.realpath(m_['l10n'].prefix)): + # ok, not the same thing, continue + continue + # check that we're comparing the same thing + if 'reference' in m: + if (mozpath.realpath(m['reference'].prefix) != + mozpath.realpath(m_.get('reference').prefix)): + raise RuntimeError('Mismatch in reference for ' + + mozpath.realpath(m['l10n'].prefix)) + drops.add(i_ + i + 1) + m['test'] |= m_['test'] + drops = sorted(drops, reverse=True) + for i in drops: + del self.matchers[i] + + def __iter__(self): + # The iteration is pretty different when we iterate over + # a localization vs over the reference. We do that latter + # when running in validation mode. + inner = self.iter_locale() if self.locale else self.iter_reference() + for t in inner: + yield t + + def iter_locale(self): + '''Iterate over locale files.''' + known = {} + for matchers in self.matchers: + matcher = matchers['l10n'] + for path in self._files(matcher): + if path not in known: + known[path] = {'test': matchers.get('test')} + if 'reference' in matchers: + known[path]['reference'] = matcher.sub( + matchers['reference'], path) + if 'merge' in matchers: + known[path]['merge'] = matcher.sub( + matchers['merge'], path) + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + for path in self._files(matcher): + l10npath = matcher.sub(matchers['l10n'], path) + if l10npath not in known: + known[l10npath] = { + 'reference': path, + 'test': matchers.get('test') + } + if 'merge' in matchers: + known[l10npath]['merge'] = \ + matcher.sub(matchers['merge'], path) + for path, d in sorted(known.items()): + yield (path, d.get('reference'), d.get('merge'), d['test']) + + def iter_reference(self): + '''Iterate over reference files.''' + # unset self.exclude, as we don't want that for our reference files + exclude = self.exclude + self.exclude = None + known = {} + for matchers in self.matchers: + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + for path in self._files(matcher): + refpath = matcher.sub(matchers['reference'], path) + if refpath not in known: + known[refpath] = { + 'reference': path, + 'test': matchers.get('test') + } + for path, d in sorted(known.items()): + yield (path, d.get('reference'), None, d['test']) + self.exclude = exclude + + def _files(self, matcher): + '''Base implementation of getting all files in a hierarchy + using the file system. + Subclasses might replace this method to support different IO + patterns. + ''' + base = matcher.prefix + if self._isfile(base): + if self.exclude and self.exclude.match(base) is not None: + return + if matcher.match(base) is not None: + yield base + return + for d, dirs, files in self._walk(base): + for f in files: + p = mozpath.join(d, f) + if self.exclude and self.exclude.match(p) is not None: + continue + if matcher.match(p) is not None: + yield p + + def _isfile(self, path): + return os.path.isfile(path) + + def _walk(self, base): + for d, dirs, files in os.walk(base): + yield d, dirs, files + + def match(self, path): + '''Return the tuple of l10n_path, reference, mergepath, tests + if the given path matches any config, otherwise None. + + This routine doesn't check that the files actually exist. + ''' + if ( + self.locale is not None and + self.exclude and self.exclude.match(path) is not None + ): + return + for matchers in self.matchers: + matcher = matchers['l10n'] + if self.locale is not None and matcher.match(path) is not None: + ref = merge = None + if 'reference' in matchers: + ref = matcher.sub(matchers['reference'], path) + if 'merge' in matchers: + merge = matcher.sub(matchers['merge'], path) + return path, ref, merge, matchers.get('test') + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + if matcher.match(path) is not None: + merge = None + l10n = matcher.sub(matchers['l10n'], path) + if 'merge' in matchers: + merge = matcher.sub(matchers['merge'], path) + return l10n, path, merge, matchers.get('test') diff --git a/third_party/python/compare-locales/compare_locales/paths/ini.py b/third_party/python/compare-locales/compare_locales/paths/ini.py new file mode 100644 index 0000000000..0e4b7d12bf --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/ini.py @@ -0,0 +1,227 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from six.moves.configparser import ConfigParser, NoSectionError, NoOptionError +from collections import defaultdict +from compare_locales import util, mozpath +from .project import ProjectConfig + + +class L10nConfigParser(object): + '''Helper class to gather application information from ini files. + + This class is working on synchronous open to read files or web data. + Subclass this and overwrite loadConfigs and addChild if you need async. + ''' + def __init__(self, inipath, **kwargs): + """Constructor for L10nConfigParsers + + inipath -- l10n.ini path + Optional keyword arguments are fowarded to the inner ConfigParser as + defaults. + """ + self.inipath = mozpath.normpath(inipath) + # l10n.ini files can import other l10n.ini files, store the + # corresponding L10nConfigParsers + self.children = [] + # we really only care about the l10n directories described in l10n.ini + self.dirs = [] + # optional defaults to be passed to the inner ConfigParser (unused?) + self.defaults = kwargs + + def getDepth(self, cp): + '''Get the depth for the comparison from the parsed l10n.ini. + ''' + try: + depth = cp.get('general', 'depth') + except (NoSectionError, NoOptionError): + depth = '.' + return depth + + def getFilters(self): + '''Get the test functions from this ConfigParser and all children. + + Only works with synchronous loads, used by compare-locales, which + is local anyway. + ''' + filter_path = mozpath.join(mozpath.dirname(self.inipath), 'filter.py') + try: + local = {} + with open(filter_path) as f: + exec(compile(f.read(), filter_path, 'exec'), {}, local) + if 'test' in local and callable(local['test']): + filters = [local['test']] + else: + filters = [] + except BaseException: # we really want to handle EVERYTHING here + filters = [] + + for c in self.children: + filters += c.getFilters() + + return filters + + def loadConfigs(self): + """Entry point to load the l10n.ini file this Parser refers to. + + This implementation uses synchronous loads, subclasses might overload + this behaviour. If you do, make sure to pass a file-like object + to onLoadConfig. + """ + cp = ConfigParser(self.defaults) + cp.read(self.inipath) + depth = self.getDepth(cp) + self.base = mozpath.join(mozpath.dirname(self.inipath), depth) + # create child loaders for any other l10n.ini files to be included + try: + for title, path in cp.items('includes'): + # skip default items + if title in self.defaults: + continue + # add child config parser + self.addChild(title, path, cp) + except NoSectionError: + pass + # try to load the "dirs" defined in the "compare" section + try: + self.dirs.extend(cp.get('compare', 'dirs').split()) + except (NoOptionError, NoSectionError): + pass + # try to set "all_path" and "all_url" + try: + self.all_path = mozpath.join(self.base, cp.get('general', 'all')) + except (NoOptionError, NoSectionError): + self.all_path = None + return cp + + def addChild(self, title, path, orig_cp): + """Create a child L10nConfigParser and load it. + + title -- indicates the module's name + path -- indicates the path to the module's l10n.ini file + orig_cp -- the configuration parser of this l10n.ini + """ + cp = L10nConfigParser(mozpath.join(self.base, path), **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + def dirsIter(self): + """Iterate over all dirs and our base path for this l10n.ini""" + for dir in self.dirs: + yield dir, (self.base, dir) + + def directories(self): + """Iterate over all dirs and base paths for this l10n.ini as well + as the included ones. + """ + for t in self.dirsIter(): + yield t + for child in self.children: + for t in child.directories(): + yield t + + def allLocales(self): + """Return a list of all the locales of this project""" + with open(self.all_path) as f: + return util.parseLocales(f.read()) + + +class SourceTreeConfigParser(L10nConfigParser): + '''Subclassing L10nConfigParser to work with just the repos + checked out next to each other instead of intermingled like + we do for real builds. + ''' + + def __init__(self, inipath, base, redirects): + '''Add additional arguments basepath. + + basepath is used to resolve local paths via branchnames. + redirects is used in unified repository, mapping upstream + repos to local clones. + ''' + L10nConfigParser.__init__(self, inipath) + self.base = base + self.redirects = redirects + + def addChild(self, title, path, orig_cp): + # check if there's a section with details for this include + # we might have to check a different repo, or even VCS + # for example, projects like "mail" indicate in + # an "include_" section where to find the l10n.ini for "toolkit" + details = 'include_' + title + if orig_cp.has_section(details): + branch = orig_cp.get(details, 'mozilla') + branch = self.redirects.get(branch, branch) + inipath = orig_cp.get(details, 'l10n.ini') + path = mozpath.join(self.base, branch, inipath) + else: + path = mozpath.join(self.base, path) + cp = SourceTreeConfigParser(path, self.base, self.redirects, + **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + +class EnumerateApp(object): + reference = 'en-US' + + def __init__(self, inipath, l10nbase): + self.setupConfigParser(inipath) + self.modules = defaultdict(dict) + self.l10nbase = mozpath.abspath(l10nbase) + self.filters = [] + self.addFilters(*self.config.getFilters()) + + def setupConfigParser(self, inipath): + self.config = L10nConfigParser(inipath) + self.config.loadConfigs() + + def addFilters(self, *args): + self.filters += args + + def asConfig(self): + # We've already normalized paths in the ini parsing. + # Set the path and root to None to just keep our paths as is. + config = ProjectConfig(None) + config.set_root('.') # sets to None because path is None + config.add_environment(l10n_base=self.l10nbase) + self._config_for_ini(config, self.config) + filters = self.config.getFilters() + if filters: + config.set_filter_py(filters[0]) + config.set_locales(self.config.allLocales(), deep=True) + return config + + def _config_for_ini(self, projectconfig, aConfig): + for k, (basepath, module) in aConfig.dirsIter(): + paths = { + 'module': module, + 'reference': mozpath.normpath('%s/%s/locales/en-US/**' % + (basepath, module)), + 'l10n': mozpath.normpath('{l10n_base}/{locale}/%s/**' % + module) + } + if module == 'mobile/android/base': + paths['test'] = ['android-dtd'] + projectconfig.add_paths(paths) + for child in aConfig.children: + self._config_for_ini(projectconfig, child) + + +class EnumerateSourceTreeApp(EnumerateApp): + '''Subclass EnumerateApp to work on side-by-side checked out + repos, and to no pay attention to how the source would actually + be checked out for building. + ''' + + def __init__(self, inipath, basepath, l10nbase, redirects): + self.basepath = basepath + self.redirects = redirects + EnumerateApp.__init__(self, inipath, l10nbase) + + def setupConfigParser(self, inipath): + self.config = SourceTreeConfigParser(inipath, self.basepath, + self.redirects) + self.config.loadConfigs() diff --git a/third_party/python/compare-locales/compare_locales/paths/matcher.py b/third_party/python/compare-locales/compare_locales/paths/matcher.py new file mode 100644 index 0000000000..554d167686 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/matcher.py @@ -0,0 +1,472 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import os +import re +import itertools +from compare_locales import mozpath +import six + + +# Android uses non-standard locale codes, these are the mappings +# back and forth +ANDROID_LEGACY_MAP = { + 'he': 'iw', + 'id': 'in', + 'yi': 'ji' +} +ANDROID_STANDARD_MAP = { + legacy: standard + for standard, legacy in six.iteritems(ANDROID_LEGACY_MAP) +} + + +class Matcher(object): + '''Path pattern matcher + Supports path matching similar to mozpath.match(), but does + not match trailing file paths without trailing wildcards. + Also gets a prefix, which is the path before the first wildcard, + which is good for filesystem iterations, and allows to replace + the own matches in a path on a different Matcher. compare-locales + uses that to transform l10n and en-US paths back and forth. + ''' + + def __init__(self, pattern_or_other, env={}, root=None, encoding=None): + '''Create regular expression similar to mozpath.match(). + ''' + parser = PatternParser() + real_env = {k: parser.parse(v) for k, v in env.items()} + self._cached_re = None + if root is not None: + # make sure that our root is fully expanded and ends with / + root = mozpath.abspath(root) + '/' + # allow constructing Matchers from Matchers + if isinstance(pattern_or_other, Matcher): + other = pattern_or_other + self.pattern = Pattern(other.pattern) + self.env = other.env.copy() + self.env.update(real_env) + if root is not None: + self.pattern.root = root + self.encoding = other.encoding + return + self.env = real_env + pattern = pattern_or_other + self.pattern = parser.parse(pattern) + if root is not None: + self.pattern.root = root + self.encoding = encoding + + def with_env(self, environ): + return Matcher(self, environ) + + @property + def prefix(self): + subpattern = Pattern(self.pattern[:self.pattern.prefix_length]) + subpattern.root = self.pattern.root + prefix = subpattern.expand(self.env) + if self.encoding is not None: + prefix = prefix.encode(self.encoding) + return prefix + + def match(self, path): + '''Test the given path against this matcher and its environment. + + Return None if there's no match, and the dictionary of matched + variables in this matcher if there's a match. + ''' + self._cache_regex() + m = self._cached_re.match(path) + if m is None: + return None + d = m.groupdict() + if self.encoding is not None: + d = {key: value.decode(self.encoding) for key, value in d.items()} + if 'android_locale' in d and 'locale' not in d: + # map android_locale to locale code + locale = d['android_locale'] + # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji + locale = re.sub( + r'(iw|in|ji)(?=\Z|-)', + lambda legacy: ANDROID_STANDARD_MAP[legacy.group(1)], + locale + ) + locale = re.sub(r'-r([A-Z]{2})', r'-\1', locale) + locale = locale.replace('b+', '').replace('+', '-') + d['locale'] = locale + return d + + def _cache_regex(self): + if self._cached_re is not None: + return + pattern = self.pattern.regex_pattern(self.env) + '$' + if self.encoding is not None: + pattern = pattern.encode(self.encoding) + self._cached_re = re.compile(pattern) + + def sub(self, other, path): + ''' + Replace the wildcard matches in this pattern into the + pattern of the other Match object. + ''' + m = self.match(path) + if m is None: + return None + env = {} + env.update( + (key, Literal(value if value is not None else '')) + for key, value in m.items() + ) + env.update(other.env) + path = other.pattern.expand(env) + if self.encoding is not None: + path = path.encode(self.encoding) + return path + + def concat(self, other): + '''Concat two Matcher objects. + + The intent is to create one Matcher with variable substitutions that + behaves as if you joined the resulting paths. + This doesn't do path separator logic, though, and it won't resolve + parent directories. + ''' + if not isinstance(other, Matcher): + other_matcher = Matcher(other) + else: + other_matcher = other + other_pattern = other_matcher.pattern + if other_pattern.root is not None: + raise ValueError('Other matcher must not be rooted') + result = Matcher(self) + result.pattern += other_pattern + if self.pattern.prefix_length == len(self.pattern): + result.pattern.prefix_length += other_pattern.prefix_length + result.env.update(other_matcher.env) + return result + + def __str__(self): + return self.pattern.expand(self.env) + + def __repr__(self): + return '{}({!r}, env={!r}, root={!r})'.format( + type(self).__name__, self.pattern, self.env, self.pattern.root + ) + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + '''Equality for Matcher. + + The equality for Matchers is defined to have the same pattern, + and no conflicting environment. Additional environment settings + in self or other are OK. + ''' + if other.__class__ is not self.__class__: + return NotImplemented + if self.pattern != other.pattern: + return False + if self.env and other.env: + for k in self.env: + if k not in other.env: + continue + if self.env[k] != other.env[k]: + return False + if self.encoding != other.encoding: + return False + return True + + +def expand(root, path, env): + '''Expand a given path relative to the given root, + using the given env to resolve variables. + + This will break if the path contains wildcards. + ''' + matcher = Matcher(path, env=env, root=root) + return str(matcher) + + +class MissingEnvironment(Exception): + pass + + +class Node(object): + '''Abstract base class for all nodes in parsed patterns.''' + def regex_pattern(self, env): + '''Create a regular expression fragment for this Node.''' + raise NotImplementedError + + def expand(self, env): + '''Convert this node to a string with the given environment.''' + raise NotImplementedError + + +class Pattern(list, Node): + def __init__(self, iterable=[]): + list.__init__(self, iterable) + self.root = getattr(iterable, 'root', None) + self.prefix_length = getattr(iterable, 'prefix_length', None) + + def regex_pattern(self, env): + root = '' + if self.root is not None: + # make sure we're not hiding a full path + first_seg = self[0].expand(env) + if not os.path.isabs(first_seg): + root = re.escape(self.root) + return root + ''.join( + child.regex_pattern(env) for child in self + ) + + def expand(self, env, raise_missing=False): + root = '' + if self.root is not None: + # make sure we're not hiding a full path + first_seg = self[0].expand(env) + if not os.path.isabs(first_seg): + root = self.root + return root + ''.join(self._expand_children(env, raise_missing)) + + def _expand_children(self, env, raise_missing): + # Helper iterator to convert Exception to a stopped iterator + for child in self: + try: + yield child.expand(env, raise_missing=True) + except MissingEnvironment: + if raise_missing: + raise + return + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if not super(Pattern, self).__eq__(other): + return False + if other.__class__ == list: + # good for tests and debugging + return True + return ( + self.root == other.root + and self.prefix_length == other.prefix_length + ) + + +class Literal(six.text_type, Node): + def regex_pattern(self, env): + return re.escape(self) + + def expand(self, env, raise_missing=False): + return self + + +class Variable(Node): + def __init__(self, name, repeat=False): + self.name = name + self.repeat = repeat + + def regex_pattern(self, env): + if self.repeat: + return '(?P={})'.format(self.name) + return '(?P<{}>{})'.format(self.name, self._pattern_from_env(env)) + + def _pattern_from_env(self, env): + if self.name in env: + # make sure we match the value in the environment + return env[self.name].regex_pattern(self._no_cycle(env)) + # match anything, including path segments + return '.+?' + + def expand(self, env, raise_missing=False): + '''Create a string for this Variable. + + This expansion happens recursively. We avoid recusion loops + by removing the current variable from the environment that's used + to expand child variable references. + ''' + if self.name not in env: + raise MissingEnvironment + return env[self.name].expand( + self._no_cycle(env), raise_missing=raise_missing + ) + + def _no_cycle(self, env): + '''Remove our variable name from the environment. + That way, we can't create cyclic references. + ''' + if self.name not in env: + return env + env = env.copy() + env.pop(self.name) + return env + + def __repr__(self): + return 'Variable(name="{}")'.format(self.name) + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if other.__class__ is not self.__class__: + return False + return ( + self.name == other.name + and self.repeat == other.repeat + ) + + +class AndroidLocale(Variable): + '''Subclass for Android locale code mangling. + + Supports ab-rCD and b+ab+Scrip+DE. + Language and Language-Region tags get mapped to ab-rCD, more complex + Locale tags to b+. + ''' + def __init__(self, repeat=False): + self.name = 'android_locale' + self.repeat = repeat + + def _pattern_from_env(self, env): + android_locale = self._get_android_locale(env) + if android_locale is not None: + return re.escape(android_locale) + return '.+?' + + def expand(self, env, raise_missing=False): + '''Create a string for this Variable. + + This expansion happens recursively. We avoid recusion loops + by removing the current variable from the environment that's used + to expand child variable references. + ''' + android_locale = self._get_android_locale(env) + if android_locale is None: + raise MissingEnvironment + return android_locale + + def _get_android_locale(self, env): + if 'locale' not in env: + return None + android = bcp47 = env['locale'].expand(self._no_cycle(env)) + # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji + android = bcp47 = re.sub( + r'(he|id|yi)(?=\Z|-)', + lambda standard: ANDROID_LEGACY_MAP[standard.group(1)], + bcp47 + ) + if re.match(r'[a-z]{2,3}-[A-Z]{2}', bcp47): + android = '{}-r{}'.format(*bcp47.split('-')) + elif '-' in bcp47: + android = 'b+' + bcp47.replace('-', '+') + return android + + +class Star(Node): + def __init__(self, number): + self.number = number + + def regex_pattern(self, env): + return '(?P<s{}>[^/]*)'.format(self.number) + + def expand(self, env, raise_missing=False): + return env['s%d' % self.number] + + def __repr__(self): + return type(self).__name__ + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if other.__class__ is not self.__class__: + return False + return self.number == other.number + + +class Starstar(Star): + def __init__(self, number, suffix): + self.number = number + self.suffix = suffix + + def regex_pattern(self, env): + return '(?P<s{}>.+{})?'.format(self.number, self.suffix) + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if not super(Starstar, self).__eq__(other): + return False + return self.suffix == other.suffix + + +PATH_SPECIAL = re.compile( + r'(?P<starstar>(?<![^/}])\*\*(?P<suffix>/|$))' + r'|' + r'(?P<star>\*)' + r'|' + r'(?P<variable>{ *(?P<varname>[\w]+) *})' +) + + +class PatternParser(object): + def __init__(self): + # Not really initializing anything, just making room for our + # result and state members. + self.pattern = None + self._stargroup = self._cursor = None + self._known_vars = None + + def parse(self, pattern): + if isinstance(pattern, Pattern): + return pattern + if isinstance(pattern, Matcher): + return pattern.pattern + # Initializing result and state + self.pattern = Pattern() + self._stargroup = itertools.count(1) + self._known_vars = set() + self._cursor = 0 + for match in PATH_SPECIAL.finditer(pattern): + if match.start() > self._cursor: + self.pattern.append( + Literal(pattern[self._cursor:match.start()]) + ) + self.handle(match) + self.pattern.append(Literal(pattern[self._cursor:])) + if self.pattern.prefix_length is None: + self.pattern.prefix_length = len(self.pattern) + return self.pattern + + def handle(self, match): + if match.group('variable'): + self.variable(match) + else: + self.wildcard(match) + self._cursor = match.end() + + def variable(self, match): + varname = match.group('varname') + # Special case Android locale code matching. + # It's kinda sad, but true. + if varname == 'android_locale': + self.pattern.append(AndroidLocale(varname in self._known_vars)) + else: + self.pattern.append(Variable(varname, varname in self._known_vars)) + self._known_vars.add(varname) + + def wildcard(self, match): + # wildcard found, stop prefix + if self.pattern.prefix_length is None: + self.pattern.prefix_length = len(self.pattern) + wildcard = next(self._stargroup) + if match.group('star'): + # * + self.pattern.append(Star(wildcard)) + else: + # ** + self.pattern.append(Starstar(wildcard, match.group('suffix'))) diff --git a/third_party/python/compare-locales/compare_locales/paths/project.py b/third_party/python/compare-locales/compare_locales/paths/project.py new file mode 100644 index 0000000000..269b6fed9d --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/paths/project.py @@ -0,0 +1,265 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import re +from compare_locales import mozpath +from .matcher import Matcher +import six + + +class ExcludeError(ValueError): + pass + + +class ProjectConfig(object): + '''Abstraction of l10n project configuration data. + ''' + + def __init__(self, path): + self.filter_py = None # legacy filter code + # { + # 'l10n': pattern, + # 'reference': pattern, # optional + # 'locales': [], # optional + # 'test': [], # optional + # } + self.path = path + self.root = None + self.paths = [] + self.rules = [] + self.locales = None + # cache for all_locales, as that's not in `filter` + self._all_locales = None + self.environ = {} + self.children = [] + self.excludes = [] + self._cache = None + + def same(self, other): + '''Equality test, ignoring locales. + ''' + if other.__class__ is not self.__class__: + return False + if len(self.children) != len(other.children): + return False + for prop in ('path', 'root', 'paths', 'rules', 'environ'): + if getattr(self, prop) != getattr(other, prop): + return False + for this_child, other_child in zip(self.children, other.children): + if not this_child.same(other_child): + return False + return True + + def set_root(self, basepath): + if self.path is None: + self.root = None + return + self.root = mozpath.abspath( + mozpath.join(mozpath.dirname(self.path), basepath) + ) + + def add_environment(self, **kwargs): + self.environ.update(kwargs) + + def add_paths(self, *paths): + '''Add path dictionaries to this config. + The dictionaries must have a `l10n` key. For monolingual files, + `reference` is also required. + An optional key `test` is allowed to enable additional tests for this + path pattern. + ''' + self._all_locales = None # clear cache + for d in paths: + rv = { + 'l10n': Matcher(d['l10n'], env=self.environ, root=self.root), + 'module': d.get('module') + } + if 'reference' in d: + rv['reference'] = Matcher( + d['reference'], env=self.environ, root=self.root + ) + if 'test' in d: + rv['test'] = d['test'] + if 'locales' in d: + rv['locales'] = d['locales'][:] + self.paths.append(rv) + + def set_filter_py(self, filter_function): + '''Set legacy filter.py code. + Assert that no rules are set. + Also, normalize output already here. + ''' + assert not self.rules + + def filter_(module, path, entity=None): + try: + rv = filter_function(module, path, entity=entity) + except BaseException: # we really want to handle EVERYTHING here + return 'error' + rv = { + True: 'error', + False: 'ignore', + 'report': 'warning' + }.get(rv, rv) + assert rv in ('error', 'ignore', 'warning', None) + return rv + self.filter_py = filter_ + + def add_rules(self, *rules): + '''Add rules to filter on. + Assert that there's no legacy filter.py code hooked up. + ''' + assert self.filter_py is None + for rule in rules: + self.rules.extend(self._compile_rule(rule)) + + def add_child(self, child): + self._all_locales = None # clear cache + if child.excludes: + raise ExcludeError( + 'Included configs cannot declare their own excludes.' + ) + self.children.append(child) + + def exclude(self, child): + for config in child.configs: + if config.excludes: + raise ExcludeError( + 'Excluded configs cannot declare their own excludes.' + ) + self.excludes.append(child) + + def set_locales(self, locales, deep=False): + self._all_locales = None # clear cache + self.locales = locales + if not deep: + return + for child in self.children: + child.set_locales(locales, deep=deep) + + @property + def configs(self): + 'Recursively get all configs in this project and its children' + yield self + for child in self.children: + for config in child.configs: + yield config + + @property + def all_locales(self): + 'Recursively get all locales in this project and its paths' + if self._all_locales is None: + all_locales = set() + for config in self.configs: + if config.locales is not None: + all_locales.update(config.locales) + for paths in config.paths: + if 'locales' in paths: + all_locales.update(paths['locales']) + self._all_locales = sorted(all_locales) + return self._all_locales + + def filter(self, l10n_file, entity=None): + '''Filter a localization file or entities within, according to + this configuration file.''' + if l10n_file.locale not in self.all_locales: + return 'ignore' + if self.filter_py is not None: + return self.filter_py(l10n_file.module, l10n_file.file, + entity=entity) + rv = self._filter(l10n_file, entity=entity) + if rv is None: + return 'ignore' + return rv + + class FilterCache(object): + def __init__(self, locale): + self.locale = locale + self.rules = [] + self.l10n_paths = [] + + def cache(self, locale): + if self._cache and self._cache.locale == locale: + return self._cache + self._cache = self.FilterCache(locale) + for paths in self.paths: + if 'locales' in paths and locale not in paths['locales']: + continue + self._cache.l10n_paths.append(paths['l10n'].with_env({ + "locale": locale + })) + for rule in self.rules: + cached_rule = rule.copy() + cached_rule['path'] = rule['path'].with_env({ + "locale": locale + }) + self._cache.rules.append(cached_rule) + return self._cache + + def _filter(self, l10n_file, entity=None): + if any( + exclude.filter(l10n_file) == 'error' + for exclude in self.excludes + ): + return + actions = set( + child._filter(l10n_file, entity=entity) + for child in self.children) + if 'error' in actions: + # return early if we know we'll error + return 'error' + + cached = self.cache(l10n_file.locale) + if any(p.match(l10n_file.fullpath) for p in cached.l10n_paths): + action = 'error' + for rule in reversed(cached.rules): + if not rule['path'].match(l10n_file.fullpath): + continue + if ('key' in rule) ^ (entity is not None): + # key/file mismatch, not a matching rule + continue + if 'key' in rule and not rule['key'].match(entity): + continue + action = rule['action'] + break + actions.add(action) + if 'error' in actions: + return 'error' + if 'warning' in actions: + return 'warning' + if 'ignore' in actions: + return 'ignore' + + def _compile_rule(self, rule): + assert 'path' in rule + if isinstance(rule['path'], list): + for path in rule['path']: + _rule = rule.copy() + _rule['path'] = Matcher(path, env=self.environ, root=self.root) + for __rule in self._compile_rule(_rule): + yield __rule + return + if isinstance(rule['path'], six.string_types): + rule['path'] = Matcher( + rule['path'], env=self.environ, root=self.root + ) + if 'key' not in rule: + yield rule + return + if not isinstance(rule['key'], six.string_types): + for key in rule['key']: + _rule = rule.copy() + _rule['key'] = key + for __rule in self._compile_rule(_rule): + yield __rule + return + rule = rule.copy() + key = rule['key'] + if key.startswith('re:'): + key = key[3:] + else: + key = re.escape(key) + '$' + rule['key'] = re.compile(key) + yield rule diff --git a/third_party/python/compare-locales/compare_locales/plurals.py b/third_party/python/compare-locales/compare_locales/plurals.py new file mode 100644 index 0000000000..d316b6cf43 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/plurals.py @@ -0,0 +1,218 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mapping of locales to CLDR plural categories as implemented by PluralForm.jsm' + +CATEGORIES_BY_INDEX = ( + # 0 (Chinese) + ('other',), + # 1 (English) + ('one', 'other'), + # 2 (French) + ('one', 'other'), + # 3 (Latvian) + ('zero', 'one', 'other'), + # 4 (Scottish Gaelic) + ('one', 'two', 'few', 'other'), + # 5 (Romanian) + ('one', 'few', 'other'), + # 6 (Lithuanian) + # CLDR: one, few, many (fractions), other + ('one', 'other', 'few'), + # 7 (Russian) + # CLDR: one, few, many, other (fractions) + ('one', 'few', 'many'), + # 8 (Slovak) + # CLDR: one, few, many (fractions), other + ('one', 'few', 'other'), + # 9 (Polish) + # CLDR: one, few, many, other (fractions) + ('one', 'few', 'many'), + # 10 (Slovenian) + ('one', 'two', 'few', 'other'), + # 11 (Irish Gaelic) + ('one', 'two', 'few', 'many', 'other'), + # 12 (Arabic) + # CLDR: zero, one, two, few, many, other + ('one', 'two', 'few', 'many', 'other', 'zero'), + # 13 (Maltese) + ('one', 'few', 'many', 'other'), + # 14 (Unused) + # CLDR: one, other + ('one', 'two', 'other'), + # 15 (Icelandic, Macedonian) + ('one', 'other'), + # 16 (Breton) + ('one', 'two', 'few', 'many', 'other'), + # 17 (Shuar) + # CLDR: (missing) + ('zero', 'other'), + # 18 (Welsh), + ('zero', 'one', 'two', 'few', 'many', 'other'), + # 19 (Bosnian, Croatian, Serbian) + ('one', 'few', 'other'), +) + +CATEGORIES_EXCEPTIONS = { +} + +CATEGORIES_BY_LOCALE = { + 'ace': 0, + 'ach': 1, + 'af': 1, + 'ak': 2, + 'an': 1, + 'ar': 12, + 'arn': 1, + 'as': 1, + 'ast': 1, + 'az': 1, + 'be': 7, + 'bg': 1, + 'bn': 2, + 'bo': 0, + 'br': 16, + 'brx': 1, + 'bs': 19, + 'ca': 1, + 'cak': 1, + 'ckb': 1, + 'crh': 1, + 'cs': 8, + 'csb': 9, + 'cv': 1, + 'cy': 18, + 'da': 1, + 'de': 1, + 'dsb': 10, + 'el': 1, + 'en': 1, + 'eo': 1, + 'es': 1, + 'et': 1, + 'eu': 1, + 'fa': 2, + 'ff': 1, + 'fi': 1, + 'fr': 2, + 'frp': 2, + 'fur': 1, + 'fy': 1, + 'ga': 11, + 'gd': 4, + 'gl': 1, + 'gn': 1, + 'gu': 2, + 'he': 1, + 'hi': 2, + 'hr': 19, + 'hsb': 10, + 'hto': 1, + 'hu': 1, + 'hy': 1, + 'hye': 1, + 'ia': 1, + 'id': 0, + 'ilo': 0, + 'is': 15, + 'it': 1, + 'ja': 0, + 'jiv': 17, + 'ka': 1, + 'kab': 1, + 'kk': 1, + 'km': 0, + 'kn': 1, + 'ko': 0, + 'ks': 1, + 'ku': 1, + 'lb': 1, + 'lg': 1, + 'lij': 1, + 'lo': 0, + 'lt': 6, + 'ltg': 3, + 'lv': 3, + 'lus': 0, + 'mai': 1, + 'meh': 0, + 'mix': 0, + 'mk': 15, + 'ml': 1, + 'mn': 1, + 'mr': 1, + 'ms': 0, + 'my': 0, + 'nb': 1, + 'ne': 1, + 'nl': 1, + 'nn': 1, + 'nr': 1, + 'nso': 2, + 'ny': 1, + 'oc': 2, + 'or': 1, + 'pa': 2, + 'pai': 0, + 'pl': 9, + 'pt': 1, + 'quy': 1, + 'qvi': 1, + 'rm': 1, + 'ro': 5, + 'ru': 7, + 'rw': 1, + 'sah': 0, + 'sat': 1, + 'sc': 1, + 'scn': 1, + 'si': 1, + 'sk': 8, + 'sl': 10, + 'son': 1, + 'sq': 1, + 'sr': 19, + 'ss': 1, + 'st': 1, + 'sv': 1, + 'sw': 1, + 'szl': 9, + 'ta': 1, + 'ta': 1, + 'te': 1, + 'th': 0, + 'tl': 1, + 'tn': 1, + 'tr': 1, + 'trs': 1, + 'ts': 1, + 'tsz': 1, + 'uk': 7, + 'ur': 1, + 'uz': 1, + 've': 1, + 'vi': 0, + 'wo': 0, + 'xh': 1, + 'zam': 1, + 'zh-CN': 0, + 'zh-TW': 0, + 'zu': 2, +} + + +def get_plural(locale): + plural_form = get_plural_rule(locale) + if plural_form is None: + return None + return CATEGORIES_BY_INDEX[plural_form] + + +def get_plural_rule(locale): + if locale is None: + return None + if locale in CATEGORIES_BY_LOCALE: + return CATEGORIES_BY_LOCALE[locale] + locale = locale.split('-', 1)[0] + return CATEGORIES_BY_LOCALE.get(locale) diff --git a/third_party/python/compare-locales/compare_locales/serializer.py b/third_party/python/compare-locales/compare_locales/serializer.py new file mode 100644 index 0000000000..60e5a93766 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/serializer.py @@ -0,0 +1,137 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Serialize string changes. + +The serialization logic is based on the cross-channel merge algorithm. +It's taking the file structure for the first file, and localizable entries +from the last. +Input data is the parsed reference as a list of parser.walk(), +the existing localized file, also a list of parser.walk(), and a dictionary +of newly added keys and raw values. +To remove a string from a localization, pass `None` as value for a key. + +The marshalling between raw values and entities is done via Entity.unwrap +and Entity.wrap. + +To avoid adding English reference strings into the generated file, the +actual entities in the reference are replaced with Placeholders, which +are removed in a final pass over the result of merge_resources. After that, +we also prune whitespace once more.` +''' + +from codecs import encode +import six + +from compare_locales.merge import merge_resources, serialize_legacy_resource +from compare_locales.parser import getParser +from compare_locales.parser.base import ( + Entity, + PlaceholderEntity, + Junk, + Whitespace, +) + + +class SerializationNotSupportedError(ValueError): + pass + + +def serialize(filename, reference, old_l10n, new_data): + '''Returns a byte string of the serialized content to use. + + Input are a filename to create the right parser, a reference and + an existing localization, both as the result of parser.walk(). + Finally, new_data is a dictionary of key to raw values to serialize. + + Raises a SerializationNotSupportedError if we don't support the file + format. + ''' + try: + parser = getParser(filename) + except UserWarning: + raise SerializationNotSupportedError( + 'Unsupported file format ({}).'.format(filename)) + # create template, whitespace and all + placeholders = [ + placeholder(entry) + for entry in reference + if not isinstance(entry, Junk) + ] + ref_mapping = { + entry.key: entry + for entry in reference + if isinstance(entry, Entity) + } + # strip obsolete strings + old_l10n = sanitize_old(ref_mapping.keys(), old_l10n, new_data) + # create new Entities + # .val can just be "", merge_channels doesn't need that + new_l10n = [] + for key, new_raw_val in six.iteritems(new_data): + if new_raw_val is None or key not in ref_mapping: + continue + ref_ent = ref_mapping[key] + new_l10n.append(ref_ent.wrap(new_raw_val)) + + merged = merge_resources( + parser, + [placeholders, old_l10n, new_l10n], + keep_newest=False + ) + pruned = prune_placeholders(merged) + return encode(serialize_legacy_resource(pruned), parser.encoding) + + +def sanitize_old(known_keys, old_l10n, new_data): + """Strip Junk and replace obsolete messages with placeholders. + If new_data has `None` as a value, strip the existing translation. + Use placeholders generously, so that we can rely on `prune_placeholders` + to find their associated comments and remove them, too. + """ + + def should_placeholder(entry): + # If entry is an Entity, check if it's obsolete + # or marked to be removed. + if not isinstance(entry, Entity): + return False + if entry.key not in known_keys: + return True + return entry.key in new_data and new_data[entry.key] is None + + return [ + placeholder(entry) + if should_placeholder(entry) + else entry + for entry in old_l10n + if not isinstance(entry, Junk) + ] + + +def placeholder(entry): + if isinstance(entry, Entity): + return PlaceholderEntity(entry.key) + return entry + + +def prune_placeholders(entries): + pruned = [ + entry for entry in entries + if not isinstance(entry, PlaceholderEntity) + ] + + def prune_whitespace(acc, entity): + if len(acc) and isinstance(entity, Whitespace): + prev_entity = acc[-1] + + if isinstance(prev_entity, Whitespace): + # Prefer the longer whitespace. + if len(entity.all) > len(prev_entity.all): + acc[-1] = entity + return acc + + acc.append(entity) + return acc + + return six.moves.reduce(prune_whitespace, pruned, []) diff --git a/third_party/python/compare-locales/compare_locales/tests/__init__.py b/third_party/python/compare-locales/compare_locales/tests/__init__.py new file mode 100644 index 0000000000..8e4df17961 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/__init__.py @@ -0,0 +1,82 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Mixins for parser tests. +''' + +from __future__ import absolute_import + +from pkg_resources import resource_string +import re +import unittest + +from compare_locales import parser +from compare_locales.checks import getChecker +import six +from six.moves import zip_longest + + +class ParserTestMixin(): + '''Utility methods used by the parser tests. + ''' + filename = None + + def setUp(self): + '''Create a parser for this test. + ''' + self.parser = parser.getParser(self.filename) + + def tearDown(self): + 'tear down this test' + del self.parser + + def resource(self, name): + testcontent = resource_string(__name__, 'data/' + name) + # fake universal line endings + testcontent = re.sub(b'\r\n?', lambda m: b'\n', testcontent) + return testcontent + + def _test(self, unicode_content, refs): + '''Helper to test the parser. + Compares the result of parsing content with the given list + of reference keys and values. + ''' + self.parser.readUnicode(unicode_content) + entities = list(self.parser.walk()) + for entity, ref in zip_longest(entities, refs): + self.assertTrue(entity, + 'excess reference entity ' + six.text_type(ref)) + self.assertTrue(ref, + 'excess parsed entity ' + six.text_type(entity)) + if isinstance(entity, parser.Entity): + self.assertEqual(entity.key, ref[0]) + self.assertEqual(entity.val, ref[1]) + if len(ref) == 3: + self.assertIn(ref[2], entity.pre_comment.val) + else: + self.assertIsInstance(entity, ref[0]) + self.assertIn(ref[1], entity.all) + + +class BaseHelper(unittest.TestCase): + file = None + refContent = None + + def setUp(self): + p = parser.getParser(self.file.file) + p.readContents(self.refContent) + self.refList = p.parse() + + def _test(self, content, refWarnOrErrors): + p = parser.getParser(self.file.file) + p.readContents(content) + l10n = [e for e in p] + assert len(l10n) == 1 + l10n = l10n[0] + checker = getChecker(self.file) + if checker.needs_reference: + checker.set_reference(self.refList) + ref = self.refList[l10n.key] + found = tuple(checker.check(ref, l10n)) + self.assertEqual(found, refWarnOrErrors) diff --git a/third_party/python/compare-locales/compare_locales/tests/android/__init__.py b/third_party/python/compare-locales/compare_locales/tests/android/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/android/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/android/test_checks.py b/third_party/python/compare-locales/compare_locales/tests/android/test_checks.py new file mode 100644 index 0000000000..382a7f8bdb --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/android/test_checks.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +from compare_locales.tests import BaseHelper +from compare_locales.paths import File + + +ANDROID_WRAPPER = b'''<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="foo">%s</string> +</resources> +''' + + +class SimpleStringsTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = ANDROID_WRAPPER % b'plain' + + def test_simple_string(self): + self._test( + ANDROID_WRAPPER % b'foo', + tuple() + ) + + def test_empty_string(self): + self._test( + ANDROID_WRAPPER % b'', + tuple() + ) + + def test_single_cdata(self): + self._test( + ANDROID_WRAPPER % b'<![CDATA[text]]>', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'<![CDATA[\n text\n ]]>', + tuple() + ) + + def test_mix_cdata(self): + self._test( + ANDROID_WRAPPER % b'<![CDATA[text]]> with <![CDATA[cdatas]]>', + ( + ( + "error", + 0, + "Only plain text allowed, " + "or one CDATA surrounded by whitespace", + "android" + ), + ) + ) + + def test_element_fails(self): + self._test( + ANDROID_WRAPPER % b'one<br/>two', + ( + ( + "error", + 0, + "Only plain text allowed, " + "or one CDATA surrounded by whitespace", + "android" + ), + ) + ) + + def test_bad_encoding(self): + self._test( + ANDROID_WRAPPER % 'touché'.encode('latin-1'), + ( + ( + "warning", + 24, + "\ufffd in: foo", + "encodings" + ), + ) + ) + + +class QuotesTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = ANDROID_WRAPPER % b'plain' + + def test_straightquotes(self): + self._test( + ANDROID_WRAPPER % b'""', + ( + ( + "error", + 0, + "Double straight quotes not allowed", + "android" + ), + ) + ) + self._test( + ANDROID_WRAPPER % b'"some"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'some\\"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'some"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'some', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'some""', + ( + ( + "error", + 4, + "Double straight quotes not allowed", + "android" + ), + ) + ) + + def test_apostrophes(self): + self._test( + ANDROID_WRAPPER % b'''"some'apos"''', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'''some\\'apos''', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'''some'apos''', + ( + ( + "error", + 4, + "Apostrophe must be escaped", + "android" + ), + ) + ) + + +class TranslatableTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = (ANDROID_WRAPPER % b'plain').replace( + b'name="foo"', + b'translatable="false" name="foo"') + + def test_translatable(self): + self._test( + ANDROID_WRAPPER % b'"some"', + ( + ( + "error", + 0, + "strings must be translatable", + "android" + ), + ) + ) + + +class AtStringTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = (ANDROID_WRAPPER % b'@string/foo') + + def test_translatable(self): + self._test( + ANDROID_WRAPPER % b'"some"', + ( + ( + "warning", + 0, + "strings must be translatable", + "android" + ), + ) + ) + + +class PrintfSTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = ANDROID_WRAPPER % b'%s' + + def test_match(self): + self._test( + ANDROID_WRAPPER % b'"%s"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"%1$s"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"$s %1$s"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"$1$s %1$s"', + tuple() + ) + + def test_mismatch(self): + self._test( + ANDROID_WRAPPER % b'"%d"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + self._test( + ANDROID_WRAPPER % b'"%S"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + + def test_off_position(self): + self._test( + ANDROID_WRAPPER % b'%2$s', + ( + ( + "error", + 0, + "Formatter %2$s not found in reference", + "android" + ), + ) + ) + + +class PrintfCapSTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = ANDROID_WRAPPER % b'%S' + + def test_match(self): + self._test( + ANDROID_WRAPPER % b'"%S"', + tuple() + ) + + def test_mismatch(self): + self._test( + ANDROID_WRAPPER % b'"%s"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + self._test( + ANDROID_WRAPPER % b'"%d"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + + +class PrintfDTest(BaseHelper): + file = File('values/strings.xml', 'values/strings.xml') + refContent = ANDROID_WRAPPER % b'%d' + + def test_match(self): + self._test( + ANDROID_WRAPPER % b'"%d"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"%1$d"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"$d %1$d"', + tuple() + ) + self._test( + ANDROID_WRAPPER % b'"$1$d %1$d"', + tuple() + ) + + def test_mismatch(self): + self._test( + ANDROID_WRAPPER % b'"%s"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + self._test( + ANDROID_WRAPPER % b'"%S"', + ( + ( + "error", + 0, + "Mismatching formatter", + "android" + ), + ) + ) + + def test_off_position(self): + self._test( + ANDROID_WRAPPER % b'%2$d', + ( + ( + "error", + 0, + "Formatter %2$d not found in reference", + "android" + ), + ) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/android/test_merge.py b/third_party/python/compare-locales/compare_locales/tests/android/test_merge.py new file mode 100644 index 0000000000..32e13a7439 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/android/test_merge.py @@ -0,0 +1,82 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMerge(unittest.TestCase): + name = "strings.xml" + + def test_no_changes(self): + channels = (b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- bar --> + <string name="foo">value</string> +</resources> +''', b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- bar --> + <string name="foo">value</string> +</resources> +''') + self.assertEqual( + merge_channels(self.name, channels), b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- bar --> + <string name="foo">value</string> +</resources> +''') + + def test_a_and_b(self): + channels = (b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- Foo --> + <string name="foo">value</string> +</resources> +''', b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- Bar --> + <string name="bar">other value</string> +</resources> +''') + self.assertEqual( + merge_channels(self.name, channels), b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- Bar --> + <string name="bar">other value</string> + <!-- Foo --> + <string name="foo">value</string> +</resources> +''') + + def test_namespaces(self): + channels = ( + b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources xmlns:ns1="urn:ns1"> + <string ns1:one="test">string</string> +</resources> +''', + b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources xmlns:ns2="urn:ns2"> + <string ns2:two="test">string</string> +</resources> +''' + ) + self.assertEqual( + merge_channels(self.name, channels), b'''\ +<?xml version="1.0" encoding="utf-8"?> +<resources xmlns:ns2="urn:ns2" xmlns:ns1="urn:ns1"> + <string ns2:two="test">string</string> + <string ns1:one="test">string</string> +</resources> +''') diff --git a/third_party/python/compare-locales/compare_locales/tests/android/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/android/test_parser.py new file mode 100644 index 0000000000..f5949a1b86 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/android/test_parser.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from compare_locales.tests import ParserTestMixin +from compare_locales.parser import ( + Comment, + Junk, + Whitespace, +) +from compare_locales.parser.android import DocumentWrapper + + +class TestAndroidParser(ParserTestMixin, unittest.TestCase): + maxDiff = None + filename = 'strings.xml' + + def test_simple_string(self): + source = '''\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- bar --> + <string name="foo">value</string> + <!-- bar --> + <!-- foo --> + <string name="bar">multi-line comment</string> + + <!-- standalone --> + + <string name="baz">so lonely</string> +</resources> +''' + self._test( + source, + ( + (DocumentWrapper, '<?xml'), + (DocumentWrapper, '>'), + (Whitespace, '\n '), + ('foo', 'value', 'bar'), + (Whitespace, '\n'), + ('bar', 'multi-line comment', 'bar\nfoo'), + (Whitespace, '\n '), + (Comment, 'standalone'), + (Whitespace, '\n '), + ('baz', 'so lonely'), + (Whitespace, '\n'), + (DocumentWrapper, '</resources>') + ) + ) + + def test_bad_doc(self): + source = '''\ +<?xml version="1.0" ?> +<not-a-resource/> +''' + self._test( + source, + ( + (Junk, '<not-a-resource/>'), + ) + ) + + def test_bad_elements(self): + source = '''\ +<?xml version="1.0" ?> +<resources> + <string name="first">value</string> + <non-string name="bad">value</non-string> + <string name="mid">value</string> + <string nomine="dom">value</string> + <string name="last">value</string> +</resources> +''' + self._test( + source, + ( + (DocumentWrapper, '<?xml'), + (DocumentWrapper, '>'), + (Whitespace, '\n '), + ('first', 'value'), + (Whitespace, '\n '), + (Junk, '<non-string name="bad">'), + (Whitespace, '\n '), + ('mid', 'value'), + (Whitespace, '\n '), + (Junk, '<string nomine="dom">'), + (Whitespace, '\n '), + ('last', 'value'), + (Whitespace, '\n'), + (DocumentWrapper, '</resources>') + ) + ) + + def test_xml_parse_error(self): + source = 'no xml' + self._test( + source, + ( + (Junk, 'no xml'), + ) + ) + + def test_empty_strings(self): + source = '''\ +<?xml version="1.0" ?> +<resources> + <string name="one"></string> + <string name="two"/> +</resources> +''' + self._test( + source, + ( + (DocumentWrapper, '<?xml'), + (DocumentWrapper, '>'), + (Whitespace, '\n '), + ('one', ''), + (Whitespace, '\n '), + ('two', ''), + (Whitespace, '\n'), + (DocumentWrapper, '</resources>') + ) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/data/bug121341.properties b/third_party/python/compare-locales/compare_locales/tests/data/bug121341.properties new file mode 100644 index 0000000000..b45fc9698c --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/data/bug121341.properties @@ -0,0 +1,68 @@ +# simple check +1=abc +# test whitespace trimming in key and value + 2 = xy +# test parsing of escaped values +3 = \u1234\t\r\n\uAB\ +\u1\n +# test multiline properties +4 = this is \ +multiline property +5 = this is \ + another multiline property +# property with DOS EOL
+6 = test\u0036
+# test multiline property with with DOS EOL +7 = yet another multi\
+ line propery
+# trimming should not trim escaped whitespaces +8 = \ttest5\u0020 +# another variant of #8 +9 = \ test6\t +# test UTF-8 encoded property/value +10aሴb = c췯d +# next property should test unicode escaping at the boundary of parsing buffer +# buffer size is expected to be 4096 so add comments to get to this offset +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +############################################################################### +11 = \uABCD diff --git a/third_party/python/compare-locales/compare_locales/tests/data/test.properties b/third_party/python/compare-locales/compare_locales/tests/data/test.properties new file mode 100644 index 0000000000..19cae97028 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/data/test.properties @@ -0,0 +1,14 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +1=1 + 2=2 +3 =3 + 4 =4 +5=5 +6= 6 +7=7 +8= 8 +# this is a comment +9=this is the first part of a continued line \ + and here is the 2nd part diff --git a/third_party/python/compare-locales/compare_locales/tests/data/triple-license.dtd b/third_party/python/compare-locales/compare_locales/tests/data/triple-license.dtd new file mode 100644 index 0000000000..4a28b17a6f --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/data/triple-license.dtd @@ -0,0 +1,38 @@ +<!-- ***** BEGIN LICENSE BLOCK ***** +#if 0 + - Version: MPL 1.1/GPL 2.0/LGPL 2.1 + - + - The contents of this file are subject to the Mozilla Public License Version + - 1.1 (the "License"); you may not use this file except in compliance with + - the License. You may obtain a copy of the License at + - http://www.mozilla.org/MPL/ + - + - Software distributed under the License is distributed on an "AS IS" basis, + - WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + - for the specific language governing rights and limitations under the + - License. + - + - The Original Code is mozilla.org Code. + - + - The Initial Developer of the Original Code is dummy. + - Portions created by the Initial Developer are Copyright (C) 2005 + - the Initial Developer. All Rights Reserved. + - + - Contributor(s): + - + - Alternatively, the contents of this file may be used under the terms of + - either the GNU General Public License Version 2 or later (the "GPL"), or + - the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + - in which case the provisions of the GPL or the LGPL are applicable instead + - of those above. If you wish to allow use of your version of this file only + - under the terms of either the GPL or the LGPL, and not to allow others to + - use your version of this file under the terms of the MPL, indicate your + - decision by deleting the provisions above and replace them with the notice + - and other provisions required by the LGPL or the GPL. If you do not delete + - the provisions above, a recipient may use your version of this file under + - the terms of any one of the MPL, the GPL or the LGPL. + - +#endif + - ***** END LICENSE BLOCK ***** --> + +<!ENTITY foo "value"> diff --git a/third_party/python/compare-locales/compare_locales/tests/dtd/__init__.py b/third_party/python/compare-locales/compare_locales/tests/dtd/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/dtd/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/dtd/test_checks.py b/third_party/python/compare-locales/compare_locales/tests/dtd/test_checks.py new file mode 100644 index 0000000000..5967c016d9 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/dtd/test_checks.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from compare_locales.checks import getChecker +from compare_locales.parser import getParser, Parser, DTDEntity +from compare_locales.paths import File +from compare_locales.tests import BaseHelper +import six +from six.moves import range + + +class TestDTDs(BaseHelper): + file = File('foo.dtd', 'foo.dtd') + refContent = b'''<!ENTITY foo "This is 'good'"> +<!ENTITY width "10ch"> +<!ENTITY style "width: 20ch; height: 280px;"> +<!ENTITY minStyle "min-height: 50em;"> +<!ENTITY ftd "0"> +<!ENTITY formatPercent "This is 100% correct"> +<!ENTITY some.key "K"> +''' + + def testWarning(self): + self._test(b'''<!ENTITY foo "This is ¬ good"> +''', + (('warning', (0, 0), 'Referencing unknown entity `not`', + 'xmlparse'),)) + # make sure we only handle translated entity references + self._test('''<!ENTITY foo "This is &ƞǿŧ; good"> +'''.encode('utf-8'), + (('warning', (0, 0), 'Referencing unknown entity `ƞǿŧ`', + 'xmlparse'),)) + + def testErrorFirstLine(self): + self._test(b'''<!ENTITY foo "This is </bad> stuff"> +''', + (('error', (1, 10), 'mismatched tag', 'xmlparse'),)) + + def testErrorSecondLine(self): + self._test(b'''<!ENTITY foo "This is + </bad> +stuff"> +''', + (('error', (2, 4), 'mismatched tag', 'xmlparse'),)) + + def testKeyErrorSingleAmpersand(self): + self._test(b'''<!ENTITY some.key "&"> +''', + (('error', (1, 1), 'not well-formed (invalid token)', + 'xmlparse'),)) + + def testXMLEntity(self): + self._test(b'''<!ENTITY foo "This is "good""> +''', + tuple()) + + def testPercentEntity(self): + self._test(b'''<!ENTITY formatPercent "Another 100%"> +''', + tuple()) + self._test(b'''<!ENTITY formatPercent "Bad 100% should fail"> +''', + (('error', (0, 32), 'not well-formed (invalid token)', + 'xmlparse'),)) + + def testNoNumber(self): + self._test(b'''<!ENTITY ftd "foo">''', + (('warning', 0, 'reference is a number', 'number'),)) + + def testNoLength(self): + self._test(b'''<!ENTITY width "15miles">''', + (('error', 0, 'reference is a CSS length', 'css'),)) + + def testNoStyle(self): + self._test(b'''<!ENTITY style "15ch">''', + (('error', 0, 'reference is a CSS spec', 'css'),)) + self._test(b'''<!ENTITY style "junk">''', + (('error', 0, 'reference is a CSS spec', 'css'),)) + + def testStyleWarnings(self): + self._test(b'''<!ENTITY style "width:15ch">''', + (('warning', 0, 'height only in reference', 'css'),)) + self._test(b'''<!ENTITY style "width:15em;height:200px;">''', + (('warning', 0, "units for width don't match (em != ch)", + 'css'),)) + + def testNoWarning(self): + self._test(b'''<!ENTITY width "12em">''', tuple()) + self._test(b'''<!ENTITY style "width:12ch;height:200px;">''', tuple()) + self._test(b'''<!ENTITY ftd "0">''', tuple()) + + def test_bad_encoding(self): + self._test( + '<!ENTITY foo "touché">'.encode('latin-1'), + ( + ( + "warning", + 19, + "\ufffd in: foo", + "encodings" + ), + ) + ) + + +class TestEntitiesInDTDs(BaseHelper): + file = File('foo.dtd', 'foo.dtd') + refContent = b'''<!ENTITY short "This is &brandShortName;"> +<!ENTITY shorter "This is &brandShorterName;"> +<!ENTITY ent.start "Using &brandShorterName; start to"> +<!ENTITY ent.end " end"> +''' + + def testOK(self): + self._test(b'''<!ENTITY ent.start "Mit &brandShorterName;">''', + tuple()) + + def testMismatch(self): + self._test(b'''<!ENTITY ent.start "Mit &brandShortName;">''', + (('warning', (0, 0), + 'Entity brandShortName referenced, ' + 'but brandShorterName used in context', + 'xmlparse'),)) + + def testAcross(self): + self._test(b'''<!ENTITY ent.end "Mit &brandShorterName;">''', + tuple()) + + def testAcrossWithMismatch(self): + '''If we could tell that ent.start and ent.end are one string, + we should warn. Sadly, we can't, so this goes without warning.''' + self._test(b'''<!ENTITY ent.end "Mit &brandShortName;">''', + tuple()) + + def testUnknownWithRef(self): + self._test(b'''<!ENTITY ent.start "Mit &foopy;">''', + (('warning', + (0, 0), + 'Referencing unknown entity `foopy` ' + '(brandShorterName used in context, ' + 'brandShortName known)', + 'xmlparse'),)) + + def testUnknown(self): + self._test(b'''<!ENTITY ent.end "Mit &foopy;">''', + (('warning', + (0, 0), + 'Referencing unknown entity `foopy`' + ' (brandShortName, brandShorterName known)', + 'xmlparse'),)) + + +class TestAndroid(unittest.TestCase): + """Test Android checker + + Make sure we're hitting our extra rules only if + we're passing in a DTD file in the embedding/android module. + """ + apos_msg = "Apostrophes in Android DTDs need escaping with \\' or " + \ + "\\u0027, or use \u2019, or put string in quotes." + quot_msg = "Quotes in Android DTDs need escaping with \\\" or " + \ + "\\u0022, or put string in apostrophes." + + def getNext(self, v): + ctx = Parser.Context(v) + return DTDEntity( + ctx, None, None, (0, len(v)), (), (0, len(v))) + + def getDTDEntity(self, v): + if isinstance(v, six.binary_type): + v = v.decode('utf-8') + v = v.replace('"', '"') + ctx = Parser.Context('<!ENTITY foo "%s">' % v) + return DTDEntity( + ctx, None, None, (0, len(v) + 16), (9, 12), (14, len(v) + 14)) + + def test_android_dtd(self): + """Testing the actual android checks. The logic is involved, + so this is a lot of nitty gritty detail tests. + """ + f = File("embedding/android/strings.dtd", "strings.dtd", + "embedding/android") + checker = getChecker(f, extra_tests=['android-dtd']) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref`', 'xmlparse'),)) + # no report on stray ampersand or quote, if not completely quoted + for i in range(3): + # make sure we're catching unescaped apostrophes, + # try 0..5 backticks + l10n = self.getDTDEntity("\\"*(2*i) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2*i, self.apos_msg, 'android'),)) + l10n = self.getDTDEntity("\\"*(2*i + 1) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # make sure we don't report if apos string is quoted + l10n = self.getDTDEntity('"' + "\\"*(2*i) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" + % (l10n.val, str(tpl))) + l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" + % (l10n.val, str(tpl))) + # make sure we're catching unescaped quotes, try 0..5 backticks + l10n = self.getDTDEntity("\\"*(2*i) + "\"") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2*i, self.quot_msg, 'android'),)) + l10n = self.getDTDEntity("\\"*(2*i + 1) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # make sure we don't report if quote string is single quoted + l10n = self.getDTDEntity("'" + "\\"*(2*i) + "\"'") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" % + (l10n.val, str(tpl))) + l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" % + (l10n.val, str(tpl))) + # check for mixed quotes and ampersands + l10n = self.getDTDEntity("'\"") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 0, self.apos_msg, 'android'), + ('error', 1, self.quot_msg, 'android'))) + l10n = self.getDTDEntity("''\"'") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 1, self.apos_msg, 'android'),)) + l10n = self.getDTDEntity('"\'""') + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2, self.quot_msg, 'android'),)) + + # broken unicode escape + l10n = self.getDTDEntity(b"Some broken \u098 unicode") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 12, 'truncated \\uXXXX escape', + 'android'),)) + # broken unicode escape, try to set the error off + l10n = self.getDTDEntity("\u9690"*14+"\\u006"+" "+"\\u0064") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 14, 'truncated \\uXXXX escape', + 'android'),)) + + def test_android_prop(self): + f = File("embedding/android/strings.properties", "strings.properties", + "embedding/android") + checker = getChecker(f, extra_tests=['android-dtd']) + # good plain string + ref = self.getNext("plain string") + l10n = self.getNext("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # no dtd warning + ref = self.getNext("plain string") + l10n = self.getNext("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # no report on stray ampersand + ref = self.getNext("plain string") + l10n = self.getNext("plain localized string with apos: '") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # report on bad printf + ref = self.getNext("string with %s") + l10n = self.getNext("string with %S") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 0, 'argument 1 `S` should be `s`', + 'printf'),)) + + def test_non_android_dtd(self): + f = File("browser/strings.dtd", "strings.dtd", "browser") + checker = getChecker(f) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref`', 'xmlparse'),)) + # no report on stray ampersand + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string with apos: '") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + + def test_entities_across_dtd(self): + f = File("browser/strings.dtd", "strings.dtd", "browser") + p = getParser(f.file) + p.readContents(b'<!ENTITY other "some &good.ref;">') + ref = p.parse() + checker = getChecker(f) + checker.set_reference(ref) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref` (good.ref known)', + 'xmlparse'),)) + # no report on stray ampersand + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string with &good.ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/python/compare-locales/compare_locales/tests/dtd/test_merge.py b/third_party/python/compare-locales/compare_locales/tests/dtd/test_merge.py new file mode 100644 index 0000000000..e1db766e94 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/dtd/test_merge.py @@ -0,0 +1,133 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMergeDTD(unittest.TestCase): + name = "foo.dtd" + maxDiff = None + + def test_no_changes(self): + channels = (b""" +<!ENTITY foo "Foo 1"> +""", b""" +<!ENTITY foo "Foo 2"> +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +<!ENTITY foo "Foo 1"> +""") + + def test_trailing_whitespace(self): + channels = (b""" +<!ENTITY foo "Foo 1"> +""", b""" +<!ENTITY foo "Foo 2"> \n""") + self.assertEqual( + merge_channels(self.name, channels), b""" +<!ENTITY foo "Foo 1"> \n""") + + def test_browser_dtd(self): + channels = (b"""\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu ... --> +<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys --> + +<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE --> +<!ENTITY mainWindow.titlemodifier "&brandFullName;"> +<!-- LOCALIZATION NOTE (mainWindow.separator): DONT_TRANSLATE --> +<!ENTITY mainWindow.separator " - "> +<!-- LOCALIZATION NOTE (mainWindow.privatebrowsing2): This will be appended ... + inside the ... --> +<!ENTITY mainWindow.privatebrowsing2 "(Private Browsing)"> +""", b"""\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu ... --> +<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys --> + +<!-- LOCALIZATION NOTE (mainWindow.title): DONT_TRANSLATE --> +<!ENTITY mainWindow.title "&brandFullName;"> +<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE --> +<!ENTITY mainWindow.titlemodifier "&brandFullName;"> +<!-- LOCALIZATION NOTE (mainWindow.privatebrowsing): This will be appended ... + inside the ... --> +<!ENTITY mainWindow.privatebrowsing "(Private Browsing)"> +""") + + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE : FILE This file contains the browser main menu ... --> +<!-- LOCALIZATION NOTE : FILE Do not translate commandkeys --> + +<!-- LOCALIZATION NOTE (mainWindow.title): DONT_TRANSLATE --> +<!ENTITY mainWindow.title "&brandFullName;"> + +<!-- LOCALIZATION NOTE (mainWindow.titlemodifier) : DONT_TRANSLATE --> +<!ENTITY mainWindow.titlemodifier "&brandFullName;"> +<!-- LOCALIZATION NOTE (mainWindow.privatebrowsing): This will be appended ... + inside the ... --> +<!ENTITY mainWindow.privatebrowsing "(Private Browsing)"> +<!-- LOCALIZATION NOTE (mainWindow.separator): DONT_TRANSLATE --> +<!ENTITY mainWindow.separator " - "> +<!-- LOCALIZATION NOTE (mainWindow.privatebrowsing2): This will be appended ... + inside the ... --> +<!ENTITY mainWindow.privatebrowsing2 "(Private Browsing)"> +""") + + def test_aboutServiceWorkers_dtd(self): + channels = (b"""\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY title "About Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY maintitle "Registered Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_not_enabled "Service Workers are not enabled."> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_no_serviceworkers "No Service Workers registered."> +""", b"""\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY title "About Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY maintitle "Registered Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_not_enabled "Service Workers are not enabled."> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_no_serviceworkers "No Service Workers registered."> +""") + + self.assertEqual( + merge_channels(self.name, channels), b"""\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY title "About Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY maintitle "Registered Service Workers"> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_not_enabled "Service Workers are not enabled."> +<!-- LOCALIZATION NOTE the term "Service Workers" should not be translated. --> +<!ENTITY warning_no_serviceworkers "No Service Workers registered."> +""") diff --git a/third_party/python/compare-locales/compare_locales/tests/dtd/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/dtd/test_parser.py new file mode 100644 index 0000000000..679bd21f84 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/dtd/test_parser.py @@ -0,0 +1,271 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Tests for the DTD parser. +''' + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest +import re + +from compare_locales import parser +from compare_locales.parser import ( + Comment, + Junk, + Whitespace, +) +from compare_locales.tests import ParserTestMixin + + +class TestDTD(ParserTestMixin, unittest.TestCase): + '''Tests for the DTD Parser.''' + filename = 'foo.dtd' + + def test_one_entity(self): + self._test('''<!ENTITY foo.label "stuff">''', + (('foo.label', 'stuff'),)) + self.assertListEqual( + [e.localized for e in self.parser], + [True] + ) + + quoteContent = '''<!ENTITY good.one "one"> +<!ENTITY bad.one "bad " quote"> +<!ENTITY good.two "two"> +<!ENTITY bad.two "bad "quoted" word"> +<!ENTITY good.three "three"> +<!ENTITY good.four "good ' quote"> +<!ENTITY good.five "good 'quoted' word"> +''' + quoteRef = ( + ('good.one', 'one'), + (Whitespace, '\n'), + (Junk, '<!ENTITY bad.one "bad " quote">\n'), + ('good.two', 'two'), + (Whitespace, '\n'), + (Junk, '<!ENTITY bad.two "bad "quoted" word">\n'), + ('good.three', 'three'), + (Whitespace, '\n'), + ('good.four', 'good \' quote'), + (Whitespace, '\n'), + ('good.five', 'good \'quoted\' word'), + (Whitespace, '\n'),) + + def test_quotes(self): + self._test(self.quoteContent, self.quoteRef) + + def test_apos(self): + qr = re.compile('[\'"]', re.M) + + def quot2apos(s): + return qr.sub(lambda m: m.group(0) == '"' and "'" or '"', s) + + self._test(quot2apos(self.quoteContent), + ((ref[0], quot2apos(ref[1])) for ref in self.quoteRef)) + + def test_parsed_ref(self): + self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd"> + %fooDTD; +''', + (('fooDTD', '"chrome://brand.dtd"'),)) + self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd"> + %fooDTD; +''', + (('fooDTD', '"chrome://brand.dtd"'),)) + + def test_trailing_comment(self): + self._test('''<!ENTITY first "string"> +<!ENTITY second "string"> +<!-- +<!ENTITY commented "out"> +--> +''', + ( + ('first', 'string'), + (Whitespace, '\n'), + ('second', 'string'), + (Whitespace, '\n'), + (Comment, 'out'), + (Whitespace, '\n'))) + + def test_license_header(self): + p = parser.getParser('foo.dtd') + p.readContents(self.resource('triple-license.dtd')) + entities = list(p.walk()) + self.assertIsInstance(entities[0], parser.Comment) + self.assertIn('MPL', entities[0].all) + e = entities[2] + self.assertIsInstance(e, parser.Entity) + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assertEqual(len(entities), 4) + p.readContents(b'''\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this file, + - You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!ENTITY foo "value"> +''') + entities = list(p.walk()) + self.assertIsInstance(entities[0], parser.Comment) + self.assertIn('MPL', entities[0].all) + e = entities[2] + self.assertIsInstance(e, parser.Entity) + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assertEqual(len(entities), 4) + # Test again without empty line after licence header, and with BOM. + p.readContents(b'''\xEF\xBB\xBF\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this file, + - You can obtain one at http://mozilla.org/MPL/2.0/. --> +<!ENTITY foo "value"> +''') + entities = list(p.walk()) + self.assertIsInstance(entities[0], parser.Comment) + self.assertIn('MPL', entities[0].all) + e = entities[2] + self.assertIsInstance(e, parser.Entity) + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assertEqual(len(entities), 4) + + def testBOM(self): + self._test(u'\ufeff<!ENTITY foo.label "stuff">', + (('foo.label', 'stuff'),)) + + def test_trailing_whitespace(self): + self._test('<!ENTITY foo.label "stuff">\n \n', + (('foo.label', 'stuff'), (Whitespace, '\n \n'))) + + def test_unicode_comment(self): + self._test(b'<!-- \xe5\x8f\x96 -->'.decode('utf-8'), + ((Comment, u'\u53d6'),)) + + def test_empty_file(self): + self._test('', tuple()) + self._test('\n', ((Whitespace, '\n'),)) + self._test('\n\n', ((Whitespace, '\n\n'),)) + self._test(' \n\n', ((Whitespace, ' \n\n'),)) + + def test_positions(self): + self.parser.readContents(b'''\ +<!ENTITY one "value"> +<!ENTITY two "other +escaped value"> +''') + one, two = list(self.parser) + self.assertEqual(one.position(), (1, 1)) + self.assertEqual(one.value_position(), (1, 16)) + self.assertEqual(one.position(-1), (1, 23)) + self.assertEqual(two.position(), (2, 1)) + self.assertEqual(two.value_position(), (2, 16)) + self.assertEqual(two.value_position(-1), (3, 14)) + self.assertEqual(two.value_position(10), (3, 5)) + + def test_word_count(self): + self.parser.readContents(b'''\ +<!ENTITY a "one"> +<!ENTITY b "one<br>two"> +<!ENTITY c "one<span>word</span>"> +<!ENTITY d "one <a href='foo'>two</a> three"> +''') + a, b, c, d = list(self.parser) + self.assertEqual(a.count_words(), 1) + self.assertEqual(b.count_words(), 2) + self.assertEqual(c.count_words(), 1) + self.assertEqual(d.count_words(), 3) + + def test_html_entities(self): + self.parser.readContents(b'''\ +<!ENTITY named "&"> +<!ENTITY numcode "&"> +<!ENTITY shorthexcode "&"> +<!ENTITY longhexcode "&"> +<!ENTITY unknown "&unknownEntity;"> +''') + entities = iter(self.parser) + + entity = next(entities) + self.assertEqual(entity.raw_val, '&') + self.assertEqual(entity.val, '&') + + entity = next(entities) + self.assertEqual(entity.raw_val, '&') + self.assertEqual(entity.val, '&') + + entity = next(entities) + self.assertEqual(entity.raw_val, '&') + self.assertEqual(entity.val, '&') + + entity = next(entities) + self.assertEqual(entity.raw_val, '&') + self.assertEqual(entity.val, '&') + + entity = next(entities) + self.assertEqual(entity.raw_val, '&unknownEntity;') + self.assertEqual(entity.val, '&unknownEntity;') + + def test_comment_val(self): + self.parser.readContents(b'''\ +<!-- comment +spanning lines --> <!-- +--> +<!-- last line --> +''') + entities = self.parser.walk() + + entity = next(entities) + self.assertIsInstance(entity, parser.Comment) + self.assertEqual(entity.val, ' comment\nspanning lines ') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + + entity = next(entities) + self.assertIsInstance(entity, parser.Comment) + self.assertEqual(entity.val, '\n') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + + entity = next(entities) + self.assertIsInstance(entity, parser.Comment) + self.assertEqual(entity.val, ' last line ') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + + def test_pre_comment(self): + self.parser.readContents(b'''\ +<!-- comment --> +<!ENTITY one "string"> + +<!-- standalone --> + +<!-- glued --><!ENTITY second "string"> +''') + entities = self.parser.walk() + + entity = next(entities) + self.assertIsInstance(entity.pre_comment, parser.Comment) + self.assertEqual(entity.pre_comment.val, ' comment ') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + + entity = next(entities) + self.assertIsInstance(entity, parser.Comment) + self.assertEqual(entity.val, ' standalone ') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + + entity = next(entities) + self.assertIsInstance(entity.pre_comment, parser.Comment) + self.assertEqual(entity.pre_comment.val, ' glued ') + entity = next(entities) + self.assertIsInstance(entity, parser.Whitespace) + with self.assertRaises(StopIteration): + next(entities) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/python/compare-locales/compare_locales/tests/fluent/__init__.py b/third_party/python/compare-locales/compare_locales/tests/fluent/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/fluent/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/fluent/test_checks.py b/third_party/python/compare-locales/compare_locales/tests/fluent/test_checks.py new file mode 100644 index 0000000000..5a906d2a8d --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/fluent/test_checks.py @@ -0,0 +1,581 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import textwrap +import unittest + +from compare_locales.tests import BaseHelper +from compare_locales.paths import File + + +def dedent_ftl(text): + return textwrap.dedent(text.rstrip() + "\n").encode("utf-8") + + +REFERENCE = b'''\ +simple = value +term_ref = some { -term } + .attr = is simple +msg-attr-ref = some {button.label} +mixed-attr = value + .and = attribute +only-attr = + .one = exists +-term = value need + .attrs = can differ +''' + + +class TestFluent(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = REFERENCE + + def test_simple(self): + self._test(b'''simple = localized''', + tuple()) + + +class TestMessage(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = REFERENCE + + def test_excess_attribute(self): + self._test( + dedent_ftl('''\ + simple = value with + .obsolete = attribute + '''), + ( + ( + 'error', 24, + 'Obsolete attribute: obsolete', 'fluent' + ), + ) + ) + + def test_duplicate_attribute(self): + self._test( + dedent_ftl('''\ + only-attr = + .one = attribute + .one = again + .one = three times + '''), + ( + ( + 'warning', 16, + 'Attribute "one" is duplicated', 'fluent' + ), + ( + 'warning', 37, + 'Attribute "one" is duplicated', 'fluent' + ), + ( + 'warning', 54, + 'Attribute "one" is duplicated', 'fluent' + ), + ) + ) + + def test_only_attributes(self): + self._test( + dedent_ftl('''\ + only-attr = obsolete value + '''), + ( + ( + 'error', 0, + 'Missing attribute: one', 'fluent' + ), + ( + 'error', 12, + 'Obsolete value', 'fluent' + ), + ) + ) + + def test_missing_value(self): + self._test( + dedent_ftl('''\ + mixed-attr = + .and = attribute exists + '''), + ( + ( + 'error', 0, + 'Missing value', 'fluent' + ), + ) + ) + + def test_bad_encoding(self): + self._test( + 'simple = touché'.encode('latin-1'), + ( + ( + "warning", + 14, + "\ufffd in: simple", + "encodings" + ), + ) + ) + + +class TestTerm(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = REFERENCE + + def test_mismatching_attribute(self): + self._test( + dedent_ftl('''\ + -term = value with + .different = attribute + '''), + tuple() + ) + + def test_duplicate_attribute(self): + self._test( + dedent_ftl('''\ + -term = need value + .one = attribute + .one = again + .one = three times + '''), + ( + ( + 'warning', 23, + 'Attribute "one" is duplicated', 'fluent' + ), + ( + 'warning', 44, + 'Attribute "one" is duplicated', 'fluent' + ), + ( + 'warning', 61, + 'Attribute "one" is duplicated', 'fluent' + ), + ) + ) + + +class TestMessageReference(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = REFERENCE + + def test_msg_attr(self): + self._test( + b'''msg-attr-ref = Nice {button.label}''', + tuple() + ) + self._test( + b'''msg-attr-ref = not at all''', + ( + ( + 'warning', 0, + 'Missing message reference: button.label', 'fluent' + ), + ) + ) + self._test( + b'''msg-attr-ref = {button} is not a label''', + ( + ( + 'warning', 0, + 'Missing message reference: button.label', 'fluent' + ), + ( + 'warning', 16, + 'Obsolete message reference: button', 'fluent' + ), + ) + ) + self._test( + b'''msg-attr-ref = {button.tooltip} is not a label''', + ( + ( + 'warning', 0, + 'Missing message reference: button.label', 'fluent' + ), + ( + 'warning', 16, + 'Obsolete message reference: button.tooltip', 'fluent' + ), + ) + ) + + +class TestTermReference(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = REFERENCE + + def test_good_term_ref(self): + self._test( + dedent_ftl('''\ + term_ref = localized to {-term} + .attr = is plain + '''), + tuple() + ) + + def test_missing_term_ref(self): + self._test( + dedent_ftl('''\ + term_ref = localized + .attr = should not refer to {-term} + '''), + ( + ( + 'warning', 0, + 'Missing term reference: -term', 'fluent' + ), + ( + 'warning', 54, + 'Obsolete term reference: -term', 'fluent' + ), + ) + ) + + def test_l10n_only_term_ref(self): + self._test( + b'''simple = localized with { -term }''', + ( + ( + u'warning', 26, + u'Obsolete term reference: -term', u'fluent' + ), + ) + ) + + def test_term_attr(self): + self._test( + dedent_ftl('''\ + term_ref = Depends on { -term.prop -> + *[some] Term prop, doesn't reference the term value, though. + } + .attr = still simple + '''), + ( + ( + u'warning', 0, + u'Missing term reference: -term', u'fluent' + ), + ) + ) + + +class SelectExpressionTest(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = b'''\ +msg = { $val -> + *[other] Show something + } +-term = Foopy +''' + + def test_no_select(self): + self._test( + b'''msg = Something''', + tuple() + ) + + def test_good(self): + self._test( + dedent_ftl('''\ + msg = { $val -> + *[one] one + [other] other + } + '''), + tuple() + ) + + def test_duplicate_variant(self): + self._test( + dedent_ftl('''\ + msg = { $val -> + *[one] one + [one] other + } + '''), + ( + ( + 'warning', 19, + 'Variant key "one" is duplicated', 'fluent' + ), + ( + 'warning', 31, + 'Variant key "one" is duplicated', 'fluent' + ), + ) + ) + + def test_term_value(self): + self._test( + dedent_ftl('''\ + -term = { PLATFORM() -> + *[one] one + [two] two + [two] duplicate + } + '''), + ( + ( + 'warning', 39, + 'Variant key "two" is duplicated', 'fluent' + ), + ( + 'warning', 51, + 'Variant key "two" is duplicated', 'fluent' + ), + ) + ) + + def test_term_attribute(self): + self._test( + dedent_ftl('''\ + -term = boring value + .attr = { PLATFORM() -> + *[one] one + [two] two + [two] duplicate + [two] three + } + '''), + ( + ( + 'warning', 66, + 'Variant key "two" is duplicated', 'fluent' + ), + ( + 'warning', 80, + 'Variant key "two" is duplicated', 'fluent' + ), + ( + 'warning', 100, + 'Variant key "two" is duplicated', 'fluent' + ), + ) + ) + + +class PluralTest(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = b'''\ +msg = { $val -> + *[other] Show something + } +''' + + def test_missing_plural(self): + self.file.locale = 'ru' + self._test( + dedent_ftl('''\ + msg = { $val -> + [one] thing + [3] is ok + *[many] stuff + } + '''), + ( + ( + 'warning', 19, + 'Plural categories missing: few', 'fluent' + ), + ) + ) + + def test_ignoring_other(self): + self.file.locale = 'de' + self._test( + dedent_ftl('''\ + msg = { $val -> + [1] thing + *[other] stuff + } + '''), + tuple() + ) + + +class CSSStyleTest(BaseHelper): + file = File('foo.ftl', 'foo.ftl') + refContent = b'''\ +simple = + .style = width:1px +select = + .style = {PLATFORM() -> + [windows] width:1px + *[unix] max-width:1px + } +ref = + .style = {simple.style} +broken = + .style = 28em +''' + + def test_simple(self): + self._test(dedent_ftl( + '''\ + simple = + .style = width:2px + '''), + tuple()) + self._test(dedent_ftl( + '''\ + simple = + .style = max-width:2px + '''), + ( + ( + 'warning', 0, + 'width only in reference, max-width only in l10n', 'fluent' + ), + )) + self._test(dedent_ftl( + '''\ + simple = + .style = stuff + '''), + ( + ( + 'error', 0, + 'reference is a CSS spec', 'fluent' + ), + )) + # Cover the current limitations of only plain strings + self._test(dedent_ftl( + '''\ + simple = + .style = {"width:3px"} + '''), + tuple()) + + def test_select(self): + self._test(dedent_ftl( + '''\ + select = + .style = width:2px + '''), + ( + ( + 'warning', 0, + 'width only in l10n', 'fluent' + ), + )) + self._test(dedent_ftl( + '''\ + select = + .style = max-width:2px + '''), + ( + ( + 'warning', 0, + 'max-width only in l10n', 'fluent' + ), + )) + self._test(dedent_ftl( + '''\ + select = + .style = stuff + '''), + ( + ( + 'error', 0, + 'reference is a CSS spec', 'fluent' + ), + )) + # Cover the current limitations of only plain strings + self._test(dedent_ftl( + '''\ + select = + .style = {"width:1px"} + '''), + tuple()) + + def test_ref(self): + self._test(dedent_ftl( + '''\ + ref = + .style = width:2px + '''), + ( + ( + 'warning', 0, + 'width only in l10n', 'fluent' + ), + ( + 'warning', 0, + 'Missing message reference: simple.style', 'fluent' + ), + )) + self._test(dedent_ftl( + '''\ + ref = + .style = max-width:2px + '''), + ( + ( + 'warning', 0, + 'max-width only in l10n', 'fluent' + ), + ( + 'warning', 0, + 'Missing message reference: simple.style', 'fluent' + ), + )) + self._test(dedent_ftl( + '''\ + ref = + .style = stuff + '''), + ( + ( + 'error', 0, + 'reference is a CSS spec', 'fluent' + ), + ( + 'warning', 0, + 'Missing message reference: simple.style', 'fluent' + ), + )) + # Cover the current limitations of only plain strings + self._test(dedent_ftl( + '''\ + ref = + .style = {"width:1px"} + '''), + ( + ( + 'warning', 0, + 'Missing message reference: simple.style', 'fluent' + ), + )) + + def test_broken(self): + self._test(dedent_ftl( + '''\ + broken = + .style = 27em + '''), + (('error', 0, 'reference is a CSS spec', 'fluent'),)) + self._test(dedent_ftl( + '''\ + broken = + .style = width: 27em + '''), + ( + ( + 'warning', 0, + 'width only in l10n', 'fluent' + ), + )) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/python/compare-locales/compare_locales/tests/fluent/test_merge.py b/third_party/python/compare-locales/compare_locales/tests/fluent/test_merge.py new file mode 100644 index 0000000000..41e69eca3e --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/fluent/test_merge.py @@ -0,0 +1,283 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMergeFluent(unittest.TestCase): + name = "foo.ftl" + + def test_no_changes(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_attribute_in_first(self): + channels = (b""" +foo = Foo 1 + .attr = Attr 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + .attr = Attr 1 +""") + + def test_attribute_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 + .attr = Attr 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_junk_in_first(self): + channels = (b"""\ +line of junk +""", b"""\ +one = entry +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode('utf-8'), + """\ +one = entry +line of junk +""" + ) + + def test_junk_in_last(self): + channels = (b"""\ +one = entry +""", b"""\ +line of junk +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode('utf-8'), + """\ +line of junk +one = entry +""" + ) + + def test_attribute_changed(self): + channels = (b""" +foo = Foo 1 + .attr = Attr 1 +""", b""" +foo = Foo 2 + .attr = Attr 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + .attr = Attr 1 +""") + + def test_group_comment_in_first(self): + channels = (b""" +## Group Comment 1 +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +## Group Comment 1 +foo = Foo 1 +""") + + def test_group_comment_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +## Group Comment 2 +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +## Group Comment 2 +foo = Foo 1 +""") + + def test_group_comment_changed(self): + channels = (b""" +## Group Comment 1 +foo = Foo 1 +""", b""" +## Group Comment 2 +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +## Group Comment 2 +## Group Comment 1 +foo = Foo 1 +""") + + def test_group_comment_and_section(self): + channels = (b""" +## Group Comment +foo = Foo 1 +""", b""" +// Section Comment +[[ Section ]] +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +// Section Comment +[[ Section ]] +## Group Comment +foo = Foo 1 +""") + + def test_message_comment_in_first(self): + channels = (b""" +# Comment 1 +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +# Comment 1 +foo = Foo 1 +""") + + def test_message_comment_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +# Comment 2 +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_message_comment_changed(self): + channels = (b""" +# Comment 1 +foo = Foo 1 +""", b""" +# Comment 2 +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +# Comment 1 +foo = Foo 1 +""") + + def test_standalone_comment_in_first(self): + channels = (b""" +foo = Foo 1 + +# Comment 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + +# Comment 1 +""") + + def test_standalone_comment_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 + +# Comment 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + +# Comment 2 +""") + + def test_standalone_comment_changed(self): + channels = (b""" +foo = Foo 1 + +# Comment 1 +""", b""" +foo = Foo 2 + +# Comment 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + +# Comment 2 + +# Comment 1 +""") + + def test_resource_comment_in_first(self): + channels = (b""" +### Resource Comment 1 + +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +### Resource Comment 1 + +foo = Foo 1 +""") + + def test_resource_comment_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +### Resource Comment 1 + +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +### Resource Comment 1 + +foo = Foo 1 +""") + + def test_resource_comment_changed(self): + channels = (b""" +### Resource Comment 1 + +foo = Foo 1 +""", b""" +### Resource Comment 2 + +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +### Resource Comment 2 + +### Resource Comment 1 + +foo = Foo 1 +""") diff --git a/third_party/python/compare-locales/compare_locales/tests/fluent/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/fluent/test_parser.py new file mode 100644 index 0000000000..db767fd5e2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/fluent/test_parser.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales import parser +from compare_locales.tests import ParserTestMixin + + +class TestFluentParser(ParserTestMixin, unittest.TestCase): + maxDiff = None + filename = 'foo.ftl' + + def test_equality_same(self): + source = b'progress = Progress: { NUMBER($num, style: "percent") }.' + + self.parser.readContents(source) + [ent1] = list(self.parser) + + self.parser.readContents(source) + [ent2] = list(self.parser) + + self.assertTrue(ent1.equals(ent2)) + self.assertTrue(ent1.localized) + + def test_equality_different_whitespace(self): + source1 = b'foo = { $arg }' + source2 = b'foo = { $arg }' + + self.parser.readContents(source1) + [ent1] = list(self.parser) + + self.parser.readContents(source2) + [ent2] = list(self.parser) + + self.assertTrue(ent1.equals(ent2)) + + def test_word_count(self): + self.parser.readContents(b'''\ +a = One +b = One two three +c = One { $arg } two +d = + One { $arg -> + *[x] Two three + [y] Four + } five. +e = + .attr = One +f = + .attr1 = One + .attr2 = Two +g = One two + .attr = Three +h = + One { $arg -> + *[x] Two three + [y] Four + } five. + .attr1 = + Six { $arg -> + *[x] Seven eight + [y] Nine + } ten. +-i = One + .prop = Do not count +''') + + a, b, c, d, e, f, g, h, i = list(self.parser) + self.assertEqual(a.count_words(), 1) + self.assertEqual(b.count_words(), 3) + self.assertEqual(c.count_words(), 2) + self.assertEqual(d.count_words(), 5) + self.assertEqual(e.count_words(), 1) + self.assertEqual(f.count_words(), 2) + self.assertEqual(g.count_words(), 3) + self.assertEqual(h.count_words(), 10) + self.assertEqual(i.count_words(), 1) + + def test_simple_message(self): + self.parser.readContents(b'a = A') + + [a] = list(self.parser) + self.assertEqual(a.key, 'a') + self.assertEqual(a.raw_val, 'A') + self.assertEqual(a.all, 'a = A') + attributes = list(a.attributes) + self.assertEqual(len(attributes), 0) + + def test_complex_message(self): + self.parser.readContents(b'abc = A { $arg } B { msg } C') + + [abc] = list(self.parser) + self.assertEqual(abc.key, 'abc') + self.assertEqual(abc.raw_val, 'A { $arg } B { msg } C') + self.assertEqual(abc.all, 'abc = A { $arg } B { msg } C') + + def test_multiline_message(self): + self.parser.readContents(b'''\ +abc = + A + B + C +''') + + [abc] = list(self.parser) + self.assertEqual(abc.key, 'abc') + self.assertEqual(abc.raw_val, ' A\n B\n C') + self.assertEqual(abc.all, 'abc =\n A\n B\n C') + + def test_message_with_attribute(self): + self.parser.readContents(b'''\ + + +abc = ABC + .attr = Attr +''') + + [abc] = list(self.parser) + self.assertEqual(abc.key, 'abc') + self.assertEqual(abc.raw_val, 'ABC') + self.assertEqual(abc.all, 'abc = ABC\n .attr = Attr') + self.assertEqual(abc.position(), (3, 1)) + self.assertEqual(abc.value_position(), (3, 7)) + attr = list(abc.attributes)[0] + self.assertEqual(attr.value_position(), (4, 13)) + + def test_message_with_attribute_and_no_value(self): + self.parser.readContents(b'''\ +abc = + .attr = Attr +''') + + [abc] = list(self.parser) + self.assertEqual(abc.key, 'abc') + self.assertEqual(abc.raw_val, None) + self.assertEqual(abc.all, 'abc =\n .attr = Attr') + attributes = list(abc.attributes) + self.assertEqual(len(attributes), 1) + attr = attributes[0] + self.assertEqual(attr.key, 'attr') + self.assertEqual(attr.raw_val, 'Attr') + self.assertEqual(abc.value_position(), (1, 4)) + self.assertEqual(attr.value_position(), (2, 13)) + + def test_non_localizable(self): + self.parser.readContents(b'''\ +### Resource Comment + +foo = Foo + +## Group Comment + +-bar = Bar + +## + +# Standalone Comment + +# Baz Comment +baz = Baz +''') + entities = self.parser.walk() + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentComment)) + self.assertEqual(entity.all, '### Resource Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentMessage)) + self.assertEqual(entity.raw_val, 'Foo') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentComment)) + self.assertEqual(entity.all, '## Group Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentTerm)) + self.assertEqual(entity.raw_val, 'Bar') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentComment)) + self.assertEqual(entity.all, '##') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentComment)) + self.assertEqual(entity.all, '# Standalone Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentMessage)) + self.assertEqual(entity.raw_val, 'Baz') + self.assertEqual(entity.entry.comment.content, 'Baz Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n') + + with self.assertRaises(StopIteration): + next(entities) + + def test_comments_val(self): + self.parser.readContents(b'''\ +// Legacy Comment + +### Resource Comment + +## Section Comment + +# Standalone Comment +''') + entities = self.parser.walk() + + entity = next(entities) + # ensure that fluent comments are FluentComments and Comments + # Legacy comments (//) are Junk + self.assertTrue(isinstance(entity, parser.Junk)) + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Comment)) + self.assertEqual(entity.val, 'Resource Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Comment)) + self.assertEqual(entity.val, 'Section Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Comment)) + self.assertEqual(entity.val, 'Standalone Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.all, '\n') + + with self.assertRaises(StopIteration): + next(entities) + + def test_junk(self): + self.parser.readUnicode('''\ +# Comment + +Line of junk + +# Comment +msg = value +''') + entities = self.parser.walk() + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentComment)) + self.assertEqual(entity.val, 'Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.raw_val, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Junk)) + self.assertEqual(entity.raw_val, 'Line of junk') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.raw_val, '\n\n') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.FluentEntity)) + self.assertEqual(entity.raw_val, 'value') + self.assertEqual(entity.entry.comment.content, 'Comment') + + entity = next(entities) + self.assertTrue(isinstance(entity, parser.Whitespace)) + self.assertEqual(entity.raw_val, '\n') + + with self.assertRaises(StopIteration): + next(entities) diff --git a/third_party/python/compare-locales/compare_locales/tests/lint/__init__.py b/third_party/python/compare-locales/compare_locales/tests/lint/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/lint/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/lint/test_linter.py b/third_party/python/compare-locales/compare_locales/tests/lint/test_linter.py new file mode 100644 index 0000000000..9abdc57c08 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/lint/test_linter.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales.lint import linter +from compare_locales.parser import base as parser + + +class MockChecker(object): + def __init__(self, mocked): + self.results = mocked + + def check(self, ent, ref): + for r in self.results: + yield r + + +class EntityTest(unittest.TestCase): + def test_junk(self): + el = linter.EntityLinter([], None, {}) + ctx = parser.Parser.Context('foo\nbar\n') + ent = parser.Junk(ctx, (4, 7)) + res = el.handle_junk(ent) + self.assertIsNotNone(res) + self.assertEqual(res['lineno'], 2) + self.assertEqual(res['column'], 1) + ent = parser.LiteralEntity('one', 'two', 'one = two') + self.assertIsNone(el.handle_junk(ent)) + + def test_full_entity(self): + ctx = parser.Parser.Context('''\ +one = two +two = three +one = four +''') + entities = [ + parser.Entity(ctx, None, None, (0, 10), (0, 3), (6, 9)), + parser.Entity(ctx, None, None, (10, 22), (10, 13), (16, 21)), + parser.Entity(ctx, None, None, (22, 33), (22, 25), (28, 32)), + ] + self.assertEqual( + (entities[0].all, entities[0].key, entities[0].val), + ('one = two\n', 'one', 'two') + ) + self.assertEqual( + (entities[1].all, entities[1].key, entities[1].val), + ('two = three\n', 'two', 'three') + ) + self.assertEqual( + (entities[2].all, entities[2].key, entities[2].val), + ('one = four\n', 'one', 'four') + ) + el = linter.EntityLinter(entities, None, {}) + results = list(el.lint_full_entity(entities[1])) + self.assertListEqual(results, []) + results = list(el.lint_full_entity(entities[2])) + self.assertEqual(len(results), 1) + result = results[0] + self.assertEqual(result['level'], 'error') + self.assertEqual(result['lineno'], 3) + self.assertEqual(result['column'], 1) + # finally check for conflict + el.reference = { + 'two': parser.LiteralEntity('two = other', 'two', 'other') + } + results = list(el.lint_full_entity(entities[1])) + self.assertEqual(len(results), 1) + result = results[0] + self.assertEqual(result['level'], 'warning') + self.assertEqual(result['lineno'], 2) + self.assertEqual(result['column'], 1) + + def test_in_value(self): + ctx = parser.Parser.Context('''\ +one = two +''') + entities = [ + parser.Entity(ctx, None, None, (0, 10), (0, 3), (6, 9)), + ] + self.assertEqual( + (entities[0].all, entities[0].key, entities[0].val), + ('one = two\n', 'one', 'two') + ) + checker = MockChecker([ + ('error', 2, 'Incompatible resource types', 'android'), + ]) + el = linter.EntityLinter(entities, checker, {}) + results = list(el.lint_value(entities[0])) + self.assertEqual(len(results), 1) + result = results[0] + self.assertEqual(result['level'], 'error') + self.assertEqual(result['lineno'], 1) + self.assertEqual(result['column'], 9) diff --git a/third_party/python/compare-locales/compare_locales/tests/lint/test_util.py b/third_party/python/compare-locales/compare_locales/tests/lint/test_util.py new file mode 100644 index 0000000000..2a8d30bf2a --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/lint/test_util.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import + +import unittest + +from compare_locales.lint import util +from compare_locales.paths.project import ProjectConfig +from compare_locales.paths.files import ProjectFiles +from compare_locales import mozpath + + +class MirrorReferenceTest(unittest.TestCase): + def test_empty(self): + files = ProjectFiles(None, []) + get_reference_and_tests = util.mirror_reference_and_tests(files, 'tld') + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertIsNone(ref) + self.assertIsNone(tests) + + def test_no_tests(self): + pc = ProjectConfig(None) + pc.add_paths({ + 'reference': 'some/path/file.ftl', + 'l10n': 'some/{locale}/file.ftl', + }) + files = ProjectFiles(None, [pc]) + get_reference_and_tests = util.mirror_reference_and_tests(files, 'tld') + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertEqual(mozpath.relpath(ref, 'tld'), 'some/path/file.ftl') + self.assertEqual(tests, set()) + + def test_with_tests(self): + pc = ProjectConfig(None) + pc.add_paths({ + 'reference': 'some/path/file.ftl', + 'l10n': 'some/{locale}/file.ftl', + 'test': ['more_stuff'], + }) + files = ProjectFiles(None, [pc]) + get_reference_and_tests = util.mirror_reference_and_tests(files, 'tld') + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertEqual(mozpath.relpath(ref, 'tld'), 'some/path/file.ftl') + self.assertEqual(tests, {'more_stuff'}) + + +class L10nBaseReferenceTest(unittest.TestCase): + def test_empty(self): + files = ProjectFiles(None, []) + get_reference_and_tests = util.l10n_base_reference_and_tests(files) + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertIsNone(ref) + self.assertIsNone(tests) + + def test_no_tests(self): + pc = ProjectConfig(None) + pc.add_environment(l10n_base='l10n_orig') + pc.add_paths({ + 'reference': 'some/path/file.ftl', + 'l10n': '{l10n_base}/{locale}/some/file.ftl', + }) + pc.set_locales(['gecko'], deep=True) + files = ProjectFiles('gecko', [pc]) + get_reference_and_tests = util.l10n_base_reference_and_tests(files) + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertEqual( + mozpath.relpath(ref, 'l10n_orig/gecko'), + 'some/file.ftl' + ) + self.assertEqual(tests, set()) + + def test_with_tests(self): + pc = ProjectConfig(None) + pc.add_environment(l10n_base='l10n_orig') + pc.add_paths({ + 'reference': 'some/path/file.ftl', + 'l10n': '{l10n_base}/{locale}/some/file.ftl', + 'test': ['more_stuff'], + }) + pc.set_locales(['gecko'], deep=True) + files = ProjectFiles('gecko', [pc]) + get_reference_and_tests = util.l10n_base_reference_and_tests(files) + ref, tests = get_reference_and_tests('some/path/file.ftl') + self.assertEqual( + mozpath.relpath(ref, 'l10n_orig/gecko'), + 'some/file.ftl' + ) + self.assertEqual(tests, {'more_stuff'}) diff --git a/third_party/python/compare-locales/compare_locales/tests/merge/__init__.py b/third_party/python/compare-locales/compare_locales/tests/merge/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/merge/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/merge/test_comments.py b/third_party/python/compare-locales/compare_locales/tests/merge/test_comments.py new file mode 100644 index 0000000000..71241c8768 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/merge/test_comments.py @@ -0,0 +1,188 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMergeComments(unittest.TestCase): + name = "foo.properties" + + def test_comment_added_in_first(self): + channels = (b""" +foo = Foo 1 +# Bar Comment 1 +bar = Bar 1 +""", b""" +foo = Foo 2 +bar = Bar 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +foo = Foo 1 +# Bar Comment 1 +bar = Bar 1 +""") + + def test_comment_still_in_last(self): + channels = (b""" +foo = Foo 1 +bar = Bar 1 +""", b""" +foo = Foo 2 +# Bar Comment 2 +bar = Bar 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +foo = Foo 1 +bar = Bar 1 +""") + + def test_comment_changed(self): + channels = (b""" +foo = Foo 1 +# Bar Comment 1 +bar = Bar 1 +""", b""" +foo = Foo 2 +# Bar Comment 2 +bar = Bar 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +foo = Foo 1 +# Bar Comment 1 +bar = Bar 1 +""") + + +class TestMergeStandaloneComments(unittest.TestCase): + name = "foo.properties" + + def test_comment_added_in_first(self): + channels = (b""" +# Standalone Comment 1 + +# Foo Comment 1 +foo = Foo 1 +""", b""" +# Foo Comment 2 +foo = Foo 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Standalone Comment 1 + +# Foo Comment 1 +foo = Foo 1 +""") + + def test_comment_still_in_last(self): + channels = (b""" +# Foo Comment 1 +foo = Foo 1 +""", b""" +# Standalone Comment 2 + +# Foo Comment 2 +foo = Foo 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Standalone Comment 2 + +# Foo Comment 1 +foo = Foo 1 +""") + + def test_comments_in_both(self): + channels = (b""" +# Standalone Comment 1 + +# Foo Comment 1 +foo = Foo 1 +""", b""" +# Standalone Comment 2 + +# Foo Comment 2 +foo = Foo 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Standalone Comment 2 + +# Standalone Comment 1 + +# Foo Comment 1 +foo = Foo 1 +""") + + def test_identical_comments_in_both(self): + channels = (b""" +# Standalone Comment + +# Foo Comment 1 +foo = Foo 1 +""", b""" +# Standalone Comment + +# Foo Comment 2 +foo = Foo 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Standalone Comment + +# Foo Comment 1 +foo = Foo 1 +""") + + def test_standalone_which_is_attached_in_first(self): + channels = (b""" +# Ambiguous Comment +foo = Foo 1 + +# Bar Comment 1 +bar = Bar 1 +""", b""" +# Ambiguous Comment + +# Bar Comment 2 +bar = Bar 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Ambiguous Comment + +# Ambiguous Comment +foo = Foo 1 + +# Bar Comment 1 +bar = Bar 1 +""") + + def test_standalone_which_is_attached_in_second(self): + channels = (b""" +# Ambiguous Comment + +# Bar Comment 1 +bar = Bar 1 +""", b""" +# Ambiguous Comment +foo = Foo 1 + +# Bar Comment 2 +bar = Bar 2 +""") + self.assertMultiLineEqual( + merge_channels(self.name, channels).decode("utf-8"), """ +# Ambiguous Comment +foo = Foo 1 + +# Ambiguous Comment + +# Bar Comment 1 +bar = Bar 1 +""") diff --git a/third_party/python/compare-locales/compare_locales/tests/merge/test_messages.py b/third_party/python/compare-locales/compare_locales/tests/merge/test_messages.py new file mode 100644 index 0000000000..664bbd16c5 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/merge/test_messages.py @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMergeTwo(unittest.TestCase): + name = "foo.properties" + + def test_no_changes(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_message_added_in_first(self): + channels = (b""" +foo = Foo 1 +bar = Bar 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +bar = Bar 1 +""") + + def test_message_still_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +bar = Bar 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +bar = Bar 2 +""") + + def test_message_reordered(self): + channels = (b""" +foo = Foo 1 +bar = Bar 1 +""", b""" +bar = Bar 2 +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +bar = Bar 1 +""") + + +class TestMergeThree(unittest.TestCase): + name = "foo.properties" + + def test_no_changes(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""", b""" +foo = Foo 3 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_message_still_in_last(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""", b""" +foo = Foo 3 +bar = Bar 3 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +bar = Bar 3 +""") diff --git a/third_party/python/compare-locales/compare_locales/tests/merge/test_unknown.py b/third_party/python/compare-locales/compare_locales/tests/merge/test_unknown.py new file mode 100644 index 0000000000..ce74e1a10b --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/merge/test_unknown.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +import six + +from compare_locales.merge import merge_channels, MergeNotSupportedError + + +class TestMergeUnknown(unittest.TestCase): + name = "foo.unknown" + + def test_not_supported_error(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + pattern = r"Unsupported file format \(foo\.unknown\)\." + with six.assertRaisesRegex(self, MergeNotSupportedError, pattern): + merge_channels(self.name, channels) diff --git a/third_party/python/compare-locales/compare_locales/tests/merge/test_whitespace.py b/third_party/python/compare-locales/compare_locales/tests/merge/test_whitespace.py new file mode 100644 index 0000000000..adaedc70d1 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/merge/test_whitespace.py @@ -0,0 +1,76 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +from compare_locales.merge import merge_channels + + +class TestMergeWhitespace(unittest.TestCase): + name = "foo.properties" + + def test_trailing_spaces(self): + channels = (b""" +foo = Foo 1 + """, b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + """) + + def test_blank_lines_between_messages(self): + channels = (b""" +foo = Foo 1 + +bar = Bar 1 +""", b""" +foo = Foo 2 +bar = Bar 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 + +bar = Bar 1 +""") + + def test_no_eol(self): + channels = (b""" +foo = Foo 1""", b""" +foo = Foo 2 +bar = Bar 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +bar = Bar 2 +""") + + def test_still_in_last_with_blank(self): + channels = (b""" + +foo = Foo 1 + +baz = Baz 1 + +""", b""" + +foo = Foo 2 + +bar = Bar 2 + +baz = Baz 2 + +""") + self.assertEqual( + merge_channels(self.name, channels), b""" + +foo = Foo 1 + +bar = Bar 2 + +baz = Baz 1 + +""") diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/__init__.py b/third_party/python/compare-locales/compare_locales/tests/paths/__init__.py new file mode 100644 index 0000000000..1a99c53e2f --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/__init__.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import + +from collections import defaultdict +import six +import tempfile +from compare_locales.paths import ( + ProjectConfig, File, ProjectFiles, TOMLParser +) +from compare_locales import mozpath +import pytoml as toml + + +class Rooted(object): + def setUp(self): + # Use tempdir as self.root, that's absolute on all platforms + self.root = mozpath.normpath(tempfile.gettempdir()) + + def path(self, leaf=''): + return self.root + leaf + + def leaf(self, path): + return mozpath.relpath(path, self.root) + + +class SetupMixin(object): + def setUp(self): + self.cfg = ProjectConfig(None) + self.file = File( + '/tmp/somedir/de/browser/one/two/file.ftl', + 'file.ftl', + module='browser', locale='de') + self.other_file = File( + '/tmp/somedir/de/toolkit/two/one/file.ftl', + 'file.ftl', + module='toolkit', locale='de') + self.cfg.set_locales(['de']) + + +class MockOS(object): + '''Mock `os.path.isfile` and `os.walk` based on a list of files. + ''' + def __init__(self, root, paths): + self.root = root + self.files = [] + self.dirs = {} + if not paths: + return + if isinstance(paths[0], six.string_types): + paths = [ + mozpath.split(path) + for path in sorted(paths) + ] + child_paths = defaultdict(list) + for segs in paths: + if len(segs) == 1: + self.files.append(segs[0]) + else: + child_paths[segs[0]].append(segs[1:]) + for root, leafs in child_paths.items(): + self.dirs[root] = MockOS(mozpath.join(self.root, root), leafs) + + def find(self, dir_path): + relpath = mozpath.relpath(dir_path, self.root) + if relpath.startswith('..'): + return None + if relpath in ('', '.'): + return self + segs = mozpath.split(relpath) + node = self + while segs: + seg = segs.pop(0) + if seg not in node.dirs: + return None + node = node.dirs[seg] + return node + + def isfile(self, path): + dirname = mozpath.dirname(path) + if dirname: + node = self.find(dirname) + else: + node = self + return node and mozpath.basename(path) in node.files + + def walk(self, path=None): + if path is None: + node = self + else: + node = self.find(path) + if node is None: + return + subdirs = sorted(node.dirs) + if node.root is not None: + yield node.root, subdirs, node.files + for subdir in subdirs: + child = node.dirs[subdir] + for tpl in child.walk(): + yield tpl + + +class MockProjectFiles(ProjectFiles): + def __init__(self, mocks, locale, projects, mergebase=None): + (super(MockProjectFiles, self) + .__init__(locale, projects, mergebase=mergebase)) + root = mozpath.commonprefix(mocks) + files = [mozpath.relpath(f, root) for f in mocks] + self.mocks = MockOS(root, files) + + def _isfile(self, path): + return self.mocks.isfile(path) + + def _walk(self, base): + base = mozpath.normpath(base) + root = self.mocks.find(base) + if not root: + return + for tpl in root.walk(): + yield tpl + + +class MockTOMLParser(TOMLParser): + def __init__(self, mock_data): + self.mock_data = mock_data + + def load(self, ctx): + p = mozpath.basename(ctx.path) + ctx.data = toml.loads(self.mock_data[p]) diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_configparser.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_configparser.py new file mode 100644 index 0000000000..fe9d7dcf6e --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_configparser.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, unicode_literals +import unittest +import six + +from . import MockTOMLParser +from compare_locales.paths.matcher import Matcher +from compare_locales.paths.project import ProjectConfig, ExcludeError +from compare_locales import mozpath + + +class TestConfigParser(unittest.TestCase): + def test_includes(self): + parser = MockTOMLParser({ + "root.toml": """ +basepath = "." +[env] + o = "toolkit" +[[includes]] + path = "{o}/other.toml" +[[includes]] + path = "dom/more.toml" +""", + "other.toml": """ +basepath = "." +""", + "more.toml": """ +basepath = "." +""" + }) + config = parser.parse("root.toml") + self.assertIsInstance(config, ProjectConfig) + configs = list(config.configs) + self.assertEqual(configs[0], config) + self.assertListEqual( + [c.path for c in configs], + [ + "root.toml", + mozpath.abspath("toolkit/other.toml"), + mozpath.abspath("dom/more.toml"), + ] + ) + + def test_excludes(self): + parser = MockTOMLParser({ + "root.toml": """ +basepath = "." +[[excludes]] + path = "exclude.toml" +[[excludes]] + path = "other-exclude.toml" + """, + "exclude.toml": """ +basepath = "." +""", + "other-exclude.toml": """ +basepath = "." +""", + "grandparent.toml": """ +basepath = "." +[[includes]] + path = "root.toml" +""", + "wrapped.toml": """ +basepath = "." +[[excludes]] + path = "root.toml" + """ + }) + config = parser.parse("root.toml") + self.assertIsInstance(config, ProjectConfig) + configs = list(config.configs) + self.assertListEqual(configs, [config]) + self.assertEqual( + [c.path for c in config.excludes], + [ + mozpath.abspath("exclude.toml"), + mozpath.abspath("other-exclude.toml"), + ] + ) + with six.assertRaisesRegex(self, ExcludeError, 'Included configs'): + parser.parse("grandparent.toml") + with six.assertRaisesRegex(self, ExcludeError, 'Excluded configs'): + parser.parse("wrapped.toml") + + def test_paths(self): + parser = MockTOMLParser({ + "l10n.toml": """ +[[paths]] + l10n = "some/{locale}/*" +""", + "ref.toml": """ +[[paths]] + reference = "ref/l10n/*" + l10n = "some/{locale}/*" +""", + "tests.toml": """ +[[paths]] + l10n = "some/{locale}/*" + test = [ + "run_this", + ] +""", + }) + + paths = parser.parse("l10n.toml").paths + self.assertIn("l10n", paths[0]) + self.assertIsInstance(paths[0]["l10n"], Matcher) + self.assertNotIn("reference", paths[0]) + self.assertNotIn("test", paths[0]) + paths = parser.parse("ref.toml").paths + self.assertIn("l10n", paths[0]) + self.assertIsInstance(paths[0]["l10n"], Matcher) + self.assertIn("reference", paths[0]) + self.assertIsInstance(paths[0]["reference"], Matcher) + self.assertNotIn("test", paths[0]) + paths = parser.parse("tests.toml").paths + self.assertIn("l10n", paths[0]) + self.assertIsInstance(paths[0]["l10n"], Matcher) + self.assertNotIn("reference", paths[0]) + self.assertIn("test", paths[0]) + self.assertListEqual(paths[0]["test"], ["run_this"]) diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_files.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_files.py new file mode 100644 index 0000000000..997d7d2ffc --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_files.py @@ -0,0 +1,572 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest +import mock + +from compare_locales.paths import ( + File, + ProjectConfig, + ProjectFiles, +) +from . import ( + MockOS, + MockProjectFiles, + MockTOMLParser, + Rooted, +) + + +class TestMockOS(Rooted, unittest.TestCase): + def setUp(self): + self.node = MockOS('jazz', [ + 'one/bit', + 'two/deep/in/directories/with/file1', + 'two/deep/in/directories/with/file2', + 'three/feet', + ]) + + def test_isfile(self): + self.assertTrue(self.node.isfile('jazz/one/bit')) + self.assertFalse(self.node.isfile('jazz/one')) + self.assertFalse(self.node.isfile('foo')) + + def test_walk(self): + self.assertListEqual( + list(self.node.walk()), + [ + ('jazz', ['one', 'three', 'two'], []), + ('jazz/one', [], ['bit']), + ('jazz/three', [], ['feet']), + ('jazz/two', ['deep'], []), + ('jazz/two/deep', ['in'], []), + ('jazz/two/deep/in', ['directories'], []), + ('jazz/two/deep/in/directories', ['with'], []), + ('jazz/two/deep/in/directories/with', [], [ + 'file1', + 'file2', + ]), + ] + ) + + def test_find(self): + self.assertIsNone(self.node.find('foo')) + self.assertIsNone(self.node.find('jazz/one/bit')) + self.assertIsNone(self.node.find('jazz/one/bit/too/much')) + self.assertIsNotNone(self.node.find('jazz/one')) + self.assertListEqual(list(self.node.find('jazz/one').walk()), [ + ('jazz/one', [], ['bit']), + ]) + self.assertEqual(self.node.find('jazz'), self.node) + + +class TestProjectPaths(Rooted, unittest.TestCase): + def test_l10n_path(self): + cfg = ProjectConfig(None) + cfg.add_environment(l10n_base=self.root) + cfg.set_locales(['de']) + cfg.add_paths({ + 'l10n': '{l10n_base}/{locale}/*' + }) + mocks = [ + self.path(leaf) + for leaf in ( + '/de/good.ftl', + '/de/not/subdir/bad.ftl', + '/fr/good.ftl', + '/fr/not/subdir/bad.ftl', + ) + ] + files = MockProjectFiles(mocks, 'de', [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/de/good.ftl'), None, None, set()) + ] + ) + self.assertTupleEqual( + files.match(self.path('/de/good.ftl')), + (self.path('/de/good.ftl'), None, None, set()) + ) + self.assertIsNone(files.match(self.path('/fr/something.ftl'))) + files = MockProjectFiles(mocks, 'de', [cfg], mergebase='merging') + self.assertListEqual( + list(files), + [ + (self.path('/de/good.ftl'), None, 'merging/de/good.ftl', set()) + ] + ) + self.assertTupleEqual( + files.match(self.path('/de/something.ftl')), + (self.path('/de/something.ftl'), + None, + 'merging/de/something.ftl', + set())) + # 'fr' is not in the locale list, should return no files + files = MockProjectFiles(mocks, 'fr', [cfg]) + self.assertListEqual(list(files), []) + + def test_single_reference_path(self): + cfg = ProjectConfig(None) + cfg.add_environment(l10n_base=self.path('/l10n')) + cfg.set_locales(['de']) + cfg.add_paths({ + 'l10n': '{l10n_base}/{locale}/good.ftl', + 'reference': self.path('/reference/good.ftl') + }) + mocks = [ + self.path('/reference/good.ftl'), + self.path('/reference/not/subdir/bad.ftl'), + ] + files = MockProjectFiles(mocks, 'de', [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + ]) + self.assertTupleEqual( + files.match(self.path('/reference/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + ) + self.assertTupleEqual( + files.match(self.path('/l10n/de/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + ) + + def test_reference_path(self): + cfg = ProjectConfig(None) + cfg.add_environment(l10n_base=self.path('/l10n')) + cfg.set_locales(['de']) + cfg.add_paths({ + 'l10n': '{l10n_base}/{locale}/*', + 'reference': self.path('/reference/*') + }) + mocks = [ + self.path(leaf) + for leaf in [ + '/l10n/de/good.ftl', + '/l10n/de/not/subdir/bad.ftl', + '/l10n/fr/good.ftl', + '/l10n/fr/not/subdir/bad.ftl', + '/reference/ref.ftl', + '/reference/not/subdir/bad.ftl', + ] + ] + files = MockProjectFiles(mocks, 'de', [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + (self.path('/l10n/de/ref.ftl'), + self.path('/reference/ref.ftl'), + None, + set()), + ]) + self.assertTupleEqual( + files.match(self.path('/l10n/de/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + ) + self.assertTupleEqual( + files.match(self.path('/reference/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + None, + set()), + ) + self.assertIsNone(files.match(self.path('/l10n/de/subdir/bad.ftl'))) + self.assertIsNone(files.match(self.path('/reference/subdir/bad.ftl'))) + files = MockProjectFiles(mocks, 'de', [cfg], mergebase='merging') + self.assertListEqual( + list(files), + [ + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + 'merging/de/good.ftl', set()), + (self.path('/l10n/de/ref.ftl'), + self.path('/reference/ref.ftl'), + 'merging/de/ref.ftl', set()), + ]) + self.assertTupleEqual( + files.match(self.path('/l10n/de/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + 'merging/de/good.ftl', set()), + ) + self.assertTupleEqual( + files.match(self.path('/reference/good.ftl')), + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + 'merging/de/good.ftl', set()), + ) + # 'fr' is not in the locale list, should return no files + files = MockProjectFiles(mocks, 'fr', [cfg]) + self.assertListEqual(list(files), []) + + def test_partial_l10n(self): + cfg = ProjectConfig(None) + cfg.set_locales(['de', 'fr']) + cfg.add_paths({ + 'l10n': self.path('/{locale}/major/*') + }, { + 'l10n': self.path('/{locale}/minor/*'), + 'locales': ['de'] + }) + mocks = [ + self.path(leaf) + for leaf in [ + '/de/major/good.ftl', + '/de/major/not/subdir/bad.ftl', + '/de/minor/good.ftl', + '/fr/major/good.ftl', + '/fr/major/not/subdir/bad.ftl', + '/fr/minor/good.ftl', + ] + ] + files = MockProjectFiles(mocks, 'de', [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/de/major/good.ftl'), None, None, set()), + (self.path('/de/minor/good.ftl'), None, None, set()), + ]) + self.assertTupleEqual( + files.match(self.path('/de/major/some.ftl')), + (self.path('/de/major/some.ftl'), None, None, set())) + self.assertIsNone(files.match(self.path('/de/other/some.ftl'))) + # 'fr' is not in the locale list of minor, should only return major + files = MockProjectFiles(mocks, 'fr', [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/fr/major/good.ftl'), None, None, set()), + ]) + self.assertIsNone(files.match(self.path('/fr/minor/some.ftl'))) + + def test_validation_mode(self): + cfg = ProjectConfig(None) + cfg.add_environment(l10n_base=self.path('/l10n')) + cfg.set_locales(['de']) + cfg.add_paths({ + 'l10n': '{l10n_base}/{locale}/*', + 'reference': self.path('/reference/*') + }) + mocks = [ + self.path(leaf) + for leaf in [ + '/l10n/de/good.ftl', + '/l10n/de/not/subdir/bad.ftl', + '/l10n/fr/good.ftl', + '/l10n/fr/not/subdir/bad.ftl', + '/reference/ref.ftl', + '/reference/not/subdir/bad.ftl', + ] + ] + # `None` switches on validation mode + files = MockProjectFiles(mocks, None, [cfg]) + self.assertListEqual( + list(files), + [ + (self.path('/reference/ref.ftl'), + self.path('/reference/ref.ftl'), + None, + set()), + ]) + + +@mock.patch('os.path.isfile') +@mock.patch('os.walk') +class TestExcludes(Rooted, unittest.TestCase): + def _list(self, locale, _walk, _isfile): + parser = MockTOMLParser({ + "pontoon.toml": + '''\ +basepath = "." + +[[includes]] + path = "configs-pontoon.toml" + +[[excludes]] + path = "configs-vendor.toml" +[[excludes]] + path = "configs-special-templates.toml" +''', + "vendor.toml": + '''\ +basepath = "." + +[[includes]] + path = "configs-vendor.toml" + +[[excludes]] + path = "configs-special-templates.toml" +''', + "configs-pontoon.toml": + '''\ +basepath = "." + +locales = [ + "de", + "gd", + "it", +] + +[[paths]] + reference = "en/**/*.ftl" + l10n = "{locale}/**/*.ftl" +''', + "configs-vendor.toml": + '''\ +basepath = "." + +locales = [ + "de", + "it", +] + +[[paths]] + reference = "en/firefox/*.ftl" + l10n = "{locale}/firefox/*.ftl" +''', + "configs-special-templates.toml": + '''\ +basepath = "." + +[[paths]] + reference = "en/firefox/home.ftl" + l10n = "{locale}/firefox/home.ftl" + locales = [ + "de", + "fr", + ] +[[paths]] + reference = "en/firefox/pagina.ftl" + l10n = "{locale}/firefox/pagina.ftl" + locales = [ + "gd", + ] +''', + }) + pontoon = parser.parse(self.path('/pontoon.toml')) + vendor = parser.parse(self.path('/vendor.toml')) + pc = ProjectFiles(locale, [pontoon, vendor]) + mock_files = [ + '{}/{}/{}'.format(locale, dir, f) + for locale in ('de', 'en', 'gd', 'it') + for dir, files in ( + ('firefox', ('home.ftl', 'feature.ftl')), + ('mozorg', ('mission.ftl',)), + ) + for f in files + ] + os_ = MockOS(self.root, mock_files) + _isfile.side_effect = os_.isfile + _walk.side_effect = os_.walk + local_files = [self.leaf(p).split('/', 1)[1] for p, _, _, _ in pc] + return pontoon, vendor, local_files + + def test_reference(self, _walk, _isfile): + pontoon_config, vendor_config, files = self._list(None, _walk, _isfile) + pontoon_files = ProjectFiles(None, [pontoon_config]) + vendor_files = ProjectFiles(None, [vendor_config]) + self.assertListEqual( + files, + [ + 'firefox/feature.ftl', + 'firefox/home.ftl', + 'mozorg/mission.ftl', + ] + ) + ref_path = self.path('/en/firefox/feature.ftl') + self.assertIsNotNone(pontoon_files.match(ref_path)) + self.assertIsNotNone(vendor_files.match(ref_path)) + ref_path = self.path('/en/firefox/home.ftl') + self.assertIsNotNone(pontoon_files.match(ref_path)) + self.assertIsNotNone(vendor_files.match(ref_path)) + ref_path = self.path('/en/mozorg/mission.ftl') + self.assertIsNotNone(pontoon_files.match(ref_path)) + self.assertIsNone(vendor_files.match(ref_path)) + + def test_de(self, _walk, _isfile): + # home.ftl excluded completely by configs-special-templates.toml + # firefox/* only in vendor + pontoon_config, vendor_config, files = self._list('de', _walk, _isfile) + pontoon_files = ProjectFiles('de', [pontoon_config]) + vendor_files = ProjectFiles('de', [vendor_config]) + self.assertListEqual( + files, + [ + 'firefox/feature.ftl', + # 'firefox/home.ftl', + 'mozorg/mission.ftl', + ] + ) + l10n_path = self.path('/de/firefox/feature.ftl') + ref_path = self.path('/en/firefox/feature.ftl') + self.assertEqual( + pontoon_config.filter( + File( + l10n_path, + 'de/firefox/feature.ftl', + locale='de' + ) + ), + 'ignore' + ) + self.assertIsNone(pontoon_files.match(l10n_path)) + self.assertIsNone(pontoon_files.match(ref_path)) + self.assertIsNotNone(vendor_files.match(l10n_path)) + self.assertIsNotNone(vendor_files.match(ref_path)) + l10n_path = self.path('/de/firefox/home.ftl') + ref_path = self.path('/en/firefox/home.ftl') + self.assertEqual( + pontoon_config.filter( + File( + l10n_path, + 'de/firefox/home.ftl', + locale='de' + ) + ), + 'ignore' + ) + self.assertIsNone(pontoon_files.match(l10n_path)) + self.assertIsNone(pontoon_files.match(ref_path)) + self.assertIsNone(vendor_files.match(l10n_path)) + self.assertIsNone(vendor_files.match(ref_path)) + l10n_path = self.path('/de/mozorg/mission.ftl') + ref_path = self.path('/en/mozorg/mission.ftl') + self.assertEqual( + pontoon_config.filter( + File( + l10n_path, + 'de/mozorg/mission.ftl', + locale='de' + ) + ), + 'error' + ) + self.assertIsNotNone(pontoon_files.match(l10n_path)) + self.assertIsNotNone(pontoon_files.match(ref_path)) + self.assertIsNone(vendor_files.match(l10n_path)) + self.assertIsNone(vendor_files.match(ref_path)) + + def test_gd(self, _walk, _isfile): + # only community localization + pontoon_config, vendor_config, files = self._list('gd', _walk, _isfile) + pontoon_files = ProjectFiles('gd', [pontoon_config]) + vendor_files = ProjectFiles('gd', [vendor_config]) + self.assertListEqual( + files, + [ + 'firefox/feature.ftl', + 'firefox/home.ftl', + 'mozorg/mission.ftl', + ] + ) + l10n_path = self.path('/gd/firefox/home.ftl') + ref_path = self.path('/en/firefox/home.ftl') + self.assertEqual( + pontoon_config.filter( + File( + l10n_path, + 'gd/firefox/home.ftl', + locale='gd' + ) + ), + 'error' + ) + self.assertIsNotNone(pontoon_files.match(l10n_path)) + self.assertIsNotNone(pontoon_files.match(ref_path)) + self.assertIsNone(vendor_files.match(l10n_path)) + self.assertIsNone(vendor_files.match(ref_path)) + + def test_it(self, _walk, _isfile): + # all pages translated, but split between vendor and community + pontoon_config, vendor_config, files = self._list('it', _walk, _isfile) + pontoon_files = ProjectFiles('it', [pontoon_config]) + vendor_files = ProjectFiles('it', [vendor_config]) + self.assertListEqual( + files, + [ + 'firefox/feature.ftl', + 'firefox/home.ftl', + 'mozorg/mission.ftl', + ] + ) + l10n_path = self.path('/it/firefox/home.ftl') + ref_path = self.path('/en/firefox/home.ftl') + file = File( + l10n_path, + 'it/firefox/home.ftl', + locale='it' + ) + self.assertEqual(pontoon_config.filter(file), 'ignore') + self.assertEqual(vendor_config.filter(file), 'error') + self.assertIsNone(pontoon_files.match(l10n_path)) + self.assertIsNone(pontoon_files.match(ref_path)) + self.assertIsNotNone(vendor_files.match(l10n_path)) + self.assertIsNotNone(vendor_files.match(ref_path)) + + +class TestL10nMerge(Rooted, unittest.TestCase): + # need to go through TOMLParser, as that's handling most of the + # environment + def test_merge_paths(self): + parser = MockTOMLParser({ + "base.toml": + '''\ +basepath = "." +locales = [ + "de", +] +[env] + l = "{l10n_base}/{locale}/" +[[paths]] + reference = "reference/*" + l10n = "{l}*" +'''}) + cfg = parser.parse( + self.path('/base.toml'), + env={'l10n_base': self.path('/l10n')} + ) + mocks = [ + self.path(leaf) + for leaf in [ + '/l10n/de/good.ftl', + '/l10n/de/not/subdir/bad.ftl', + '/l10n/fr/good.ftl', + '/l10n/fr/not/subdir/bad.ftl', + '/reference/ref.ftl', + '/reference/not/subdir/bad.ftl', + ] + ] + files = MockProjectFiles(mocks, 'de', [cfg], self.path('/mergers')) + self.assertListEqual( + list(files), + [ + (self.path('/l10n/de/good.ftl'), + self.path('/reference/good.ftl'), + self.path('/mergers/de/good.ftl'), + set()), + (self.path('/l10n/de/ref.ftl'), + self.path('/reference/ref.ftl'), + self.path('/mergers/de/ref.ftl'), + set()), + ]) diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_ini.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_ini.py new file mode 100644 index 0000000000..ddb75e2b1b --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_ini.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from . import ( + SetupMixin, +) + + +class TestConfigLegacy(SetupMixin, unittest.TestCase): + + def test_filter_py_true(self): + 'Test filter.py just return bool(True)' + def filter(mod, path, entity=None): + return True + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'error') + + def test_filter_py_false(self): + 'Test filter.py just return bool(False)' + def filter(mod, path, entity=None): + return False + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'ignore') + + def test_filter_py_error(self): + 'Test filter.py just return str("error")' + def filter(mod, path, entity=None): + return 'error' + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'error') + + def test_filter_py_ignore(self): + 'Test filter.py just return str("ignore")' + def filter(mod, path, entity=None): + return 'ignore' + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'ignore') + + def test_filter_py_report(self): + 'Test filter.py just return str("report") and match to "warning"' + def filter(mod, path, entity=None): + return 'report' + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'warning') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'warning') + + def test_filter_py_module(self): + 'Test filter.py to return str("error") for browser or "ignore"' + def filter(mod, path, entity=None): + return 'error' if mod == 'browser' else 'ignore' + self.cfg.set_filter_py(filter) + with self.assertRaises(AssertionError): + self.cfg.add_rules({}) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file, entity='one_entity') + self.assertEqual(rv, 'ignore') diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_matcher.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_matcher.py new file mode 100644 index 0000000000..74a20a84ce --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_matcher.py @@ -0,0 +1,500 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import six +import unittest + +from compare_locales.paths.matcher import Matcher, ANDROID_STANDARD_MAP +from . import Rooted + + +class TestMatcher(unittest.TestCase): + + def test_matcher(self): + one = Matcher('foo/*') + self.assertTrue(one.match('foo/baz')) + self.assertFalse(one.match('foo/baz/qux')) + other = Matcher('bar/*') + self.assertTrue(other.match('bar/baz')) + self.assertFalse(other.match('bar/baz/qux')) + self.assertEqual(one.sub(other, 'foo/baz'), 'bar/baz') + self.assertIsNone(one.sub(other, 'bar/baz')) + one = Matcher('foo/**') + self.assertTrue(one.match('foo/baz')) + self.assertTrue(one.match('foo/baz/qux')) + other = Matcher('bar/**') + self.assertTrue(other.match('bar/baz')) + self.assertTrue(other.match('bar/baz/qux')) + self.assertEqual(one.sub(other, 'foo/baz'), 'bar/baz') + self.assertEqual(one.sub(other, 'foo/baz/qux'), 'bar/baz/qux') + one = Matcher('foo/*/one/**') + self.assertTrue(one.match('foo/baz/one/qux')) + self.assertFalse(one.match('foo/baz/bez/one/qux')) + other = Matcher('bar/*/other/**') + self.assertTrue(other.match('bar/baz/other/qux')) + self.assertFalse(other.match('bar/baz/bez/other/qux')) + self.assertEqual(one.sub(other, 'foo/baz/one/qux'), + 'bar/baz/other/qux') + self.assertEqual(one.sub(other, 'foo/baz/one/qux/zzz'), + 'bar/baz/other/qux/zzz') + self.assertIsNone(one.sub(other, 'foo/baz/bez/one/qux')) + one = Matcher('foo/**/bar/**') + self.assertTrue(one.match('foo/bar/baz.qux')) + self.assertTrue(one.match('foo/tender/bar/baz.qux')) + self.assertFalse(one.match('foo/nobar/baz.qux')) + self.assertFalse(one.match('foo/tender/bar')) + other = Matcher('baz/**/qux/**') + self.assertEqual(one.sub(other, 'foo/bar/baz.qux'), 'baz/qux/baz.qux') + self.assertEqual( + one.sub(other, 'foo/tender/bar/baz.qux'), + 'baz/tender/qux/baz.qux' + ) + + def test_encoded_matcher(self): + one = Matcher('foo/*', encoding='utf-8') + self.assertTrue(one.match(b'foo/bar')) + other = Matcher('bar/*', encoding='utf-8') + self.assertEqual(one.sub(other, b'foo/baz'), b'bar/baz') + + def test_prefix(self): + self.assertEqual( + Matcher('foo/bar.file').prefix, 'foo/bar.file' + ) + self.assertEqual( + Matcher('foo/*').prefix, 'foo/' + ) + self.assertEqual( + Matcher('foo/**').prefix, 'foo/' + ) + self.assertEqual( + Matcher('foo/*/bar').prefix, 'foo/' + ) + self.assertEqual( + Matcher('foo/**/bar').prefix, 'foo/' + ) + self.assertEqual( + Matcher('foo/**/bar/*').prefix, 'foo/' + ) + self.assertEqual( + Matcher('foo/{v}/bar').prefix, + 'foo/' + ) + self.assertEqual( + Matcher('foo/{v}/bar', {'v': 'expanded'}).prefix, + 'foo/expanded/bar' + ) + self.assertEqual( + Matcher('foo/{v}/*/bar').prefix, + 'foo/' + ) + self.assertEqual( + Matcher('foo/{v}/*/bar', {'v': 'expanded'}).prefix, + 'foo/expanded/' + ) + self.assertEqual( + Matcher('foo/{v}/*/bar', {'v': '{missing}'}).prefix, + 'foo/' + ) + + def test_encoded_prefix(self): + self.assertEqual( + Matcher('foo/bar.file', encoding='utf-8').prefix, b'foo/bar.file' + ) + self.assertEqual( + Matcher('foo/*', encoding='utf-8').prefix, b'foo/' + ) + self.assertEqual( + Matcher('foo/{v}/bar', encoding='utf-8').prefix, + b'foo/' + ) + self.assertEqual( + Matcher('foo/{v}/bar', {'v': 'expanded'}, encoding='utf-8').prefix, + b'foo/expanded/bar' + ) + + def test_variables(self): + self.assertDictEqual( + Matcher('foo/bar.file').match('foo/bar.file'), + {} + ) + self.assertDictEqual( + Matcher('{path}/bar.file').match('foo/bar.file'), + { + 'path': 'foo' + } + ) + self.assertDictEqual( + Matcher('{ path }/bar.file').match('foo/bar.file'), + { + 'path': 'foo' + } + ) + self.assertIsNone( + Matcher('{ var }/foopy/{ var }/bears') + .match('one/foopy/other/bears') + ) + self.assertDictEqual( + Matcher('{ var }/foopy/{ var }/bears') + .match('same_value/foopy/same_value/bears'), + { + 'var': 'same_value' + } + ) + self.assertIsNone( + Matcher('{ var }/foopy/bears', {'var': 'other'}) + .match('one/foopy/bears') + ) + self.assertDictEqual( + Matcher('{ var }/foopy/bears', {'var': 'one'}) + .match('one/foopy/bears'), + { + 'var': 'one' + } + ) + self.assertDictEqual( + Matcher('{one}/{two}/something', { + 'one': 'some/segment', + 'two': 'with/a/lot/of' + }).match('some/segment/with/a/lot/of/something'), + { + 'one': 'some/segment', + 'two': 'with/a/lot/of' + } + ) + self.assertDictEqual( + Matcher('{l}**', { + 'l': 'foo/{locale}/' + }).match('foo/it/path'), + { + 'l': 'foo/it/', + 'locale': 'it', + 's1': 'path', + } + ) + self.assertDictEqual( + Matcher('{l}*', { + 'l': 'foo/{locale}/' + }).match('foo/it/path'), + { + 'l': 'foo/it/', + 'locale': 'it', + 's1': 'path', + } + ) + + def test_encoded_variables(self): + self.assertDictEqual( + Matcher('foo/bar.file', encoding='utf-8').match(b'foo/bar.file'), + {} + ) + self.assertDictEqual( + Matcher( + '{path}/bar.file', encoding='utf-8' + ).match(b'foo/bar.file'), + { + 'path': 'foo' + } + ) + self.assertDictEqual( + Matcher('{l}*', { + 'l': 'foo/{locale}/' + }, encoding='utf-8').match(b'foo/it/path'), + { + 'l': 'foo/it/', + 'locale': 'it', + 's1': 'path', + } + ) + + def test_variables_sub(self): + one = Matcher('{base}/{loc}/*', {'base': 'ONE_BASE'}) + other = Matcher('{base}/somewhere/*', {'base': 'OTHER_BASE'}) + self.assertEqual( + one.sub(other, 'ONE_BASE/ab-CD/special'), + 'OTHER_BASE/somewhere/special' + ) + one = Matcher('{base}/{loc}/*', {'base': 'ONE_BASE'}, encoding='utf-8') + other = Matcher( + '{base}/somewhere/*', {'base': 'OTHER_BASE'}, encoding='utf-8' + ) + self.assertEqual( + one.sub(other, b'ONE_BASE/ab-CD/special'), + b'OTHER_BASE/somewhere/special' + ) + + def test_copy(self): + one = Matcher('{base}/{loc}/*', { + 'base': 'ONE_BASE', + 'generic': 'keep' + }) + other = Matcher(one, {'base': 'OTHER_BASE'}) + self.assertEqual( + one.sub(other, 'ONE_BASE/ab-CD/special'), + 'OTHER_BASE/ab-CD/special' + ) + self.assertDictEqual( + one.env, + { + 'base': ['ONE_BASE'], + 'generic': ['keep'] + } + ) + self.assertDictEqual( + other.env, + { + 'base': ['OTHER_BASE'], + 'generic': ['keep'] + } + ) + + def test_eq(self): + self.assertEqual( + Matcher('foo'), + Matcher('foo') + ) + self.assertNotEqual( + Matcher('foo'), + Matcher('bar') + ) + self.assertEqual( + Matcher('foo', root='/bar/'), + Matcher('foo', root='/bar/') + ) + self.assertNotEqual( + Matcher('foo', root='/bar/'), + Matcher('foo', root='/baz/') + ) + self.assertNotEqual( + Matcher('foo'), + Matcher('foo', root='/bar/') + ) + self.assertEqual( + Matcher('foo', env={'one': 'two'}), + Matcher('foo', env={'one': 'two'}) + ) + self.assertEqual( + Matcher('foo'), + Matcher('foo', env={}) + ) + self.assertNotEqual( + Matcher('foo', env={'one': 'two'}), + Matcher('foo', env={'one': 'three'}) + ) + self.assertEqual( + Matcher('foo', env={'other': 'val'}), + Matcher('foo', env={'one': 'two'}) + ) + + +class ConcatTest(unittest.TestCase): + def test_plain(self): + left = Matcher('some/path/') + right = Matcher('with/file') + concatenated = left.concat(right) + self.assertEqual(str(concatenated), 'some/path/with/file') + self.assertEqual(concatenated.prefix, 'some/path/with/file') + pattern_concatenated = left.concat('with/file') + self.assertEqual(concatenated, pattern_concatenated) + + def test_stars(self): + left = Matcher('some/*/path/') + right = Matcher('with/file') + concatenated = left.concat(right) + self.assertEqual(concatenated.prefix, 'some/') + concatenated = right.concat(left) + self.assertEqual(concatenated.prefix, 'with/filesome/') + + +class TestAndroid(unittest.TestCase): + '''special case handling for `android_locale` to handle the funky + locale codes in Android apps + ''' + def test_match(self): + # test matches as well as groupdict aliasing. + one = Matcher('values-{android_locale}/strings.xml') + self.assertEqual( + one.match('values-de/strings.xml'), + { + 'android_locale': 'de', + 'locale': 'de' + } + ) + self.assertEqual( + one.match('values-de-rDE/strings.xml'), + { + 'android_locale': 'de-rDE', + 'locale': 'de-DE' + } + ) + self.assertEqual( + one.match('values-b+sr+Latn/strings.xml'), + { + 'android_locale': 'b+sr+Latn', + 'locale': 'sr-Latn' + } + ) + self.assertEqual( + one.with_env( + {'locale': 'de'} + ).match('values-de/strings.xml'), + { + 'android_locale': 'de', + 'locale': 'de' + } + ) + self.assertEqual( + one.with_env( + {'locale': 'de-DE'} + ).match('values-de-rDE/strings.xml'), + { + 'android_locale': 'de-rDE', + 'locale': 'de-DE' + } + ) + self.assertEqual( + one.with_env( + {'locale': 'sr-Latn'} + ).match('values-b+sr+Latn/strings.xml'), + { + 'android_locale': 'b+sr+Latn', + 'locale': 'sr-Latn' + } + ) + + def test_repeat(self): + self.assertEqual( + Matcher('{android_locale}/{android_locale}').match( + 'b+sr+Latn/b+sr+Latn' + ), + { + 'android_locale': 'b+sr+Latn', + 'locale': 'sr-Latn' + } + ) + self.assertEqual( + Matcher( + '{android_locale}/{android_locale}', + env={'locale': 'sr-Latn'} + ).match( + 'b+sr+Latn/b+sr+Latn' + ), + { + 'android_locale': 'b+sr+Latn', + 'locale': 'sr-Latn' + } + ) + + def test_mismatch(self): + # test failed matches + one = Matcher('values-{android_locale}/strings.xml') + self.assertIsNone( + one.with_env({'locale': 'de'}).match( + 'values-fr.xml' + ) + ) + self.assertIsNone( + one.with_env({'locale': 'de-DE'}).match( + 'values-de-DE.xml' + ) + ) + self.assertIsNone( + one.with_env({'locale': 'sr-Latn'}).match( + 'values-sr-Latn.xml' + ) + ) + self.assertIsNone( + Matcher('{android_locale}/{android_locale}').match( + 'b+sr+Latn/de-rDE' + ) + ) + + def test_prefix(self): + one = Matcher('values-{android_locale}/strings.xml') + self.assertEqual( + one.with_env({'locale': 'de'}).prefix, + 'values-de/strings.xml' + ) + self.assertEqual( + one.with_env({'locale': 'de-DE'}).prefix, + 'values-de-rDE/strings.xml' + ) + self.assertEqual( + one.with_env({'locale': 'sr-Latn'}).prefix, + 'values-b+sr+Latn/strings.xml' + ) + self.assertEqual( + one.prefix, + 'values-' + ) + + def test_aliases(self): + # test legacy locale code mapping + # he <-> iw, id <-> in, yi <-> ji + one = Matcher('values-{android_locale}/strings.xml') + for legacy, standard in six.iteritems(ANDROID_STANDARD_MAP): + self.assertDictEqual( + one.match('values-{}/strings.xml'.format(legacy)), + { + 'android_locale': legacy, + 'locale': standard + } + ) + self.assertEqual( + one.with_env({'locale': standard}).prefix, + 'values-{}/strings.xml'.format(legacy) + ) + + +class TestRootedMatcher(Rooted, unittest.TestCase): + def test_root_path(self): + one = Matcher('some/path', root=self.root) + self.assertIsNone(one.match('some/path')) + self.assertIsNotNone(one.match(self.path('/some/path'))) + + def test_copy(self): + one = Matcher('some/path', root=self.path('/one-root')) + other = Matcher(one, root=self.path('/different-root')) + self.assertIsNone(other.match('some/path')) + self.assertIsNone( + other.match(self.path('/one-root/some/path')) + ) + self.assertIsNotNone( + other.match(self.path('/different-root/some/path')) + ) + + def test_rooted(self): + r1 = self.path('/one-root') + r2 = self.path('/other-root') + one = Matcher(self.path('/one-root/full/path'), root=r2) + self.assertIsNone(one.match(self.path('/other-root/full/path'))) + # concat r2 and r1. r1 is absolute, so we gotta trick that + concat_root = r2 + if not r1.startswith('/'): + # windows absolute paths don't start with '/', add one + concat_root += '/' + concat_root += r1 + self.assertIsNone(one.match(concat_root + '/full/path')) + self.assertIsNotNone(one.match(self.path('/one-root/full/path'))) + + def test_variable(self): + r1 = self.path('/one-root') + r2 = self.path('/other-root') + one = Matcher( + '{var}/path', + env={'var': 'relative-dir'}, + root=r1 + ) + self.assertIsNone(one.match('relative-dir/path')) + self.assertIsNotNone( + one.match(self.path('/one-root/relative-dir/path')) + ) + other = Matcher(one, env={'var': r2}) + self.assertIsNone( + other.match(self.path('/one-root/relative-dir/path')) + ) + self.assertIsNotNone( + other.match(self.path('/other-root/path')) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_paths.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_paths.py new file mode 100644 index 0000000000..e72fe9a7a6 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_paths.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales.paths import File + + +class TestFile(unittest.TestCase): + def test_hash_and_equality(self): + f1 = File('/tmp/full/path/to/file', 'path/to/file') + d = {} + d[f1] = True + self.assertIn(f1, d) + f2 = File('/tmp/full/path/to/file', 'path/to/file') + self.assertIn(f2, d) + f2 = File('/tmp/full/path/to/file', 'path/to/file', locale='en') + self.assertNotIn(f2, d) + # trigger hash collisions between File and non-File objects + self.assertEqual(hash(f1), hash(f1.localpath)) + self.assertNotIn(f1.localpath, d) + f1 = File('/tmp/full/other/path', 'other/path') + d[f1.localpath] = True + self.assertIn(f1.localpath, d) + self.assertNotIn(f1, d) diff --git a/third_party/python/compare-locales/compare_locales/tests/paths/test_project.py b/third_party/python/compare-locales/compare_locales/tests/paths/test_project.py new file mode 100644 index 0000000000..fe12245486 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/paths/test_project.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales.paths import ProjectConfig +from . import SetupMixin + + +class TestConfigRules(SetupMixin, unittest.TestCase): + + def test_filter_empty(self): + 'Test that an empty config works' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/browser/**' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, entity='one_entity') + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file, entity='one_entity') + self.assertEqual(rv, 'ignore') + + def test_single_file_rule(self): + 'Test a single rule for just a single file, no key' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/browser/**' + }) + self.cfg.add_rules({ + 'path': '/tmp/somedir/{locale}/browser/one/two/file.ftl', + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.file, 'one_entity') + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file, 'one_entity') + self.assertEqual(rv, 'ignore') + + def test_single_key_rule(self): + 'Test a single rule with file and key' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/browser/**' + }) + self.cfg.add_rules({ + 'path': '/tmp/somedir/{locale}/browser/one/two/file.ftl', + 'key': 'one_entity', + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, 'one_entity') + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file, 'one_entity') + self.assertEqual(rv, 'ignore') + + def test_single_non_matching_key_rule(self): + 'Test a single key rule with regex special chars that should not match' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.cfg.add_rules({ + 'path': '/tmp/somedir/{locale}/browser/one/two/file.ftl', + 'key': '.ne_entit.', + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file, 'one_entity') + self.assertEqual(rv, 'error') + + def test_single_matching_re_key_rule(self): + 'Test a single key with regular expression' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.cfg.add_rules({ + 'path': '/tmp/somedir/{locale}/browser/one/two/file.ftl', + 'key': 're:.ne_entit.$', + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file, 'one_entity') + self.assertEqual(rv, 'ignore') + + def test_double_file_rule(self): + 'Test path shortcut, one for each of our files' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.cfg.add_rules({ + 'path': [ + '/tmp/somedir/{locale}/browser/one/two/file.ftl', + '/tmp/somedir/{locale}/toolkit/two/one/file.ftl', + ], + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + + def test_double_file_key_rule(self): + 'Test path and key shortcut, one key matching, one not' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.cfg.add_rules({ + 'path': [ + '/tmp/somedir/{locale}/browser/one/two/file.ftl', + '/tmp/somedir/{locale}/toolkit/two/one/file.ftl', + ], + 'key': [ + 'one_entity', + 'other_entity', + ], + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.file, 'one_entity') + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'error') + rv = self.cfg.filter(self.other_file, 'one_entity') + self.assertEqual(rv, 'ignore') + + def test_single_wildcard_rule(self): + 'Test single wildcard' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/browser/**' + }) + self.cfg.add_rules({ + 'path': [ + '/tmp/somedir/{locale}/browser/one/*/*', + ], + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + + def test_double_wildcard_rule(self): + 'Test double wildcard' + self.cfg.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.cfg.add_rules({ + 'path': [ + '/tmp/somedir/{locale}/**', + ], + 'action': 'ignore' + }) + rv = self.cfg.filter(self.file) + self.assertEqual(rv, 'ignore') + rv = self.cfg.filter(self.other_file) + self.assertEqual(rv, 'ignore') + + +class TestProjectConfig(unittest.TestCase): + def test_children(self): + pc = ProjectConfig(None) + child = ProjectConfig(None) + pc.add_child(child) + self.assertListEqual([pc, child], list(pc.configs)) + + def test_locales_in_children(self): + pc = ProjectConfig(None) + child = ProjectConfig(None) + child.add_paths({ + 'l10n': '/tmp/somedir/{locale}/toolkit/**', + }) + child.set_locales([]) + pc.add_child(child) + self.assertListEqual(pc.all_locales, []) + pc.set_locales(['de', 'fr']) + self.assertListEqual(child.locales, []) + self.assertListEqual(pc.all_locales, ['de', 'fr']) + + def test_locales_in_paths(self): + pc = ProjectConfig(None) + child = ProjectConfig(None) + child.add_paths({ + 'l10n': '/tmp/somedir/{locale}/toolkit/**', + 'locales': ['it'] + }) + child.set_locales([]) + pc.add_child(child) + self.assertListEqual(pc.all_locales, ['it']) + pc.set_locales(['de', 'fr']) + self.assertListEqual(pc.all_locales, ['de', 'fr', 'it']) + + +class TestSameConfig(unittest.TestCase): + + def test_path(self): + one = ProjectConfig('one.toml') + one.set_locales(['ab']) + self.assertTrue(one.same(ProjectConfig('one.toml'))) + self.assertFalse(one.same(ProjectConfig('two.toml'))) + + def test_paths(self): + one = ProjectConfig('one.toml') + one.set_locales(['ab']) + one.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + other = ProjectConfig('one.toml') + self.assertFalse(one.same(other)) + other.add_paths({ + 'l10n': '/tmp/somedir/{locale}/**' + }) + self.assertTrue(one.same(other)) + + def test_children(self): + one = ProjectConfig('one.toml') + one.add_child(ProjectConfig('inner.toml')) + one.set_locales(['ab']) + other = ProjectConfig('one.toml') + self.assertFalse(one.same(other)) + other.add_child(ProjectConfig('inner.toml')) + self.assertTrue(one.same(other)) diff --git a/third_party/python/compare-locales/compare_locales/tests/po/__init__.py b/third_party/python/compare-locales/compare_locales/tests/po/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/po/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/po/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/po/test_parser.py new file mode 100644 index 0000000000..e02fe66283 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/po/test_parser.py @@ -0,0 +1,139 @@ +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from compare_locales.tests import ParserTestMixin +from compare_locales.parser import ( + BadEntity, + Whitespace, +) + + +class TestPoParser(ParserTestMixin, unittest.TestCase): + maxDiff = None + filename = 'strings.po' + + def test_parse_string_list(self): + self.parser.readUnicode(' ') + ctx = self.parser.ctx + with self.assertRaises(BadEntity): + self.parser._parse_string_list(ctx, 0, 'msgctxt') + self.parser.readUnicode('msgctxt ') + ctx = self.parser.ctx + with self.assertRaises(BadEntity): + self.parser._parse_string_list(ctx, 0, 'msgctxt') + self.parser.readUnicode('msgctxt " "') + ctx = self.parser.ctx + self.assertTupleEqual( + self.parser._parse_string_list(ctx, 0, 'msgctxt'), + (" ", len(ctx.contents)) + ) + self.parser.readUnicode('msgctxt " " \t "A"\r "B"asdf') + ctx = self.parser.ctx + self.assertTupleEqual( + self.parser._parse_string_list(ctx, 0, 'msgctxt'), + (" AB", len(ctx.contents)-4) + ) + self.parser.readUnicode('msgctxt "\\\\ " "A" "B"asdf"fo"') + ctx = self.parser.ctx + self.assertTupleEqual( + self.parser._parse_string_list(ctx, 0, 'msgctxt'), + ("\\ AB", len(ctx.contents)-8) + ) + + def test_simple_string(self): + source = ''' +msgid "untranslated string" +msgstr "translated string" +''' + self._test( + source, + ( + (Whitespace, '\n'), + (('untranslated string', None), 'translated string'), + (Whitespace, '\n'), + ) + ) + + def test_escapes(self): + source = r''' +msgid "untranslated string" +msgstr "\\\t\r\n\"" +''' + self._test( + source, + ( + (Whitespace, '\n'), + (('untranslated string', None), '\\\t\r\n"'), + (Whitespace, '\n'), + ) + ) + + def test_comments(self): + source = ''' +# translator-comments +#. extracted-comments +#: reference... +#, flag... +#| msgctxt previous-context +#| msgid previous-untranslated-string +msgid "untranslated string" +msgstr "translated string" +''' + self._test( + source, + ( + (Whitespace, '\n'), + ( + ('untranslated string', None), + 'translated string', + 'extracted-comments', + ), + (Whitespace, '\n'), + ) + ) + + def test_simple_context(self): + source = ''' +msgctxt "context to use" +msgid "untranslated string" +msgstr "translated string" +''' + self._test( + source, + ( + (Whitespace, '\n'), + ( + ('untranslated string', 'context to use'), + 'translated string' + ), + (Whitespace, '\n'), + ) + ) + + def test_translated(self): + source = ''' +msgid "reference 1" +msgstr "translated string" + +msgid "reference 2" +msgstr "" +''' + self._test( + source, + ( + (Whitespace, '\n'), + (('reference 1', None), 'translated string'), + (Whitespace, '\n'), + (('reference 2', None), 'reference 2'), + (Whitespace, '\n'), + ) + ) + entities = self.parser.parse() + self.assertListEqual( + [e.localized for e in entities], + [True, False] + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/properties/__init__.py b/third_party/python/compare-locales/compare_locales/tests/properties/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/properties/__init__.py diff --git a/third_party/python/compare-locales/compare_locales/tests/properties/test_checks.py b/third_party/python/compare-locales/compare_locales/tests/properties/test_checks.py new file mode 100644 index 0000000000..68a8e0fd8c --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/properties/test_checks.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +from compare_locales.paths import File +from compare_locales.tests import BaseHelper + + +class TestProperties(BaseHelper): + file = File('foo.properties', 'foo.properties') + refContent = b'''some = value +''' + + def testGood(self): + self._test(b'''some = localized''', + tuple()) + + def testMissedEscape(self): + self._test(br'''some = \u67ood escape, bad \escape''', + (('warning', 20, r'unknown escape sequence, \e', + 'escape'),)) + + def test_bad_encoding(self): + self._test( + 'some = touché"'.encode('latin-1'), + ( + ( + "warning", + 12, + "\ufffd in: some", + "encodings" + ), + ) + ) + + +class TestPlurals(BaseHelper): + file = File('foo.properties', 'foo.properties') + refContent = b'''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2 +''' + + def testGood(self): + self._test(b'''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 filers +''', + tuple()) + + def testNotUsed(self): + self._test(b'''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - Downloads;#1 filers +''', + (('warning', 0, 'not all variables used in l10n', + 'plural'),)) + + def testNotDefined(self): + self._test(b'''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 #3 +''', + (('error', 0, 'unreplaced variables in l10n', 'plural'),)) + + +class TestPluralForms(BaseHelper): + file = File('foo.properties', 'foo.properties', locale='en-GB') + refContent = b'''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file;#1 files +''' + + def test_matching_forms(self): + self._test(b'''\ +downloadsTitleFiles=#1 fiiilee;#1 fiiilees +''', + tuple()) + + def test_lacking_forms(self): + self._test(b'''\ +downloadsTitleFiles=#1 fiiilee +''', + (('warning', 0, 'expecting 2 plurals, found 1', 'plural'),)) + + def test_excess_forms(self): + self._test(b'''\ +downloadsTitleFiles=#1 fiiilee;#1 fiiilees;#1 fiiilees +''', + (('warning', 0, 'expecting 2 plurals, found 3', 'plural'),)) diff --git a/third_party/python/compare-locales/compare_locales/tests/properties/test_merge.py b/third_party/python/compare-locales/compare_locales/tests/properties/test_merge.py new file mode 100644 index 0000000000..97d98f5167 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/properties/test_merge.py @@ -0,0 +1,68 @@ +# coding=utf8 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from codecs import decode, encode +import unittest + +from compare_locales.merge import merge_channels + + +class TestMergeProperties(unittest.TestCase): + name = "foo.properties" + + def test_no_changes(self): + channels = (b""" +foo = Foo 1 +""", b""" +foo = Foo 2 +""") + self.assertEqual( + merge_channels(self.name, channels), b""" +foo = Foo 1 +""") + + def test_encoding(self): + channels = (encode(u""" +foo = Foo 1… +""", "utf8"), encode(u""" +foo = Foo 2… +""", "utf8")) + output = merge_channels(self.name, channels) + self.assertEqual(output, encode(u""" +foo = Foo 1… +""", "utf8")) + + u_output = decode(output, "utf8") + self.assertEqual(u_output, u""" +foo = Foo 1… +""") + + def test_repetitive(self): + channels = (b"""\ +# comment +one = one +# comment +three = three +""", b"""\ +# comment +one = one +# comment +two = two +# comment +three = three +""") + output = merge_channels(self.name, channels) + self.assertMultiLineEqual( + decode(output, "utf-8"), + """\ +# comment +one = one +# comment +two = two +# comment +three = three +""" + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/properties/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/properties/test_parser.py new file mode 100644 index 0000000000..7600baa753 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/properties/test_parser.py @@ -0,0 +1,243 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from six.moves import zip +from compare_locales.tests import ParserTestMixin +from compare_locales.parser import ( + Comment, + Junk, + Whitespace, +) + + +class TestPropertiesParser(ParserTestMixin, unittest.TestCase): + + filename = 'foo.properties' + + def testBackslashes(self): + self._test(r'''one_line = This is one line +two_line = This is the first \ +of two lines +one_line_trailing = This line ends in \\ +and has junk +two_lines_triple = This line is one of two and ends in \\\ +and still has another line coming +''', ( + ('one_line', 'This is one line'), + (Whitespace, '\n'), + ('two_line', u'This is the first of two lines'), + (Whitespace, '\n'), + ('one_line_trailing', u'This line ends in \\'), + (Whitespace, '\n'), + (Junk, 'and has junk\n'), + ('two_lines_triple', 'This line is one of two and ends in \\' + 'and still has another line coming'), + (Whitespace, '\n'))) + + def testProperties(self): + # port of netwerk/test/PropertiesTest.cpp + self.parser.readContents(self.resource('test.properties')) + ref = ['1', '2', '3', '4', '5', '6', '7', '8', + 'this is the first part of a continued line ' + 'and here is the 2nd part'] + i = iter(self.parser) + for r, e in zip(ref, i): + self.assertTrue(e.localized) + self.assertEqual(e.val, r) + + def test_bug121341(self): + # port of xpcom/tests/unit/test_bug121341.js + self.parser.readContents(self.resource('bug121341.properties')) + ref = ['abc', 'xy', u"\u1234\t\r\n\u00AB\u0001\n", + "this is multiline property", + "this is another multiline property", u"test\u0036", + "yet another multiline propery", u"\ttest5\u0020", " test6\t", + u"c\uCDEFd", u"\uABCD"] + i = iter(self.parser) + for r, e in zip(ref, i): + self.assertEqual(e.val, r) + + def test_comment_in_multi(self): + self._test(r'''bar=one line with a \ +# part that looks like a comment \ +and an end''', (('bar', 'one line with a # part that looks like a comment ' + 'and an end'),)) + + def test_license_header(self): + self._test('''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +foo=value +''', ( + (Comment, 'MPL'), + (Whitespace, '\n\n'), + ('foo', 'value'), + (Whitespace, '\n'))) + + def test_escapes(self): + self.parser.readContents(br''' +# unicode escapes +zero = some \unicode +one = \u0 +two = \u41 +three = \u042 +four = \u0043 +five = \u0044a +six = \a +seven = \n\r\t\\ +''') + ref = ['some unicode', chr(0), 'A', 'B', 'C', 'Da', 'a', '\n\r\t\\'] + for r, e in zip(ref, self.parser): + self.assertEqual(e.val, r) + + def test_trailing_comment(self): + self._test('''first = string +second = string + +# +#commented out +''', ( + ('first', 'string'), + (Whitespace, '\n'), + ('second', 'string'), + (Whitespace, '\n\n'), + (Comment, 'commented out'), + (Whitespace, '\n'))) + + def test_trailing_newlines(self): + self._test('''\ +foo = bar + +\x20\x20 + ''', (('foo', 'bar'), (Whitespace, '\n\n\x20\x20\n '))) + + def test_just_comments(self): + self._test('''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# LOCALIZATION NOTE These strings are used inside the Promise debugger +# which is available as a panel in the Debugger. +''', ( + (Comment, 'MPL'), + (Whitespace, '\n\n'), + (Comment, 'LOCALIZATION NOTE'), + (Whitespace, '\n'))) + + def test_just_comments_without_trailing_newline(self): + self._test('''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# LOCALIZATION NOTE These strings are used inside the Promise debugger +# which is available as a panel in the Debugger.''', ( + (Comment, 'MPL'), + (Whitespace, '\n\n'), + (Comment, 'LOCALIZATION NOTE'))) + + def test_trailing_comment_and_newlines(self): + self._test('''\ +# LOCALIZATION NOTE These strings are used inside the Promise debugger +# which is available as a panel in the Debugger. + + + +''', ( + (Comment, 'LOCALIZATION NOTE'), + (Whitespace, '\n\n\n'))) + + def test_standalone_license(self): + self._test('''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +foo = value +''', ( + (Comment, 'MPL'), + (Whitespace, '\n'), + ('foo', 'value'), + (Whitespace, '\n'))) + + def test_empty_file(self): + self._test('', tuple()) + self._test('\n', ((Whitespace, '\n'),)) + self._test('\n\n', ((Whitespace, '\n\n'),)) + self._test(' \n\n', ((Whitespace, '\n\n'),)) + + def test_positions(self): + self.parser.readContents(b'''\ +one = value +two = other \\ +escaped value +''') + one, two = list(self.parser) + self.assertEqual(one.position(), (1, 1)) + self.assertEqual(one.value_position(), (1, 7)) + self.assertEqual(two.position(), (2, 1)) + self.assertEqual(two.value_position(), (2, 7)) + self.assertEqual(two.value_position(-1), (3, 14)) + self.assertEqual(two.value_position(10), (3, 3)) + + # Bug 1399059 comment 18 + def test_z(self): + self.parser.readContents(b'''\ +one = XYZ ABC +''') + one, = list(self.parser) + self.assertEqual(one.val, 'XYZ ABC') + + def test_white_space_stripping(self): + self._test('''\ +one = one +two = two \n\ +three = three\xa0''', ( + ('one', 'one'), + (Whitespace, '\n'), + ('two', 'two'), + (Whitespace, '\n'), + ('three', 'three\xa0'), + )) + + def test_white_space_keys(self): + self._test('''\ +o\\ e = one +t\fo = two \n\ +t\xa0e = three\xa0''', ( + ('o\\ e', 'one'), + (Whitespace, '\n'), + ('t\fo', 'two'), + (Whitespace, '\n'), + ('t\xa0e', 'three\xa0'), + )) + + def test_pre_comment(self): + self._test('''\ +# comment +one = string + +# standalone + +# glued +second = string +''', ( + ('one', 'string', 'comment'), + (Whitespace, '\n\n'), + (Comment, 'standalone'), + (Whitespace, '\n\n'), + ('second', 'string', 'glued'), + (Whitespace, '\n'), + )) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/python/compare-locales/compare_locales/tests/serializer/__init__.py b/third_party/python/compare-locales/compare_locales/tests/serializer/__init__.py new file mode 100644 index 0000000000..9b85098b23 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/serializer/__init__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from compare_locales.parser import getParser +from compare_locales.serializer import serialize + + +class Helper(object): + """Mixin to test serializers. + + Reads the reference_content into self.reference, and uses + that to serialize in _test. + """ + name = None + reference_content = None + + def setUp(self): + p = self.parser = getParser(self.name) + p.readUnicode(self.reference_content) + self.reference = list(p.walk()) + + def _test(self, old_content, new_data, expected): + """Test with old content, new data, and the reference data + against the expected unicode output. + """ + self.parser.readUnicode(old_content) + old_l10n = list(self.parser.walk()) + output = serialize(self.name, self.reference, old_l10n, new_data) + self.assertMultiLineEqual( + output.decode(self.parser.encoding), + expected + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/serializer/test_android.py b/third_party/python/compare-locales/compare_locales/tests/serializer/test_android.py new file mode 100644 index 0000000000..b36f605e87 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/serializer/test_android.py @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from . import Helper + + +class TestAndroidSerializer(Helper, unittest.TestCase): + name = 'strings.xml' + reference_content = """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- The page html title (i.e. the <title> tag content) --> + <string name="title">Unable to connect</string> + <string name="message"><![CDATA[ + <ul> + <li>The site could be temporarily unavailable or too busy.</li> + </ul> + ]]></string> + <string name="wrapped_message"> + <![CDATA[ + <ul> + <li>The site could be temporarily unavailable or too busy.</li> + </ul> + ]]> + </string> +</resources> +""" + + def test_nothing_new_or_old(self): + self._test( + "", + {}, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + </resources> +""" + ) + + def test_new_string(self): + self._test( + "", + { + "title": "Cannot connect" + }, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- The page html title (i.e. the <title> tag content) --> + <string name="title">Cannot connect</string> + </resources> +""" + ) + + def test_new_cdata(self): + self._test( + "", + { + "message": """ +<ul> + <li>Something else</li> +</ul> +""" + }, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="message"><![CDATA[ +<ul> + <li>Something else</li> +</ul> +]]></string> + </resources> +""" + ) + + def test_new_cdata_wrapped(self): + self._test( + "", + { + "wrapped_message": """ +<ul> + <li>Something else</li> +</ul> +""" + }, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="wrapped_message"> + <![CDATA[ +<ul> + <li>Something else</li> +</ul> +]]> + </string> +</resources> +""" + ) + + def test_remove_string(self): + self._test( + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="first_old_title">Unable to connect</string> + <string name="title">Unable to connect</string> + <string name="last_old_title">Unable to connect</string> +</resources> +""", + {}, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="title">Unable to connect</string> + </resources> +""" + ) + + def test_same_string(self): + self._test( + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="title">Unable to connect</string> +</resources> +""", + { + "title": "Unable to connect" + }, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <!-- The page html title (i.e. the <title> tag content) --> + <string name="title">Unable to connect</string> + </resources> +""" + ) + + +class TestAndroidDuplicateComment(Helper, unittest.TestCase): + name = 'strings.xml' + reference_content = """\ +<?xml version="1.0" encoding="utf-8"?> +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> +<resources> + <!-- Label used in the contextmenu shown when long-pressing on a link --> + <string name="contextmenu_open_in_app">Open with app</string> + <!-- Label used in the contextmenu shown when long-pressing on a link --> + <string name="contextmenu_link_share">Share link</string> +</resources> +""" + + def test_missing_translation(self): + self._test( + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + + <!-- Label used in the contextmenu shown when long-pressing on a link --> + <!-- Label used in the contextmenu shown when long-pressing on a link --> + <string name="contextmenu_link_share"/> + </resources> +""", + { + "contextmenu_link_share": "translation" + }, + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + + <!-- Label used in the contextmenu shown when long-pressing on a link --> + <string name="contextmenu_link_share">translation</string> + </resources> +""" + ) + + +class TestAndroidTools(Helper, unittest.TestCase): + name = 'strings.xml' + reference_content = ( + """\ +<resources xmlns:tools="http://schemas.android.com/tools"> + <string name="app_tagline">Take your passwords everywhere.</string> + <string name="search_your_entries" tools:ignore="ExtraTranslation">""" + "search your entries" + """</string> +</resources> +""") + + def test_namespaced_document(self): + self._test( + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="app_tagline">Localized tag line</string> + </resources> +""", + { + "search_your_entries": "Looking for Entries" + }, + ( + """\ +<?xml version="1.0" encoding="utf-8"?> +<resources xmlns:tools="http://schemas.android.com/tools"> + <string name="app_tagline">Localized tag line</string> + <string name="search_your_entries" tools:ignore="ExtraTranslation">""" + "Looking for Entries" + """</string> +</resources> +""") + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/serializer/test_fluent.py b/third_party/python/compare-locales/compare_locales/tests/serializer/test_fluent.py new file mode 100644 index 0000000000..9aa9acd4f7 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/serializer/test_fluent.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales.serializer import serialize +from . import Helper + + +class TestFluentSerializer(Helper, unittest.TestCase): + name = "foo.ftl" + reference_content = """\ +this = is English + +# another one bites +another = message +""" + + def test_nothing_new_or_old(self): + output = serialize(self.name, self.reference, [], {}) + self.assertMultiLineEqual(output.decode(self.parser.encoding), '\n\n') + + def test_obsolete_old_string(self): + self._test( + """\ +# we used to have this +old = stuff with comment +""", + {}, + """\ + + +""") + + def test_nothing_old_new_translation(self): + self._test( + "", + { + "another": "another = localized message" + }, + """\ + + +# another one bites +another = localized message +""" + ) + + def test_old_message_new_other_translation(self): + self._test( + """\ +this = is localized +""", + { + "another": "another = localized message" + }, + """\ +this = is localized + +# another one bites +another = localized message +""" + ) + + def test_old_message_new_same_translation(self): + self._test( + """\ +this = is localized +""", + { + "this": "this = has a better message" + }, + """\ +this = has a better message + +""" + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/serializer/test_properties.py b/third_party/python/compare-locales/compare_locales/tests/serializer/test_properties.py new file mode 100644 index 0000000000..50929fce73 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/serializer/test_properties.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales.serializer import serialize +from . import Helper + + +class TestPropertiesSerializer(Helper, unittest.TestCase): + name = 'foo.properties' + reference_content = """\ +this = is English + +# another one bites +another = message +""" + + def test_nothing_new_or_old(self): + output = serialize(self.name, self.reference, [], {}) + self.assertMultiLineEqual(output.decode(self.parser.encoding), '\n\n') + + def test_obsolete_old_string(self): + self._test( + """\ +# we used to have this +old = stuff with comment +""", + {}, + """\ + + +""") + + def test_nothing_old_new_translation(self): + self._test( + "", + { + "another": "localized message" + }, + """\ + + +# another one bites +another = localized message +""" + ) + + def test_old_message_new_other_translation(self): + self._test( + """\ +this = is localized +""", + { + "another": "localized message" + }, + """\ +this = is localized + +# another one bites +another = localized message +""" + ) + + def test_old_message_new_same_translation(self): + self._test( + """\ +this = is localized +""", + { + "this": "has a better message" + }, + """\ +this = has a better message + +""" + ) + + +class TestPropertiesDuplicateComment(Helper, unittest.TestCase): + name = 'foo.properties' + reference_content = """\ +# repetitive +one = one +# repetitive +two = two +""" + + def test_missing_translation(self): + self._test( + """\ +# repetitive + +# repetitive +two = two +""", + {}, + """\ +# repetitive + +# repetitive +two = two +""" + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_apps.py b/third_party/python/compare-locales/compare_locales/tests/test_apps.py new file mode 100644 index 0000000000..3fc5091fe5 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_apps.py @@ -0,0 +1,168 @@ +from __future__ import absolute_import +import unittest +import os +import tempfile +import shutil + +from compare_locales import mozpath +from compare_locales.paths import ( + EnumerateApp, + EnumerateSourceTreeApp, + ProjectFiles, +) + +MAIL_INI = '''\ +[general] +depth = ../.. +all = mail/locales/all-locales + +[compare] +dirs = mail + +[includes] +# non-central apps might want to use %(topsrcdir)s here, or other vars +# RFE: that needs to be supported by compare-locales, too, though +toolkit = mozilla/toolkit/locales/l10n.ini + +[include_toolkit] +type = hg +mozilla = mozilla-central +repo = http://hg.mozilla.org/ +l10n.ini = toolkit/locales/l10n.ini +''' + + +MAIL_ALL_LOCALES = '''af +de +fr +''' + +MAIL_FILTER_PY = ''' +def test(mod, path, entity = None): + if mod == 'toolkit' and path == 'ignored_path': + return 'ignore' + return 'error' +''' + +TOOLKIT_INI = '''[general] +depth = ../.. + +[compare] +dirs = toolkit +''' + + +class TestApp(unittest.TestCase): + def setUp(self): + self.stage = tempfile.mkdtemp() + mail = mozpath.join(self.stage, 'comm', 'mail', 'locales') + toolkit = mozpath.join( + self.stage, 'comm', 'mozilla', 'toolkit', 'locales') + l10n = mozpath.join(self.stage, 'l10n-central', 'de', 'toolkit') + os.makedirs(mozpath.join(mail, 'en-US')) + os.makedirs(mozpath.join(toolkit, 'en-US')) + os.makedirs(l10n) + with open(mozpath.join(mail, 'l10n.ini'), 'w') as f: + f.write(MAIL_INI) + with open(mozpath.join(mail, 'all-locales'), 'w') as f: + f.write(MAIL_ALL_LOCALES) + with open(mozpath.join(mail, 'filter.py'), 'w') as f: + f.write(MAIL_FILTER_PY) + with open(mozpath.join(toolkit, 'l10n.ini'), 'w') as f: + f.write(TOOLKIT_INI) + with open(mozpath.join(mail, 'en-US', 'mail.ftl'), 'w') as f: + f.write('') + with open(mozpath.join(toolkit, 'en-US', 'platform.ftl'), 'w') as f: + f.write('') + with open(mozpath.join(l10n, 'localized.ftl'), 'w') as f: + f.write('') + + def tearDown(self): + shutil.rmtree(self.stage) + + def test_app(self): + 'Test parsing a App' + app = EnumerateApp( + mozpath.join(self.stage, 'comm', 'mail', 'locales', 'l10n.ini'), + mozpath.join(self.stage, 'l10n-central')) + self.assertListEqual(app.config.allLocales(), ['af', 'de', 'fr']) + self.assertEqual(len(app.config.children), 1) + projectconfig = app.asConfig() + self.assertListEqual(projectconfig.locales, ['af', 'de', 'fr']) + files = ProjectFiles('de', [projectconfig]) + files = list(files) + self.assertEqual(len(files), 3) + + l10nfile, reffile, mergefile, test = files[0] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'mail', 'mail.ftl']) + self.assertListEqual(mozpath.split(reffile)[-4:], + ['mail', 'locales', 'en-US', 'mail.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) + + l10nfile, reffile, mergefile, test = files[1] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'toolkit', 'localized.ftl']) + self.assertListEqual( + mozpath.split(reffile)[-6:], + ['comm', 'mozilla', 'toolkit', + 'locales', 'en-US', 'localized.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) + + l10nfile, reffile, mergefile, test = files[2] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'toolkit', 'platform.ftl']) + self.assertListEqual( + mozpath.split(reffile)[-6:], + ['comm', 'mozilla', 'toolkit', 'locales', 'en-US', 'platform.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) + + def test_src_app(self): + 'Test parsing a App in source setup' + # move toolkit to toplevel + shutil.move(mozpath.join(self.stage, 'comm', 'mozilla'), self.stage) + app = EnumerateSourceTreeApp( + mozpath.join(self.stage, 'comm', 'mail', 'locales', 'l10n.ini'), + self.stage, + mozpath.join(self.stage, 'l10n-central'), + { + 'mozilla-central': mozpath.join(self.stage, 'mozilla') + } + ) + self.assertListEqual(app.config.allLocales(), ['af', 'de', 'fr']) + self.assertEqual(len(app.config.children), 1) + projectconfig = app.asConfig() + self.assertListEqual(projectconfig.locales, ['af', 'de', 'fr']) + files = ProjectFiles('de', [projectconfig]) + files = list(files) + self.assertEqual(len(files), 3) + + l10nfile, reffile, mergefile, test = files[0] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'mail', 'mail.ftl']) + self.assertListEqual(mozpath.split(reffile)[-4:], + ['mail', 'locales', 'en-US', 'mail.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) + + l10nfile, reffile, mergefile, test = files[1] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'toolkit', 'localized.ftl']) + self.assertListEqual( + mozpath.split(reffile)[-5:], + ['mozilla', 'toolkit', + 'locales', 'en-US', 'localized.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) + + l10nfile, reffile, mergefile, test = files[2] + self.assertListEqual(mozpath.split(l10nfile)[-3:], + ['de', 'toolkit', 'platform.ftl']) + self.assertListEqual( + mozpath.split(reffile)[-5:], + ['mozilla', 'toolkit', 'locales', 'en-US', 'platform.ftl']) + self.assertIsNone(mergefile) + self.assertSetEqual(test, set()) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_checks.py b/third_party/python/compare-locales/compare_locales/tests/test_checks.py new file mode 100644 index 0000000000..193ac60c6b --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_checks.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales.checks.base import CSSCheckMixin + + +class CSSParserTest(unittest.TestCase): + def setUp(self): + self.mixin = CSSCheckMixin() + + def test_other(self): + refMap, errors = self.mixin.parse_css_spec('foo') + self.assertIsNone(refMap) + self.assertIsNone(errors) + + def test_css_specs(self): + for prop in ( + 'min-width', 'width', 'max-width', + 'min-height', 'height', 'max-height', + ): + refMap, errors = self.mixin.parse_css_spec('{}:1px;'.format(prop)) + self.assertDictEqual( + refMap, {prop: 'px'} + ) + self.assertIsNone(errors) + + def test_single_whitespace(self): + refMap, errors = self.mixin.parse_css_spec('width:15px;') + self.assertDictEqual( + refMap, {'width': 'px'} + ) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec('width : \t 15px ; ') + self.assertDictEqual( + refMap, {'width': 'px'} + ) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec('width: 15px') + self.assertDictEqual( + refMap, {'width': 'px'} + ) + self.assertIsNone(errors) + + def test_multiple(self): + refMap, errors = self.mixin.parse_css_spec('width:15px;height:20.2em;') + self.assertDictEqual( + refMap, {'height': 'em', 'width': 'px'} + ) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec( + 'width:15px \t\t; height:20em' + ) + self.assertDictEqual( + refMap, {'height': 'em', 'width': 'px'} + ) + self.assertIsNone(errors) + + def test_errors(self): + refMap, errors = self.mixin.parse_css_spec('width:15pxfoo') + self.assertDictEqual( + refMap, {'width': 'px'} + ) + self.assertListEqual( + errors, [{'pos': 10, 'code': 'css-bad-content'}] + ) + refMap, errors = self.mixin.parse_css_spec('width:15px height:20em') + self.assertDictEqual( + refMap, {'height': 'em', 'width': 'px'} + ) + self.assertListEqual( + errors, [{'pos': 10, 'code': 'css-missing-semicolon'}] + ) + refMap, errors = self.mixin.parse_css_spec('witdth:15px') + self.assertIsNone(refMap) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec('width:1,5px') + self.assertIsNone(refMap) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec('width:1.5.1px') + self.assertIsNone(refMap) + self.assertIsNone(errors) + refMap, errors = self.mixin.parse_css_spec('width:1.px') + self.assertIsNone(refMap) + self.assertIsNone(errors) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_compare.py b/third_party/python/compare-locales/compare_locales/tests/test_compare.py new file mode 100644 index 0000000000..acc47cff68 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_compare.py @@ -0,0 +1,229 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales import compare, paths + + +class TestTree(unittest.TestCase): + '''Test the Tree utility class + + Tree value classes need to be in-place editable + ''' + + def test_empty_dict(self): + tree = compare.Tree(dict) + self.assertEqual(list(tree.getContent()), []) + self.assertDictEqual( + tree.toJSON(), + {} + ) + + def test_disjoint_dict(self): + tree = compare.Tree(dict) + tree['one/entry']['leaf'] = 1 + tree['two/other']['leaf'] = 2 + self.assertEqual( + list(tree.getContent()), + [ + (0, 'key', ('one', 'entry')), + (1, 'value', {'leaf': 1}), + (0, 'key', ('two', 'other')), + (1, 'value', {'leaf': 2}) + ] + ) + self.assertDictEqual( + tree.toJSON(), + { + 'one/entry': + {'leaf': 1}, + 'two/other': + {'leaf': 2} + } + ) + self.assertMultiLineEqual( + str(tree), + '''\ +one/entry + {'leaf': 1} +two/other + {'leaf': 2}\ +''' + ) + + def test_overlapping_dict(self): + tree = compare.Tree(dict) + tree['one/entry']['leaf'] = 1 + tree['one/other']['leaf'] = 2 + self.assertEqual( + list(tree.getContent()), + [ + (0, 'key', ('one',)), + (1, 'key', ('entry',)), + (2, 'value', {'leaf': 1}), + (1, 'key', ('other',)), + (2, 'value', {'leaf': 2}) + ] + ) + self.assertDictEqual( + tree.toJSON(), + { + 'one': { + 'entry': + {'leaf': 1}, + 'other': + {'leaf': 2} + } + } + ) + + +class TestObserver(unittest.TestCase): + def test_simple(self): + obs = compare.Observer() + f = paths.File('/some/real/sub/path', 'de/sub/path', locale='de') + obs.notify('missingEntity', f, 'one') + obs.notify('missingEntity', f, 'two') + obs.updateStats(f, {'missing': 15}) + self.assertDictEqual(obs.toJSON(), { + 'summary': { + 'de': { + 'errors': 0, + 'warnings': 0, + 'missing': 15, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + }, + 'details': { + 'de/sub/path': + [{'missingEntity': 'one'}, + {'missingEntity': 'two'}] + } + }) + + def test_module(self): + obs = compare.Observer() + f = paths.File('/some/real/sub/path', 'path', + module='sub', locale='de') + obs.notify('missingEntity', f, 'one') + obs.notify('obsoleteEntity', f, 'bar') + obs.notify('missingEntity', f, 'two') + obs.updateStats(f, {'missing': 15}) + self.assertDictEqual(obs.toJSON(), { + 'summary': { + 'de': { + 'errors': 0, + 'warnings': 0, + 'missing': 15, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + }, + 'details': { + 'de/sub/path': + [ + {'missingEntity': 'one'}, + {'obsoleteEntity': 'bar'}, + {'missingEntity': 'two'}, + ] + } + }) + + +class TestAddRemove(unittest.TestCase): + + def _test(self, left, right, ref_actions): + ar = compare.AddRemove() + ar.set_left(left) + ar.set_right(right) + actions = list(ar) + self.assertListEqual(actions, ref_actions) + + def test_equal(self): + self._test(['z', 'a', 'p'], ['z', 'a', 'p'], [ + ('equal', 'z'), + ('equal', 'a'), + ('equal', 'p'), + ]) + + def test_add_start(self): + self._test(['a', 'p'], ['z', 'a', 'p'], [ + ('add', 'z'), + ('equal', 'a'), + ('equal', 'p'), + ]) + + def test_add_middle(self): + self._test(['z', 'p'], ['z', 'a', 'p'], [ + ('equal', 'z'), + ('add', 'a'), + ('equal', 'p'), + ]) + + def test_add_end(self): + self._test(['z', 'a'], ['z', 'a', 'p'], [ + ('equal', 'z'), + ('equal', 'a'), + ('add', 'p'), + ]) + + def test_delete_start(self): + self._test(['z', 'a', 'p'], ['a', 'p'], [ + ('delete', 'z'), + ('equal', 'a'), + ('equal', 'p'), + ]) + + def test_delete_middle(self): + self._test(['z', 'a', 'p'], ['z', 'p'], [ + ('equal', 'z'), + ('delete', 'a'), + ('equal', 'p'), + ]) + + def test_delete_end(self): + self._test(['z', 'a', 'p'], ['z', 'a'], [ + ('equal', 'z'), + ('equal', 'a'), + ('delete', 'p'), + ]) + + def test_replace_start(self): + self._test(['b', 'a', 'p'], ['z', 'a', 'p'], [ + ('add', 'z'), + ('delete', 'b'), + ('equal', 'a'), + ('equal', 'p'), + ]) + + def test_replace_middle(self): + self._test(['z', 'b', 'p'], ['z', 'a', 'p'], [ + ('equal', 'z'), + ('add', 'a'), + ('delete', 'b'), + ('equal', 'p'), + ]) + + def test_replace_end(self): + self._test(['z', 'a', 'b'], ['z', 'a', 'p'], [ + ('equal', 'z'), + ('equal', 'a'), + ('add', 'p'), + ('delete', 'b'), + ]) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_defines.py b/third_party/python/compare-locales/compare_locales/tests/test_defines.py new file mode 100644 index 0000000000..6f903d82d5 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_defines.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from compare_locales.tests import ParserTestMixin, BaseHelper +from compare_locales.paths import File +from compare_locales.parser import ( + Comment, + DefinesInstruction, + Junk, + Whitespace, +) + + +mpl2 = '''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/.''' + + +class TestDefinesParser(ParserTestMixin, unittest.TestCase): + + filename = 'defines.inc' + + def testBrowser(self): + self._test(mpl2 + ''' +#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +# If non-English locales wish to credit multiple contributors, uncomment this +# variable definition and use the format specified. +# #define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor> + +#unfilter emptyLines + +''', ( + (Comment, mpl2), + (Whitespace, '\n'), + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n\n'), + (Comment, '#define'), + (Whitespace, '\n\n'), + (DefinesInstruction, 'unfilter emptyLines'), + (Junk, '\n\n'))) + + def testBrowserWithContributors(self): + self._test(mpl2 + ''' +#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +# If non-English locales wish to credit multiple contributors, uncomment this +# variable definition and use the format specified. +#define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Joe Solon</em:contributor> + +#unfilter emptyLines + +''', ( + (Comment, mpl2), + (Whitespace, '\n'), + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n\n'), + ( + 'MOZ_LANGPACK_CONTRIBUTORS', + '<em:contributor>Joe Solon</em:contributor>', + 'non-English', + ), + (Whitespace, '\n\n'), + (DefinesInstruction, 'unfilter emptyLines'), + (Junk, '\n\n'))) + + def testCommentWithNonAsciiCharacters(self): + self._test(mpl2 + ''' +#filter emptyLines + +# e.g. #define seamonkey_l10n <DT><A HREF="urn:foo">SeaMonkey v češtině</a> +#define seamonkey_l10n_long + +#unfilter emptyLines + +''', ( + (Comment, mpl2), + (Whitespace, '\n'), + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n\n'), + ('seamonkey_l10n_long', '', 'češtině'), + (Whitespace, '\n\n'), + (DefinesInstruction, 'unfilter emptyLines'), + (Junk, '\n\n'))) + + def test_no_empty_lines(self): + self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org +#define MOZ_LANGPACK_CREATOR mozilla.org +''', ( + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'))) + + def test_empty_line_between(self): + self._test('''#define MOZ_LANGPACK_CREATOR mozilla.org + +#define MOZ_LANGPACK_CREATOR mozilla.org +''', ( + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Junk, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'))) + + def test_empty_line_at_the_beginning(self): + self._test(''' +#define MOZ_LANGPACK_CREATOR mozilla.org +#define MOZ_LANGPACK_CREATOR mozilla.org +''', ( + (Junk, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'))) + + def test_filter_empty_lines(self): + self._test('''#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org +#define MOZ_LANGPACK_CREATOR mozilla.org +#unfilter emptyLines''', ( + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + (DefinesInstruction, 'unfilter emptyLines'))) + + def test_unfilter_empty_lines_with_trailing(self): + self._test('''#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org +#define MOZ_LANGPACK_CREATOR mozilla.org +#unfilter emptyLines +''', ( + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + ('MOZ_LANGPACK_CREATOR', 'mozilla.org'), + (Whitespace, '\n'), + (DefinesInstruction, 'unfilter emptyLines'), + (Whitespace, '\n'))) + + def testToolkit(self): + self._test('''#define MOZ_LANG_TITLE English (US) +''', ( + ('MOZ_LANG_TITLE', 'English (US)'), + (Whitespace, '\n'))) + + def testToolkitEmpty(self): + self._test('', tuple()) + + def test_empty_file(self): + '''Test that empty files generate errors + + defines.inc are interesting that way, as their + content is added to the generated file. + ''' + self._test('\n', ((Junk, '\n'),)) + self._test('\n\n', ((Junk, '\n\n'),)) + self._test(' \n\n', ((Junk, ' \n\n'),)) + + def test_whitespace_value(self): + '''Test that there's only one whitespace between key and value + ''' + # funny formatting of trailing whitespace to make it explicit + # and flake-8 happy + self._test('''\ +#define one \n\ +#define two \n\ +#define tre \n\ +''', ( + ('one', ''), + (Whitespace, '\n'), + ('two', ' '), + (Whitespace, '\n'), + ('tre', ' '), + (Whitespace, '\n'),)) + + def test_standalone_comments(self): + self._test( + '''\ +#filter emptyLines +# One comment + +# Second comment + +#define foo +# bar comment +#define bar + +#unfilter emptyLines +''', + ( + (DefinesInstruction, 'filter emptyLines'), + (Whitespace, '\n'), + (Comment, 'One comment'), + (Whitespace, '\n\n'), + (Comment, 'Second comment'), + (Whitespace, '\n\n'), + ('foo', ''), + (Whitespace, '\n'), + ('bar', '', 'bar comment'), + (Whitespace, '\n\n'), + (DefinesInstruction, 'unfilter emptyLines'), + (Whitespace, '\n'), + ) + ) + + +class TestChecks(BaseHelper): + file = File('defines.inc', 'defines.inc') + refContent = b'''\ +#define foo bar +''' + + def test_ok(self): + self._test( + b'#define foo other', + tuple() + ) + + def test_bad_encoding(self): + self._test( + '#define foo touché'.encode('latin-1'), + ( + ( + "warning", + 17, + "\ufffd in: foo", + "encodings" + ), + ) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_ini.py b/third_party/python/compare-locales/compare_locales/tests/test_ini.py new file mode 100644 index 0000000000..e5d68482c1 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_ini.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals +import unittest + +from compare_locales.tests import ParserTestMixin, BaseHelper +from compare_locales.paths import File +from compare_locales.parser import ( + Comment, + IniSection, + Junk, + Whitespace, +) + + +mpl2 = '''\ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this file, +; You can obtain one at http://mozilla.org/MPL/2.0/.''' + + +class TestIniParser(ParserTestMixin, unittest.TestCase): + + filename = 'foo.ini' + + def testSimpleHeader(self): + self._test('''; This file is in the UTF-8 encoding +[Strings] +TitleText=Some Title +''', ( + (Comment, 'UTF-8 encoding'), + (Whitespace, '\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def testMPL2_Space_UTF(self): + self._test(mpl2 + ''' + +; This file is in the UTF-8 encoding +[Strings] +TitleText=Some Title +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (Comment, 'UTF-8'), + (Whitespace, '\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def testMPL2_Space(self): + self._test(mpl2 + ''' + +[Strings] +TitleText=Some Title +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def testMPL2_no_space(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title +''', ( + (Comment, mpl2), + (Whitespace, '\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def testMPL2_MultiSpace(self): + self._test(mpl2 + ''' + +; more comments + +[Strings] +TitleText=Some Title +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (Comment, 'more comments'), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def testMPL2_JunkBeforeCategory(self): + self._test(mpl2 + ''' +Junk +[Strings] +TitleText=Some Title +''', ( + (Comment, mpl2), + (Whitespace, '\n'), + (Junk, 'Junk\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'))) + + def test_TrailingComment(self): + self._test(mpl2 + ''' + +[Strings] +TitleText=Some Title +;Stray trailing comment +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n'), + (Comment, 'Stray trailing'), + (Whitespace, '\n'))) + + def test_SpacedTrailingComments(self): + self._test(mpl2 + ''' + +[Strings] +TitleText=Some Title + +;Stray trailing comment +;Second stray comment + +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n\n'), + (Comment, 'Second stray comment'), + (Whitespace, '\n\n'))) + + def test_TrailingCommentsAndJunk(self): + self._test(mpl2 + ''' + +[Strings] +TitleText=Some Title + +;Stray trailing comment +Junk +;Second stray comment + +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n\n'), + (Comment, 'Stray trailing'), + (Whitespace, '\n'), + (Junk, 'Junk\n'), + (Comment, 'Second stray comment'), + (Whitespace, '\n\n'))) + + def test_JunkInbetweenEntries(self): + self._test(mpl2 + ''' + +[Strings] +TitleText=Some Title + +Junk + +Good=other string +''', ( + (Comment, mpl2), + (Whitespace, '\n\n'), + (IniSection, 'Strings'), + (Whitespace, '\n'), + ('TitleText', 'Some Title'), + (Whitespace, '\n\n'), + (Junk, 'Junk\n\n'), + ('Good', 'other string'), + (Whitespace, '\n'))) + + def test_empty_file(self): + self._test('', tuple()) + self._test('\n', ((Whitespace, '\n'),)) + self._test('\n\n', ((Whitespace, '\n\n'),)) + self._test(' \n\n', ((Whitespace, ' \n\n'),)) + + +class TestChecks(BaseHelper): + file = File('foo.ini', 'foo.ini') + refContent = b'''\ +[Strings] +foo=good +''' + + def test_ok(self): + self._test( + b'[Strings]\nfoo=other', + tuple() + ) + + def test_bad_encoding(self): + self._test( + 'foo=touché'.encode('latin-1'), + ( + ( + "warning", + 9, + "\ufffd in: foo", + "encodings" + ), + ) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_keyedtuple.py b/third_party/python/compare-locales/compare_locales/tests/test_keyedtuple.py new file mode 100644 index 0000000000..156a8e868c --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_keyedtuple.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from __future__ import unicode_literals + +from collections import namedtuple +import unittest + +from compare_locales.keyedtuple import KeyedTuple + + +KeyedThing = namedtuple('KeyedThing', ['key', 'val']) + + +class TestKeyedTuple(unittest.TestCase): + def test_constructor(self): + keyedtuple = KeyedTuple([]) + self.assertEqual(keyedtuple, tuple()) + + def test_contains(self): + things = [KeyedThing('one', 'thing'), KeyedThing('two', 'things')] + keyedtuple = KeyedTuple(things) + self.assertNotIn(1, keyedtuple) + self.assertIn('one', keyedtuple) + self.assertIn(things[0], keyedtuple) + self.assertIn(things[1], keyedtuple) + self.assertNotIn(KeyedThing('three', 'stooges'), keyedtuple) + + def test_getitem(self): + things = [KeyedThing('one', 'thing'), KeyedThing('two', 'things')] + keyedtuple = KeyedTuple(things) + self.assertEqual(keyedtuple[0], things[0]) + self.assertEqual(keyedtuple[1], things[1]) + self.assertEqual(keyedtuple['one'], things[0]) + self.assertEqual(keyedtuple['two'], things[1]) + + def test_items(self): + things = [KeyedThing('one', 'thing'), KeyedThing('two', 'things')] + things.extend(things) + keyedtuple = KeyedTuple(things) + self.assertEqual(len(keyedtuple), 4) + items = list(keyedtuple.items()) + self.assertEqual(len(items), 4) + self.assertEqual( + keyedtuple, + tuple((v for k, v in items)) + ) + self.assertEqual( + ('one', 'two', 'one', 'two',), + tuple((k for k, v in items)) + ) diff --git a/third_party/python/compare-locales/compare_locales/tests/test_merge.py b/third_party/python/compare-locales/compare_locales/tests/test_merge.py new file mode 100644 index 0000000000..a10a04ca16 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_merge.py @@ -0,0 +1,1408 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest +import filecmp +import os +from tempfile import mkdtemp +import shutil + +from compare_locales.parser import getParser +from compare_locales.paths import File +from compare_locales.compare.content import ContentComparer +from compare_locales.compare.observer import Observer +from compare_locales import mozpath + + +class ContentMixin(object): + extension = None # OVERLOAD + + @property + def ref(self): + return mozpath.join(self.tmp, "en-reference" + self.extension) + + @property + def l10n(self): + return mozpath.join(self.tmp, "l10n" + self.extension) + + def reference(self, content): + with open(self.ref, "w") as f: + f.write(content) + + def localized(self, content): + with open(self.l10n, "w") as f: + f.write(content) + + +class TestNonSupported(unittest.TestCase, ContentMixin): + extension = '.js' + + def setUp(self): + self.maxDiff = None + self.tmp = mkdtemp() + os.mkdir(mozpath.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def test_good(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = 'fooVal';""") + self.localized("""foo = 'lfoo';""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.js", ""), + File(self.l10n, "l10n.js", ""), + mozpath.join(self.tmp, "merge", "l10n.js")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': {}, + 'details': {} + } + ) + self.assertTrue(filecmp.cmp( + self.l10n, + mozpath.join(self.tmp, "merge", 'l10n.js')) + ) + + def test_missing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = 'fooVal';""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.add(File(self.ref, "en-reference.js", ""), + File(self.l10n, "l10n.js", ""), + mozpath.join(self.tmp, "merge", "l10n.js")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': {}, + 'details': {'l10n.js': [{'missingFile': 'error'}]} + } + ) + self.assertTrue(filecmp.cmp( + self.ref, + mozpath.join(self.tmp, "merge", 'l10n.js')) + ) + + def test_missing_ignored(self): + + def ignore(*args): + return 'ignore' + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = 'fooVal';""") + cc = ContentComparer() + cc.observers.append(Observer(filter=ignore)) + cc.add(File(self.ref, "en-reference.js", ""), + File(self.l10n, "l10n.js", ""), + mozpath.join(self.tmp, "merge", "l10n.js")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': {}, + 'details': {} + } + ) + self.assertTrue(filecmp.cmp( + self.ref, + mozpath.join(self.tmp, "merge", 'l10n.js')) + ) + + +class TestDefines(unittest.TestCase, ContentMixin): + '''Test case for parsers with just CAN_COPY''' + extension = '.inc' + + def setUp(self): + self.maxDiff = None + self.tmp = mkdtemp() + os.mkdir(mozpath.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def testGood(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +#define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Suzy Solon</em:contributor> + +#unfilter emptyLines +""") + self.localized("""#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +#define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Jane Doe</em:contributor> + +#unfilter emptyLines +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.inc", ""), + File(self.l10n, "l10n.inc", ""), + mozpath.join(self.tmp, "merge", "l10n.inc")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 2, + 'unchanged': 1, + 'unchanged_w': 1, + 'keys': 0, + }}, + 'details': {} + } + ) + self.assertTrue(filecmp.cmp( + self.l10n, + mozpath.join(self.tmp, "merge", 'l10n.inc')) + ) + + def testMissing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +#define MOZ_LANGPACK_CONTRIBUTORS <em:contributor>Suzy Solon</em:contributor> + +#unfilter emptyLines +""") + self.localized("""#filter emptyLines + +#define MOZ_LANGPACK_CREATOR mozilla.org + +#unfilter emptyLines +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.inc", ""), + File(self.l10n, "l10n.inc", ""), + mozpath.join(self.tmp, "merge", "l10n.inc")) + self.assertDictEqual( + cc.observers.toJSON(), + { + 'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 1, + 'missing_w': 2, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 1, + 'unchanged_w': 1, + 'keys': 0, + }}, + 'details': + { + 'l10n.inc': [ + {'missingEntity': 'MOZ_LANGPACK_CONTRIBUTORS'} + ] + } + } + ) + self.assertTrue(filecmp.cmp( + self.ref, + mozpath.join(self.tmp, "merge", 'l10n.inc')) + ) + + +class TestProperties(unittest.TestCase, ContentMixin): + extension = '.properties' + + def setUp(self): + self.maxDiff = None + self.tmp = mkdtemp() + os.mkdir(mozpath.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def testGood(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal word +bar = barVal word +eff = effVal""") + self.localized("""foo = lFoo +bar = lBar +eff = lEff word +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 5, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + + }}, + 'details': {} + } + ) + self.assertTrue(filecmp.cmp( + self.l10n, + mozpath.join(self.tmp, "merge", 'l10n.properties')) + ) + + def testMissing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal""") + self.localized("""bar = lBar +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 2, + 'missing_w': 2, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.properties': [ + {'missingEntity': u'foo'}, + {'missingEntity': u'eff'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + entities = p.parse() + self.assertEqual(list(entities.keys()), ["bar", "foo", "eff"]) + + def test_missing_file(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.add(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 3, + 'missing_w': 3, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.properties': [ + {'missingFile': 'error'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(filecmp.cmp(self.ref, mergefile)) + + def testError(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = %d barVal +eff = effVal""") + self.localized("""\ +bar = %S lBar +eff = leffVal +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 1, + 'warnings': 0, + 'missing': 1, + 'missing_w': 1, + 'report': 0, + 'obsolete': 0, + 'changed': 2, + 'changed_w': 3, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.properties': [ + {'missingEntity': u'foo'}, + {'error': u'argument 1 `S` should be `d` ' + u'at line 1, column 7 for bar'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + entities = p.parse() + self.assertEqual(list(entities.keys()), ["eff", "foo", "bar"]) + self.assertEqual(entities['bar'].val, '%d barVal') + + def testObsolete(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +eff = effVal""") + self.localized("""foo = fooVal +other = obsolete +eff = leffVal +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 1, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 1, + 'unchanged_w': 1, + 'keys': 0, + }}, + 'details': { + 'l10n.properties': [ + {'obsoleteEntity': u'other'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + def test_obsolete_file(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.localized("""foo = fooVal +eff = leffVal +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.remove(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {}, + 'details': { + 'l10n.properties': [ + {'obsoleteFile': u'error'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(os.path.isfile(mergefile)) + + def test_duplicate(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal +foo = other val for foo""") + self.localized("""foo = localized +bar = lBar +eff = localized eff +bar = duplicated bar +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", ""), + mozpath.join(self.tmp, "merge", "l10n.properties")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 1, + 'warnings': 1, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 6, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.properties': [ + {'warning': u'foo occurs 2 times'}, + {'error': u'bar occurs 2 times'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + +class TestDTD(unittest.TestCase, ContentMixin): + extension = '.dtd' + + def setUp(self): + self.maxDiff = None + self.tmp = mkdtemp() + os.mkdir(mozpath.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def testGood(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'lFoo'> +<!ENTITY bar 'lBar'> +<!ENTITY eff 'lEff'> +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", ""), + mozpath.join(self.tmp, "merge", "l10n.dtd")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 3, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': {} + } + ) + self.assertTrue(filecmp.cmp( + self.l10n, + mozpath.join(self.tmp, "merge", 'l10n.dtd')) + ) + + def testMissing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY bar 'lBar'> +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", ""), + mozpath.join(self.tmp, "merge", "l10n.dtd")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 2, + 'missing_w': 2, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.dtd': [ + {'missingEntity': u'foo'}, + {'missingEntity': u'eff'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + entities = p.parse() + self.assertEqual(list(entities.keys()), ["bar", "foo", "eff"]) + + def testJunk(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'fooVal'> +<!ENTY bar 'gimmick'> +<!ENTITY eff 'effVal'> +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", ""), + mozpath.join(self.tmp, "merge", "l10n.dtd")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 1, + 'warnings': 0, + 'missing': 1, + 'missing_w': 1, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 2, + 'unchanged_w': 2, + 'keys': 0, + }}, + 'details': { + 'l10n.dtd': [ + {'error': u'Unparsed content "<!ENTY bar ' + u'\'gimmick\'>\n" ' + u'from line 2 column 1 to ' + u'line 3 column 1'}, + {'missingEntity': u'bar'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + entities = p.parse() + self.assertEqual(list(entities.keys()), ["foo", "eff", "bar"]) + + def test_reference_junk(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENT bar 'bad val'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'fooVal'> +<!ENTITY eff 'effVal'> +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", ""), + mozpath.join(self.tmp, "merge", "l10n.dtd")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 1, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 0, + 'changed_w': 0, + 'unchanged': 2, + 'unchanged_w': 2, + 'keys': 0, + }}, + 'details': { + 'l10n.dtd': [ + {'warning': 'Parser error in en-US'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + def test_reference_xml_error(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'bad &val'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'good val'> +<!ENTITY eff 'effVal'> +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", ""), + mozpath.join(self.tmp, "merge", "l10n.dtd")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 1, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 2, + 'unchanged': 2, + 'unchanged_w': 2, + 'keys': 0, + }}, + 'details': { + 'l10n.dtd': [ + {'warning': u"can't parse en-US value at line 1, " + u"column 0 for bar"}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + +class TestFluent(unittest.TestCase): + maxDiff = None # we got big dictionaries to compare + + def reference(self, content): + self.ref = os.path.join(self.tmp, "en-reference.ftl") + with open(self.ref, "w") as f: + f.write(content) + + def localized(self, content): + self.l10n = os.path.join(self.tmp, "l10n.ftl") + with open(self.l10n, "w") as f: + f.write(content) + + def setUp(self): + self.tmp = mkdtemp() + os.mkdir(os.path.join(self.tmp, "merge")) + self.ref = self.l10n = None + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + del self.ref + del self.l10n + + def testGood(self): + self.reference("""\ +foo = fooVal +bar = barVal +-eff = effVal +""") + self.localized("""\ +foo = lFoo +bar = lBar +-eff = lEff +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 3, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': {} + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testMissing(self): + self.reference("""\ +foo = fooVal +bar = barVal +-baz = bazVal +eff = effVal +""") + self.localized("""\ +foo = lFoo +eff = lEff +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + {'missingEntity': u'bar'}, + {'missingEntity': u'-baz'}, + ], + }, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 2, + 'missing_w': 2, + 'report': 0, + 'obsolete': 0, + 'changed': 2, + 'changed_w': 2, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testBroken(self): + self.reference("""\ +foo = fooVal +bar = barVal +eff = effVal +""") + self.localized("""\ +-- Invalid Comment +foo = lFoo +bar lBar +eff = lEff { +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + {'error': u'Unparsed content "-- Invalid Comment" ' + u'from line 1 column 1 ' + u'to line 1 column 19'}, + {'error': u'Unparsed content "bar lBar" ' + u'from line 3 column 1 ' + u'to line 3 column 9'}, + {'error': u'Unparsed content "eff = lEff {" ' + u'from line 4 column 1 ' + u'to line 4 column 13'}, + {'missingEntity': u'bar'}, + {'missingEntity': u'eff'}, + ], + }, + 'summary': { + None: { + 'errors': 3, + 'warnings': 0, + 'missing': 2, + 'missing_w': 2, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(os.path.exists(mergepath)) + + p = getParser(mergepath) + p.readFile(mergepath) + merged_entities = p.parse() + self.assertEqual(list(merged_entities.keys()), ["foo"]) + merged_foo = merged_entities['foo'] + + # foo should be l10n + p.readFile(self.l10n) + l10n_entities = p.parse() + l10n_foo = l10n_entities['foo'] + self.assertTrue(merged_foo.equals(l10n_foo)) + + def testMatchingReferences(self): + self.reference("""\ +foo = Reference { bar } +""") + self.localized("""\ +foo = Localized { bar } +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': {}, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testMismatchingReferences(self): + self.reference("""\ +foo = Reference { bar } +bar = Reference { baz } +baz = Reference +""") + self.localized("""\ +foo = Localized { qux } +bar = Localized +baz = Localized { qux } +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + { + 'warning': + u'Missing message reference: bar ' + u'at line 1, column 1 for foo' + }, + { + 'warning': + u'Obsolete message reference: qux ' + u'at line 1, column 19 for foo' + }, + { + 'warning': + u'Missing message reference: baz ' + u'at line 2, column 1 for bar' + }, + { + 'warning': + u'Obsolete message reference: qux ' + u'at line 3, column 19 for baz' + }, + ], + }, + 'summary': { + None: { + 'errors': 0, + 'warnings': 4, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 3, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testMismatchingAttributes(self): + self.reference(""" +foo = Foo +bar = Bar + .tender = Attribute value +eff = Eff +""") + self.localized("""\ +foo = lFoo + .obsolete = attr +bar = lBar +eff = lEff +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + { + 'error': + u'Obsolete attribute: ' + 'obsolete at line 2, column 3 for foo' + }, + { + 'error': + u'Missing attribute: tender at line 3,' + ' column 1 for bar', + }, + ], + }, + 'summary': { + None: { + 'errors': 2, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 5, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(os.path.exists(mergepath)) + + p = getParser(mergepath) + p.readFile(mergepath) + merged_entities = p.parse() + self.assertEqual(list(merged_entities.keys()), ["eff"]) + merged_eff = merged_entities['eff'] + + # eff should be l10n + p.readFile(self.l10n) + l10n_entities = p.parse() + l10n_eff = l10n_entities['eff'] + self.assertTrue(merged_eff.equals(l10n_eff)) + + def test_term_attributes(self): + self.reference(""" +-foo = Foo +-bar = Bar +-baz = Baz + .attr = Baz Attribute +-qux = Qux + .attr = Qux Attribute +-missing = Missing + .attr = An Attribute +""") + self.localized("""\ +-foo = Localized Foo +-bar = Localized Bar + .attr = Locale-specific Bar Attribute +-baz = Localized Baz +-qux = Localized Qux + .other = Locale-specific Qux Attribute +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + {'missingEntity': u'-missing'}, + ], + }, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 1, + 'missing_w': 1, + 'report': 0, + 'obsolete': 0, + 'changed': 4, + 'changed_w': 4, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testMismatchingValues(self): + self.reference(""" +foo = Foo + .foottr = something +bar = + .tender = Attribute value +""") + self.localized("""\ +foo = + .foottr = attr +bar = lBar + .tender = localized +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': { + 'l10n.ftl': [ + { + 'error': + u'Missing value at line 1, column 1 for foo' + }, + { + 'error': + u'Obsolete value at line 3, column 7 for bar', + }, + ] + }, + 'summary': { + None: { + 'errors': 2, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 2, + 'changed_w': 4, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(os.path.exists(mergepath)) + + p = getParser(mergepath) + p.readFile(mergepath) + merged_entities = p.parse() + self.assertEqual(merged_entities, tuple()) + + def testMissingGroupComment(self): + self.reference("""\ +foo = fooVal + +## Group Comment +bar = barVal +""") + self.localized("""\ +foo = lFoo +bar = lBar +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': {}, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 2, + 'changed_w': 2, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testMissingAttachedComment(self): + self.reference("""\ +foo = fooVal + +# Attached Comment +bar = barVal +""") + self.localized("""\ +foo = lFoo +bar = barVal +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': {}, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 1, + 'unchanged': 1, + 'unchanged_w': 1, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def testObsoleteStandaloneComment(self): + self.reference("""\ +foo = fooVal +bar = barVal +""") + self.localized("""\ +foo = lFoo + +# Standalone Comment + +bar = lBar +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + + self.assertDictEqual( + cc.observers.toJSON(), + { + 'details': {}, + 'summary': { + None: { + 'errors': 0, + 'warnings': 0, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 2, + 'changed_w': 2, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + } + } + } + ) + + # validate merge results + mergepath = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergepath)) + + def test_duplicate(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal +foo = other val for foo""") + self.localized("""foo = localized +bar = lBar +eff = localized eff +bar = duplicated bar +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 1, + 'warnings': 1, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 3, + 'changed_w': 6, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.ftl': [ + {'warning': u'foo occurs 2 times'}, + {'error': u'bar occurs 2 times'}] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + def test_duplicate_attributes(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal + .attr = good""") + self.localized("""foo = localized + .attr = not + .attr = so + .attr = good +""") + cc = ContentComparer() + cc.observers.append(Observer()) + cc.compare(File(self.ref, "en-reference.ftl", ""), + File(self.l10n, "l10n.ftl", ""), + mozpath.join(self.tmp, "merge", "l10n.ftl")) + self.assertDictEqual( + cc.observers.toJSON(), + {'summary': + {None: { + 'errors': 0, + 'warnings': 3, + 'missing': 0, + 'missing_w': 0, + 'report': 0, + 'obsolete': 0, + 'changed': 1, + 'changed_w': 2, + 'unchanged': 0, + 'unchanged_w': 0, + 'keys': 0, + }}, + 'details': { + 'l10n.ftl': [ + {'warning': + u'Attribute "attr" is duplicated ' + u'at line 2, column 5 for foo' + }, + {'warning': + u'Attribute "attr" is duplicated ' + u'at line 3, column 5 for foo' + }, + {'warning': + u'Attribute "attr" is duplicated ' + u'at line 4, column 5 for foo' + }, + ] + } + }) + mergefile = mozpath.join(self.tmp, "merge", "l10n.ftl") + self.assertTrue(filecmp.cmp(self.l10n, mergefile)) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/python/compare-locales/compare_locales/tests/test_mozpath.py b/third_party/python/compare-locales/compare_locales/tests/test_mozpath.py new file mode 100644 index 0000000000..d4bf9ec4b2 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_mozpath.py @@ -0,0 +1,139 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +from compare_locales.mozpath import ( + relpath, + join, + normpath, + dirname, + commonprefix, + basename, + split, + splitext, + basedir, + match, + rebase, +) +import unittest +import os + + +class TestPath(unittest.TestCase): + SEP = os.sep + + def test_relpath(self): + self.assertEqual(relpath('foo', 'foo'), '') + self.assertEqual(relpath(self.SEP.join(('foo', 'bar')), 'foo/bar'), '') + self.assertEqual(relpath(self.SEP.join(('foo', 'bar')), 'foo'), 'bar') + self.assertEqual(relpath(self.SEP.join(('foo', 'bar', 'baz')), 'foo'), + 'bar/baz') + self.assertEqual(relpath(self.SEP.join(('foo', 'bar')), 'foo/bar/baz'), + '..') + self.assertEqual(relpath(self.SEP.join(('foo', 'bar')), 'foo/baz'), + '../bar') + self.assertEqual(relpath('foo/', 'foo'), '') + self.assertEqual(relpath('foo/bar/', 'foo'), 'bar') + + def test_join(self): + self.assertEqual(join('foo', 'bar', 'baz'), 'foo/bar/baz') + self.assertEqual(join('foo', '', 'bar'), 'foo/bar') + self.assertEqual(join('', 'foo', 'bar'), 'foo/bar') + self.assertEqual(join('', 'foo', '/bar'), '/bar') + + def test_normpath(self): + self.assertEqual(normpath(self.SEP.join(('foo', 'bar', 'baz', + '..', 'qux'))), 'foo/bar/qux') + + def test_dirname(self): + self.assertEqual(dirname('foo/bar/baz'), 'foo/bar') + self.assertEqual(dirname('foo/bar'), 'foo') + self.assertEqual(dirname('foo'), '') + self.assertEqual(dirname('foo/bar/'), 'foo/bar') + + def test_commonprefix(self): + self.assertEqual(commonprefix([self.SEP.join(('foo', 'bar', 'baz')), + 'foo/qux', 'foo/baz/qux']), 'foo/') + self.assertEqual(commonprefix([self.SEP.join(('foo', 'bar', 'baz')), + 'foo/qux', 'baz/qux']), '') + + def test_basename(self): + self.assertEqual(basename('foo/bar/baz'), 'baz') + self.assertEqual(basename('foo/bar'), 'bar') + self.assertEqual(basename('foo'), 'foo') + self.assertEqual(basename('foo/bar/'), '') + + def test_split(self): + self.assertEqual(split(self.SEP.join(('foo', 'bar', 'baz'))), + ['foo', 'bar', 'baz']) + + def test_splitext(self): + self.assertEqual(splitext(self.SEP.join(('foo', 'bar', 'baz.qux'))), + ('foo/bar/baz', '.qux')) + + def test_basedir(self): + foobarbaz = self.SEP.join(('foo', 'bar', 'baz')) + self.assertEqual(basedir(foobarbaz, ['foo', 'bar', 'baz']), 'foo') + self.assertEqual(basedir(foobarbaz, ['foo', 'foo/bar', 'baz']), + 'foo/bar') + self.assertEqual(basedir(foobarbaz, ['foo/bar', 'foo', 'baz']), + 'foo/bar') + self.assertEqual(basedir(foobarbaz, ['foo', 'bar', '']), 'foo') + self.assertEqual(basedir(foobarbaz, ['bar', 'baz', '']), '') + + def test_match(self): + self.assertTrue(match('foo', '')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/bar')) + self.assertTrue(match('foo/bar/baz.qux', 'foo')) + self.assertTrue(match('foo', '*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/bar/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/bar/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/bar/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/bar/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/*/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', '*/bar/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', '*/*/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', '*/*/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/*/*')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/*/*.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/b*/*z.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/b*r/ba*z.qux')) + self.assertFalse(match('foo/bar/baz.qux', 'foo/b*z/ba*r.qux')) + self.assertTrue(match('foo/bar/baz.qux', '**')) + self.assertTrue(match('foo/bar/baz.qux', '**/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', '**/bar/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/*.qux')) + self.assertTrue(match('foo/bar/baz.qux', '**/foo/bar/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/bar/baz.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/bar/*.qux')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/*.qux')) + self.assertTrue(match('foo/bar/baz.qux', '**/*.qux')) + self.assertFalse(match('foo/bar/baz.qux', '**.qux')) + self.assertFalse(match('foo/bar', 'foo/*/bar')) + self.assertTrue(match('foo/bar/baz.qux', 'foo/**/bar/**')) + self.assertFalse(match('foo/nobar/baz.qux', 'foo/**/bar/**')) + self.assertTrue(match('foo/bar', 'foo/**/bar/**')) + + def test_rebase(self): + self.assertEqual(rebase('foo', 'foo/bar', 'bar/baz'), 'baz') + self.assertEqual(rebase('foo', 'foo', 'bar/baz'), 'bar/baz') + self.assertEqual(rebase('foo/bar', 'foo', 'baz'), 'bar/baz') + + +if os.altsep: + class TestAltPath(TestPath): + SEP = os.altsep + + class TestReverseAltPath(TestPath): + def setUp(self): + sep = os.sep + os.sep = os.altsep + os.altsep = sep + + def tearDown(self): + self.setUp() + + class TestAltReverseAltPath(TestReverseAltPath): + SEP = os.altsep diff --git a/third_party/python/compare-locales/compare_locales/tests/test_parser.py b/third_party/python/compare-locales/compare_locales/tests/test_parser.py new file mode 100644 index 0000000000..38fe642ddf --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_parser.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import pkg_resources +import shutil +import tempfile +import textwrap +import unittest + +from compare_locales import parser, mozpath + + +class TestParserContext(unittest.TestCase): + def test_linecol(self): + "Should return 1-based line and column numbers." + ctx = parser.Parser.Context('''first line +second line +third line +''') + self.assertEqual( + ctx.linecol(0), + (1, 1) + ) + self.assertEqual( + ctx.linecol(1), + (1, 2) + ) + self.assertEqual( + ctx.linecol(len('first line')), + (1, len('first line') + 1) + ) + self.assertEqual( + ctx.linecol(len('first line') + 1), + (2, 1) + ) + self.assertEqual( + ctx.linecol(len(ctx.contents)), + (4, 1) + ) + + def test_empty_parser(self): + p = parser.Parser() + entities = p.parse() + self.assertTupleEqual( + entities, + tuple() + ) + + +class TestOffsetComment(unittest.TestCase): + def test_offset(self): + ctx = parser.Parser.Context(textwrap.dedent('''\ + #foo + #bar + # baz + ''' + )) # noqa + offset_comment = parser.OffsetComment(ctx, (0, len(ctx.contents))) + self.assertEqual( + offset_comment.val, + textwrap.dedent('''\ + foo + bar + baz + ''') + ) + + +class TestUniversalNewlines(unittest.TestCase): + def setUp(self): + '''Create a parser for this test. + ''' + self.parser = parser.Parser() + self.dir = tempfile.mkdtemp() + + def tearDown(self): + 'tear down this test' + del self.parser + shutil.rmtree(self.dir) + + def test_universal_newlines(self): + f = mozpath.join(self.dir, 'file') + with open(f, 'wb') as fh: + fh.write(b'one\ntwo\rthree\r\n') + self.parser.readFile(f) + self.assertEqual( + self.parser.ctx.contents, + 'one\ntwo\nthree\n') + + +class TestPlugins(unittest.TestCase): + def setUp(self): + self.old_working_set_state = pkg_resources.working_set.__getstate__() + distribution = pkg_resources.Distribution(__file__) + entry_point = pkg_resources.EntryPoint.parse( + 'test_parser = compare_locales.tests.test_parser:DummyParser', + dist=distribution + ) + distribution._ep_map = { + 'compare_locales.parsers': { + 'test_parser': entry_point + } + } + pkg_resources.working_set.add(distribution) + + def tearDown(self): + pkg_resources.working_set.__setstate__(self.old_working_set_state) + + def test_dummy_parser(self): + p = parser.getParser('some/weird/file.ext') + self.assertIsInstance(p, DummyParser) + + +class DummyParser(parser.Parser): + def use(self, path): + return path.endswith('weird/file.ext') diff --git a/third_party/python/compare-locales/compare_locales/tests/test_util.py b/third_party/python/compare-locales/compare_locales/tests/test_util.py new file mode 100644 index 0000000000..f549cd2c67 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/tests/test_util.py @@ -0,0 +1,30 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import +import unittest + +from compare_locales import util + + +class ParseLocalesTest(unittest.TestCase): + def test_empty(self): + self.assertEqual(util.parseLocales(''), []) + + def test_all(self): + self.assertEqual(util.parseLocales('''af +de'''), ['af', 'de']) + + def test_shipped(self): + self.assertEqual(util.parseLocales('''af +ja win mac +de'''), ['af', 'de', 'ja']) + + def test_sparse(self): + self.assertEqual(util.parseLocales(''' +af + +de + +'''), ['af', 'de']) diff --git a/third_party/python/compare-locales/compare_locales/util.py b/third_party/python/compare-locales/compare_locales/util.py new file mode 100644 index 0000000000..71eadd8749 --- /dev/null +++ b/third_party/python/compare-locales/compare_locales/util.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This file is shared between compare-locales and locale-inspector +# test_util is in compare-locales only, for the sake of easy +# development. + + +def parseLocales(content): + return sorted(l.split()[0] for l in content.splitlines() if l) diff --git a/third_party/python/compare-locales/setup.cfg b/third_party/python/compare-locales/setup.cfg new file mode 100644 index 0000000000..adf5ed72aa --- /dev/null +++ b/third_party/python/compare-locales/setup.cfg @@ -0,0 +1,7 @@ +[bdist_wheel] +universal = 1 + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/third_party/python/compare-locales/setup.py b/third_party/python/compare-locales/setup.py new file mode 100755 index 0000000000..b273929dce --- /dev/null +++ b/third_party/python/compare-locales/setup.py @@ -0,0 +1,62 @@ +from __future__ import absolute_import + +from setuptools import setup, find_packages + +import sys +import os +sys.path.insert(0, os.path.dirname(__file__)) + +from compare_locales import version + +this_directory = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(this_directory, 'README.md'), 'rb') as f: + long_description = f.read().decode('utf-8') + +CLASSIFIERS = """\ +Development Status :: 5 - Production/Stable +Intended Audience :: Developers +License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Operating System :: OS Independent +Programming Language :: Python +Programming Language :: Python :: 2 +Programming Language :: Python :: 2.7 +Programming Language :: Python :: 3 +Programming Language :: Python :: 3.5 +Programming Language :: Python :: 3.6 +Programming Language :: Python :: 3.7 +Topic :: Software Development :: Libraries :: Python Modules +Topic :: Software Development :: Localization +Topic :: Software Development :: Testing\ +""" + +setup(name="compare-locales", + version=version, + author="Axel Hecht", + author_email="axel@mozilla.com", + description='Lint Mozilla localizations', + long_description=long_description, + long_description_content_type='text/markdown', + license="MPL 2.0", + classifiers=CLASSIFIERS.split("\n"), + platforms=["any"], + python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4', + entry_points={ + 'console_scripts': + [ + 'compare-locales = compare_locales.commands:CompareLocales.call', + 'moz-l10n-lint = compare_locales.lint.cli:main', + ], + }, + packages=find_packages(), + package_data={ + 'compare_locales.tests': ['data/*.properties', 'data/*.dtd'] + }, + install_requires=[ + 'fluent.syntax >=0.18.0, <0.19', + 'pytoml', + 'six', + ], + tests_require=[ + 'mock<4.0', + ], + test_suite='compare_locales.tests') |