summaryrefslogtreecommitdiffstats
path: root/third_party/python/compare-locales/compare_locales/compare/content.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/compare-locales/compare_locales/compare/content.py')
-rw-r--r--third_party/python/compare-locales/compare_locales/compare/content.py307
1 files changed, 307 insertions, 0 deletions
diff --git a/third_party/python/compare-locales/compare_locales/compare/content.py b/third_party/python/compare-locales/compare_locales/compare/content.py
new file mode 100644
index 0000000000..03ba222d8e
--- /dev/null
+++ b/third_party/python/compare-locales/compare_locales/compare/content.py
@@ -0,0 +1,307 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+from __future__ import absolute_import
+from __future__ import print_function
+import codecs
+import os
+import shutil
+import re
+
+from compare_locales import parser
+from compare_locales import mozpath
+from compare_locales.checks import getChecker, EntityPos
+from compare_locales.keyedtuple import KeyedTuple
+
+from .observer import ObserverList
+from .utils import AddRemove
+
+
+class ContentComparer:
+ keyRE = re.compile('[kK]ey')
+ nl = re.compile('\n', re.M)
+
+ def __init__(self, quiet=0):
+ '''Create a ContentComparer.
+ observer is usually a instance of Observer. The return values
+ of the notify method are used to control the handling of missing
+ entities.
+ '''
+ self.observers = ObserverList(quiet=quiet)
+
+ def create_merge_dir(self, merge_file):
+ outdir = mozpath.dirname(merge_file)
+ if not os.path.isdir(outdir):
+ os.makedirs(outdir)
+
+ def merge(self, ref_entities, ref_file, l10n_file, merge_file,
+ missing, skips, ctx, capabilities, encoding):
+ '''Create localized file in merge dir
+
+ `ref_entities` and `ref_map` are the parser result of the
+ reference file
+ `ref_file` and `l10n_file` are the File objects for the reference and
+ the l10n file, resp.
+ `merge_file` is the output path for the generated content. This is None
+ if we're just comparing or validating.
+ `missing` are the missing messages in l10n - potentially copied from
+ reference
+ `skips` are entries to be dropped from the localized file
+ `ctx` is the parsing context
+ `capabilities` are the capabilities for the merge algorithm
+ `encoding` is the encoding to be used when serializing, usually utf-8
+ '''
+
+ if not merge_file:
+ return
+
+ if capabilities == parser.CAN_NONE:
+ return
+
+ self.create_merge_dir(merge_file)
+
+ if capabilities & parser.CAN_COPY:
+ # copy the l10n file if it's good, or the reference file if not
+ if skips or missing:
+ src = ref_file.fullpath
+ else:
+ src = l10n_file.fullpath
+ shutil.copyfile(src, merge_file)
+ print("copied reference to " + merge_file)
+ return
+
+ if not (capabilities & parser.CAN_SKIP):
+ return
+
+ # Start with None in case the merge file doesn't need to be created.
+ f = None
+
+ if skips:
+ # skips come in ordered by key name, we need them in file order
+ skips.sort(key=lambda s: s.span[0])
+
+ # we need to skip a few erroneous blocks in the input, copy by hand
+ f = codecs.open(merge_file, 'wb', encoding)
+ offset = 0
+ for skip in skips:
+ chunk = skip.span
+ f.write(ctx.contents[offset:chunk[0]])
+ offset = chunk[1]
+ f.write(ctx.contents[offset:])
+
+ if f is None:
+ # l10n file is a good starting point
+ shutil.copyfile(l10n_file.fullpath, merge_file)
+
+ if not (capabilities & parser.CAN_MERGE):
+ if f:
+ f.close()
+ return
+
+ if skips or missing:
+ if f is None:
+ f = codecs.open(merge_file, 'ab', encoding)
+ trailing = (['\n'] +
+ [ref_entities[key].all for key in missing] +
+ [ref_entities[skip.key].all for skip in skips
+ if not isinstance(skip, parser.Junk)])
+
+ def ensureNewline(s):
+ if not s.endswith('\n'):
+ return s + '\n'
+ return s
+
+ print("adding to " + merge_file)
+ f.write(''.join(map(ensureNewline, trailing)))
+
+ if f is not None:
+ f.close()
+
+ def remove(self, ref_file, l10n, merge_file):
+ '''Obsolete l10n file.
+
+ Copy to merge stage if we can.
+ '''
+ self.observers.notify('obsoleteFile', l10n, None)
+ self.merge(
+ KeyedTuple([]), ref_file, l10n, merge_file,
+ [], [], None, parser.CAN_COPY, None
+ )
+
+ def compare(self, ref_file, l10n, merge_file, extra_tests=None):
+ try:
+ p = parser.getParser(ref_file.file)
+ except UserWarning:
+ # no comparison, XXX report?
+ # At least, merge
+ self.merge(
+ KeyedTuple([]), ref_file, l10n, merge_file, [], [], None,
+ parser.CAN_COPY, None)
+ return
+ try:
+ p.readFile(ref_file)
+ except Exception as e:
+ self.observers.notify('error', ref_file, str(e))
+ return
+ ref_entities = p.parse()
+ try:
+ p.readFile(l10n)
+ l10n_entities = p.parse()
+ l10n_ctx = p.ctx
+ except Exception as e:
+ self.observers.notify('error', l10n, str(e))
+ return
+
+ ar = AddRemove()
+ ar.set_left(ref_entities.keys())
+ ar.set_right(l10n_entities.keys())
+ report = missing = obsolete = changed = unchanged = keys = 0
+ missing_w = changed_w = unchanged_w = 0 # word stats
+ missings = []
+ skips = []
+ checker = getChecker(l10n, extra_tests=extra_tests)
+ if checker and checker.needs_reference:
+ checker.set_reference(ref_entities)
+ for msg in p.findDuplicates(ref_entities):
+ self.observers.notify('warning', l10n, msg)
+ for msg in p.findDuplicates(l10n_entities):
+ self.observers.notify('error', l10n, msg)
+ for action, entity_id in ar:
+ if action == 'delete':
+ # missing entity
+ if isinstance(ref_entities[entity_id], parser.Junk):
+ self.observers.notify(
+ 'warning', l10n, 'Parser error in en-US'
+ )
+ continue
+ _rv = self.observers.notify('missingEntity', l10n, entity_id)
+ if _rv == "ignore":
+ continue
+ if _rv == "error":
+ # only add to missing entities for l10n-merge on error,
+ # not report
+ missings.append(entity_id)
+ missing += 1
+ refent = ref_entities[entity_id]
+ missing_w += refent.count_words()
+ else:
+ # just report
+ report += 1
+ elif action == 'add':
+ # obsolete entity or junk
+ if isinstance(l10n_entities[entity_id],
+ parser.Junk):
+ junk = l10n_entities[entity_id]
+ self.observers.notify(
+ 'error', l10n,
+ junk.error_message()
+ )
+ if merge_file is not None:
+ skips.append(junk)
+ elif (
+ self.observers.notify('obsoleteEntity', l10n, entity_id)
+ != 'ignore'
+ ):
+ obsolete += 1
+ else:
+ # entity found in both ref and l10n, check for changed
+ refent = ref_entities[entity_id]
+ l10nent = l10n_entities[entity_id]
+ if self.keyRE.search(entity_id):
+ keys += 1
+ else:
+ if refent.equals(l10nent):
+ self.doUnchanged(l10nent)
+ unchanged += 1
+ unchanged_w += refent.count_words()
+ else:
+ self.doChanged(ref_file, refent, l10nent)
+ changed += 1
+ changed_w += refent.count_words()
+ # run checks:
+ if checker:
+ for tp, pos, msg, cat in checker.check(refent, l10nent):
+ if isinstance(pos, EntityPos):
+ line, col = l10nent.position(pos)
+ else:
+ line, col = l10nent.value_position(pos)
+ # skip error entities when merging
+ if tp == 'error' and merge_file is not None:
+ skips.append(l10nent)
+ self.observers.notify(
+ tp, l10n,
+ u"%s at line %d, column %d for %s" %
+ (msg, line, col, refent.key)
+ )
+ pass
+
+ if merge_file is not None:
+ self.merge(
+ ref_entities, ref_file,
+ l10n, merge_file, missings, skips, l10n_ctx,
+ p.capabilities, p.encoding)
+
+ stats = {
+ 'missing': missing,
+ 'missing_w': missing_w,
+ 'report': report,
+ 'obsolete': obsolete,
+ 'changed': changed,
+ 'changed_w': changed_w,
+ 'unchanged': unchanged,
+ 'unchanged_w': unchanged_w,
+ 'keys': keys,
+ }
+ self.observers.updateStats(l10n, stats)
+ pass
+
+ def add(self, orig, missing, merge_file):
+ ''' Add missing localized file.'''
+ f = orig
+ try:
+ p = parser.getParser(f.file)
+ except UserWarning:
+ p = None
+
+ # if we don't support this file, assume CAN_COPY to mimick
+ # l10n dir as closely as possible
+ caps = p.capabilities if p else parser.CAN_COPY
+ if (caps & (parser.CAN_COPY | parser.CAN_MERGE)):
+ # even if we can merge, pretend we can only copy
+ self.merge(
+ KeyedTuple([]), orig, missing, merge_file,
+ ['trigger copy'], [], None, parser.CAN_COPY, None
+ )
+
+ if self.observers.notify('missingFile', missing, None) == "ignore":
+ # filter said that we don't need this file, don't count it
+ return
+
+ if p is None:
+ # We don't have a parser, cannot count missing strings
+ return
+
+ try:
+ p.readFile(f)
+ entities = p.parse()
+ except Exception as ex:
+ self.observers.notify('error', f, str(ex))
+ return
+ # strip parse errors
+ entities = [e for e in entities if not isinstance(e, parser.Junk)]
+ self.observers.updateStats(missing, {'missing': len(entities)})
+ missing_w = 0
+ for e in entities:
+ missing_w += e.count_words()
+ self.observers.updateStats(missing, {'missing_w': missing_w})
+
+ def doUnchanged(self, entity):
+ # overload this if needed
+ pass
+
+ def doChanged(self, file, ref_entity, l10n_entity):
+ # overload this if needed
+ pass