summaryrefslogtreecommitdiffstats
path: root/third_party/python/compare_locales/compare_locales/merge.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/compare_locales/compare_locales/merge.py')
-rw-r--r--third_party/python/compare_locales/compare_locales/merge.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/third_party/python/compare_locales/compare_locales/merge.py b/third_party/python/compare_locales/compare_locales/merge.py
new file mode 100644
index 0000000000..9399e639e0
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/merge.py
@@ -0,0 +1,143 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''Merge resources across channels.
+
+Merging resources is done over a series of parsed resources, or source
+strings.
+The nomenclature is that the resources are ordered from newest to oldest.
+The generated file structure is taken from the newest file, and then the
+next-newest, etc. The values of the returned entities are taken from the
+newest to the oldest resource, too.
+
+In merge_resources, there's an option to choose the values from oldest
+to newest instead.
+'''
+
+from collections import OrderedDict, defaultdict
+from codecs import encode
+import six
+
+
+from compare_locales import parser as cl
+from compare_locales.parser.base import StickyEntry
+from compare_locales.compare.utils import AddRemove
+
+
+class MergeNotSupportedError(ValueError):
+ pass
+
+
+def merge_channels(name, resources):
+ try:
+ parser = cl.getParser(name)
+ except UserWarning:
+ raise MergeNotSupportedError(
+ 'Unsupported file format ({}).'.format(name))
+
+ entities = merge_resources(parser, resources)
+ return encode(serialize_legacy_resource(entities), parser.encoding)
+
+
+def merge_resources(parser, resources, keep_newest=True):
+ '''Merge parsed or unparsed resources, returning a enumerable of Entities.
+
+ Resources are ordered from newest to oldest in the input. The structure
+ of the generated content is taken from the newest resource first, and
+ then filled by the next etc.
+ Values are also taken from the newest, unless keep_newest is False,
+ then values are taken from the oldest first.
+ '''
+
+ def parse_resource(resource):
+ # The counter dict keeps track of number of identical comments.
+ counter = defaultdict(int)
+ if isinstance(resource, bytes):
+ parser.readContents(resource)
+ resource = parser.walk()
+ pairs = [get_key_value(entity, counter) for entity in resource]
+ return OrderedDict(pairs)
+
+ def get_key_value(entity, counter):
+ if isinstance(entity, cl.Comment):
+ counter[entity.val] += 1
+ # Use the (value, index) tuple as the key. AddRemove will
+ # de-deplicate identical comments at the same index.
+ return ((entity.val, counter[entity.val]), entity)
+
+ if isinstance(entity, cl.Whitespace):
+ # Use the Whitespace instance as the key so that it's always
+ # unique. Adjecent whitespace will be folded into the longer one in
+ # prune.
+ return (entity, entity)
+
+ return (entity.key, entity)
+
+ entities = six.moves.reduce(
+ lambda x, y: merge_two(x, y, keep_newer=keep_newest),
+ map(parse_resource, resources))
+ return entities.values()
+
+
+def merge_two(newer, older, keep_newer=True):
+ '''Merge two OrderedDicts.
+
+ The order of the result dict is determined by `newer`.
+ The values in the dict are the newer ones by default, too.
+ If `keep_newer` is False, the values will be taken from the older
+ dict.
+ '''
+ diff = AddRemove()
+ diff.set_left(newer.keys())
+ diff.set_right(older.keys())
+
+ # Create a flat sequence of all entities in order reported by AddRemove.
+ get_entity = get_newer_entity if keep_newer else get_older_entity
+ contents = [(key, get_entity(newer, older, key)) for _, key in diff]
+
+ def prune(acc, cur):
+ _, entity = cur
+ if entity is None:
+ # Prune Nones which stand for duplicated comments.
+ return acc
+
+ if len(acc) and isinstance(entity, cl.Whitespace):
+ _, prev_entity = acc[-1]
+
+ if isinstance(prev_entity, cl.Whitespace):
+ # Prefer the longer whitespace.
+ if len(entity.all) > len(prev_entity.all):
+ acc[-1] = (entity, entity)
+ return acc
+
+ acc.append(cur)
+ return acc
+
+ pruned = six.moves.reduce(prune, contents, [])
+ return OrderedDict(pruned)
+
+
+def get_newer_entity(newer, older, key):
+ entity = newer.get(key, None)
+
+ # Always prefer the newer version.
+ if entity is not None:
+ return entity
+
+ return older.get(key)
+
+
+def get_older_entity(newer, older, key):
+ entity = older.get(key, None)
+
+ # If we don't have an older version, or it's a StickyEntry,
+ # get a newer version
+ if entity is None or isinstance(entity, StickyEntry):
+ return newer.get(key)
+
+ return entity
+
+
+def serialize_legacy_resource(entities):
+ return "".join((entity.all for entity in entities))