diff options
Diffstat (limited to 'third_party/python/compare_locales/compare_locales/merge.py')
-rw-r--r-- | third_party/python/compare_locales/compare_locales/merge.py | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/third_party/python/compare_locales/compare_locales/merge.py b/third_party/python/compare_locales/compare_locales/merge.py new file mode 100644 index 0000000000..9399e639e0 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/merge.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Merge resources across channels. + +Merging resources is done over a series of parsed resources, or source +strings. +The nomenclature is that the resources are ordered from newest to oldest. +The generated file structure is taken from the newest file, and then the +next-newest, etc. The values of the returned entities are taken from the +newest to the oldest resource, too. + +In merge_resources, there's an option to choose the values from oldest +to newest instead. +''' + +from collections import OrderedDict, defaultdict +from codecs import encode +import six + + +from compare_locales import parser as cl +from compare_locales.parser.base import StickyEntry +from compare_locales.compare.utils import AddRemove + + +class MergeNotSupportedError(ValueError): + pass + + +def merge_channels(name, resources): + try: + parser = cl.getParser(name) + except UserWarning: + raise MergeNotSupportedError( + 'Unsupported file format ({}).'.format(name)) + + entities = merge_resources(parser, resources) + return encode(serialize_legacy_resource(entities), parser.encoding) + + +def merge_resources(parser, resources, keep_newest=True): + '''Merge parsed or unparsed resources, returning a enumerable of Entities. + + Resources are ordered from newest to oldest in the input. The structure + of the generated content is taken from the newest resource first, and + then filled by the next etc. + Values are also taken from the newest, unless keep_newest is False, + then values are taken from the oldest first. + ''' + + def parse_resource(resource): + # The counter dict keeps track of number of identical comments. + counter = defaultdict(int) + if isinstance(resource, bytes): + parser.readContents(resource) + resource = parser.walk() + pairs = [get_key_value(entity, counter) for entity in resource] + return OrderedDict(pairs) + + def get_key_value(entity, counter): + if isinstance(entity, cl.Comment): + counter[entity.val] += 1 + # Use the (value, index) tuple as the key. AddRemove will + # de-deplicate identical comments at the same index. + return ((entity.val, counter[entity.val]), entity) + + if isinstance(entity, cl.Whitespace): + # Use the Whitespace instance as the key so that it's always + # unique. Adjecent whitespace will be folded into the longer one in + # prune. + return (entity, entity) + + return (entity.key, entity) + + entities = six.moves.reduce( + lambda x, y: merge_two(x, y, keep_newer=keep_newest), + map(parse_resource, resources)) + return entities.values() + + +def merge_two(newer, older, keep_newer=True): + '''Merge two OrderedDicts. + + The order of the result dict is determined by `newer`. + The values in the dict are the newer ones by default, too. + If `keep_newer` is False, the values will be taken from the older + dict. + ''' + diff = AddRemove() + diff.set_left(newer.keys()) + diff.set_right(older.keys()) + + # Create a flat sequence of all entities in order reported by AddRemove. + get_entity = get_newer_entity if keep_newer else get_older_entity + contents = [(key, get_entity(newer, older, key)) for _, key in diff] + + def prune(acc, cur): + _, entity = cur + if entity is None: + # Prune Nones which stand for duplicated comments. + return acc + + if len(acc) and isinstance(entity, cl.Whitespace): + _, prev_entity = acc[-1] + + if isinstance(prev_entity, cl.Whitespace): + # Prefer the longer whitespace. + if len(entity.all) > len(prev_entity.all): + acc[-1] = (entity, entity) + return acc + + acc.append(cur) + return acc + + pruned = six.moves.reduce(prune, contents, []) + return OrderedDict(pruned) + + +def get_newer_entity(newer, older, key): + entity = newer.get(key, None) + + # Always prefer the newer version. + if entity is not None: + return entity + + return older.get(key) + + +def get_older_entity(newer, older, key): + entity = older.get(key, None) + + # If we don't have an older version, or it's a StickyEntry, + # get a newer version + if entity is None or isinstance(entity, StickyEntry): + return newer.get(key) + + return entity + + +def serialize_legacy_resource(entities): + return "".join((entity.all for entity in entities)) |