summaryrefslogtreecommitdiffstats
path: root/third_party/python/fluent.migrate/fluent/migrate
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/fluent.migrate/fluent/migrate')
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/__init__.py3
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/_context.py329
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/blame.py80
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/changesets.py56
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/context.py148
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/errors.py22
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/evaluator.py28
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/helpers.py147
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/merge.py55
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/tool.py181
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/transforms.py576
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/util.py110
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/validator.py335
13 files changed, 2070 insertions, 0 deletions
diff --git a/third_party/python/fluent.migrate/fluent/migrate/__init__.py b/third_party/python/fluent.migrate/fluent/migrate/__init__.py
new file mode 100644
index 0000000000..96bb6008b2
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/__init__.py
@@ -0,0 +1,3 @@
+from .transforms import ( # noqa: F401
+ CONCAT, COPY, COPY_PATTERN, PLURALS, REPLACE, REPLACE_IN_TEXT
+)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/_context.py b/third_party/python/fluent.migrate/fluent/migrate/_context.py
new file mode 100644
index 0000000000..14c4de15e4
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/_context.py
@@ -0,0 +1,329 @@
+import os
+import codecs
+from functools import partial
+import logging
+from itertools import zip_longest
+
+import fluent.syntax.ast as FTL
+from fluent.syntax.parser import FluentParser
+from fluent.syntax.serializer import FluentSerializer
+from compare_locales.parser import getParser
+from compare_locales.plurals import get_plural
+
+from .evaluator import Evaluator
+from .merge import merge_resource
+from .errors import (
+ UnreadableReferenceError,
+)
+
+
+class InternalContext:
+ """Internal context for merging translation resources.
+
+ For the public interface, see `context.MigrationContext`.
+ """
+
+ def __init__(
+ self, lang, reference_dir, localization_dir, enforce_translated=False
+ ):
+ self.fluent_parser = FluentParser(with_spans=False)
+ self.fluent_serializer = FluentSerializer()
+
+ # An iterable of plural category names relevant to the context's
+ # language. E.g. ('one', 'other') for English.
+ self.plural_categories = get_plural(lang)
+ if self.plural_categories is None:
+ logger = logging.getLogger('migrate')
+ logger.warning(
+ 'Plural rule for "{}" is not defined in '
+ 'compare-locales'.format(lang))
+ self.plural_categories = ('one', 'other')
+
+ self.enforce_translated = enforce_translated
+ # Parsed input resources stored by resource path.
+ self.reference_resources = {}
+ self.localization_resources = {}
+ self.target_resources = {}
+
+ # An iterable of `FTL.Message` objects some of whose nodes can be the
+ # transform operations.
+ self.transforms = {}
+
+ # The evaluator instance is an AST transformer capable of walking an
+ # AST hierarchy and evaluating nodes which are migration Transforms.
+ self.evaluator = Evaluator(self)
+
+ def read_ftl_resource(self, path):
+ """Read an FTL resource and parse it into an AST."""
+ f = codecs.open(path, 'r', 'utf8')
+ try:
+ contents = f.read()
+ except UnicodeDecodeError as err:
+ logger = logging.getLogger('migrate')
+ logger.warning(f'Unable to read file {path}: {err}')
+ raise err
+ finally:
+ f.close()
+
+ ast = self.fluent_parser.parse(contents)
+
+ annots = [
+ annot
+ for entry in ast.body
+ if isinstance(entry, FTL.Junk)
+ for annot in entry.annotations
+ ]
+
+ if len(annots):
+ logger = logging.getLogger('migrate')
+ for annot in annots:
+ msg = annot.message
+ logger.warning(f'Syntax error in {path}: {msg}')
+
+ return ast
+
+ def read_legacy_resource(self, path):
+ """Read a legacy resource and parse it into a dict."""
+ parser = getParser(path)
+ parser.readFile(path)
+ # Transform the parsed result which is an iterator into a dict.
+ return {
+ entity.key: entity.val for entity in parser
+ if entity.localized or self.enforce_translated
+ }
+
+ def read_reference_ftl(self, path):
+ """Read and parse a reference FTL file.
+
+ A missing resource file is a fatal error and will raise an
+ UnreadableReferenceError.
+ """
+ fullpath = os.path.join(self.reference_dir, path)
+ try:
+ return self.read_ftl_resource(fullpath)
+ except OSError:
+ error_message = f'Missing reference file: {fullpath}'
+ logging.getLogger('migrate').error(error_message)
+ raise UnreadableReferenceError(error_message)
+ except UnicodeDecodeError as err:
+ error_message = f'Error reading file {fullpath}: {err}'
+ logging.getLogger('migrate').error(error_message)
+ raise UnreadableReferenceError(error_message)
+
+ def read_localization_ftl(self, path):
+ """Read and parse an existing localization FTL file.
+
+ Create a new FTL.Resource if the file doesn't exist or can't be
+ decoded.
+ """
+ fullpath = os.path.join(self.localization_dir, path)
+ try:
+ return self.read_ftl_resource(fullpath)
+ except OSError:
+ logger = logging.getLogger('migrate')
+ logger.info(
+ 'Localization file {} does not exist and '
+ 'it will be created'.format(path))
+ return FTL.Resource()
+ except UnicodeDecodeError:
+ logger = logging.getLogger('migrate')
+ logger.warning(
+ 'Localization file {} has broken encoding. '
+ 'It will be re-created and some translations '
+ 'may be lost'.format(path))
+ return FTL.Resource()
+
+ def maybe_add_localization(self, path):
+ """Add a localization resource to migrate translations from.
+
+ Uses a compare-locales parser to create a dict of (key, string value)
+ tuples.
+ For Fluent sources, we store the AST.
+ """
+ try:
+ fullpath = os.path.join(self.localization_dir, path)
+ if not fullpath.endswith('.ftl'):
+ collection = self.read_legacy_resource(fullpath)
+ else:
+ collection = self.read_ftl_resource(fullpath)
+ except OSError:
+ logger = logging.getLogger('migrate')
+ logger.warning(f'Missing localization file: {path}')
+ else:
+ self.localization_resources[path] = collection
+
+ def get_legacy_source(self, path, key):
+ """Get an entity value from a localized legacy source.
+
+ Used by the `Source` transform.
+ """
+ resource = self.localization_resources[path]
+ return resource.get(key, None)
+
+ def get_fluent_source_pattern(self, path, key):
+ """Get a pattern from a localized Fluent source.
+
+ If the key contains a `.`, does an attribute lookup.
+ Used by the `COPY_PATTERN` transform.
+ """
+ resource = self.localization_resources[path]
+ msg_key, _, attr_key = key.partition('.')
+ found = None
+ for entry in resource.body:
+ if isinstance(entry, (FTL.Message, FTL.Term)):
+ if entry.id.name == msg_key:
+ found = entry
+ break
+ if found is None:
+ return None
+ if not attr_key:
+ return found.value
+ for attribute in found.attributes:
+ if attribute.id.name == attr_key:
+ return attribute.value
+ return None
+
+ def messages_equal(self, res1, res2):
+ """Compare messages and terms of two FTL resources.
+
+ Uses FTL.BaseNode.equals to compare all messages/terms
+ in two FTL resources.
+ If the order or number of messages differ, the result is also False.
+ """
+ def message_id(message):
+ "Return the message's identifer name for sorting purposes."
+ return message.id.name
+
+ messages1 = sorted(
+ (entry for entry in res1.body
+ if isinstance(entry, FTL.Message)
+ or isinstance(entry, FTL.Term)),
+ key=message_id)
+ messages2 = sorted(
+ (entry for entry in res2.body
+ if isinstance(entry, FTL.Message)
+ or isinstance(entry, FTL.Term)),
+ key=message_id)
+ for msg1, msg2 in zip_longest(messages1, messages2):
+ if msg1 is None or msg2 is None:
+ return False
+ if not msg1.equals(msg2):
+ return False
+ return True
+
+ def merge_changeset(self, changeset=None, known_translations=None):
+ """Return a generator of FTL ASTs for the changeset.
+
+ The input data must be configured earlier using the `add_*` methods.
+ if given, `changeset` must be a set of (path, key) tuples describing
+ which legacy translations are to be merged. If `changeset` is None,
+ all legacy translations will be allowed to be migrated in a single
+ changeset.
+
+ We use the `in_changeset` method to determine if a message should be
+ migrated for the given changeset.
+
+ Given `changeset`, return a dict whose keys are resource paths and
+ values are `FTL.Resource` instances. The values will also be used to
+ update this context's existing localization resources.
+ """
+
+ if changeset is None:
+ # Merge all known legacy translations. Used in tests.
+ changeset = {
+ (path, key)
+ for path, strings in self.localization_resources.items()
+ if not path.endswith('.ftl')
+ for key in strings.keys()
+ }
+
+ if known_translations is None:
+ known_translations = changeset
+
+ for path, reference in self.reference_resources.items():
+ current = self.target_resources[path]
+ transforms = self.transforms.get(path, [])
+ in_changeset = partial(
+ self.in_changeset, changeset, known_translations, path)
+
+ # Merge legacy translations with the existing ones using the
+ # reference as a template.
+ snapshot = merge_resource(
+ self, reference, current, transforms, in_changeset
+ )
+
+ # Skip this path if the messages in the merged snapshot are
+ # identical to those in the current state of the localization file.
+ # This may happen when:
+ #
+ # - none of the transforms is in the changset, or
+ # - all messages which would be migrated by the context's
+ # transforms already exist in the current state.
+ if self.messages_equal(current, snapshot):
+ continue
+
+ # Store the merged snapshot on the context so that the next merge
+ # already takes it into account as the existing localization.
+ self.target_resources[path] = snapshot
+
+ # The result for this path is a complete `FTL.Resource`.
+ yield path, snapshot
+
+ def in_changeset(self, changeset, known_translations, path, ident):
+ """Check if a message should be migrated in this changeset.
+
+ The message is identified by path and ident.
+
+
+ A message will be migrated only if all of its dependencies
+ are present in the currently processed changeset.
+
+ If a transform defined for this message points to a missing
+ legacy translation, this message will not be merged. The
+ missing legacy dependency won't be present in the changeset.
+
+ This also means that partially translated messages (e.g.
+ constructed from two legacy strings out of which only one is
+ avaiable) will never be migrated.
+ """
+ message_deps = self.dependencies.get((path, ident), None)
+
+ # Don't merge if we don't have a transform for this message.
+ if message_deps is None:
+ return False
+
+ # As a special case, if a transform exists but has no
+ # dependecies, it's a hardcoded `FTL.Node` which doesn't
+ # migrate any existing translation but rather creates a new
+ # one. Merge it.
+ if len(message_deps) == 0:
+ return True
+
+ # Make sure all the dependencies are present in the current
+ # changeset. Partial migrations are not currently supported.
+ # See https://bugzilla.mozilla.org/show_bug.cgi?id=1321271
+ # We only return True if our current changeset touches
+ # the transform, and we have all of the dependencies.
+ active_deps = message_deps & changeset
+ available_deps = message_deps & known_translations
+ return active_deps and message_deps == available_deps
+
+ def serialize_changeset(self, changeset, known_translations=None):
+ """Return a dict of serialized FTLs for the changeset.
+
+ Given `changeset`, return a dict whose keys are resource paths and
+ values are serialized FTL snapshots.
+ """
+
+ return {
+ path: self.fluent_serializer.serialize(snapshot)
+ for path, snapshot in self.merge_changeset(
+ changeset, known_translations
+ )
+ }
+
+ def evaluate(self, node):
+ return self.evaluator.visit(node)
+
+
+logging.basicConfig()
diff --git a/third_party/python/fluent.migrate/fluent/migrate/blame.py b/third_party/python/fluent.migrate/fluent/migrate/blame.py
new file mode 100644
index 0000000000..2ae23d0ebe
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/blame.py
@@ -0,0 +1,80 @@
+import argparse
+import json
+import os
+
+from compare_locales.parser import getParser, Junk
+from compare_locales.parser.fluent import FluentEntity
+from compare_locales import mozpath
+import hglib
+from hglib.util import b, cmdbuilder
+
+
+class Blame:
+ def __init__(self, client):
+ self.client = client
+ self.users = []
+ self.blame = {}
+
+ def attribution(self, file_paths):
+ args = cmdbuilder(
+ b('annotate'), *[b(p) for p in file_paths], template='json',
+ date=True, user=True, cwd=self.client.root())
+ blame_json = self.client.rawcommand(args)
+ file_blames = json.loads(blame_json)
+
+ for file_blame in file_blames:
+ self.handleFile(file_blame)
+
+ return {'authors': self.users,
+ 'blame': self.blame}
+
+ def handleFile(self, file_blame):
+ path = mozpath.normsep(file_blame['path'])
+
+ try:
+ parser = getParser(path)
+ except UserWarning:
+ return
+
+ self.blame[path] = {}
+
+ self.readFile(parser, path)
+ entities = parser.parse()
+ for e in entities:
+ if isinstance(e, Junk):
+ continue
+ if e.val_span:
+ key_vals = [(e.key, e.val_span)]
+ else:
+ key_vals = []
+ if isinstance(e, FluentEntity):
+ key_vals += [
+ (f'{e.key}.{attr.key}', attr.val_span)
+ for attr in e.attributes
+ ]
+ for key, (val_start, val_end) in key_vals:
+ entity_lines = file_blame['lines'][
+ (e.ctx.linecol(val_start)[0] - 1):e.ctx.linecol(val_end)[0]
+ ]
+ # ignore timezone
+ entity_lines.sort(key=lambda blame: -blame['date'][0])
+ line_blame = entity_lines[0]
+ user = line_blame['user']
+ timestamp = line_blame['date'][0] # ignore timezone
+ if user not in self.users:
+ self.users.append(user)
+ userid = self.users.index(user)
+ self.blame[path][key] = [userid, timestamp]
+
+ def readFile(self, parser, path):
+ parser.readFile(os.path.join(self.client.root().decode('utf-8'), path))
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('repo_path')
+ parser.add_argument('file_path', nargs='+')
+ args = parser.parse_args()
+ blame = Blame(hglib.open(args.repo_path))
+ attrib = blame.attribution(args.file_path)
+ print(json.dumps(attrib, indent=4, separators=(',', ': ')))
diff --git a/third_party/python/fluent.migrate/fluent/migrate/changesets.py b/third_party/python/fluent.migrate/fluent/migrate/changesets.py
new file mode 100644
index 0000000000..c766216aa2
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/changesets.py
@@ -0,0 +1,56 @@
+import time
+
+
+def by_first_commit(item):
+ """Order two changesets by their first commit date."""
+ return item['first_commit']
+
+
+def convert_blame_to_changesets(blame_json):
+ """Convert a blame dict into a list of changesets.
+
+ The blame information in `blame_json` should be a dict of the following
+ structure:
+
+ {
+ 'authors': [
+ 'A.N. Author <author@example.com>',
+ ],
+ 'blame': {
+ 'path/one': {
+ 'key1': [0, 1346095921.0],
+ },
+ }
+ }
+
+ It will be transformed into a list of changesets which can be fed into
+ `InternalContext.serialize_changeset`:
+
+ [
+ {
+ 'author': 'A.N. Author <author@example.com>',
+ 'first_commit': 1346095921.0,
+ 'changes': {
+ ('path/one', 'key1'),
+ }
+ },
+ ]
+
+ """
+ now = time.time()
+ changesets = [
+ {
+ 'author': author,
+ 'first_commit': now,
+ 'changes': set()
+ } for author in blame_json['authors']
+ ]
+
+ for path, keys_info in blame_json['blame'].items():
+ for key, (author_index, timestamp) in keys_info.items():
+ changeset = changesets[author_index]
+ changeset['changes'].add((path, key))
+ if timestamp < changeset['first_commit']:
+ changeset['first_commit'] = timestamp
+
+ return sorted(changesets, key=by_first_commit)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/context.py b/third_party/python/fluent.migrate/fluent/migrate/context.py
new file mode 100644
index 0000000000..befeea7ab0
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/context.py
@@ -0,0 +1,148 @@
+import logging
+
+import fluent.syntax.ast as FTL
+from fluent.migrate.util import fold
+
+from .transforms import Source
+from .util import get_message, skeleton
+from .errors import (
+ EmptyLocalizationError,
+ UnreadableReferenceError,
+)
+from ._context import InternalContext
+
+
+__all__ = [
+ 'EmptyLocalizationError',
+ 'UnreadableReferenceError',
+ 'MigrationContext',
+]
+
+
+class MigrationContext(InternalContext):
+ """Stateful context for merging translation resources.
+
+ `MigrationContext` must be configured with the target locale and the
+ directory locations of the input data.
+
+ The transformation takes four types of input data:
+
+ - The en-US FTL reference files which will be used as templates for
+ message order, comments and sections. If the reference_dir is None,
+ the migration will create Messages and Terms in the order given by
+ the transforms.
+
+ - The current FTL files for the given locale.
+
+ - A list of `FTL.Message` or `FTL.Term` objects some of whose nodes
+ are special helper or transform nodes:
+
+ helpers: VARIABLE_REFERENCE, MESSAGE_REFERENCE, TERM_REFERENCE
+ transforms: COPY, REPLACE_IN_TEXT, REPLACE, PLURALS, CONCAT
+ fluent value helper: COPY_PATTERN
+
+ The legacy (DTD, properties) translation files are deduced by the
+ dependencies in the transforms. The translations from these files will be
+ read from the localization_dir and transformed into FTL and merged
+ into the existing FTL files for the given language.
+ """
+
+ def __init__(
+ self, locale, reference_dir, localization_dir, enforce_translated=False
+ ):
+ super().__init__(
+ locale, reference_dir, localization_dir,
+ enforce_translated=enforce_translated
+ )
+ self.locale = locale
+ # Paths to directories with input data, relative to CWD.
+ self.reference_dir = reference_dir
+ self.localization_dir = localization_dir
+
+ # A dict whose keys are `(path, key)` tuples corresponding to target
+ # FTL translations, and values are sets of `(path, key)` tuples
+ # corresponding to localized entities which will be migrated.
+ self.dependencies = {}
+
+ def add_transforms(self, target, reference, transforms):
+ """Define transforms for target using reference as template.
+
+ `target` is a path of the destination FTL file relative to the
+ localization directory. `reference` is a path to the template FTL
+ file relative to the reference directory.
+
+ Each transform is an extended FTL node with `Transform` nodes as some
+ values. Transforms are stored in their lazy AST form until
+ `merge_changeset` is called, at which point they are evaluated to real
+ FTL nodes with migrated translations.
+
+ Each transform is scanned for `Source` nodes which will be used to
+ build the list of dependencies for the transformed message.
+
+ For transforms that merely copy legacy messages or Fluent patterns,
+ using `fluent.migrate.helpers.transforms_from` is recommended.
+ """
+ def get_sources(acc, cur):
+ if isinstance(cur, Source):
+ acc.add((cur.path, cur.key))
+ return acc
+
+ if self.reference_dir is None:
+ # Add skeletons to resource body for each transform
+ # if there's no reference.
+ reference_ast = self.reference_resources.get(target)
+ if reference_ast is None:
+ reference_ast = FTL.Resource()
+ reference_ast.body.extend(
+ skeleton(transform) for transform in transforms
+ )
+ else:
+ reference_ast = self.read_reference_ftl(reference)
+ self.reference_resources[target] = reference_ast
+
+ for node in transforms:
+ ident = node.id.name
+ # Scan `node` for `Source` nodes and collect the information they
+ # store into a set of dependencies.
+ dependencies = fold(get_sources, node, set())
+ # Set these sources as dependencies for the current transform.
+ self.dependencies[(target, ident)] = dependencies
+
+ # The target Fluent message should exist in the reference file. If
+ # it doesn't, it's probably a typo.
+ # Of course, only if we're having a reference.
+ if self.reference_dir is None:
+ continue
+ if get_message(reference_ast.body, ident) is None:
+ logger = logging.getLogger('migrate')
+ logger.warning(
+ '{} "{}" was not found in {}'.format(
+ type(node).__name__, ident, reference))
+
+ # Keep track of localization resource paths which were defined as
+ # sources in the transforms.
+ expected_paths = set()
+
+ # Read all legacy translation files defined in Source transforms. This
+ # may fail but a single missing legacy resource doesn't mean that the
+ # migration can't succeed.
+ for dependencies in self.dependencies.values():
+ for path in {path for path, _ in dependencies}:
+ expected_paths.add(path)
+ self.maybe_add_localization(path)
+
+ # However, if all legacy resources are missing, bail out early. There
+ # are no translations to migrate. We'd also get errors in hg annotate.
+ if len(expected_paths) > 0 and len(self.localization_resources) == 0:
+ error_message = 'No localization files were found'
+ logging.getLogger('migrate').error(error_message)
+ raise EmptyLocalizationError(error_message)
+
+ # Add the current transforms to any other transforms added earlier for
+ # this path.
+ path_transforms = self.transforms.setdefault(target, [])
+ path_transforms += transforms
+
+ if target not in self.target_resources:
+ target_ast = self.read_localization_ftl(target)
+ self.target_resources[target] = target_ast
diff --git a/third_party/python/fluent.migrate/fluent/migrate/errors.py b/third_party/python/fluent.migrate/fluent/migrate/errors.py
new file mode 100644
index 0000000000..dcc3025377
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/errors.py
@@ -0,0 +1,22 @@
+class SkipTransform(RuntimeError):
+ pass
+
+
+class MigrationError(ValueError):
+ pass
+
+
+class EmptyLocalizationError(MigrationError):
+ pass
+
+
+class NotSupportedError(MigrationError):
+ pass
+
+
+class UnreadableReferenceError(MigrationError):
+ pass
+
+
+class InvalidTransformError(MigrationError):
+ pass
diff --git a/third_party/python/fluent.migrate/fluent/migrate/evaluator.py b/third_party/python/fluent.migrate/fluent/migrate/evaluator.py
new file mode 100644
index 0000000000..90c626f933
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/evaluator.py
@@ -0,0 +1,28 @@
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Transformer
+
+from .transforms import Transform
+
+
+class Evaluator(Transformer):
+ """An AST transformer for evaluating migration Transforms.
+
+ An AST transformer (i.e. a visitor capable of modifying the AST) which
+ walks an AST hierarchy and evaluates nodes which are migration Transforms.
+ """
+
+ def __init__(self, ctx):
+ self.ctx = ctx
+
+ def visit(self, node):
+ if not isinstance(node, FTL.BaseNode):
+ return node
+
+ if isinstance(node, Transform):
+ # Some transforms don't expect other transforms as children.
+ # Evaluate the children first.
+ transform = self.generic_visit(node)
+ # Then, evaluate this transform.
+ return transform(self.ctx)
+
+ return self.generic_visit(node)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/helpers.py b/third_party/python/fluent.migrate/fluent/migrate/helpers.py
new file mode 100644
index 0000000000..1c12f644fc
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/helpers.py
@@ -0,0 +1,147 @@
+"""Fluent AST helpers.
+
+The functions defined in this module offer a shorthand for defining common AST
+nodes.
+
+They take a string argument and immediately return a corresponding AST node.
+(As opposed to Transforms which are AST nodes on their own and only return the
+migrated AST nodes when they are evaluated by a MigrationContext.) """
+
+
+from fluent.syntax import FluentParser, ast as FTL
+from fluent.syntax.visitor import Transformer
+from .transforms import Transform, CONCAT, COPY, COPY_PATTERN
+from .errors import NotSupportedError, InvalidTransformError
+
+
+def VARIABLE_REFERENCE(name):
+ """Create an ExternalArgument expression."""
+
+ return FTL.VariableReference(
+ id=FTL.Identifier(name)
+ )
+
+
+def MESSAGE_REFERENCE(name):
+ """Create a MessageReference expression.
+
+ If the passed name contains a `.`, we're generating
+ a message reference with an attribute.
+ """
+ if '.' in name:
+ name, attribute = name.split('.')
+ attribute = FTL.Identifier(attribute)
+ else:
+ attribute = None
+
+ return FTL.MessageReference(
+ id=FTL.Identifier(name),
+ attribute=attribute,
+ )
+
+
+def TERM_REFERENCE(name):
+ """Create a TermReference expression."""
+
+ return FTL.TermReference(
+ id=FTL.Identifier(name)
+ )
+
+
+class IntoTranforms(Transformer):
+ IMPLICIT_TRANSFORMS = ("CONCAT",)
+ FORBIDDEN_TRANSFORMS = ("PLURALS", "REPLACE", "REPLACE_IN_TEXT")
+
+ def __init__(self, substitutions):
+ self.substitutions = substitutions
+
+ def visit_Junk(self, node):
+ anno = node.annotations[0]
+ raise InvalidTransformError(
+ "Transform contains parse error: {}, at {}".format(
+ anno.message, anno.span.start))
+
+ def visit_FunctionReference(self, node):
+ name = node.id.name
+ if name in self.IMPLICIT_TRANSFORMS:
+ raise NotSupportedError(
+ "{} may not be used with transforms_from(). It runs "
+ "implicitly on all Patterns anyways.".format(name))
+ if name in self.FORBIDDEN_TRANSFORMS:
+ raise NotSupportedError(
+ "{} may not be used with transforms_from(). It requires "
+ "additional logic in Python code.".format(name))
+ if name in ('COPY', 'COPY_PATTERN'):
+ args = (
+ self.into_argument(arg) for arg in node.arguments.positional
+ )
+ kwargs = {
+ arg.name.name: self.into_argument(arg.value)
+ for arg in node.arguments.named}
+ if name == 'COPY':
+ return COPY(*args, **kwargs)
+ return COPY_PATTERN(*args, **kwargs)
+ return self.generic_visit(node)
+
+ def visit_Placeable(self, node):
+ """If the expression is a Transform, replace this Placeable
+ with the Transform it's holding.
+ Transforms evaluate to Patterns, which are flattened as
+ elements of Patterns in Transform.pattern_of, but only
+ one level deep.
+ """
+ node = self.generic_visit(node)
+ if isinstance(node.expression, Transform):
+ return node.expression
+ return node
+
+ def visit_Pattern(self, node):
+ """Replace the Pattern with CONCAT which is more accepting of its
+ elements. CONCAT takes PatternElements, Expressions and other
+ Patterns (e.g. returned from evaluating transforms).
+ """
+ node = self.generic_visit(node)
+ return CONCAT(*node.elements)
+
+ def into_argument(self, node):
+ """Convert AST node into an argument to migration transforms."""
+ if isinstance(node, FTL.StringLiteral):
+ # Special cases for booleans which don't exist in Fluent.
+ if node.value == "True":
+ return True
+ if node.value == "False":
+ return False
+ return node.value
+ if isinstance(node, FTL.MessageReference):
+ try:
+ return self.substitutions[node.id.name]
+ except KeyError:
+ raise InvalidTransformError(
+ "Unknown substitution in COPY: {}".format(
+ node.id.name))
+ else:
+ raise InvalidTransformError(
+ "Invalid argument passed to COPY: {}".format(
+ type(node).__name__))
+
+
+def transforms_from(ftl, **substitutions):
+ """Parse FTL code into a list of Message nodes with Transforms.
+
+ The FTL may use a fabricated COPY function inside of placeables which
+ will be converted into actual COPY migration transform.
+
+ new-key = Hardcoded text { COPY("filepath.dtd", "string.key") }
+
+ For convenience, COPY may also refer to transforms_from's keyword
+ arguments via the MessageReference syntax:
+
+ transforms_from(\"""
+ new-key = Hardcoded text { COPY(file_dtd, "string.key") }
+ \""", file_dtd="very/long/path/to/a/file.dtd")
+
+ """
+
+ parser = FluentParser(with_spans=False)
+ resource = parser.parse(ftl)
+ return IntoTranforms(substitutions).visit(resource).body
diff --git a/third_party/python/fluent.migrate/fluent/migrate/merge.py b/third_party/python/fluent.migrate/fluent/migrate/merge.py
new file mode 100644
index 0000000000..c8bedc4583
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/merge.py
@@ -0,0 +1,55 @@
+import fluent.syntax.ast as FTL
+
+from .errors import SkipTransform
+from .util import get_message, get_transform
+
+
+def merge_resource(ctx, reference, current, transforms, in_changeset):
+ """Transform legacy translations into FTL.
+
+ Use the `reference` FTL AST as a template. For each en-US string in the
+ reference, first check for an existing translation in the current FTL
+ `localization` and use it if it's present; then if the string has
+ a transform defined in the migration specification and if it's in the
+ currently processed changeset, evaluate the transform.
+ """
+
+ def merge_body(body):
+ return [
+ entry
+ for entry in map(merge_entry, body)
+ if entry is not None
+ ]
+
+ def merge_entry(entry):
+ # All standalone comments will be merged.
+ if isinstance(entry, FTL.BaseComment):
+ return entry
+
+ # Ignore Junk
+ if isinstance(entry, FTL.Junk):
+ return None
+
+ ident = entry.id.name
+
+ # If the message is present in the existing localization, we add it to
+ # the resulting resource. This ensures consecutive merges don't remove
+ # translations but rather create supersets of them.
+ existing = get_message(current.body, ident)
+ if existing is not None:
+ return existing
+
+ transform = get_transform(transforms, ident)
+
+ # Make sure this message is supposed to be migrated as part of the
+ # current changeset.
+ if transform is not None and in_changeset(ident):
+ if transform.comment is None:
+ transform.comment = entry.comment
+ try:
+ return ctx.evaluate(transform)
+ except SkipTransform:
+ return None
+
+ body = merge_body(reference.body)
+ return FTL.Resource(body)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/tool.py b/third_party/python/fluent.migrate/fluent/migrate/tool.py
new file mode 100644
index 0000000000..cb5ddb23b4
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/tool.py
@@ -0,0 +1,181 @@
+import os
+import logging
+import argparse
+from contextlib import contextmanager
+import importlib
+import sys
+
+import hglib
+
+from fluent.migrate.context import MigrationContext
+from fluent.migrate.errors import MigrationError
+from fluent.migrate.changesets import convert_blame_to_changesets
+from fluent.migrate.blame import Blame
+
+
+@contextmanager
+def dont_write_bytecode():
+ _dont_write_bytecode = sys.dont_write_bytecode
+ sys.dont_write_bytecode = True
+ yield
+ sys.dont_write_bytecode = _dont_write_bytecode
+
+
+class Migrator:
+ def __init__(self, locale, reference_dir, localization_dir, dry_run):
+ self.locale = locale
+ self.reference_dir = reference_dir
+ self.localization_dir = localization_dir
+ self.dry_run = dry_run
+ self._client = None
+
+ @property
+ def client(self):
+ if self._client is None:
+ self._client = hglib.open(self.localization_dir, 'utf-8')
+ return self._client
+
+ def close(self):
+ # close hglib.client, if we cached one.
+ if self._client is not None:
+ self._client.close()
+
+ def run(self, migration):
+ print('\nRunning migration {} for {}'.format(
+ migration.__name__, self.locale))
+
+ # For each migration create a new context.
+ ctx = MigrationContext(
+ self.locale, self.reference_dir, self.localization_dir
+ )
+
+ try:
+ # Add the migration spec.
+ migration.migrate(ctx)
+ except MigrationError as e:
+ print(' Skipping migration {} for {}:\n {}'.format(
+ migration.__name__, self.locale, e))
+ return
+
+ # Keep track of how many changesets we're committing.
+ index = 0
+ description_template = migration.migrate.__doc__
+
+ # Annotate localization files used as sources by this migration
+ # to preserve attribution of translations.
+ files = ctx.localization_resources.keys()
+ blame = Blame(self.client).attribution(files)
+ changesets = convert_blame_to_changesets(blame)
+ known_legacy_translations = set()
+
+ for changeset in changesets:
+ snapshot = self.snapshot(
+ ctx, changeset['changes'], known_legacy_translations
+ )
+ if not snapshot:
+ continue
+ self.serialize_changeset(snapshot)
+ index += 1
+ self.commit_changeset(
+ description_template, changeset['author'], index
+ )
+
+ def snapshot(self, ctx, changes_in_changeset, known_legacy_translations):
+ '''Run the migration for the changeset, with the set of
+ this and all prior legacy translations.
+ '''
+ known_legacy_translations.update(changes_in_changeset)
+ return ctx.serialize_changeset(
+ changes_in_changeset,
+ known_legacy_translations
+ )
+
+ def serialize_changeset(self, snapshot):
+ '''Write serialized FTL files to disk.'''
+ for path, content in snapshot.items():
+ fullpath = os.path.join(self.localization_dir, path)
+ print(f' Writing to {fullpath}')
+ if not self.dry_run:
+ fulldir = os.path.dirname(fullpath)
+ if not os.path.isdir(fulldir):
+ os.makedirs(fulldir)
+ with open(fullpath, 'wb') as f:
+ f.write(content.encode('utf8'))
+ f.close()
+
+ def commit_changeset(
+ self, description_template, author, index
+ ):
+ message = description_template.format(
+ index=index,
+ author=author
+ )
+
+ print(f' Committing changeset: {message}')
+ if self.dry_run:
+ return
+ try:
+ self.client.commit(
+ message, user=author.encode('utf-8'), addremove=True
+ )
+ except hglib.error.CommandError as err:
+ print(f' WARNING: hg commit failed ({err})')
+
+
+def main(locale, reference_dir, localization_dir, migrations, dry_run):
+ """Run migrations and commit files with the result."""
+ migrator = Migrator(locale, reference_dir, localization_dir, dry_run)
+
+ for migration in migrations:
+ migrator.run(migration)
+
+ migrator.close()
+
+
+def cli():
+ parser = argparse.ArgumentParser(
+ description='Migrate translations to FTL.'
+ )
+ parser.add_argument(
+ 'migrations', metavar='MIGRATION', type=str, nargs='+',
+ help='migrations to run (Python modules)'
+ )
+ parser.add_argument(
+ '--locale', '--lang', type=str,
+ help='target locale code (--lang is deprecated)'
+ )
+ parser.add_argument(
+ '--reference-dir', type=str,
+ help='directory with reference FTL files'
+ )
+ parser.add_argument(
+ '--localization-dir', type=str,
+ help='directory for localization files'
+ )
+ parser.add_argument(
+ '--dry-run', action='store_true',
+ help='do not write to disk nor commit any changes'
+ )
+ parser.set_defaults(dry_run=False)
+
+ logger = logging.getLogger('migrate')
+ logger.setLevel(logging.INFO)
+
+ args = parser.parse_args()
+
+ # Don't byte-compile migrations.
+ # They're not our code, and infrequently run
+ with dont_write_bytecode():
+ migrations = map(importlib.import_module, args.migrations)
+
+ main(
+ locale=args.locale,
+ reference_dir=args.reference_dir,
+ localization_dir=args.localization_dir,
+ migrations=migrations,
+ dry_run=args.dry_run
+ )
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/third_party/python/fluent.migrate/fluent/migrate/transforms.py b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
new file mode 100644
index 0000000000..11b722125f
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
@@ -0,0 +1,576 @@
+"""Migration Transforms.
+
+Transforms are AST nodes which describe how legacy translations should be
+migrated. They are created inert and only return the migrated AST nodes when
+they are evaluated by a MigrationContext.
+
+All Transforms evaluate to Fluent Patterns. This makes them suitable for
+defining migrations of values of message, attributes and variants. The special
+CONCAT Transform is capable of joining multiple Patterns returned by evaluating
+other Transforms into a single Pattern. It can also concatenate Pattern
+elements: TextElements and Placeables.
+
+The COPY, REPLACE and PLURALS Transforms inherit from Source which is a special
+AST Node defining the location (the file path and the id) of the legacy
+translation. During the migration, the current MigrationContext scans the
+migration spec for Source nodes and extracts the information about all legacy
+translations being migrated. For instance,
+
+ COPY('file.dtd', 'hello')
+
+is equivalent to:
+
+ FTL.Pattern([
+ Source('file.dtd', 'hello')
+ ])
+
+Sometimes it's useful to work with text rather than (path, key) source
+definitions. This is the case when the migrated translation requires some
+hardcoded text, e.g. <a> and </a> when multiple translations become a single
+one with a DOM overlay. In such cases it's best to use FTL.TextElements:
+
+ FTL.Message(
+ id=FTL.Identifier('update-failed'),
+ value=CONCAT(
+ COPY('aboutDialog.dtd', 'update.failed.start'),
+ FTL.TextElement('<a>'),
+ COPY('aboutDialog.dtd', 'update.failed.linkText'),
+ FTL.TextElement('</a>'),
+ COPY('aboutDialog.dtd', 'update.failed.end'),
+ )
+ )
+
+The REPLACE_IN_TEXT Transform also takes TextElements as input, making it
+possible to pass it as the foreach function of the PLURALS Transform. In the
+example below, each slice of the plural string is converted into a
+TextElement by PLURALS and then run through the REPLACE_IN_TEXT transform.
+
+ FTL.Message(
+ FTL.Identifier('delete-all'),
+ value=PLURALS(
+ 'aboutDownloads.dtd',
+ 'deleteAll',
+ VARIABLE_REFERENCE('num'),
+ lambda text: REPLACE_IN_TEXT(
+ text,
+ {
+ '#1': VARIABLE_REFERENCE('num')
+ }
+ )
+ )
+ )
+"""
+
+import re
+
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Transformer
+from .errors import NotSupportedError
+
+
+def chain_elements(elements):
+ '''Flatten a list of FTL nodes into an iterator over PatternElements.'''
+ for element in elements:
+ if isinstance(element, FTL.Pattern):
+ # PY3 yield from element.elements
+ yield from element.elements
+ elif isinstance(element, FTL.PatternElement):
+ yield element
+ elif isinstance(element, FTL.Expression):
+ yield FTL.Placeable(element)
+ else:
+ raise RuntimeError(
+ 'Expected Pattern, PatternElement or Expression')
+
+
+re_leading_ws = re.compile(
+ r'\A(?:(?P<whitespace> +)(?P<text>.*?)|(?P<block_text>\n.*?))\Z',
+ re.S,
+)
+re_trailing_ws = re.compile(
+ r'\A(?:(?P<text>.*?)(?P<whitespace> +)|(?P<block_text>.*\n))\Z',
+ re.S
+)
+
+
+def extract_whitespace(regex, element):
+ '''Extract leading or trailing whitespace from a TextElement.
+
+ Return a tuple of (Placeable, TextElement) in which the Placeable
+ encodes the extracted whitespace as a StringLiteral and the
+ TextElement has the same amount of whitespace removed. The
+ Placeable with the extracted whitespace is always returned first.
+ If the element starts or ends with a newline, add an empty
+ StringLiteral.
+ '''
+ match = re.search(regex, element.value)
+ if match:
+ # If white-space is None, we're a newline. Add an
+ # empty { "" }
+ whitespace = match.group('whitespace') or ''
+ placeable = FTL.Placeable(FTL.StringLiteral(whitespace))
+ if whitespace == element.value:
+ return placeable, None
+ else:
+ # Either text or block_text matched the rest.
+ text = match.group('text') or match.group('block_text')
+ return placeable, FTL.TextElement(text)
+ else:
+ return None, element
+
+
+class Transform(FTL.BaseNode):
+ def __call__(self, ctx):
+ raise NotImplementedError
+
+ @staticmethod
+ def pattern_of(*elements):
+ normalized = []
+
+ # Normalize text content: convert text content to TextElements, join
+ # adjacent text and prune empty. Text content is either existing
+ # TextElements or whitespace-only StringLiterals. This may result in
+ # leading and trailing whitespace being put back into TextElements if
+ # the new Pattern is built from existing Patterns (CONCAT(COPY...)).
+ # The leading and trailing whitespace of the new Pattern will be
+ # extracted later into new StringLiterals.
+ for element in chain_elements(elements):
+ if isinstance(element, FTL.TextElement):
+ text_content = element.value
+ elif isinstance(element, FTL.Placeable) \
+ and isinstance(element.expression, FTL.StringLiteral) \
+ and re.match(r'^ *$', element.expression.value):
+ text_content = element.expression.value
+ else:
+ # The element does not contain text content which should be
+ # normalized. It may be a number, a reference, or
+ # a StringLiteral which should be preserved in the Pattern.
+ normalized.append(element)
+ continue
+
+ previous = normalized[-1] if len(normalized) else None
+ if isinstance(previous, FTL.TextElement):
+ # Join adjacent TextElements.
+ previous.value += text_content
+ elif len(text_content) > 0:
+ # Normalize non-empty text to a TextElement.
+ normalized.append(FTL.TextElement(text_content))
+ else:
+ # Prune empty text.
+ pass
+
+ # Store empty values explicitly as {""}.
+ if len(normalized) == 0:
+ empty = FTL.Placeable(FTL.StringLiteral(''))
+ return FTL.Pattern([empty])
+
+ # Extract explicit leading whitespace into a StringLiteral.
+ if isinstance(normalized[0], FTL.TextElement):
+ ws, text = extract_whitespace(re_leading_ws, normalized[0])
+ normalized[:1] = [ws, text]
+
+ # Extract explicit trailing whitespace into a StringLiteral.
+ if isinstance(normalized[-1], FTL.TextElement):
+ ws, text = extract_whitespace(re_trailing_ws, normalized[-1])
+ normalized[-1:] = [text, ws]
+
+ return FTL.Pattern([
+ element
+ for element in normalized
+ if element is not None
+ ])
+
+
+class Source(Transform):
+ """Base class for Transforms that get translations from source files.
+
+ The contract is that the first argument is the source path, and the
+ second is a key representing legacy string IDs, or Fluent id.attr.
+ """
+ def __init__(self, path, key):
+ self.path = path
+ self.key = key
+
+
+class FluentSource(Source):
+ """Declare a Fluent source translation to be copied over.
+
+ When evaluated, it clones the Pattern of the parsed source.
+ """
+ def __init__(self, path, key):
+ if not path.endswith('.ftl'):
+ raise NotSupportedError(
+ 'Please use COPY to migrate from legacy files '
+ '({})'.format(path)
+ )
+ if key[0] == '-' and '.' in key:
+ raise NotSupportedError(
+ 'Cannot migrate from Term Attributes, as they are'
+ 'locale-dependent ({})'.format(path)
+ )
+ super().__init__(path, key)
+
+ def __call__(self, ctx):
+ pattern = ctx.get_fluent_source_pattern(self.path, self.key)
+ return pattern.clone()
+
+
+class COPY_PATTERN(FluentSource):
+ """Create a Pattern with the translation value from the given source.
+
+ The given key can be a Message ID, Message ID.attribute_name, or
+ Term ID. Accessing Term attributes is not supported, as they're internal
+ to the localization.
+ """
+ pass
+
+
+class TransformPattern(FluentSource, Transformer):
+ """Base class for modifying a Fluent pattern as part of a migration.
+
+ Implement visit_* methods of the Transformer pattern to do the
+ actual modifications.
+ """
+ def __call__(self, ctx):
+ pattern = super().__call__(ctx)
+ return self.visit(pattern)
+
+ def visit_Pattern(self, node):
+ # Make sure we're creating valid Patterns after restructuring
+ # transforms.
+ node = self.generic_visit(node)
+ pattern = Transform.pattern_of(*node.elements)
+ return pattern
+
+ def visit_Placeable(self, node):
+ # Ensure we have a Placeable with an expression still.
+ # Transforms could have replaced the expression with
+ # a Pattern or PatternElement, in which case we
+ # just pass that through.
+ # Patterns then get flattened by visit_Pattern.
+ node = self.generic_visit(node)
+ if isinstance(node.expression, (FTL.Pattern, FTL.PatternElement)):
+ return node.expression
+ return node
+
+
+class LegacySource(Source):
+ """Declare the source translation to be migrated with other transforms.
+
+ When evaluated, `Source` returns a TextElement with the content from the
+ source translation. Escaped characters are unescaped by the
+ compare-locales parser according to the file format:
+
+ - in properties files: \\uXXXX,
+ - in DTD files: known named, decimal, and hexadecimal HTML entities.
+
+ Consult the following files for the list of known named HTML entities:
+
+ https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py
+ https://github.com/python/cpython/blob/3.6/Lib/html/entities.py
+
+ By default, leading and trailing whitespace on each line as well as
+ leading and trailing empty lines will be stripped from the source
+ translation's content. Set `trim=False` to disable this behavior.
+ """
+
+ def __init__(self, path, key, trim=None):
+ if path.endswith('.ftl'):
+ raise NotSupportedError(
+ 'Please use COPY_PATTERN to migrate from Fluent files '
+ '({})'.format(path))
+
+ super().__init__(path, key)
+ self.trim = trim
+
+ def get_text(self, ctx):
+ return ctx.get_legacy_source(self.path, self.key)
+
+ @staticmethod
+ def trim_text(text):
+ # strip leading white-space from each line
+ text = re.sub('^[ \t]+', '', text, flags=re.M)
+ # strip trailing white-space from each line
+ text = re.sub('[ \t]+$', '', text, flags=re.M)
+ # strip leading and trailing empty lines
+ text = text.strip('\r\n')
+ return text
+
+ def __call__(self, ctx):
+ text = self.get_text(ctx)
+ if self.trim is not False:
+ text = self.trim_text(text)
+ return FTL.TextElement(text)
+
+
+class COPY(LegacySource):
+ """Create a Pattern with the translation value from the given source."""
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ return Transform.pattern_of(element)
+
+
+PRINTF = re.compile(
+ r'%(?P<good>%|'
+ r'(?:(?P<number>[1-9][0-9]*)\$)?'
+ r'(?P<width>\*|[0-9]+)?'
+ r'(?P<prec>\.(?:\*|[0-9]+)?)?'
+ r'(?P<spec>[duxXosScpfg]))'
+)
+
+
+def number():
+ i = 1
+ while True:
+ yield i
+ i += 1
+
+
+def normalize_printf(text):
+ """Normalize printf arguments so that they're all numbered.
+ Gecko forbids mixing unnumbered and numbered ones, so
+ we just need to convert unnumbered to numbered ones.
+ Also remove ones that have zero width, as they're intended
+ to be removed from the output by the localizer.
+ """
+ next_number = number()
+
+ def normalized(match):
+ if match.group('good') == '%':
+ return '%'
+ hidden = match.group('width') == '0'
+ if match.group('number'):
+ return '' if hidden else match.group()
+ num = next(next_number)
+ return '' if hidden else '%{}${}'.format(num, match.group('spec'))
+
+ return PRINTF.sub(normalized, text)
+
+
+class REPLACE_IN_TEXT(Transform):
+ """Create a Pattern from a TextElement and replace legacy placeables.
+
+ The original placeables are defined as keys on the `replacements` dict.
+ For each key the value must be defined as a FTL Pattern, Placeable,
+ TextElement or Expression to be interpolated.
+ """
+
+ def __init__(self, element, replacements, normalize_printf=False):
+ self.element = element
+ self.replacements = replacements
+ self.normalize_printf = normalize_printf
+
+ def __call__(self, ctx):
+ # For each specified replacement, find all indices of the original
+ # placeable in the source translation. If missing, the list of indices
+ # will be empty.
+ value = self.element.value
+ if self.normalize_printf:
+ value = normalize_printf(value)
+ key_indices = {
+ key: [m.start() for m in re.finditer(re.escape(key), value)]
+ for key in self.replacements.keys()
+ }
+
+ # Build a dict of indices to replacement keys.
+ keys_indexed = {}
+ for key, indices in key_indices.items():
+ for index in indices:
+ keys_indexed[index] = key
+
+ # Order the replacements by the position of the original placeable in
+ # the translation.
+ replacements = (
+ (key, ctx.evaluate(self.replacements[key]))
+ for index, key
+ in sorted(keys_indexed.items(), key=lambda x: x[0])
+ )
+
+ # A list of PatternElements built from the legacy translation and the
+ # FTL replacements. It may contain empty or adjacent TextElements.
+ elements = []
+ tail = value
+
+ # Convert original placeables and text into FTL Nodes. For each
+ # original placeable the translation will be partitioned around it and
+ # the text before it will be converted into an `FTL.TextElement` and
+ # the placeable will be replaced with its replacement.
+ for key, node in replacements:
+ before, key, tail = tail.partition(key)
+ elements.append(FTL.TextElement(before))
+ elements.append(node)
+
+ # Don't forget about the tail after the loop ends.
+ elements.append(FTL.TextElement(tail))
+ return Transform.pattern_of(*elements)
+
+
+class REPLACE(LegacySource):
+ """Create a Pattern with interpolations from given source.
+
+ Interpolations in the translation value from the given source will be
+ replaced with FTL placeables using the `REPLACE_IN_TEXT` transform.
+ """
+
+ def __init__(
+ self, path, key, replacements, **kwargs
+ ):
+ # We default normalize_printf to False except for .properties files.
+ # We still allow the caller to override the default value.
+ normalize_printf = False
+ if 'normalize_printf' in kwargs:
+ normalize_printf = kwargs['normalize_printf']
+ del kwargs['normalize_printf']
+ elif path.endswith('.properties'):
+ normalize_printf = True
+
+ super().__init__(path, key, **kwargs)
+ self.replacements = replacements
+ self.normalize_printf = normalize_printf
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ return REPLACE_IN_TEXT(
+ element, self.replacements,
+ normalize_printf=self.normalize_printf
+ )(ctx)
+
+
+class PLURALS(LegacySource):
+ """Create a Pattern with plurals from given source.
+
+ Build an `FTL.SelectExpression` with the supplied `selector` and variants
+ extracted from the source. The original translation should be a
+ semicolon-separated list of plural forms. Each form will be converted
+ into a TextElement and run through the `foreach` function, which should
+ return an `FTL.Node` or a `Transform`. By default, the `foreach` function
+ creates a valid Pattern from the TextElement passed into it.
+ """
+ DEFAULT_ORDER = ('zero', 'one', 'two', 'few', 'many', 'other')
+
+ def __init__(self, path, key, selector, foreach=Transform.pattern_of,
+ **kwargs):
+ super().__init__(path, key, **kwargs)
+ self.selector = selector
+ self.foreach = foreach
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ selector = ctx.evaluate(self.selector)
+ keys = ctx.plural_categories
+ forms = [
+ FTL.TextElement(part.strip())
+ for part in element.value.split(';')
+ ]
+
+ # The default CLDR form should be the last we have in DEFAULT_ORDER,
+ # usually `other`, but in some cases `many`. If we don't have a variant
+ # for that, we'll append one, using the, in CLDR order, last existing
+ # variant in the legacy translation. That may or may not be the last
+ # variant.
+ default_key = [
+ key for key in reversed(self.DEFAULT_ORDER) if key in keys
+ ][0]
+
+ # Match keys to legacy forms in the order they are defined in Gecko's
+ # PluralForm.jsm. Filter out empty forms.
+ pairs = [
+ (key, var)
+ for key, var in zip(keys, forms)
+ if var.value
+ ]
+
+ # A special case for legacy translations which don't define any
+ # plural forms.
+ if len(pairs) == 0:
+ return Transform.pattern_of()
+
+ # A special case for languages with one plural category or one legacy
+ # variant. We don't need to insert a SelectExpression for them.
+ if len(pairs) == 1:
+ _, only_form = pairs[0]
+ only_variant = ctx.evaluate(self.foreach(only_form))
+ return Transform.pattern_of(only_variant)
+
+ # Make sure the default key is defined. If it's missing, use the last
+ # form (in CLDR order) found in the legacy translation.
+ pairs.sort(key=lambda pair: self.DEFAULT_ORDER.index(pair[0]))
+ last_key, last_form = pairs[-1]
+ if last_key != default_key:
+ pairs.append((default_key, last_form))
+
+ def createVariant(key, form):
+ # Run the legacy plural form through `foreach` which returns an
+ # `FTL.Node` describing the transformation required for each
+ # variant. Then evaluate it to a migrated FTL node.
+ value = ctx.evaluate(self.foreach(form))
+ return FTL.Variant(
+ key=FTL.Identifier(key),
+ value=value,
+ default=key == default_key
+ )
+
+ select = FTL.SelectExpression(
+ selector=selector,
+ variants=[
+ createVariant(key, form)
+ for key, form in pairs
+ ]
+ )
+
+ return Transform.pattern_of(select)
+
+
+class CONCAT(Transform):
+ """Create a new Pattern from Patterns, PatternElements and Expressions.
+
+ When called with at least two elements, `CONCAT` disables the trimming
+ behavior of the elements which are subclasses of `LegacySource` by
+ setting `trim=False`, unless `trim` has already been set explicitly. The
+ following two `CONCAT` calls are equivalent:
+
+ CONCAT(
+ FTL.TextElement("Hello"),
+ COPY("file.properties", "hello")
+ )
+
+ CONCAT(
+ FTL.TextElement("Hello"),
+ COPY("file.properties", "hello", trim=False)
+ )
+
+ Set `trim=True` explicitly to force trimming:
+
+ CONCAT(
+ FTL.TextElement("Hello "),
+ COPY("file.properties", "hello", trim=True)
+ )
+
+ When called with a single element and when the element is a subclass of
+ `LegacySource`, the trimming behavior is not changed. The following two
+ transforms are equivalent:
+
+ CONCAT(COPY("file.properties", "hello"))
+
+ COPY("file.properties", "hello")
+ """
+
+ def __init__(self, *elements, **kwargs):
+ # We want to support both passing elements as *elements in the
+ # migration specs and as elements=[]. The latter is used by
+ # FTL.BaseNode.traverse when it recreates the traversed node using its
+ # attributes as kwargs.
+ self.elements = list(kwargs.get('elements', elements))
+
+ # We want to make CONCAT(COPY()) equivalent to COPY() so that it's
+ # always safe (no-op) to wrap transforms in a CONCAT. This is used by
+ # the implementation of transforms_from.
+ if len(self.elements) > 1:
+ for elem in self.elements:
+ # Only change trim if it hasn't been set explicitly.
+ if isinstance(elem, LegacySource) and elem.trim is None:
+ elem.trim = False
+
+ def __call__(self, ctx):
+ return Transform.pattern_of(*self.elements)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/util.py b/third_party/python/fluent.migrate/fluent/migrate/util.py
new file mode 100644
index 0000000000..3e4d725d3e
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/util.py
@@ -0,0 +1,110 @@
+import textwrap
+
+import fluent.syntax.ast as FTL
+from fluent.syntax.parser import FluentParser, FluentParserStream
+
+
+fluent_parser = FluentParser(with_spans=False)
+
+
+def parse(Parser, string):
+ if Parser is FluentParser:
+ return fluent_parser.parse(string)
+
+ # Parsing a legacy resource.
+
+ # Parse the string into the internal Context.
+ parser = Parser()
+ # compare-locales expects ASCII strings.
+ parser.readContents(string.encode('utf8'))
+ # Transform the parsed result which is an iterator into a dict.
+ return {ent.key: ent for ent in parser}
+
+
+def ftl_resource_to_ast(code):
+ return fluent_parser.parse(ftl(code))
+
+
+def ftl_resource_to_json(code):
+ return fluent_parser.parse(ftl(code)).to_json()
+
+
+def ftl_pattern_to_json(code):
+ ps = FluentParserStream(ftl(code))
+ return fluent_parser.maybe_get_pattern(ps).to_json()
+
+
+def to_json(merged_iter):
+ return {
+ path: resource.to_json()
+ for path, resource in merged_iter
+ }
+
+
+LOCALIZABLE_ENTRIES = (FTL.Message, FTL.Term)
+
+
+def get_message(body, ident):
+ """Get message called `ident` from the `body` iterable."""
+ for entity in body:
+ if isinstance(entity, LOCALIZABLE_ENTRIES) and entity.id.name == ident:
+ return entity
+
+
+def get_transform(body, ident):
+ """Get entity called `ident` from the `body` iterable."""
+ for transform in body:
+ if transform.id.name == ident:
+ return transform
+
+
+def skeleton(node):
+ """Create a skeleton copy of the given node.
+
+ For localizable entries, the value is None and the attributes are {}.
+ That's not a valid Fluent entry, so it requires further manipulation to
+ set values and/or attributes.
+ """
+ if isinstance(node, LOCALIZABLE_ENTRIES):
+ return type(node)(id=node.id.clone(), value=None)
+ return node.clone()
+
+
+def ftl(code):
+ """Nicer indentation for FTL code.
+
+ The code returned by this function is meant to be compared against the
+ output of the FTL Serializer. The input code will end with a newline to
+ match the output of the serializer.
+ """
+
+ # The code might be triple-quoted.
+ code = code.lstrip('\n')
+
+ return textwrap.dedent(code)
+
+
+def fold(fun, node, init):
+ """Reduce `node` to a single value using `fun`.
+
+ Apply `fun` against an accumulator and each subnode of `node` (in postorder
+ traversal) to reduce it to a single value.
+ """
+
+ def fold_(vals, acc):
+ if not vals:
+ return acc
+
+ head = list(vals)[0]
+ tail = list(vals)[1:]
+
+ if isinstance(head, FTL.BaseNode):
+ acc = fold(fun, head, acc)
+ if isinstance(head, list):
+ acc = fold_(head, acc)
+ if isinstance(head, dict):
+ acc = fold_(head.values(), acc)
+
+ return fold_(tail, fun(acc, head))
+
+ return fold_(vars(node).values(), init)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/validator.py b/third_party/python/fluent.migrate/fluent/migrate/validator.py
new file mode 100644
index 0000000000..191c4e15e8
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/validator.py
@@ -0,0 +1,335 @@
+import argparse
+import ast
+from itertools import zip_longest
+
+from fluent.migrate import transforms
+from fluent.migrate.errors import MigrationError
+from fluent.migrate.helpers import transforms_from
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Visitor
+from compare_locales import mozpath
+
+
+class MigrateNotFoundException(Exception):
+ pass
+
+
+class BadContextAPIException(Exception):
+ pass
+
+
+def process_assign(node, context):
+ if isinstance(node.value, ast.Str):
+ val = node.value.s
+ elif isinstance(node.value, ast.Name):
+ val = context.get(node.value.id)
+ elif isinstance(node.value, ast.Call):
+ val = node.value
+ if val is None:
+ return
+ for target in node.targets:
+ if isinstance(target, ast.Name):
+ context[target.id] = val
+
+
+class Validator:
+ """Validate a migration recipe
+
+ Extract information from the migration recipe about which files to
+ migrate from, and which files to migrate to.
+ Also check for errors in the recipe, or bad API usage.
+ """
+
+ @classmethod
+ def validate(cls, path, code=None):
+ if code is None:
+ with open(path) as fh:
+ code = fh.read()
+ validator = cls(code, path)
+ return validator.inspect()
+
+ def __init__(self, code, path):
+ self.ast = ast.parse(code, path)
+
+ def inspect(self):
+ migrate_func = None
+ global_assigns = {}
+ for top_level in ast.iter_child_nodes(self.ast):
+ if (
+ isinstance(top_level, ast.FunctionDef)
+ and top_level.name == 'migrate'
+ ):
+ if migrate_func:
+ raise MigrateNotFoundException(
+ 'Duplicate definition of migrate'
+ )
+ migrate_func = top_level
+ details = self.inspect_migrate(migrate_func, global_assigns)
+ if isinstance(top_level, ast.Assign):
+ process_assign(top_level, global_assigns)
+ if isinstance(top_level, (ast.Import, ast.ImportFrom)):
+ if 'module' in top_level._fields:
+ module = top_level.module
+ else:
+ module = None
+ for alias in top_level.names:
+ asname = alias.asname or alias.name
+ dotted = alias.name
+ if module:
+ dotted = f'{module}.{dotted}'
+ global_assigns[asname] = dotted
+ if not migrate_func:
+ raise MigrateNotFoundException(
+ 'migrate function not found'
+ )
+ return details
+
+ def inspect_migrate(self, migrate_func, global_assigns):
+ if (
+ len(migrate_func.args.args) != 1 or
+ any(
+ getattr(migrate_func.args, arg_field)
+ for arg_field in migrate_func.args._fields
+ if arg_field != 'args'
+ )
+ ):
+ raise MigrateNotFoundException(
+ 'migrate takes only one positional argument'
+ )
+ arg = migrate_func.args.args[0]
+ if isinstance(arg, ast.Name):
+ ctx_var = arg.id # python 2
+ else:
+ ctx_var = arg.arg # python 3
+ visitor = MigrateAnalyzer(ctx_var, global_assigns)
+ visitor.visit(migrate_func)
+ return {
+ 'references': visitor.references,
+ 'issues': visitor.issues,
+ }
+
+
+def full_name(node, global_assigns):
+ leafs = []
+ while isinstance(node, ast.Attribute):
+ leafs.append(node.attr)
+ node = node.value
+ if isinstance(node, ast.Name):
+ leafs.append(global_assigns.get(node.id, node.id))
+ return '.'.join(reversed(leafs))
+
+
+PATH_TYPES = (str,) + (ast.Call,)
+
+
+class MigrateAnalyzer(ast.NodeVisitor):
+ def __init__(self, ctx_var, global_assigns):
+ super().__init__()
+ self.ctx_var = ctx_var
+ self.global_assigns = global_assigns
+ self.depth = 0
+ self.issues = []
+ self.references = set()
+
+ def generic_visit(self, node):
+ self.depth += 1
+ super().generic_visit(node)
+ self.depth -= 1
+
+ def visit_Assign(self, node):
+ if self.depth == 1:
+ process_assign(node, self.global_assigns)
+ self.generic_visit(node)
+
+ def visit_Attribute(self, node):
+ if isinstance(node.value, ast.Name) and node.value.id == self.ctx_var:
+ if node.attr not in (
+ 'add_transforms',
+ 'locale',
+ ):
+ raise BadContextAPIException(
+ 'Unexpected attribute access on {}.{}'.format(
+ self.ctx_var, node.attr
+ )
+ )
+ self.generic_visit(node)
+
+ def visit_Call(self, node):
+ if (
+ isinstance(node.func, ast.Attribute) and
+ isinstance(node.func.value, ast.Name) and
+ node.func.value.id == self.ctx_var
+ ):
+ return self.call_ctx(node)
+ dotted = full_name(node.func, self.global_assigns)
+ if dotted == 'fluent.migrate.helpers.transforms_from':
+ return self.call_helpers_transforms_from(node)
+ if dotted.startswith('fluent.migrate.'):
+ return self.call_transform(node, dotted)
+ self.generic_visit(node)
+
+ def call_ctx(self, node):
+ if node.func.attr == 'add_transforms':
+ return self.call_add_transforms(node)
+ raise BadContextAPIException(
+ 'Unexpected call on {}.{}'.format(
+ self.ctx_var, node.func.attr
+ )
+ )
+
+ def call_add_transforms(self, node):
+ args_msg = (
+ 'Expected arguments to {}.add_transforms: '
+ 'target_ftl_path, reference_ftl_path, list_of_transforms'
+ ).format(self.ctx_var)
+ ref_msg = (
+ 'Expected second argument to {}.add_transforms: '
+ 'reference should be string or variable with string value'
+ ).format(self.ctx_var)
+ # Just check call signature here, check actual types below
+ if not self.check_arguments(node, (ast.AST, ast.AST, ast.AST)):
+ self.issues.append({
+ 'msg': args_msg,
+ 'line': node.lineno,
+ })
+ return
+ in_reference = node.args[1]
+ if isinstance(in_reference, ast.Name):
+ in_reference = self.global_assigns.get(in_reference.id)
+ if isinstance(in_reference, ast.Str):
+ in_reference = in_reference.s
+ if not isinstance(in_reference, str):
+ self.issues.append({
+ 'msg': ref_msg,
+ 'line': node.args[1].lineno,
+ })
+ return
+ self.references.add(in_reference)
+ # Checked node.args[1].
+ # There's not a lot we can say about our target path,
+ # ignoring that.
+ # For our transforms, we want more checks.
+ self.generic_visit(node.args[2])
+
+ def call_transform(self, node, dotted):
+ module, called = dotted.rsplit('.', 1)
+ if module not in ('fluent.migrate', 'fluent.migrate.transforms'):
+ return
+ transform = getattr(transforms, called)
+ if not issubclass(transform, transforms.Source):
+ return
+ bad_args = f'{called} takes path and key as first two params'
+ if not self.check_arguments(
+ node, ((ast.Str, ast.Name), (ast.Str, ast.Name),),
+ allow_more=True, check_kwargs=False
+ ):
+ self.issues.append({
+ 'msg': bad_args,
+ 'line': node.lineno
+ })
+ return
+ path = node.args[0]
+ if isinstance(path, ast.Str):
+ path = path.s
+ if isinstance(path, ast.Name):
+ path = self.global_assigns.get(path.id)
+ if not isinstance(path, PATH_TYPES):
+ self.issues.append({
+ 'msg': bad_args,
+ 'line': node.lineno
+ })
+
+ def call_helpers_transforms_from(self, node):
+ args_msg = (
+ 'Expected arguments to transforms_from: '
+ 'str, **substitions'
+ )
+ if not self.check_arguments(
+ node, (ast.Str,), check_kwargs=False
+ ):
+ self.issues.append({
+ 'msg': args_msg,
+ 'line': node.lineno,
+ })
+ return
+ kwargs = {}
+ found_bad_keywords = False
+ for keyword in node.keywords:
+ v = keyword.value
+ if isinstance(v, ast.Str):
+ v = v.s
+ if isinstance(v, ast.Name):
+ v = self.global_assigns.get(v.id)
+ if isinstance(v, ast.Call):
+ v = 'determined at runtime'
+ if not isinstance(v, PATH_TYPES):
+ msg = 'Bad keyword arg {} to transforms_from'.format(
+ keyword.arg
+ )
+ self.issues.append({
+ 'msg': msg,
+ 'line': node.lineno,
+ })
+ found_bad_keywords = True
+ else:
+ kwargs[keyword.arg] = v
+ if found_bad_keywords:
+ return
+ try:
+ transforms = transforms_from(node.args[0].s, **kwargs)
+ except MigrationError as e:
+ self.issues.append({
+ 'msg': str(e),
+ 'line': node.lineno,
+ })
+ return
+ ti = TransformsInspector()
+ ti.visit(transforms)
+ self.issues.extend({
+ 'msg': issue,
+ 'line': node.lineno,
+ } for issue in set(ti.issues))
+
+ def check_arguments(
+ self, node, argspec, check_kwargs=True, allow_more=False
+ ):
+ if check_kwargs and (
+ node.keywords or
+ (hasattr(node, 'kwargs') and node.kwargs)
+ ):
+ return False
+ if hasattr(node, 'starargs') and node.starargs:
+ return False
+ for arg, NODE_TYPE in zip_longest(node.args, argspec):
+ if NODE_TYPE is None:
+ return True if allow_more else False
+ if not (isinstance(arg, NODE_TYPE)):
+ return False
+ return True
+
+
+class TransformsInspector(Visitor):
+ def __init__(self):
+ super().__init__()
+ self.issues = []
+
+ def generic_visit(self, node):
+ if isinstance(node, transforms.Source):
+ src = node.path
+ # Source needs paths to be normalized
+ # https://bugzilla.mozilla.org/show_bug.cgi?id=1568199
+ if src != mozpath.normpath(src):
+ self.issues.append(
+ f'Source "{src}" needs to be a normalized path'
+ )
+ super().generic_visit(node)
+
+
+def cli():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('migration')
+ args = parser.parse_args()
+ issues = Validator.validate(args.migration)['issues']
+ for issue in issues:
+ print(issue['msg'], 'at line', issue['line'])
+ return 1 if issues else 0