diff options
Diffstat (limited to 'comm/third_party/python/fluent.migratetb/fluent')
14 files changed, 2155 insertions, 0 deletions
diff --git a/comm/third_party/python/fluent.migratetb/fluent/__init__.py b/comm/third_party/python/fluent.migratetb/fluent/__init__.py new file mode 100644 index 0000000000..69e3be50da --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/__init__.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/__init__.py new file mode 100644 index 0000000000..fead341500 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/__init__.py @@ -0,0 +1,5 @@ +# coding=utf8 + +from .transforms import ( # noqa: F401 + CONCAT, COPY, COPY_PATTERN, PLURALS, REPLACE, REPLACE_IN_TEXT +) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/_context.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/_context.py new file mode 100644 index 0000000000..53a771d58a --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/_context.py @@ -0,0 +1,333 @@ +# coding=utf8 +from __future__ import unicode_literals +from __future__ import absolute_import + +import os +import codecs +from functools import partial +import logging +from six.moves import zip_longest + +import fluent.syntax.ast as FTL +from fluent.syntax.parser import FluentParser +from fluent.syntax.serializer import FluentSerializer +from compare_locales.parser import getParser +from compare_locales.plurals import get_plural + +from .evaluator import Evaluator +from .merge import merge_resource +from .errors import ( + UnreadableReferenceError, +) + + +class InternalContext(object): + """Internal context for merging translation resources. + + For the public interface, see `context.MigrationContext`. + """ + + def __init__( + self, lang, reference_dir, localization_dir, enforce_translated=False + ): + self.fluent_parser = FluentParser(with_spans=False) + self.fluent_serializer = FluentSerializer() + + # An iterable of plural category names relevant to the context's + # language. E.g. ('one', 'other') for English. + self.plural_categories = get_plural(lang) + if self.plural_categories is None: + logger = logging.getLogger('migrate') + logger.warning( + 'Plural rule for "{}" is not defined in ' + 'compare-locales'.format(lang)) + self.plural_categories = ('one', 'other') + + self.enforce_translated = enforce_translated + # Parsed input resources stored by resource path. + self.reference_resources = {} + self.localization_resources = {} + self.target_resources = {} + + # An iterable of `FTL.Message` objects some of whose nodes can be the + # transform operations. + self.transforms = {} + + # The evaluator instance is an AST transformer capable of walking an + # AST hierarchy and evaluating nodes which are migration Transforms. + self.evaluator = Evaluator(self) + + def read_ftl_resource(self, path): + """Read an FTL resource and parse it into an AST.""" + f = codecs.open(path, 'r', 'utf8') + try: + contents = f.read() + except UnicodeDecodeError as err: + logger = logging.getLogger('migrate') + logger.warning('Unable to read file {}: {}'.format(path, err)) + raise err + finally: + f.close() + + ast = self.fluent_parser.parse(contents) + + annots = [ + annot + for entry in ast.body + if isinstance(entry, FTL.Junk) + for annot in entry.annotations + ] + + if len(annots): + logger = logging.getLogger('migrate') + for annot in annots: + msg = annot.message + logger.warning('Syntax error in {}: {}'.format(path, msg)) + + return ast + + def read_legacy_resource(self, path): + """Read a legacy resource and parse it into a dict.""" + parser = getParser(path) + parser.readFile(path) + # Transform the parsed result which is an iterator into a dict. + return { + entity.key: entity.val for entity in parser + if entity.localized or self.enforce_translated + } + + def read_reference_ftl(self, path): + """Read and parse a reference FTL file. + + A missing resource file is a fatal error and will raise an + UnreadableReferenceError. + """ + fullpath = os.path.join(self.reference_dir, path) + try: + return self.read_ftl_resource(fullpath) + except IOError: + error_message = 'Missing reference file: {}'.format(fullpath) + logging.getLogger('migrate').error(error_message) + raise UnreadableReferenceError(error_message) + except UnicodeDecodeError as err: + error_message = 'Error reading file {}: {}'.format(fullpath, err) + logging.getLogger('migrate').error(error_message) + raise UnreadableReferenceError(error_message) + + def read_localization_ftl(self, path): + """Read and parse an existing localization FTL file. + + Create a new FTL.Resource if the file doesn't exist or can't be + decoded. + """ + fullpath = os.path.join(self.localization_dir, path) + try: + return self.read_ftl_resource(fullpath) + except IOError: + logger = logging.getLogger('migrate') + logger.info( + 'Localization file {} does not exist and ' + 'it will be created'.format(path)) + return FTL.Resource() + except UnicodeDecodeError: + logger = logging.getLogger('migrate') + logger.warning( + 'Localization file {} has broken encoding. ' + 'It will be re-created and some translations ' + 'may be lost'.format(path)) + return FTL.Resource() + + def maybe_add_localization(self, path): + """Add a localization resource to migrate translations from. + + Uses a compare-locales parser to create a dict of (key, string value) + tuples. + For Fluent sources, we store the AST. + """ + try: + fullpath = os.path.join(self.localization_dir, path) + if not fullpath.endswith('.ftl'): + collection = self.read_legacy_resource(fullpath) + else: + collection = self.read_ftl_resource(fullpath) + except IOError: + logger = logging.getLogger('migrate') + logger.warning('Missing localization file: {}'.format(path)) + else: + self.localization_resources[path] = collection + + def get_legacy_source(self, path, key): + """Get an entity value from a localized legacy source. + + Used by the `Source` transform. + """ + resource = self.localization_resources[path] + return resource.get(key, None) + + def get_fluent_source_pattern(self, path, key): + """Get a pattern from a localized Fluent source. + + If the key contains a `.`, does an attribute lookup. + Used by the `COPY_PATTERN` transform. + """ + resource = self.localization_resources[path] + msg_key, _, attr_key = key.partition('.') + found = None + for entry in resource.body: + if isinstance(entry, (FTL.Message, FTL.Term)): + if entry.id.name == msg_key: + found = entry + break + if found is None: + return None + if not attr_key: + return found.value + for attribute in found.attributes: + if attribute.id.name == attr_key: + return attribute.value + return None + + def messages_equal(self, res1, res2): + """Compare messages and terms of two FTL resources. + + Uses FTL.BaseNode.equals to compare all messages/terms + in two FTL resources. + If the order or number of messages differ, the result is also False. + """ + def message_id(message): + "Return the message's identifer name for sorting purposes." + return message.id.name + + messages1 = sorted( + (entry for entry in res1.body + if isinstance(entry, FTL.Message) + or isinstance(entry, FTL.Term)), + key=message_id) + messages2 = sorted( + (entry for entry in res2.body + if isinstance(entry, FTL.Message) + or isinstance(entry, FTL.Term)), + key=message_id) + for msg1, msg2 in zip_longest(messages1, messages2): + if msg1 is None or msg2 is None: + return False + if not msg1.equals(msg2): + return False + return True + + def merge_changeset(self, changeset=None, known_translations=None): + """Return a generator of FTL ASTs for the changeset. + + The input data must be configured earlier using the `add_*` methods. + if given, `changeset` must be a set of (path, key) tuples describing + which legacy translations are to be merged. If `changeset` is None, + all legacy translations will be allowed to be migrated in a single + changeset. + + We use the `in_changeset` method to determine if a message should be + migrated for the given changeset. + + Given `changeset`, return a dict whose keys are resource paths and + values are `FTL.Resource` instances. The values will also be used to + update this context's existing localization resources. + """ + + if changeset is None: + # Merge all known legacy translations. Used in tests. + changeset = { + (path, key) + for path, strings in self.localization_resources.items() + if not path.endswith('.ftl') + for key in strings.keys() + } + + if known_translations is None: + known_translations = changeset + + for path, reference in self.reference_resources.items(): + current = self.target_resources[path] + transforms = self.transforms.get(path, []) + in_changeset = partial( + self.in_changeset, changeset, known_translations, path) + + # Merge legacy translations with the existing ones using the + # reference as a template. + snapshot = merge_resource( + self, reference, current, transforms, in_changeset + ) + + # Skip this path if the messages in the merged snapshot are + # identical to those in the current state of the localization file. + # This may happen when: + # + # - none of the transforms is in the changset, or + # - all messages which would be migrated by the context's + # transforms already exist in the current state. + if self.messages_equal(current, snapshot): + continue + + # Store the merged snapshot on the context so that the next merge + # already takes it into account as the existing localization. + self.target_resources[path] = snapshot + + # The result for this path is a complete `FTL.Resource`. + yield path, snapshot + + def in_changeset(self, changeset, known_translations, path, ident): + """Check if a message should be migrated in this changeset. + + The message is identified by path and ident. + + + A message will be migrated only if all of its dependencies + are present in the currently processed changeset. + + If a transform defined for this message points to a missing + legacy translation, this message will not be merged. The + missing legacy dependency won't be present in the changeset. + + This also means that partially translated messages (e.g. + constructed from two legacy strings out of which only one is + avaiable) will never be migrated. + """ + message_deps = self.dependencies.get((path, ident), None) + + # Don't merge if we don't have a transform for this message. + if message_deps is None: + return False + + # As a special case, if a transform exists but has no + # dependecies, it's a hardcoded `FTL.Node` which doesn't + # migrate any existing translation but rather creates a new + # one. Merge it. + if len(message_deps) == 0: + return True + + # Make sure all the dependencies are present in the current + # changeset. Partial migrations are not currently supported. + # See https://bugzilla.mozilla.org/show_bug.cgi?id=1321271 + # We only return True if our current changeset touches + # the transform, and we have all of the dependencies. + active_deps = message_deps & changeset + available_deps = message_deps & known_translations + return active_deps and message_deps == available_deps + + def serialize_changeset(self, changeset, known_translations=None): + """Return a dict of serialized FTLs for the changeset. + + Given `changeset`, return a dict whose keys are resource paths and + values are serialized FTL snapshots. + """ + + return { + path: self.fluent_serializer.serialize(snapshot) + for path, snapshot in self.merge_changeset( + changeset, known_translations + ) + } + + def evaluate(self, node): + return self.evaluator.visit(node) + + +logging.basicConfig() diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/blame.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/blame.py new file mode 100644 index 0000000000..9aa24e0c67 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/blame.py @@ -0,0 +1,102 @@ +# coding=utf8 +from __future__ import unicode_literals +from __future__ import absolute_import + +import argparse +import json +import os + +from compare_locales.parser import getParser, Junk +from compare_locales.parser.fluent import FluentEntity +from compare_locales import mozpath +import hglib +from hglib.util import b, cmdbuilder + + +class Blame(object): + def __init__(self, client, cwd=None): + self.client = client + self._cwd = cwd + self.users = [] + self.blame = {} + + @property + def cwd(self): + if self._cwd is None: + return self.client.root() + else: + return mozpath.join(self.client.root(), self._cwd.encode("utf-8")) + + def file_path_relative(self, file_path): + if self._cwd is None: + return file_path + check_val = f"{self._cwd}" + if file_path.startswith(check_val): + return file_path[len(check_val)+1:] + return file_path + + def attribution(self, file_paths): + + args = cmdbuilder( + b('annotate'), *[b(p) for p in file_paths], template='json', + date=True, user=True, cwd=self.cwd) + blame_json = self.client.rawcommand(args) + file_blames = json.loads(blame_json) + + for file_blame in file_blames: + self.handleFile(file_blame) + + return {'authors': self.users, + 'blame': self.blame} + + def handleFile(self, file_blame): + path = mozpath.normsep(self.file_path_relative(file_blame['path'])) + + + try: + parser = getParser(path) + except UserWarning: + return + + self.blame[path] = {} + + self.readFile(parser, path) + entities = parser.parse() + for e in entities: + if isinstance(e, Junk): + continue + if e.val_span: + key_vals = [(e.key, e.val_span)] + else: + key_vals = [] + if isinstance(e, FluentEntity): + key_vals += [ + ('{}.{}'.format(e.key, attr.key), attr.val_span) + for attr in e.attributes + ] + for key, (val_start, val_end) in key_vals: + entity_lines = file_blame['lines'][ + (e.ctx.linecol(val_start)[0] - 1):e.ctx.linecol(val_end)[0] + ] + # ignore timezone + entity_lines.sort(key=lambda blame: -blame['date'][0]) + line_blame = entity_lines[0] + user = line_blame['user'] + timestamp = line_blame['date'][0] # ignore timezone + if user not in self.users: + self.users.append(user) + userid = self.users.index(user) + self.blame[path][key] = [userid, timestamp] + + def readFile(self, parser, path): + parser.readFile(os.path.join(self.cwd.decode('utf-8'), path)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('repo_path') + parser.add_argument('file_path', nargs='+') + args = parser.parse_args() + blame = Blame(hglib.open(args.repo_path)) + attrib = blame.attribution(args.file_path) + print(json.dumps(attrib, indent=4, separators=(',', ': '))) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/changesets.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/changesets.py new file mode 100644 index 0000000000..e4ad95f2d1 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/changesets.py @@ -0,0 +1,59 @@ +# coding=utf8 +from __future__ import absolute_import + +import time + + +def by_first_commit(item): + """Order two changesets by their first commit date.""" + return item['first_commit'] + + +def convert_blame_to_changesets(blame_json): + """Convert a blame dict into a list of changesets. + + The blame information in `blame_json` should be a dict of the following + structure: + + { + 'authors': [ + 'A.N. Author <author@example.com>', + ], + 'blame': { + 'path/one': { + 'key1': [0, 1346095921.0], + }, + } + } + + It will be transformed into a list of changesets which can be fed into + `InternalContext.serialize_changeset`: + + [ + { + 'author': 'A.N. Author <author@example.com>', + 'first_commit': 1346095921.0, + 'changes': { + ('path/one', 'key1'), + } + }, + ] + + """ + now = time.time() + changesets = [ + { + 'author': author, + 'first_commit': now, + 'changes': set() + } for author in blame_json['authors'] + ] + + for path, keys_info in blame_json['blame'].items(): + for key, (author_index, timestamp) in keys_info.items(): + changeset = changesets[author_index] + changeset['changes'].add((path, key)) + if timestamp < changeset['first_commit']: + changeset['first_commit'] = timestamp + + return sorted(changesets, key=by_first_commit) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/context.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/context.py new file mode 100644 index 0000000000..de54b52fe1 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/context.py @@ -0,0 +1,152 @@ +# coding=utf8 +from __future__ import unicode_literals +from __future__ import absolute_import + +import logging + +import fluent.syntax.ast as FTL +from fluent.migratetb.util import fold + +from .transforms import Source +from .util import get_message, skeleton +from .errors import ( + EmptyLocalizationError, + UnreadableReferenceError, +) +from ._context import InternalContext + + +__all__ = [ + 'EmptyLocalizationError', + 'UnreadableReferenceError', + 'MigrationContext', +] + + +class MigrationContext(InternalContext): + """Stateful context for merging translation resources. + + `MigrationContext` must be configured with the target locale and the + directory locations of the input data. + + The transformation takes four types of input data: + + - The en-US FTL reference files which will be used as templates for + message order, comments and sections. If the reference_dir is None, + the migration will create Messages and Terms in the order given by + the transforms. + + - The current FTL files for the given locale. + + - A list of `FTL.Message` or `FTL.Term` objects some of whose nodes + are special helper or transform nodes: + + helpers: VARIABLE_REFERENCE, MESSAGE_REFERENCE, TERM_REFERENCE + transforms: COPY, REPLACE_IN_TEXT, REPLACE, PLURALS, CONCAT + fluent value helper: COPY_PATTERN + + The legacy (DTD, properties) translation files are deduced by the + dependencies in the transforms. The translations from these files will be + read from the localization_dir and transformed into FTL and merged + into the existing FTL files for the given language. + """ + + def __init__( + self, locale, reference_dir, localization_dir, enforce_translated=False + ): + super(MigrationContext, self).__init__( + locale, reference_dir, localization_dir, + enforce_translated=enforce_translated + ) + self.locale = locale + # Paths to directories with input data, relative to CWD. + self.reference_dir = reference_dir + self.localization_dir = localization_dir + + # A dict whose keys are `(path, key)` tuples corresponding to target + # FTL translations, and values are sets of `(path, key)` tuples + # corresponding to localized entities which will be migrated. + self.dependencies = {} + + def add_transforms(self, target, reference, transforms): + """Define transforms for target using reference as template. + + `target` is a path of the destination FTL file relative to the + localization directory. `reference` is a path to the template FTL + file relative to the reference directory. + + Each transform is an extended FTL node with `Transform` nodes as some + values. Transforms are stored in their lazy AST form until + `merge_changeset` is called, at which point they are evaluated to real + FTL nodes with migrated translations. + + Each transform is scanned for `Source` nodes which will be used to + build the list of dependencies for the transformed message. + + For transforms that merely copy legacy messages or Fluent patterns, + using `fluent.migratetb.helpers.transforms_from` is recommended. + """ + def get_sources(acc, cur): + if isinstance(cur, Source): + acc.add((cur.path, cur.key)) + return acc + + if self.reference_dir is None: + # Add skeletons to resource body for each transform + # if there's no reference. + reference_ast = self.reference_resources.get(target) + if reference_ast is None: + reference_ast = FTL.Resource() + reference_ast.body.extend( + skeleton(transform) for transform in transforms + ) + else: + reference_ast = self.read_reference_ftl(reference) + self.reference_resources[target] = reference_ast + + for node in transforms: + ident = node.id.name + # Scan `node` for `Source` nodes and collect the information they + # store into a set of dependencies. + dependencies = fold(get_sources, node, set()) + # Set these sources as dependencies for the current transform. + self.dependencies[(target, ident)] = dependencies + + # The target Fluent message should exist in the reference file. If + # it doesn't, it's probably a typo. + # Of course, only if we're having a reference. + if self.reference_dir is None: + continue + if get_message(reference_ast.body, ident) is None: + logger = logging.getLogger('migrate') + logger.warning( + '{} "{}" was not found in {}'.format( + type(node).__name__, ident, reference)) + + # Keep track of localization resource paths which were defined as + # sources in the transforms. + expected_paths = set() + + # Read all legacy translation files defined in Source transforms. This + # may fail but a single missing legacy resource doesn't mean that the + # migration can't succeed. + for dependencies in self.dependencies.values(): + for path in set(path for path, _ in dependencies): + expected_paths.add(path) + self.maybe_add_localization(path) + + # However, if all legacy resources are missing, bail out early. There + # are no translations to migrate. We'd also get errors in hg annotate. + if len(expected_paths) > 0 and len(self.localization_resources) == 0: + error_message = 'No localization files were found' + logging.getLogger('migrate').error(error_message) + raise EmptyLocalizationError(error_message) + + # Add the current transforms to any other transforms added earlier for + # this path. + path_transforms = self.transforms.setdefault(target, []) + path_transforms += transforms + + if target not in self.target_resources: + target_ast = self.read_localization_ftl(target) + self.target_resources[target] = target_ast diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/errors.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/errors.py new file mode 100644 index 0000000000..dcc3025377 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/errors.py @@ -0,0 +1,22 @@ +class SkipTransform(RuntimeError): + pass + + +class MigrationError(ValueError): + pass + + +class EmptyLocalizationError(MigrationError): + pass + + +class NotSupportedError(MigrationError): + pass + + +class UnreadableReferenceError(MigrationError): + pass + + +class InvalidTransformError(MigrationError): + pass diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/evaluator.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/evaluator.py new file mode 100644 index 0000000000..90c626f933 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/evaluator.py @@ -0,0 +1,28 @@ +from fluent.syntax import ast as FTL +from fluent.syntax.visitor import Transformer + +from .transforms import Transform + + +class Evaluator(Transformer): + """An AST transformer for evaluating migration Transforms. + + An AST transformer (i.e. a visitor capable of modifying the AST) which + walks an AST hierarchy and evaluates nodes which are migration Transforms. + """ + + def __init__(self, ctx): + self.ctx = ctx + + def visit(self, node): + if not isinstance(node, FTL.BaseNode): + return node + + if isinstance(node, Transform): + # Some transforms don't expect other transforms as children. + # Evaluate the children first. + transform = self.generic_visit(node) + # Then, evaluate this transform. + return transform(self.ctx) + + return self.generic_visit(node) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/helpers.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/helpers.py new file mode 100644 index 0000000000..848c541da4 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/helpers.py @@ -0,0 +1,176 @@ +# coding=utf8 +"""Fluent AST helpers. + +The functions defined in this module offer a shorthand for defining common AST +nodes. + +They take a string argument and immediately return a corresponding AST node. +(As opposed to Transforms which are AST nodes on their own and only return the +migrated AST nodes when they are evaluated by a MigrationContext.) """ + +from __future__ import unicode_literals +from __future__ import absolute_import + +from fluent.syntax import FluentParser, ast as FTL +from fluent.syntax.visitor import Transformer +from .transforms import Transform, CONCAT, COPY, COPY_PATTERN, REPLACE +from .errors import NotSupportedError, InvalidTransformError + + +def VARIABLE_REFERENCE(name): + """Create an ExternalArgument expression.""" + + return FTL.VariableReference( + id=FTL.Identifier(name) + ) + + +def MESSAGE_REFERENCE(name): + """Create a MessageReference expression. + + If the passed name contains a `.`, we're generating + a message reference with an attribute. + """ + if '.' in name: + name, attribute = name.split('.') + attribute = FTL.Identifier(attribute) + else: + attribute = None + + return FTL.MessageReference( + id=FTL.Identifier(name), + attribute=attribute, + ) + + +def TERM_REFERENCE(name): + """Create a TermReference expression.""" + + return FTL.TermReference( + id=FTL.Identifier(name) + ) + + +class IntoTranforms(Transformer): + IMPLICIT_TRANSFORMS = ("CONCAT",) + FORBIDDEN_TRANSFORMS = ("PLURALS",) + + def __init__(self, substitutions): + self.substitutions = substitutions + + def visit_Junk(self, node): + anno = node.annotations[0] + raise InvalidTransformError( + "Transform contains parse error: {}, at {}".format( + anno.message, anno.span.start)) + + def visit_FunctionReference(self, node): + name = node.id.name + if name in self.IMPLICIT_TRANSFORMS: + raise NotSupportedError( + "{} may not be used with transforms_from(). It runs " + "implicitly on all Patterns anyways.".format(name)) + if name in self.FORBIDDEN_TRANSFORMS: + raise NotSupportedError( + "{} may not be used with transforms_from(). It requires " + "additional logic in Python code.".format(name)) + if name in ('COPY', 'COPY_PATTERN', 'REPLACE'): + args = ( + self.into_argument(arg) for arg in node.arguments.positional + ) + kwargs = { + arg.name.name: self.into_argument(arg.value) + for arg in node.arguments.named} + if name == 'COPY': + return COPY(*args, **kwargs) + elif name == 'REPLACE': + return REPLACE(*args, **kwargs) + return COPY_PATTERN(*args, **kwargs) + return self.generic_visit(node) + + def visit_Placeable(self, node): + """If the expression is a Transform, replace this Placeable + with the Transform it's holding. + Transforms evaluate to Patterns, which are flattened as + elements of Patterns in Transform.pattern_of, but only + one level deep. + """ + node = self.generic_visit(node) + if isinstance(node.expression, Transform): + return node.expression + return node + + def visit_Pattern(self, node): + """Replace the Pattern with CONCAT which is more accepting of its + elements. CONCAT takes PatternElements, Expressions and other + Patterns (e.g. returned from evaluating transforms). + """ + node = self.generic_visit(node) + return CONCAT(*node.elements) + + def into_argument(self, node): + """Convert AST node into an argument to migration transforms.""" + if isinstance(node, FTL.StringLiteral): + # Special cases for booleans which don't exist in Fluent. + if node.value == "True": + return True + if node.value == "False": + return False + return node.value + if isinstance(node, FTL.MessageReference): + try: + return self.substitutions[node.id.name] + except KeyError: + raise InvalidTransformError( + "Unknown substitution in COPY: {}".format( + node.id.name)) + else: + raise InvalidTransformError( + "Invalid argument passed to COPY: {}".format( + type(node).__name__)) + + +def transforms_from(ftl, **substitutions): + """Parse FTL code into a list of Message nodes with Transforms. + + The FTL may use a fabricated COPY function inside of placeables which + will be converted into actual COPY migration transform. + + new-key = Hardcoded text { COPY("filepath.dtd", "string.key") } + + For convenience, COPY may also refer to transforms_from's keyword + arguments via the MessageReference syntax: + + transforms_from(\""" + new-key = Hardcoded text { COPY(file_dtd, "string.key") } + \""", file_dtd="very/long/path/to/a/file.dtd") + + REPLACE may also be used. The only tested use case is to do brand string + replacements from DTD strings. + + <!ENTITY update.noUpdatesFound "&brandShortName; is up to date"> + + First define a dictionary with the replacements outside of the migrate + function like (must be wrapped in a dict() function call): + + about_replacements = dict({ + "&brandShortName;": TERM_REFERENCE("brand-short-name"), + }) + + Note: In the TERM_REFERENCE replacement, omit the initial "-". It winds up + in the final result somehow. + + Then, use transforms_from: + + transforms_from(\""" + update-no-updates-found = { REPLACE(source, "update.noUpdatesFound", about_replacements) } + \""", source=source, about_replacements=about_replacements) + + If doing multiple string migrations in a single transforms_from template, + your replacements dictionary can have multiple key, value pairs and be used + for all REPLACE transforms. + """ + + parser = FluentParser(with_spans=False) + resource = parser.parse(ftl) + return IntoTranforms(substitutions).visit(resource).body diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/merge.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/merge.py new file mode 100644 index 0000000000..b4575f0ca7 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/merge.py @@ -0,0 +1,59 @@ +# coding=utf8 +from __future__ import unicode_literals +from __future__ import absolute_import + +import fluent.syntax.ast as FTL + +from .errors import SkipTransform +from .util import get_message, get_transform + + +def merge_resource(ctx, reference, current, transforms, in_changeset): + """Transform legacy translations into FTL. + + Use the `reference` FTL AST as a template. For each en-US string in the + reference, first check for an existing translation in the current FTL + `localization` and use it if it's present; then if the string has + a transform defined in the migration specification and if it's in the + currently processed changeset, evaluate the transform. + """ + + def merge_body(body): + return [ + entry + for entry in map(merge_entry, body) + if entry is not None + ] + + def merge_entry(entry): + # All standalone comments will be merged. + if isinstance(entry, FTL.BaseComment): + return entry + + # Ignore Junk + if isinstance(entry, FTL.Junk): + return None + + ident = entry.id.name + + # If the message is present in the existing localization, we add it to + # the resulting resource. This ensures consecutive merges don't remove + # translations but rather create supersets of them. + existing = get_message(current.body, ident) + if existing is not None: + return existing + + transform = get_transform(transforms, ident) + + # Make sure this message is supposed to be migrated as part of the + # current changeset. + if transform is not None and in_changeset(ident): + if transform.comment is None: + transform.comment = entry.comment + try: + return ctx.evaluate(transform) + except SkipTransform: + return None + + body = merge_body(reference.body) + return FTL.Resource(body) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/tool.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/tool.py new file mode 100644 index 0000000000..62ffdfdad6 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/tool.py @@ -0,0 +1,185 @@ +# coding=utf8 + +import os +import logging +import argparse +from contextlib import contextmanager +import importlib +import sys + +import hglib +import six + +from fluent.migratetb.context import MigrationContext +from fluent.migratetb.errors import MigrationError +from fluent.migratetb.changesets import convert_blame_to_changesets +from fluent.migratetb.blame import Blame + + +@contextmanager +def dont_write_bytecode(): + _dont_write_bytecode = sys.dont_write_bytecode + sys.dont_write_bytecode = True + yield + sys.dont_write_bytecode = _dont_write_bytecode + + +class Migrator(object): + def __init__(self, locale, reference_dir, localization_dir, dry_run): + self.locale = locale + self.reference_dir = reference_dir + self.localization_repo = localization_dir + self.localization_dir = os.path.join(localization_dir, locale) + self.dry_run = dry_run + self._client = None + + @property + def client(self): + if self._client is None: + self._client = hglib.open(self.localization_repo, 'utf-8') + return self._client + + def close(self): + # close hglib.client, if we cached one. + if self._client is not None: + self._client.close() + + def run(self, migration): + print('\nRunning migration {} for {}'.format( + migration.__name__, self.locale)) + + # For each migration create a new context. + ctx = MigrationContext( + self.locale, self.reference_dir, self.localization_dir + ) + + try: + # Add the migration spec. + migration.migrate(ctx) + except MigrationError as e: + print(' Skipping migration {} for {}:\n {}'.format( + migration.__name__, self.locale, e)) + return + + # Keep track of how many changesets we're committing. + index = 0 + description_template = migration.migrate.__doc__ + + # Annotate localization files used as sources by this migration + # to preserve attribution of translations. + files = ctx.localization_resources.keys() + blame = Blame(self.client, self.locale).attribution(files) + changesets = convert_blame_to_changesets(blame) + known_legacy_translations = set() + + for changeset in changesets: + snapshot = self.snapshot( + ctx, changeset['changes'], known_legacy_translations + ) + if not snapshot: + continue + self.serialize_changeset(snapshot) + index += 1 + self.commit_changeset( + description_template, changeset['author'], index + ) + + def snapshot(self, ctx, changes_in_changeset, known_legacy_translations): + '''Run the migration for the changeset, with the set of + this and all prior legacy translations. + ''' + known_legacy_translations.update(changes_in_changeset) + return ctx.serialize_changeset( + changes_in_changeset, + known_legacy_translations + ) + + def serialize_changeset(self, snapshot): + '''Write serialized FTL files to disk.''' + for path, content in six.iteritems(snapshot): + fullpath = os.path.join(self.localization_dir, path) + print(' Writing to {}'.format(fullpath)) + if not self.dry_run: + fulldir = os.path.dirname(fullpath) + if not os.path.isdir(fulldir): + os.makedirs(fulldir) + with open(fullpath, 'wb') as f: + f.write(content.encode('utf8')) + f.close() + + def commit_changeset( + self, description_template, author, index + ): + message = description_template.format( + index=index, + author=author + ) + + print(' Committing changeset: {}'.format(message)) + if self.dry_run: + return + try: + self.client.commit( + message, user=author.encode('utf-8'), addremove=True + ) + except hglib.error.CommandError as err: + print(' WARNING: hg commit failed ({})'.format(err)) + + +def main(locale, reference_dir, localization_dir, migrations, dry_run): + """Run migrations and commit files with the result.""" + migrator = Migrator(locale, reference_dir, localization_dir, dry_run) + + for migration in migrations: + migrator.run(migration) + + migrator.close() + + +def cli(): + parser = argparse.ArgumentParser( + description='Migrate translations to FTL.' + ) + parser.add_argument( + 'migrations', metavar='MIGRATION', type=str, nargs='+', + help='migrations to run (Python modules)' + ) + parser.add_argument( + '--locale', '--lang', type=str, + help='target locale code (--lang is deprecated)' + ) + parser.add_argument( + '--reference-dir', type=str, + help='directory with reference FTL files' + ) + parser.add_argument( + '--localization-dir', type=str, + help='directory for localization files' + ) + parser.add_argument( + '--dry-run', action='store_true', + help='do not write to disk nor commit any changes' + ) + parser.set_defaults(dry_run=False) + + logger = logging.getLogger('migrate') + logger.setLevel(logging.INFO) + + args = parser.parse_args() + + # Don't byte-compile migrations. + # They're not our code, and infrequently run + with dont_write_bytecode(): + migrations = map(importlib.import_module, args.migrations) + + main( + locale=args.locale, + reference_dir=args.reference_dir, + localization_dir=args.localization_dir, + migrations=migrations, + dry_run=args.dry_run + ) + + +if __name__ == '__main__': + cli() diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/transforms.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/transforms.py new file mode 100644 index 0000000000..1d9cddb387 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/transforms.py @@ -0,0 +1,580 @@ +# coding=utf8 +"""Migration Transforms. + +Transforms are AST nodes which describe how legacy translations should be +migrated. They are created inert and only return the migrated AST nodes when +they are evaluated by a MigrationContext. + +All Transforms evaluate to Fluent Patterns. This makes them suitable for +defining migrations of values of message, attributes and variants. The special +CONCAT Transform is capable of joining multiple Patterns returned by evaluating +other Transforms into a single Pattern. It can also concatenate Pattern +elements: TextElements and Placeables. + +The COPY, REPLACE and PLURALS Transforms inherit from Source which is a special +AST Node defining the location (the file path and the id) of the legacy +translation. During the migration, the current MigrationContext scans the +migration spec for Source nodes and extracts the information about all legacy +translations being migrated. For instance, + + COPY('file.dtd', 'hello') + +is equivalent to: + + FTL.Pattern([ + Source('file.dtd', 'hello') + ]) + +Sometimes it's useful to work with text rather than (path, key) source +definitions. This is the case when the migrated translation requires some +hardcoded text, e.g. <a> and </a> when multiple translations become a single +one with a DOM overlay. In such cases it's best to use FTL.TextElements: + + FTL.Message( + id=FTL.Identifier('update-failed'), + value=CONCAT( + COPY('aboutDialog.dtd', 'update.failed.start'), + FTL.TextElement('<a>'), + COPY('aboutDialog.dtd', 'update.failed.linkText'), + FTL.TextElement('</a>'), + COPY('aboutDialog.dtd', 'update.failed.end'), + ) + ) + +The REPLACE_IN_TEXT Transform also takes TextElements as input, making it +possible to pass it as the foreach function of the PLURALS Transform. In the +example below, each slice of the plural string is converted into a +TextElement by PLURALS and then run through the REPLACE_IN_TEXT transform. + + FTL.Message( + FTL.Identifier('delete-all'), + value=PLURALS( + 'aboutDownloads.dtd', + 'deleteAll', + VARIABLE_REFERENCE('num'), + lambda text: REPLACE_IN_TEXT( + text, + { + '#1': VARIABLE_REFERENCE('num') + } + ) + ) + ) +""" + +from __future__ import unicode_literals +from __future__ import absolute_import +import re + +from fluent.syntax import ast as FTL +from fluent.syntax.visitor import Transformer +from .errors import NotSupportedError + + +def chain_elements(elements): + '''Flatten a list of FTL nodes into an iterator over PatternElements.''' + for element in elements: + if isinstance(element, FTL.Pattern): + # PY3 yield from element.elements + for child in element.elements: + yield child + elif isinstance(element, FTL.PatternElement): + yield element + elif isinstance(element, FTL.Expression): + yield FTL.Placeable(element) + else: + raise RuntimeError( + 'Expected Pattern, PatternElement or Expression') + + +re_leading_ws = re.compile( + r'\A(?:(?P<whitespace> +)(?P<text>.*?)|(?P<block_text>\n.*?))\Z', + re.S, +) +re_trailing_ws = re.compile( + r'\A(?:(?P<text>.*?)(?P<whitespace> +)|(?P<block_text>.*\n))\Z', + re.S +) + + +def extract_whitespace(regex, element): + '''Extract leading or trailing whitespace from a TextElement. + + Return a tuple of (Placeable, TextElement) in which the Placeable + encodes the extracted whitespace as a StringLiteral and the + TextElement has the same amount of whitespace removed. The + Placeable with the extracted whitespace is always returned first. + If the element starts or ends with a newline, add an empty + StringLiteral. + ''' + match = re.search(regex, element.value) + if match: + # If white-space is None, we're a newline. Add an + # empty { "" } + whitespace = match.group('whitespace') or '' + placeable = FTL.Placeable(FTL.StringLiteral(whitespace)) + if whitespace == element.value: + return placeable, None + else: + # Either text or block_text matched the rest. + text = match.group('text') or match.group('block_text') + return placeable, FTL.TextElement(text) + else: + return None, element + + +class Transform(FTL.BaseNode): + def __call__(self, ctx): + raise NotImplementedError + + @staticmethod + def pattern_of(*elements): + normalized = [] + + # Normalize text content: convert text content to TextElements, join + # adjacent text and prune empty. Text content is either existing + # TextElements or whitespace-only StringLiterals. This may result in + # leading and trailing whitespace being put back into TextElements if + # the new Pattern is built from existing Patterns (CONCAT(COPY...)). + # The leading and trailing whitespace of the new Pattern will be + # extracted later into new StringLiterals. + for element in chain_elements(elements): + if isinstance(element, FTL.TextElement): + text_content = element.value + elif isinstance(element, FTL.Placeable) \ + and isinstance(element.expression, FTL.StringLiteral) \ + and re.match(r'^ *$', element.expression.value): + text_content = element.expression.value + else: + # The element does not contain text content which should be + # normalized. It may be a number, a reference, or + # a StringLiteral which should be preserved in the Pattern. + normalized.append(element) + continue + + previous = normalized[-1] if len(normalized) else None + if isinstance(previous, FTL.TextElement): + # Join adjacent TextElements. + previous.value += text_content + elif len(text_content) > 0: + # Normalize non-empty text to a TextElement. + normalized.append(FTL.TextElement(text_content)) + else: + # Prune empty text. + pass + + # Store empty values explicitly as {""}. + if len(normalized) == 0: + empty = FTL.Placeable(FTL.StringLiteral('')) + return FTL.Pattern([empty]) + + # Extract explicit leading whitespace into a StringLiteral. + if isinstance(normalized[0], FTL.TextElement): + ws, text = extract_whitespace(re_leading_ws, normalized[0]) + normalized[:1] = [ws, text] + + # Extract explicit trailing whitespace into a StringLiteral. + if isinstance(normalized[-1], FTL.TextElement): + ws, text = extract_whitespace(re_trailing_ws, normalized[-1]) + normalized[-1:] = [text, ws] + + return FTL.Pattern([ + element + for element in normalized + if element is not None + ]) + + +class Source(Transform): + """Base class for Transforms that get translations from source files. + + The contract is that the first argument is the source path, and the + second is a key representing legacy string IDs, or Fluent id.attr. + """ + def __init__(self, path, key): + self.path = path + self.key = key + + +class FluentSource(Source): + """Declare a Fluent source translation to be copied over. + + When evaluated, it clones the Pattern of the parsed source. + """ + def __init__(self, path, key): + if not path.endswith('.ftl'): + raise NotSupportedError( + 'Please use COPY to migrate from legacy files ' + '({})'.format(path) + ) + if key[0] == '-' and '.' in key: + raise NotSupportedError( + 'Cannot migrate from Term Attributes, as they are' + 'locale-dependent ({})'.format(path) + ) + super(FluentSource, self).__init__(path, key) + + def __call__(self, ctx): + pattern = ctx.get_fluent_source_pattern(self.path, self.key) + return pattern.clone() + + +class COPY_PATTERN(FluentSource): + """Create a Pattern with the translation value from the given source. + + The given key can be a Message ID, Message ID.attribute_name, or + Term ID. Accessing Term attributes is not supported, as they're internal + to the localization. + """ + pass + + +class TransformPattern(FluentSource, Transformer): + """Base class for modifying a Fluent pattern as part of a migration. + + Implement visit_* methods of the Transformer pattern to do the + actual modifications. + """ + def __call__(self, ctx): + pattern = super(TransformPattern, self).__call__(ctx) + return self.visit(pattern) + + def visit_Pattern(self, node): + # Make sure we're creating valid Patterns after restructuring + # transforms. + node = self.generic_visit(node) + pattern = Transform.pattern_of(*node.elements) + return pattern + + def visit_Placeable(self, node): + # Ensure we have a Placeable with an expression still. + # Transforms could have replaced the expression with + # a Pattern or PatternElement, in which case we + # just pass that through. + # Patterns then get flattened by visit_Pattern. + node = self.generic_visit(node) + if isinstance(node.expression, (FTL.Pattern, FTL.PatternElement)): + return node.expression + return node + + +class LegacySource(Source): + """Declare the source translation to be migrated with other transforms. + + When evaluated, `Source` returns a TextElement with the content from the + source translation. Escaped characters are unescaped by the + compare-locales parser according to the file format: + + - in properties files: \\uXXXX, + - in DTD files: known named, decimal, and hexadecimal HTML entities. + + Consult the following files for the list of known named HTML entities: + + https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py + https://github.com/python/cpython/blob/3.6/Lib/html/entities.py + + By default, leading and trailing whitespace on each line as well as + leading and trailing empty lines will be stripped from the source + translation's content. Set `trim=False` to disable this behavior. + """ + + def __init__(self, path, key, trim=None): + if path.endswith('.ftl'): + raise NotSupportedError( + 'Please use COPY_PATTERN to migrate from Fluent files ' + '({})'.format(path)) + + super(LegacySource, self).__init__(path, key) + self.trim = trim + + def get_text(self, ctx): + return ctx.get_legacy_source(self.path, self.key) + + @staticmethod + def trim_text(text): + # strip leading white-space from each line + text = re.sub('^[ \t]+', '', text, flags=re.M) + # strip trailing white-space from each line + text = re.sub('[ \t]+$', '', text, flags=re.M) + # strip leading and trailing empty lines + text = text.strip('\r\n') + return text + + def __call__(self, ctx): + text = self.get_text(ctx) + if self.trim is not False: + text = self.trim_text(text) + return FTL.TextElement(text) + + +class COPY(LegacySource): + """Create a Pattern with the translation value from the given source.""" + + def __call__(self, ctx): + element = super(COPY, self).__call__(ctx) + return Transform.pattern_of(element) + + +PRINTF = re.compile( + r'%(?P<good>%|' + r'(?:(?P<number>[1-9][0-9]*)\$)?' + r'(?P<width>\*|[0-9]+)?' + r'(?P<prec>\.(?:\*|[0-9]+)?)?' + r'(?P<spec>[duxXosScpfg]))' +) + + +def number(): + i = 1 + while True: + yield i + i += 1 + + +def normalize_printf(text): + """Normalize printf arguments so that they're all numbered. + Gecko forbids mixing unnumbered and numbered ones, so + we just need to convert unnumbered to numbered ones. + Also remove ones that have zero width, as they're intended + to be removed from the output by the localizer. + """ + next_number = number() + + def normalized(match): + if match.group('good') == '%': + return '%' + hidden = match.group('width') == '0' + if match.group('number'): + return '' if hidden else match.group() + num = next(next_number) + return '' if hidden else '%{}${}'.format(num, match.group('spec')) + + return PRINTF.sub(normalized, text) + + +class REPLACE_IN_TEXT(Transform): + """Create a Pattern from a TextElement and replace legacy placeables. + + The original placeables are defined as keys on the `replacements` dict. + For each key the value must be defined as a FTL Pattern, Placeable, + TextElement or Expression to be interpolated. + """ + + def __init__(self, element, replacements, normalize_printf=False): + self.element = element + self.replacements = replacements + self.normalize_printf = normalize_printf + + def __call__(self, ctx): + # For each specified replacement, find all indices of the original + # placeable in the source translation. If missing, the list of indices + # will be empty. + value = self.element.value + if self.normalize_printf: + value = normalize_printf(value) + key_indices = { + key: [m.start() for m in re.finditer(re.escape(key), value)] + for key in self.replacements.keys() + } + + # Build a dict of indices to replacement keys. + keys_indexed = {} + for key, indices in key_indices.items(): + for index in indices: + keys_indexed[index] = key + + # Order the replacements by the position of the original placeable in + # the translation. + replacements = ( + (key, ctx.evaluate(self.replacements[key])) + for index, key + in sorted(keys_indexed.items(), key=lambda x: x[0]) + ) + + # A list of PatternElements built from the legacy translation and the + # FTL replacements. It may contain empty or adjacent TextElements. + elements = [] + tail = value + + # Convert original placeables and text into FTL Nodes. For each + # original placeable the translation will be partitioned around it and + # the text before it will be converted into an `FTL.TextElement` and + # the placeable will be replaced with its replacement. + for key, node in replacements: + before, key, tail = tail.partition(key) + elements.append(FTL.TextElement(before)) + elements.append(node) + + # Don't forget about the tail after the loop ends. + elements.append(FTL.TextElement(tail)) + return Transform.pattern_of(*elements) + + +class REPLACE(LegacySource): + """Create a Pattern with interpolations from given source. + + Interpolations in the translation value from the given source will be + replaced with FTL placeables using the `REPLACE_IN_TEXT` transform. + """ + + def __init__( + self, path, key, replacements, **kwargs + ): + # We default normalize_printf to False except for .properties files. + # We still allow the caller to override the default value. + normalize_printf = False + if 'normalize_printf' in kwargs: + normalize_printf = kwargs['normalize_printf'] + del kwargs['normalize_printf'] + elif path.endswith('.properties'): + normalize_printf = True + + super(REPLACE, self).__init__(path, key, **kwargs) + self.replacements = replacements + self.normalize_printf = normalize_printf + + def __call__(self, ctx): + element = super(REPLACE, self).__call__(ctx) + return REPLACE_IN_TEXT( + element, self.replacements, + normalize_printf=self.normalize_printf + )(ctx) + + +class PLURALS(LegacySource): + """Create a Pattern with plurals from given source. + + Build an `FTL.SelectExpression` with the supplied `selector` and variants + extracted from the source. The original translation should be a + semicolon-separated list of plural forms. Each form will be converted + into a TextElement and run through the `foreach` function, which should + return an `FTL.Node` or a `Transform`. By default, the `foreach` function + creates a valid Pattern from the TextElement passed into it. + """ + DEFAULT_ORDER = ('zero', 'one', 'two', 'few', 'many', 'other') + + def __init__(self, path, key, selector, foreach=Transform.pattern_of, + **kwargs): + super(PLURALS, self).__init__(path, key, **kwargs) + self.selector = selector + self.foreach = foreach + + def __call__(self, ctx): + element = super(PLURALS, self).__call__(ctx) + selector = ctx.evaluate(self.selector) + keys = ctx.plural_categories + forms = [ + FTL.TextElement(part.strip()) + for part in element.value.split(';') + ] + + # The default CLDR form should be the last we have in DEFAULT_ORDER, + # usually `other`, but in some cases `many`. If we don't have a variant + # for that, we'll append one, using the, in CLDR order, last existing + # variant in the legacy translation. That may or may not be the last + # variant. + default_key = [ + key for key in reversed(self.DEFAULT_ORDER) if key in keys + ][0] + + # Match keys to legacy forms in the order they are defined in Gecko's + # PluralForm.jsm. Filter out empty forms. + pairs = [ + (key, var) + for key, var in zip(keys, forms) + if var.value + ] + + # A special case for legacy translations which don't define any + # plural forms. + if len(pairs) == 0: + return Transform.pattern_of() + + # A special case for languages with one plural category or one legacy + # variant. We don't need to insert a SelectExpression for them. + if len(pairs) == 1: + _, only_form = pairs[0] + only_variant = ctx.evaluate(self.foreach(only_form)) + return Transform.pattern_of(only_variant) + + # Make sure the default key is defined. If it's missing, use the last + # form (in CLDR order) found in the legacy translation. + pairs.sort(key=lambda pair: self.DEFAULT_ORDER.index(pair[0])) + last_key, last_form = pairs[-1] + if last_key != default_key: + pairs.append((default_key, last_form)) + + def createVariant(key, form): + # Run the legacy plural form through `foreach` which returns an + # `FTL.Node` describing the transformation required for each + # variant. Then evaluate it to a migrated FTL node. + value = ctx.evaluate(self.foreach(form)) + return FTL.Variant( + key=FTL.Identifier(key), + value=value, + default=key == default_key + ) + + select = FTL.SelectExpression( + selector=selector, + variants=[ + createVariant(key, form) + for key, form in pairs + ] + ) + + return Transform.pattern_of(select) + + +class CONCAT(Transform): + """Create a new Pattern from Patterns, PatternElements and Expressions. + + When called with at least two elements, `CONCAT` disables the trimming + behavior of the elements which are subclasses of `LegacySource` by + setting `trim=False`, unless `trim` has already been set explicitly. The + following two `CONCAT` calls are equivalent: + + CONCAT( + FTL.TextElement("Hello"), + COPY("file.properties", "hello") + ) + + CONCAT( + FTL.TextElement("Hello"), + COPY("file.properties", "hello", trim=False) + ) + + Set `trim=True` explicitly to force trimming: + + CONCAT( + FTL.TextElement("Hello "), + COPY("file.properties", "hello", trim=True) + ) + + When called with a single element and when the element is a subclass of + `LegacySource`, the trimming behavior is not changed. The following two + transforms are equivalent: + + CONCAT(COPY("file.properties", "hello")) + + COPY("file.properties", "hello") + """ + + def __init__(self, *elements, **kwargs): + # We want to support both passing elements as *elements in the + # migration specs and as elements=[]. The latter is used by + # FTL.BaseNode.traverse when it recreates the traversed node using its + # attributes as kwargs. + self.elements = list(kwargs.get('elements', elements)) + + # We want to make CONCAT(COPY()) equivalent to COPY() so that it's + # always safe (no-op) to wrap transforms in a CONCAT. This is used by + # the implementation of transforms_from. + if len(self.elements) > 1: + for elem in self.elements: + # Only change trim if it hasn't been set explicitly. + if isinstance(elem, LegacySource) and elem.trim is None: + elem.trim = False + + def __call__(self, ctx): + return Transform.pattern_of(*self.elements) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/util.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/util.py new file mode 100644 index 0000000000..7fcd1c1b5c --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/util.py @@ -0,0 +1,114 @@ +# coding=utf8 +from __future__ import unicode_literals +from __future__ import absolute_import + +import textwrap + +import fluent.syntax.ast as FTL +from fluent.syntax.parser import FluentParser, FluentParserStream + + +fluent_parser = FluentParser(with_spans=False) + + +def parse(Parser, string): + if Parser is FluentParser: + return fluent_parser.parse(string) + + # Parsing a legacy resource. + + # Parse the string into the internal Context. + parser = Parser() + # compare-locales expects ASCII strings. + parser.readContents(string.encode('utf8')) + # Transform the parsed result which is an iterator into a dict. + return {ent.key: ent for ent in parser} + + +def ftl_resource_to_ast(code): + return fluent_parser.parse(ftl(code)) + + +def ftl_resource_to_json(code): + return fluent_parser.parse(ftl(code)).to_json() + + +def ftl_pattern_to_json(code): + ps = FluentParserStream(ftl(code)) + return fluent_parser.maybe_get_pattern(ps).to_json() + + +def to_json(merged_iter): + return { + path: resource.to_json() + for path, resource in merged_iter + } + + +LOCALIZABLE_ENTRIES = (FTL.Message, FTL.Term) + + +def get_message(body, ident): + """Get message called `ident` from the `body` iterable.""" + for entity in body: + if isinstance(entity, LOCALIZABLE_ENTRIES) and entity.id.name == ident: + return entity + + +def get_transform(body, ident): + """Get entity called `ident` from the `body` iterable.""" + for transform in body: + if transform.id.name == ident: + return transform + + +def skeleton(node): + """Create a skeleton copy of the given node. + + For localizable entries, the value is None and the attributes are {}. + That's not a valid Fluent entry, so it requires further manipulation to + set values and/or attributes. + """ + if isinstance(node, LOCALIZABLE_ENTRIES): + return type(node)(id=node.id.clone(), value=None) + return node.clone() + + +def ftl(code): + """Nicer indentation for FTL code. + + The code returned by this function is meant to be compared against the + output of the FTL Serializer. The input code will end with a newline to + match the output of the serializer. + """ + + # The code might be triple-quoted. + code = code.lstrip('\n') + + return textwrap.dedent(code) + + +def fold(fun, node, init): + """Reduce `node` to a single value using `fun`. + + Apply `fun` against an accumulator and each subnode of `node` (in postorder + traversal) to reduce it to a single value. + """ + + def fold_(vals, acc): + if not vals: + return acc + + head = list(vals)[0] + tail = list(vals)[1:] + + if isinstance(head, FTL.BaseNode): + acc = fold(fun, head, acc) + if isinstance(head, list): + acc = fold_(head, acc) + if isinstance(head, dict): + acc = fold_(head.values(), acc) + + return fold_(tail, fun(acc, head)) + + return fold_(vars(node).values(), init) diff --git a/comm/third_party/python/fluent.migratetb/fluent/migratetb/validator.py b/comm/third_party/python/fluent.migratetb/fluent/migratetb/validator.py new file mode 100644 index 0000000000..11b2f85a27 --- /dev/null +++ b/comm/third_party/python/fluent.migratetb/fluent/migratetb/validator.py @@ -0,0 +1,339 @@ +# coding=utf8 +from __future__ import absolute_import + +import argparse +import ast +import six +from six.moves import zip_longest + +from fluent.migratetb import transforms +from fluent.migratetb.errors import MigrationError +from fluent.migratetb.helpers import transforms_from +from fluent.syntax import ast as FTL +from fluent.syntax.visitor import Visitor +from compare_locales import mozpath + + +class MigrateNotFoundException(Exception): + pass + + +class BadContextAPIException(Exception): + pass + + +def process_assign(node, context): + if isinstance(node.value, ast.Str): + val = node.value.s + elif isinstance(node.value, ast.Name): + val = context.get(node.value.id) + elif isinstance(node.value, ast.Call): + val = node.value + if val is None: + return + for target in node.targets: + if isinstance(target, ast.Name): + context[target.id] = val + + +class Validator(object): + """Validate a migration recipe + + Extract information from the migration recipe about which files to + migrate from, and which files to migrate to. + Also check for errors in the recipe, or bad API usage. + """ + + @classmethod + def validate(cls, path, code=None): + if code is None: + with open(path) as fh: + code = fh.read() + validator = cls(code, path) + return validator.inspect() + + def __init__(self, code, path): + self.ast = ast.parse(code, path) + + def inspect(self): + migrate_func = None + global_assigns = {} + for top_level in ast.iter_child_nodes(self.ast): + if ( + isinstance(top_level, ast.FunctionDef) + and top_level.name == 'migrate' + ): + if migrate_func: + raise MigrateNotFoundException( + 'Duplicate definition of migrate' + ) + migrate_func = top_level + details = self.inspect_migrate(migrate_func, global_assigns) + if isinstance(top_level, ast.Assign): + process_assign(top_level, global_assigns) + if isinstance(top_level, (ast.Import, ast.ImportFrom)): + if 'module' in top_level._fields: + module = top_level.module + else: + module = None + for alias in top_level.names: + asname = alias.asname or alias.name + dotted = alias.name + if module: + dotted = '{}.{}'.format(module, dotted) + global_assigns[asname] = dotted + if not migrate_func: + raise MigrateNotFoundException( + 'migrate function not found' + ) + return details + + def inspect_migrate(self, migrate_func, global_assigns): + if ( + len(migrate_func.args.args) != 1 or + any( + getattr(migrate_func.args, arg_field) + for arg_field in migrate_func.args._fields + if arg_field != 'args' + ) + ): + raise MigrateNotFoundException( + 'migrate takes only one positional argument' + ) + arg = migrate_func.args.args[0] + if isinstance(arg, ast.Name): + ctx_var = arg.id # python 2 + else: + ctx_var = arg.arg # python 3 + visitor = MigrateAnalyzer(ctx_var, global_assigns) + visitor.visit(migrate_func) + return { + 'references': visitor.references, + 'issues': visitor.issues, + } + + +def full_name(node, global_assigns): + leafs = [] + while isinstance(node, ast.Attribute): + leafs.append(node.attr) + node = node.value + if isinstance(node, ast.Name): + leafs.append(global_assigns.get(node.id, node.id)) + return '.'.join(reversed(leafs)) + + +PATH_TYPES = six.string_types + (ast.Call,) + + +class MigrateAnalyzer(ast.NodeVisitor): + def __init__(self, ctx_var, global_assigns): + super(MigrateAnalyzer, self).__init__() + self.ctx_var = ctx_var + self.global_assigns = global_assigns + self.depth = 0 + self.issues = [] + self.references = set() + + def generic_visit(self, node): + self.depth += 1 + super(MigrateAnalyzer, self).generic_visit(node) + self.depth -= 1 + + def visit_Assign(self, node): + if self.depth == 1: + process_assign(node, self.global_assigns) + self.generic_visit(node) + + def visit_Attribute(self, node): + if isinstance(node.value, ast.Name) and node.value.id == self.ctx_var: + if node.attr not in ( + 'add_transforms', + 'locale', + ): + raise BadContextAPIException( + 'Unexpected attribute access on {}.{}'.format( + self.ctx_var, node.attr + ) + ) + self.generic_visit(node) + + def visit_Call(self, node): + if ( + isinstance(node.func, ast.Attribute) and + isinstance(node.func.value, ast.Name) and + node.func.value.id == self.ctx_var + ): + return self.call_ctx(node) + dotted = full_name(node.func, self.global_assigns) + if dotted == 'fluent.migratetb.helpers.transforms_from': + return self.call_helpers_transforms_from(node) + if dotted.startswith('fluent.migratetb.'): + return self.call_transform(node, dotted) + self.generic_visit(node) + + def call_ctx(self, node): + if node.func.attr == 'add_transforms': + return self.call_add_transforms(node) + raise BadContextAPIException( + 'Unexpected call on {}.{}'.format( + self.ctx_var, node.func.attr + ) + ) + + def call_add_transforms(self, node): + args_msg = ( + 'Expected arguments to {}.add_transforms: ' + 'target_ftl_path, reference_ftl_path, list_of_transforms' + ).format(self.ctx_var) + ref_msg = ( + 'Expected second argument to {}.add_transforms: ' + 'reference should be string or variable with string value' + ).format(self.ctx_var) + # Just check call signature here, check actual types below + if not self.check_arguments(node, (ast.AST, ast.AST, ast.AST)): + self.issues.append({ + 'msg': args_msg, + 'line': node.lineno, + }) + return + in_reference = node.args[1] + if isinstance(in_reference, ast.Name): + in_reference = self.global_assigns.get(in_reference.id) + if isinstance(in_reference, ast.Str): + in_reference = in_reference.s + if not isinstance(in_reference, six.string_types): + self.issues.append({ + 'msg': ref_msg, + 'line': node.args[1].lineno, + }) + return + self.references.add(in_reference) + # Checked node.args[1]. + # There's not a lot we can say about our target path, + # ignoring that. + # For our transforms, we want more checks. + self.generic_visit(node.args[2]) + + def call_transform(self, node, dotted): + module, called = dotted.rsplit('.', 1) + if module not in ('fluent.migratetb', 'fluent.migratetb.transforms'): + return + transform = getattr(transforms, called) + if not issubclass(transform, transforms.Source): + return + bad_args = '{} takes path and key as first two params'.format(called) + if not self.check_arguments( + node, ((ast.Str, ast.Name), (ast.Str, ast.Name),), + allow_more=True, check_kwargs=False + ): + self.issues.append({ + 'msg': bad_args, + 'line': node.lineno + }) + return + path = node.args[0] + if isinstance(path, ast.Str): + path = path.s + if isinstance(path, ast.Name): + path = self.global_assigns.get(path.id) + if not isinstance(path, PATH_TYPES): + self.issues.append({ + 'msg': bad_args, + 'line': node.lineno + }) + + def call_helpers_transforms_from(self, node): + args_msg = ( + 'Expected arguments to transforms_from: ' + 'str, **substitions' + ) + if not self.check_arguments( + node, (ast.Str,), check_kwargs=False + ): + self.issues.append({ + 'msg': args_msg, + 'line': node.lineno, + }) + return + kwargs = {} + found_bad_keywords = False + for keyword in node.keywords: + v = keyword.value + if isinstance(v, ast.Str): + v = v.s + if isinstance(v, ast.Name): + v = self.global_assigns.get(v.id) + if isinstance(v, ast.Call): + v = 'determined at runtime' + if not isinstance(v, PATH_TYPES): + msg = 'Bad keyword arg {} to transforms_from'.format( + keyword.arg + ) + self.issues.append({ + 'msg': msg, + 'line': node.lineno, + }) + found_bad_keywords = True + else: + kwargs[keyword.arg] = v + if found_bad_keywords: + return + try: + transforms = transforms_from(node.args[0].s, **kwargs) + except MigrationError as e: + self.issues.append({ + 'msg': str(e), + 'line': node.lineno, + }) + return + ti = TransformsInspector() + ti.visit(transforms) + self.issues.extend({ + 'msg': issue, + 'line': node.lineno, + } for issue in set(ti.issues)) + + def check_arguments( + self, node, argspec, check_kwargs=True, allow_more=False + ): + if check_kwargs and ( + node.keywords or + (hasattr(node, 'kwargs') and node.kwargs) + ): + return False + if hasattr(node, 'starargs') and node.starargs: + return False + for arg, NODE_TYPE in zip_longest(node.args, argspec): + if NODE_TYPE is None: + return True if allow_more else False + if not (isinstance(arg, NODE_TYPE)): + return False + return True + + +class TransformsInspector(Visitor): + def __init__(self): + super(TransformsInspector, self).__init__() + self.issues = [] + + def generic_visit(self, node): + if isinstance(node, transforms.Source): + src = node.path + # Source needs paths to be normalized + # https://bugzilla.mozilla.org/show_bug.cgi?id=1568199 + if src != mozpath.normpath(src): + self.issues.append( + 'Source "{}" needs to be a normalized path'.format(src) + ) + super(TransformsInspector, self).generic_visit(node) + + +def cli(): + parser = argparse.ArgumentParser() + parser.add_argument('migration') + args = parser.parse_args() + issues = Validator.validate(args.migration)['issues'] + for issue in issues: + print(issue['msg'], 'at line', issue['line']) + return 1 if issues else 0 |