summaryrefslogtreecommitdiffstats
path: root/third_party/python/fluent.migrate
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/python/fluent.migrate
parentInitial commit. (diff)
downloadfirefox-upstream/124.0.1.tar.xz
firefox-upstream/124.0.1.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/fluent.migrate')
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/LICENSE13
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/METADATA63
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/RECORD21
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/WHEEL6
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/entry_points.txt3
-rw-r--r--third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/top_level.txt1
-rw-r--r--third_party/python/fluent.migrate/fluent/__init__.py1
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/__init__.py8
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/_context.py351
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/blame.py77
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/changesets.py66
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/context.py160
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/errors.py22
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/evaluator.py28
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/helpers.py148
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/merge.py51
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/repo_client.py106
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/tool.py184
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/transforms.py558
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/util.py107
-rw-r--r--third_party/python/fluent.migrate/fluent/migrate/validator.py323
21 files changed, 2297 insertions, 0 deletions
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/LICENSE b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/LICENSE
new file mode 100644
index 0000000000..f6a01a51d0
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/LICENSE
@@ -0,0 +1,13 @@
+Copyright 2016 Mozilla Foundation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/METADATA b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/METADATA
new file mode 100644
index 0000000000..c93563cd16
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/METADATA
@@ -0,0 +1,63 @@
+Metadata-Version: 2.1
+Name: fluent.migrate
+Version: 0.13.0
+Summary: Toolchain to migrate legacy translation to Fluent.
+Home-page: https://github.com/mozilla/fluent-migrate
+Author: Mozilla
+Author-email: l10n-drivers@mozilla.org
+License: APL 2
+Keywords: fluent,localization,l10n
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: POSIX
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: compare-locales <10.0,>=9.0.1
+Requires-Dist: fluent.syntax <0.20,>=0.19.0
+Provides-Extra: hg
+Requires-Dist: python-hglib ; extra == 'hg'
+
+Fluent Migration Tools
+======================
+
+Programmatically create Fluent files from existing content in both legacy
+and Fluent formats. Use recipes written in Python to migrate content for each
+of your localizations.
+
+`migrate-l10n` is a CLI script which uses the `fluent.migrate` module under
+the hood to run migrations on existing translations.
+
+`validate-l10n-recipe` is a CLI script to test a migration recipe for common
+errors, without trying to apply it.
+
+Installation
+------------
+
+Install from PyPI:
+
+ pip install fluent.migrate[hg]
+
+If you only want to use the `MigrationContext` API, you can drop the
+requirement on `python-hglib`:
+
+ pip install fluent.migrate
+
+Usage
+-----
+
+Migrations consist of _recipes_, which are applied to a _localization repository_, based on _template files_.
+You can find recipes for Firefox in `mozilla-central/python/l10n/fluent_migrations/`,
+the reference repository is [gecko-strings](https://hg.mozilla.org/l10n/gecko-strings/) or _quarantine_.
+You apply those migrations to l10n repositories in [l10n-central](https://hg.mozilla.org/l10n-central/), or to `gecko-strings` for testing.
+
+The migrations are run as python modules, so you need to have their file location in `PYTHONPATH`.
+
+An example would look like
+
+ $ migrate-l10n --lang it --reference-dir gecko-strings --localization-dir l10n-central/it bug_1451992_preferences_sitedata bug_1451992_preferences_translation
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/RECORD b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/RECORD
new file mode 100644
index 0000000000..f6107a7795
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/RECORD
@@ -0,0 +1,21 @@
+fluent/__init__.py,sha256=ED6jHcYiuYpr_0vjGz0zx2lrrmJT9sDJCzIljoDfmlM,65
+fluent/migrate/__init__.py,sha256=N1wyurR01T2hdcUhMuvdU9W413HQyw9gi0VJP6dTlm4,129
+fluent/migrate/_context.py,sha256=2NCpsBmG7QzwF33cRcrhzavAW0DYsn5Zyvs1mTpg2YI,12890
+fluent/migrate/blame.py,sha256=r3a9Zjc2SxAKSLcLQH4YmybbDpyRaYIaC3rEabmxqF8,2493
+fluent/migrate/changesets.py,sha256=KKkNk4Ga1rq9QXH5mdb-iy6P86CbXUrUJNhMJQG1s9g,1777
+fluent/migrate/context.py,sha256=ORIO46pTDKKq1z_mpM_E-nQiKzkjOVj8_jlAWiEHYQE,6307
+fluent/migrate/errors.py,sha256=s7JjvA2yCWogO-Ta4OV3z_Ab31-V_ha_3LGyxF46SRk,313
+fluent/migrate/evaluator.py,sha256=NhLfdlSo1zKBNDS54sa-Xz67CjNYCnAYHRsBx2Gwj2Q,859
+fluent/migrate/helpers.py,sha256=8jFxbqMuMYOwGrmtdLv8p46QKh_kGEFAcyn2BNQC4Ps,5150
+fluent/migrate/merge.py,sha256=J9DL-QUoBL3n9UTObhhETq47bCYSsHcW9F_ZIomrwak,1808
+fluent/migrate/repo_client.py,sha256=hZvfD1P-ZOoM6u-aMQ4hNzBtMlcjevZERLfizjcgDWo,3541
+fluent/migrate/tool.py,sha256=hGHq4N7gVxNllVrXQiqiCktzYAiTUMKQIDovAQXCMjE,5759
+fluent/migrate/transforms.py,sha256=aCKY-fGJBv3e5rTBfLYKCo0urzHUjtHpejt0H5Vlors,20689
+fluent/migrate/util.py,sha256=7n0pjmbvyJq7GrWV1gatDj7BYP7amY1S4UfugptWxwk,2853
+fluent/migrate/validator.py,sha256=SpjTfaKvH8ZN7ZKuoJCEWIp3xXEyplzN6vF23piXSGE,11043
+fluent.migrate-0.13.0.dist-info/LICENSE,sha256=yC8xgAJuBJQ0ThoBNcQnXzmBUYVh5xfk3rMDaXQ8gO4,559
+fluent.migrate-0.13.0.dist-info/METADATA,sha256=-mqYB_hRmQqgLT9EyWfNO85wJvKaz3AvY5K1r-jcsZg,2363
+fluent.migrate-0.13.0.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
+fluent.migrate-0.13.0.dist-info/entry_points.txt,sha256=q0mh-Wn0Z8L4j7xyyQhxLDw5yxAMDvSzMgm2uWjIBK8,109
+fluent.migrate-0.13.0.dist-info/top_level.txt,sha256=E6y0EXb_8ntRq2470rEss448Ec6wP_-DI3zVECukrn0,7
+fluent.migrate-0.13.0.dist-info/RECORD,,
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/WHEEL b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/WHEEL
new file mode 100644
index 0000000000..4724c45738
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/entry_points.txt b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/entry_points.txt
new file mode 100644
index 0000000000..e437e9ecf9
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+migrate-l10n = fluent.migrate.tool:cli
+validate-l10n-recipe = fluent.migrate.validator:cli
diff --git a/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/top_level.txt b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/top_level.txt
new file mode 100644
index 0000000000..a3582d405a
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent.migrate-0.13.0.dist-info/top_level.txt
@@ -0,0 +1 @@
+fluent
diff --git a/third_party/python/fluent.migrate/fluent/__init__.py b/third_party/python/fluent.migrate/fluent/__init__.py
new file mode 100644
index 0000000000..8db66d3d0f
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/__init__.py b/third_party/python/fluent.migrate/fluent/migrate/__init__.py
new file mode 100644
index 0000000000..158277a0a2
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/__init__.py
@@ -0,0 +1,8 @@
+from .transforms import ( # noqa: F401
+ CONCAT,
+ COPY,
+ COPY_PATTERN,
+ PLURALS,
+ REPLACE,
+ REPLACE_IN_TEXT,
+)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/_context.py b/third_party/python/fluent.migrate/fluent/migrate/_context.py
new file mode 100644
index 0000000000..34a23cde67
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/_context.py
@@ -0,0 +1,351 @@
+from __future__ import annotations
+from typing import Dict, Optional, Set, Tuple, cast
+
+import os
+import codecs
+from functools import partial
+import logging
+from itertools import zip_longest
+
+from compare_locales.parser import getParser
+from compare_locales.plurals import get_plural
+import fluent.syntax.ast as FTL
+from fluent.syntax.parser import FluentParser
+from fluent.syntax.serializer import FluentSerializer
+
+from .changesets import Changes
+from .errors import UnreadableReferenceError
+from .evaluator import Evaluator
+from .merge import merge_resource
+from .transforms import Source
+
+
+class InternalContext:
+ """Internal context for merging translation resources.
+
+ For the public interface, see `context.MigrationContext`.
+ """
+
+ dependencies: Dict[Tuple[str, str], Set[Tuple[str, Source]]] = {}
+ localization_dir: str
+ reference_dir: str
+
+ def __init__(self, lang, enforce_translated=False):
+ self.fluent_parser = FluentParser(with_spans=False)
+ self.fluent_serializer = FluentSerializer()
+
+ # An iterable of plural category names relevant to the context's
+ # language. E.g. ('one', 'other') for English.
+ self.plural_categories = get_plural(lang)
+ if self.plural_categories is None:
+ logger = logging.getLogger("migrate")
+ logger.warning(
+ f'Plural rule for "{lang}" is not defined in "compare-locales"'
+ )
+ self.plural_categories = ("one", "other")
+
+ self.enforce_translated = enforce_translated
+ # Parsed input resources stored by resource path.
+ self.reference_resources = {}
+ self.localization_resources = {}
+ self.target_resources = {}
+
+ # An iterable of `FTL.Message` objects some of whose nodes can be the
+ # transform operations.
+ self.transforms = {}
+
+ # The evaluator instance is an AST transformer capable of walking an
+ # AST hierarchy and evaluating nodes which are migration Transforms.
+ self.evaluator = Evaluator(self)
+
+ def read_ftl_resource(self, path: str):
+ """Read an FTL resource and parse it into an AST."""
+ f = codecs.open(path, "r", "utf8")
+ try:
+ contents = f.read()
+ except UnicodeDecodeError as err:
+ logger = logging.getLogger("migrate")
+ logger.warning(f"Unable to read file {path}: {err}")
+ raise err
+ finally:
+ f.close()
+
+ ast = self.fluent_parser.parse(contents)
+
+ annots = [
+ annot
+ for entry in ast.body
+ if isinstance(entry, FTL.Junk)
+ for annot in entry.annotations
+ ]
+
+ if len(annots):
+ logger = logging.getLogger("migrate")
+ for annot in annots:
+ msg = annot.message
+ logger.warning(f"Syntax error in {path}: {msg}")
+
+ return ast
+
+ def read_legacy_resource(self, path: str):
+ """Read a legacy resource and parse it into a dict."""
+ parser = getParser(path)
+ parser.readFile(path)
+ # Transform the parsed result which is an iterator into a dict.
+ return {
+ entity.key: entity.val
+ for entity in parser
+ if entity.localized or self.enforce_translated
+ }
+
+ def read_reference_ftl(self, path: str):
+ """Read and parse a reference FTL file.
+
+ A missing resource file is a fatal error and will raise an
+ UnreadableReferenceError.
+ """
+ fullpath = os.path.join(self.reference_dir, path)
+ try:
+ return self.read_ftl_resource(fullpath)
+ except OSError:
+ error_message = f"Missing reference file: {fullpath}"
+ logging.getLogger("migrate").error(error_message)
+ raise UnreadableReferenceError(error_message)
+ except UnicodeDecodeError as err:
+ error_message = f"Error reading file {fullpath}: {err}"
+ logging.getLogger("migrate").error(error_message)
+ raise UnreadableReferenceError(error_message)
+
+ def read_localization_ftl(self, path: str):
+ """Read and parse an existing localization FTL file.
+
+ Create a new FTL.Resource if the file doesn't exist or can't be
+ decoded.
+ """
+ fullpath = os.path.join(self.localization_dir, path)
+ try:
+ return self.read_ftl_resource(fullpath)
+ except OSError:
+ logger = logging.getLogger("migrate")
+ logger.info(
+ "Localization file {} does not exist and "
+ "it will be created".format(path)
+ )
+ return FTL.Resource()
+ except UnicodeDecodeError:
+ logger = logging.getLogger("migrate")
+ logger.warning(
+ "Localization file {} has broken encoding. "
+ "It will be re-created and some translations "
+ "may be lost".format(path)
+ )
+ return FTL.Resource()
+
+ def maybe_add_localization(self, path: str):
+ """Add a localization resource to migrate translations from.
+
+ Uses a compare-locales parser to create a dict of (key, string value)
+ tuples.
+ For Fluent sources, we store the AST.
+ """
+ try:
+ fullpath = os.path.join(self.localization_dir, path)
+ if not fullpath.endswith(".ftl"):
+ collection = self.read_legacy_resource(fullpath)
+ else:
+ collection = self.read_ftl_resource(fullpath)
+ except OSError:
+ logger = logging.getLogger("migrate")
+ logger.warning(f"Missing localization file: {path}")
+ else:
+ self.localization_resources[path] = collection
+
+ def get_legacy_source(self, path: str, key: str):
+ """Get an entity value from a localized legacy source.
+
+ Used by the `Source` transform.
+ """
+ resource = self.localization_resources[path]
+ return resource.get(key, None)
+
+ def get_fluent_source_pattern(self, path: str, key: str):
+ """Get a pattern from a localized Fluent source.
+
+ If the key contains a `.`, does an attribute lookup.
+ Used by the `COPY_PATTERN` transform.
+ """
+ resource = self.localization_resources[path]
+ msg_key, _, attr_key = key.partition(".")
+ found = None
+ for entry in resource.body:
+ if isinstance(entry, (FTL.Message, FTL.Term)):
+ if entry.id.name == msg_key:
+ found = entry
+ break
+ if found is None:
+ return None
+ if not attr_key:
+ return found.value
+ for attribute in found.attributes:
+ if attribute.id.name == attr_key:
+ return attribute.value
+ return None
+
+ def messages_equal(self, res1, res2):
+ """Compare messages and terms of two FTL resources.
+
+ Uses FTL.BaseNode.equals to compare all messages/terms
+ in two FTL resources.
+ If the order or number of messages differ, the result is also False.
+ """
+
+ def message_id(message):
+ "Return the message's identifer name for sorting purposes."
+ return message.id.name
+
+ messages1 = sorted(
+ (
+ entry
+ for entry in res1.body
+ if isinstance(entry, FTL.Message) or isinstance(entry, FTL.Term)
+ ),
+ key=message_id,
+ )
+ messages2 = sorted(
+ (
+ entry
+ for entry in res2.body
+ if isinstance(entry, FTL.Message) or isinstance(entry, FTL.Term)
+ ),
+ key=message_id,
+ )
+ for msg1, msg2 in zip_longest(messages1, messages2):
+ if msg1 is None or msg2 is None:
+ return False
+ if not msg1.equals(msg2):
+ return False
+ return True
+
+ def merge_changeset(
+ self,
+ changeset: Optional[Changes] = None,
+ known_translations: Optional[Changes] = None,
+ ):
+ """Return a generator of FTL ASTs for the changeset.
+
+ The input data must be configured earlier using the `add_*` methods.
+ if given, `changeset` must be a set of (path, key) tuples describing
+ which legacy translations are to be merged. If `changeset` is None,
+ all legacy translations will be allowed to be migrated in a single
+ changeset.
+
+ We use the `in_changeset` method to determine if a message should be
+ migrated for the given changeset.
+
+ Given `changeset`, return a dict whose keys are resource paths and
+ values are `FTL.Resource` instances. The values will also be used to
+ update this context's existing localization resources.
+ """
+
+ if changeset is None:
+ # Merge all known legacy translations. Used in tests.
+ changeset = {
+ (path, key)
+ for path, strings in self.localization_resources.items()
+ if not path.endswith(".ftl")
+ for key in strings.keys()
+ }
+
+ if known_translations is None:
+ known_translations = changeset
+
+ for path, reference in self.reference_resources.items():
+ current = self.target_resources[path]
+ transforms = self.transforms.get(path, [])
+ in_changeset = partial(
+ self.in_changeset, changeset, known_translations, path
+ )
+
+ # Merge legacy translations with the existing ones using the
+ # reference as a template.
+ snapshot = merge_resource(
+ self, reference, current, transforms, in_changeset
+ )
+
+ # Skip this path if the messages in the merged snapshot are
+ # identical to those in the current state of the localization file.
+ # This may happen when:
+ #
+ # - none of the transforms is in the changset, or
+ # - all messages which would be migrated by the context's
+ # transforms already exist in the current state.
+ if self.messages_equal(current, snapshot):
+ continue
+
+ # Store the merged snapshot on the context so that the next merge
+ # already takes it into account as the existing localization.
+ self.target_resources[path] = snapshot
+
+ # The result for this path is a complete `FTL.Resource`.
+ yield path, snapshot
+
+ def in_changeset(
+ self, changeset: Changes, known_translations: Changes, path: str, ident
+ ) -> bool:
+ """Check if a message should be migrated in this changeset.
+
+ The message is identified by path and ident.
+
+
+ A message will be migrated only if all of its dependencies
+ are present in the currently processed changeset.
+
+ If a transform defined for this message points to a missing
+ legacy translation, this message will not be merged. The
+ missing legacy dependency won't be present in the changeset.
+
+ This also means that partially translated messages (e.g.
+ constructed from two legacy strings out of which only one is
+ avaiable) will never be migrated.
+ """
+ message_deps = self.dependencies.get((path, ident), None)
+
+ # Don't merge if we don't have a transform for this message.
+ if message_deps is None:
+ return False
+
+ # As a special case, if a transform exists but has no
+ # dependecies, it's a hardcoded `FTL.Node` which doesn't
+ # migrate any existing translation but rather creates a new
+ # one. Merge it.
+ if len(message_deps) == 0:
+ return True
+
+ # Make sure all the dependencies are present in the current
+ # changeset. Partial migrations are not currently supported.
+ # See https://bugzilla.mozilla.org/show_bug.cgi?id=1321271
+ # We only return True if our current changeset touches
+ # the transform, and we have all of the dependencies.
+ active_deps = cast(bool, message_deps & changeset)
+ available_deps = message_deps & known_translations
+ return active_deps and message_deps == available_deps
+
+ def serialize_changeset(
+ self, changeset: Changes, known_translations: Optional[Changes] = None
+ ):
+ """Return a dict of serialized FTLs for the changeset.
+
+ Given `changeset`, return a dict whose keys are resource paths and
+ values are serialized FTL snapshots.
+ """
+
+ return {
+ path: self.fluent_serializer.serialize(snapshot)
+ for path, snapshot in self.merge_changeset(changeset, known_translations)
+ }
+
+ def evaluate(self, node):
+ return self.evaluator.visit(node)
+
+
+logging.basicConfig()
diff --git a/third_party/python/fluent.migrate/fluent/migrate/blame.py b/third_party/python/fluent.migrate/fluent/migrate/blame.py
new file mode 100644
index 0000000000..7ea505edaf
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/blame.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+from typing import Dict, Iterable, Tuple, TypedDict, cast
+
+import argparse
+import json
+from os.path import join
+
+from compare_locales.parser import Junk, getParser
+from compare_locales.parser.fluent import FluentEntity
+
+from .repo_client import RepoClient
+
+BlameData = Dict[str, Dict[str, Tuple[int, float]]]
+"File path -> message key -> [userid, timestamp]"
+
+
+class BlameResult(TypedDict):
+ authors: list[str]
+ blame: BlameData
+
+
+class Blame:
+ def __init__(self, client: RepoClient):
+ self.client = client
+ self.users: list[str] = []
+ self.blame: BlameData = {}
+
+ def attribution(self, file_paths: Iterable[str]) -> BlameResult:
+ for file in file_paths:
+ blame = self.client.blame(file)
+ self.handleFile(file, blame)
+ return {"authors": self.users, "blame": self.blame}
+
+ def handleFile(self, path: str, file_blame: list[Tuple[str, int]]):
+ try:
+ parser = getParser(path)
+ except UserWarning:
+ return
+
+ self.blame[path] = {}
+
+ self.readFile(parser, path)
+ entities = parser.parse()
+ for e in entities:
+ if isinstance(e, Junk):
+ continue
+ if e.val_span:
+ key_vals: list[tuple[str, str]] = [(e.key, e.val_span)]
+ else:
+ key_vals = []
+ if isinstance(e, FluentEntity):
+ key_vals += [
+ (f"{e.key}.{attr.key}", cast(str, attr.val_span))
+ for attr in e.attributes
+ ]
+ for key, (val_start, val_end) in key_vals:
+ entity_lines = file_blame[
+ (e.ctx.linecol(val_start)[0] - 1) : e.ctx.linecol(val_end)[0]
+ ]
+ user, timestamp = max(entity_lines, key=lambda x: x[1])
+ if user not in self.users:
+ self.users.append(user)
+ userid = self.users.index(user)
+ self.blame[path][key] = (userid, timestamp)
+
+ def readFile(self, parser, path: str):
+ parser.readFile(join(self.client.root, path))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("repo_path")
+ parser.add_argument("file_path", nargs="+")
+ args = parser.parse_args()
+ blame = Blame(RepoClient(args.repo_path))
+ attrib = blame.attribution(args.file_path)
+ print(json.dumps(attrib, indent=4, separators=(",", ": ")))
diff --git a/third_party/python/fluent.migrate/fluent/migrate/changesets.py b/third_party/python/fluent.migrate/fluent/migrate/changesets.py
new file mode 100644
index 0000000000..e687175550
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/changesets.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+from typing import Set, Tuple, TypedDict
+
+import time
+
+from .blame import BlameResult
+
+Changes = Set[Tuple[str, str]]
+
+
+class Changeset(TypedDict):
+ author: str
+ first_commit: float
+ changes: Changes
+
+
+def by_first_commit(item: Changeset):
+ """Order two changesets by their first commit date."""
+ return item["first_commit"]
+
+
+def convert_blame_to_changesets(blame_json: BlameResult) -> list[Changeset]:
+ """Convert a blame dict into a list of changesets.
+
+ The blame information in `blame_json` should be a dict of the following
+ structure:
+
+ {
+ 'authors': [
+ 'A.N. Author <author@example.com>',
+ ],
+ 'blame': {
+ 'path/one': {
+ 'key1': [0, 1346095921.0],
+ },
+ }
+ }
+
+ It will be transformed into a list of changesets which can be fed into
+ `InternalContext.serialize_changeset`:
+
+ [
+ {
+ 'author': 'A.N. Author <author@example.com>',
+ 'first_commit': 1346095921.0,
+ 'changes': {
+ ('path/one', 'key1'),
+ }
+ },
+ ]
+
+ """
+ now = time.time()
+ changesets: list[Changeset] = [
+ {"author": author, "first_commit": now, "changes": set()}
+ for author in blame_json["authors"]
+ ]
+
+ for path, keys_info in blame_json["blame"].items():
+ for key, (author_index, timestamp) in keys_info.items():
+ changeset = changesets[author_index]
+ changeset["changes"].add((path, key))
+ if timestamp < changeset["first_commit"]:
+ changeset["first_commit"] = timestamp
+
+ return sorted(changesets, key=by_first_commit)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/context.py b/third_party/python/fluent.migrate/fluent/migrate/context.py
new file mode 100644
index 0000000000..8d32ab2c0a
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/context.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+from typing import List, Set, Tuple, cast
+
+import logging
+
+import fluent.syntax.ast as FTL
+from fluent.migrate.util import fold
+
+from .transforms import Source
+from .util import get_message, skeleton
+from .errors import (
+ EmptyLocalizationError,
+ UnreadableReferenceError,
+)
+from ._context import InternalContext
+
+
+__all__ = [
+ "EmptyLocalizationError",
+ "UnreadableReferenceError",
+ "MigrationContext",
+]
+
+
+class MigrationContext(InternalContext):
+ """Stateful context for merging translation resources.
+
+ `MigrationContext` must be configured with the target locale and the
+ directory locations of the input data.
+
+ The transformation takes four types of input data:
+
+ - The en-US FTL reference files which will be used as templates for
+ message order, comments and sections. If the reference_dir is None,
+ the migration will create Messages and Terms in the order given by
+ the transforms.
+
+ - The current FTL files for the given locale.
+
+ - A list of `FTL.Message` or `FTL.Term` objects some of whose nodes
+ are special helper or transform nodes:
+
+ helpers: VARIABLE_REFERENCE, MESSAGE_REFERENCE, TERM_REFERENCE
+ transforms: COPY, REPLACE_IN_TEXT, REPLACE, PLURALS, CONCAT
+ fluent value helper: COPY_PATTERN
+
+ The legacy (DTD, properties) translation files are deduced by the
+ dependencies in the transforms. The translations from these files will be
+ read from the localization_dir and transformed into FTL and merged
+ into the existing FTL files for the given language.
+ """
+
+ def __init__(
+ self,
+ locale: str,
+ reference_dir: str,
+ localization_dir: str,
+ enforce_translated=False,
+ ):
+ super().__init__(
+ locale,
+ enforce_translated=enforce_translated,
+ )
+ self.locale = locale
+ # Paths to directories with input data, relative to CWD.
+ self.reference_dir = reference_dir
+ self.localization_dir = localization_dir
+
+ self.dependencies = {}
+ """
+ A dict whose keys are `(path, key)` tuples corresponding to target
+ FTL translations, and values are sets of `(path, key)` tuples
+ corresponding to localized entities which will be migrated.
+ """
+
+ def add_transforms(
+ self, target: str, reference: str, transforms: List[FTL.Message | FTL.Term]
+ ):
+ """Define transforms for target using reference as template.
+
+ `target` is a path of the destination FTL file relative to the
+ localization directory. `reference` is a path to the template FTL
+ file relative to the reference directory.
+
+ Each transform is an extended FTL node with `Transform` nodes as some
+ values. Transforms are stored in their lazy AST form until
+ `merge_changeset` is called, at which point they are evaluated to real
+ FTL nodes with migrated translations.
+
+ Each transform is scanned for `Source` nodes which will be used to
+ build the list of dependencies for the transformed message.
+
+ For transforms that merely copy legacy messages or Fluent patterns,
+ using `fluent.migrate.helpers.transforms_from` is recommended.
+ """
+
+ def get_sources(acc, cur):
+ if isinstance(cur, Source):
+ acc.add((cur.path, cur.key))
+ return acc
+
+ if self.reference_dir is None:
+ # Add skeletons to resource body for each transform
+ # if there's no reference.
+ reference_ast = self.reference_resources.get(target)
+ if reference_ast is None:
+ reference_ast = FTL.Resource()
+ reference_ast.body.extend(skeleton(transform) for transform in transforms)
+ else:
+ reference_ast = self.read_reference_ftl(reference)
+ self.reference_resources[target] = reference_ast
+
+ for node in transforms:
+ ident = cast(str, node.id.name)
+ # Scan `node` for `Source` nodes and collect the information they
+ # store into a set of dependencies.
+ dependencies = cast(Set[Tuple[str, Source]], fold(get_sources, node, set()))
+ # Set these sources as dependencies for the current transform.
+ self.dependencies[(target, ident)] = dependencies
+
+ # The target Fluent message should exist in the reference file. If
+ # it doesn't, it's probably a typo.
+ # Of course, only if we're having a reference.
+ if self.reference_dir is None:
+ continue
+ if get_message(reference_ast.body, ident) is None:
+ logger = logging.getLogger("migrate")
+ logger.warning(
+ '{} "{}" was not found in {}'.format(
+ type(node).__name__, ident, reference
+ )
+ )
+
+ # Keep track of localization resource paths which were defined as
+ # sources in the transforms.
+ expected_paths = set()
+
+ # Read all legacy translation files defined in Source transforms. This
+ # may fail but a single missing legacy resource doesn't mean that the
+ # migration can't succeed.
+ for dependencies in self.dependencies.values():
+ for path in {path for path, _ in dependencies}:
+ expected_paths.add(path)
+ self.maybe_add_localization(path)
+
+ # However, if all legacy resources are missing, bail out early. There
+ # are no translations to migrate. We'd also get errors in hg annotate.
+ if len(expected_paths) > 0 and len(self.localization_resources) == 0:
+ error_message = "No localization files were found"
+ logging.getLogger("migrate").error(error_message)
+ raise EmptyLocalizationError(error_message)
+
+ # Add the current transforms to any other transforms added earlier for
+ # this path.
+ path_transforms = self.transforms.setdefault(target, [])
+ path_transforms += transforms
+
+ if target not in self.target_resources:
+ target_ast = self.read_localization_ftl(target)
+ self.target_resources[target] = target_ast
diff --git a/third_party/python/fluent.migrate/fluent/migrate/errors.py b/third_party/python/fluent.migrate/fluent/migrate/errors.py
new file mode 100644
index 0000000000..dcc3025377
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/errors.py
@@ -0,0 +1,22 @@
+class SkipTransform(RuntimeError):
+ pass
+
+
+class MigrationError(ValueError):
+ pass
+
+
+class EmptyLocalizationError(MigrationError):
+ pass
+
+
+class NotSupportedError(MigrationError):
+ pass
+
+
+class UnreadableReferenceError(MigrationError):
+ pass
+
+
+class InvalidTransformError(MigrationError):
+ pass
diff --git a/third_party/python/fluent.migrate/fluent/migrate/evaluator.py b/third_party/python/fluent.migrate/fluent/migrate/evaluator.py
new file mode 100644
index 0000000000..90c626f933
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/evaluator.py
@@ -0,0 +1,28 @@
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Transformer
+
+from .transforms import Transform
+
+
+class Evaluator(Transformer):
+ """An AST transformer for evaluating migration Transforms.
+
+ An AST transformer (i.e. a visitor capable of modifying the AST) which
+ walks an AST hierarchy and evaluates nodes which are migration Transforms.
+ """
+
+ def __init__(self, ctx):
+ self.ctx = ctx
+
+ def visit(self, node):
+ if not isinstance(node, FTL.BaseNode):
+ return node
+
+ if isinstance(node, Transform):
+ # Some transforms don't expect other transforms as children.
+ # Evaluate the children first.
+ transform = self.generic_visit(node)
+ # Then, evaluate this transform.
+ return transform(self.ctx)
+
+ return self.generic_visit(node)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/helpers.py b/third_party/python/fluent.migrate/fluent/migrate/helpers.py
new file mode 100644
index 0000000000..2a221d5de6
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/helpers.py
@@ -0,0 +1,148 @@
+"""Fluent AST helpers.
+
+The functions defined in this module offer a shorthand for defining common AST
+nodes.
+
+They take a string argument and immediately return a corresponding AST node.
+(As opposed to Transforms which are AST nodes on their own and only return the
+migrated AST nodes when they are evaluated by a MigrationContext.) """
+
+from __future__ import annotations
+from typing import List
+
+from fluent.syntax import FluentParser, ast as FTL
+from fluent.syntax.visitor import Transformer
+from .transforms import Transform, CONCAT, COPY, COPY_PATTERN
+from .errors import NotSupportedError, InvalidTransformError
+
+
+def VARIABLE_REFERENCE(name):
+ """Create an ExternalArgument expression."""
+
+ return FTL.VariableReference(id=FTL.Identifier(name))
+
+
+def MESSAGE_REFERENCE(name):
+ """Create a MessageReference expression.
+
+ If the passed name contains a `.`, we're generating
+ a message reference with an attribute.
+ """
+ if "." in name:
+ name, attribute = name.split(".")
+ attribute = FTL.Identifier(attribute)
+ else:
+ attribute = None
+
+ return FTL.MessageReference(
+ id=FTL.Identifier(name),
+ attribute=attribute,
+ )
+
+
+def TERM_REFERENCE(name):
+ """Create a TermReference expression."""
+
+ return FTL.TermReference(id=FTL.Identifier(name))
+
+
+class IntoTranforms(Transformer):
+ IMPLICIT_TRANSFORMS = ("CONCAT",)
+ FORBIDDEN_TRANSFORMS = ("PLURALS", "REPLACE", "REPLACE_IN_TEXT")
+
+ def __init__(self, substitutions):
+ self.substitutions = substitutions
+
+ def visit_Junk(self, node):
+ anno = node.annotations[0]
+ raise InvalidTransformError(
+ "Transform contains parse error: {}, at {}".format(
+ anno.message, anno.span.start
+ )
+ )
+
+ def visit_FunctionReference(self, node):
+ name = node.id.name
+ if name in self.IMPLICIT_TRANSFORMS:
+ raise NotSupportedError(
+ "{} may not be used with transforms_from(). It runs "
+ "implicitly on all Patterns anyways.".format(name)
+ )
+ if name in self.FORBIDDEN_TRANSFORMS:
+ raise NotSupportedError(
+ "{} may not be used with transforms_from(). It requires "
+ "additional logic in Python code.".format(name)
+ )
+ if name in ("COPY", "COPY_PATTERN"):
+ args = (self.into_argument(arg) for arg in node.arguments.positional)
+ kwargs = {
+ arg.name.name: self.into_argument(arg.value)
+ for arg in node.arguments.named
+ }
+ if name == "COPY":
+ return COPY(*args, **kwargs)
+ return COPY_PATTERN(*args, **kwargs)
+ return self.generic_visit(node)
+
+ def visit_Placeable(self, node):
+ """If the expression is a Transform, replace this Placeable
+ with the Transform it's holding.
+ Transforms evaluate to Patterns, which are flattened as
+ elements of Patterns in Transform.pattern_of, but only
+ one level deep.
+ """
+ node = self.generic_visit(node)
+ if isinstance(node.expression, Transform):
+ return node.expression
+ return node
+
+ def visit_Pattern(self, node):
+ """Replace the Pattern with CONCAT which is more accepting of its
+ elements. CONCAT takes PatternElements, Expressions and other
+ Patterns (e.g. returned from evaluating transforms).
+ """
+ node = self.generic_visit(node)
+ return CONCAT(*node.elements)
+
+ def into_argument(self, node):
+ """Convert AST node into an argument to migration transforms."""
+ if isinstance(node, FTL.StringLiteral):
+ # Special cases for booleans which don't exist in Fluent.
+ if node.value == "True":
+ return True
+ if node.value == "False":
+ return False
+ return node.value
+ if isinstance(node, FTL.MessageReference):
+ try:
+ return self.substitutions[node.id.name]
+ except KeyError:
+ raise InvalidTransformError(
+ "Unknown substitution in COPY: {}".format(node.id.name)
+ )
+ else:
+ raise InvalidTransformError(
+ "Invalid argument passed to COPY: {}".format(type(node).__name__)
+ )
+
+
+def transforms_from(ftl, **substitutions) -> List[FTL.Message | FTL.Term]:
+ """Parse FTL code into a list of Message nodes with Transforms.
+
+ The FTL may use a fabricated COPY function inside of placeables which
+ will be converted into actual COPY migration transform.
+
+ new-key = Hardcoded text { COPY("filepath.dtd", "string.key") }
+
+ For convenience, COPY may also refer to transforms_from's keyword
+ arguments via the MessageReference syntax:
+
+ transforms_from(\"""
+ new-key = Hardcoded text { COPY(file_dtd, "string.key") }
+ \""", file_dtd="very/long/path/to/a/file.dtd")
+
+ """
+
+ parser = FluentParser(with_spans=False)
+ resource = parser.parse(ftl)
+ return IntoTranforms(substitutions).visit(resource).body
diff --git a/third_party/python/fluent.migrate/fluent/migrate/merge.py b/third_party/python/fluent.migrate/fluent/migrate/merge.py
new file mode 100644
index 0000000000..921054a054
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/merge.py
@@ -0,0 +1,51 @@
+import fluent.syntax.ast as FTL
+
+from .errors import SkipTransform
+from .util import get_message, get_transform
+
+
+def merge_resource(ctx, reference, current, transforms, in_changeset):
+ """Transform legacy translations into FTL.
+
+ Use the `reference` FTL AST as a template. For each en-US string in the
+ reference, first check for an existing translation in the current FTL
+ `localization` and use it if it's present; then if the string has
+ a transform defined in the migration specification and if it's in the
+ currently processed changeset, evaluate the transform.
+ """
+
+ def merge_body(body):
+ return [entry for entry in map(merge_entry, body) if entry is not None]
+
+ def merge_entry(entry):
+ # All standalone comments will be merged.
+ if isinstance(entry, FTL.BaseComment):
+ return entry
+
+ # Ignore Junk
+ if isinstance(entry, FTL.Junk):
+ return None
+
+ ident = entry.id.name
+
+ # If the message is present in the existing localization, we add it to
+ # the resulting resource. This ensures consecutive merges don't remove
+ # translations but rather create supersets of them.
+ existing = get_message(current.body, ident)
+ if existing is not None:
+ return existing
+
+ transform = get_transform(transforms, ident)
+
+ # Make sure this message is supposed to be migrated as part of the
+ # current changeset.
+ if transform is not None and in_changeset(ident):
+ if transform.comment is None:
+ transform.comment = entry.comment
+ try:
+ return ctx.evaluate(transform)
+ except SkipTransform:
+ return None
+
+ body = merge_body(reference.body)
+ return FTL.Resource(body)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/repo_client.py b/third_party/python/fluent.migrate/fluent/migrate/repo_client.py
new file mode 100644
index 0000000000..4236bc4286
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/repo_client.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+from typing import Tuple
+
+import json
+from subprocess import run
+
+from os.path import isdir, join
+
+import hglib
+
+
+def git(root: str, *args: str) -> str:
+ """
+ Wrapper for calling command-line git in the `root` directory.
+ Raises an exception on any error, including a non-0 return code.
+ Returns the command's stdout as a string.
+ """
+ git = ["git"]
+ git.extend(args)
+ proc = run(git, capture_output=True, cwd=root, encoding="utf-8")
+ if proc.returncode != 0:
+ raise Exception(proc.stderr or f"git command failed: {args}")
+ return proc.stdout
+
+
+class RepoClient:
+ def __init__(self, root: str):
+ self.root = root
+ if isdir(join(root, ".hg")):
+ self.hgclient = hglib.open(root, "utf-8")
+ elif isdir(join(root, ".git")):
+ self.hgclient = None
+ stdout = git(self.root, "rev-parse", "--is-inside-work-tree")
+ if stdout != "true\n":
+ raise Exception("git rev-parse failed")
+ else:
+ raise Exception(f"Unsupported repository: {root}")
+
+ def close(self):
+ if self.hgclient:
+ self.hgclient.close()
+
+ def blame(self, file: str) -> list[Tuple[str, int]]:
+ "Return a list of (author, time) tuples for each line in `file`."
+ if self.hgclient:
+ args = hglib.util.cmdbuilder(
+ b"annotate",
+ file.encode("latin-1"),
+ template="json",
+ date=True,
+ user=True,
+ cwd=self.root,
+ )
+ blame_json = self.hgclient.rawcommand(args)
+ return [
+ (line["user"], int(line["date"][0]))
+ for line in json.loads(blame_json)[0]["lines"]
+ ]
+ else:
+ lines: list[Tuple[str, int]] = []
+ user = ""
+ time = 0
+ stdout = git(self.root, "blame", "--porcelain", file)
+ for line in stdout.splitlines():
+ if line.startswith("author "):
+ user = line[7:]
+ elif line.startswith("author-mail "):
+ user += line[11:] # includes leading space
+ elif line.startswith("author-time "):
+ time = int(line[12:])
+ elif line.startswith("\t"):
+ lines.append((user, time))
+ return lines
+
+ def commit(self, message: str, author: str):
+ "Add and commit all work tree files"
+ if self.hgclient:
+ self.hgclient.commit(message, user=author.encode("utf-8"), addremove=True)
+ else:
+ git(self.root, "add", ".")
+ git(self.root, "commit", f"--author={author}", f"--message={message}")
+
+ def head(self) -> str:
+ "Identifier for the most recent commit"
+ if self.hgclient:
+ return self.hgclient.tip().node.decode("utf-8")
+ else:
+ return git(self.root, "rev-parse", "HEAD").strip()
+
+ def log(self, from_commit: str, to_commit: str) -> list[str]:
+ if self.hgclient:
+ return [
+ rev.desc.decode("utf-8")
+ for rev in self.hgclient.log(f"{to_commit} % {from_commit}")
+ ]
+ else:
+ return (
+ git(
+ self.root,
+ "log",
+ "--pretty=format:%s",
+ f"{from_commit}..{to_commit}",
+ )
+ .strip()
+ .splitlines()
+ )
diff --git a/third_party/python/fluent.migrate/fluent/migrate/tool.py b/third_party/python/fluent.migrate/fluent/migrate/tool.py
new file mode 100644
index 0000000000..c5b33ef803
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/tool.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+from types import ModuleType
+from typing import Iterable, cast
+
+import argparse
+from contextlib import contextmanager
+import importlib
+import logging
+import os
+import sys
+
+from fluent.migrate.blame import Blame
+from fluent.migrate.changesets import Changes, convert_blame_to_changesets
+from fluent.migrate.context import MigrationContext
+from fluent.migrate.errors import MigrationError
+from fluent.migrate.repo_client import RepoClient
+
+
+@contextmanager
+def dont_write_bytecode():
+ _dont_write_bytecode = sys.dont_write_bytecode
+ sys.dont_write_bytecode = True
+ yield
+ sys.dont_write_bytecode = _dont_write_bytecode
+
+
+class Migrator:
+ def __init__(
+ self, locale: str, reference_dir: str, localization_dir: str, dry_run: bool
+ ):
+ self.locale = locale
+ self.reference_dir = reference_dir
+ self.localization_dir = localization_dir
+ self.dry_run = dry_run
+ self._client = None
+
+ @property
+ def client(self):
+ if self._client is None:
+ self._client = RepoClient(self.localization_dir)
+ return self._client
+
+ def close(self):
+ # close hglib.client, if we cached one.
+ if self._client is not None:
+ self._client.close()
+
+ def run(self, migration: ModuleType):
+ print("\nRunning migration {} for {}".format(migration.__name__, self.locale))
+
+ # For each migration create a new context.
+ ctx = MigrationContext(self.locale, self.reference_dir, self.localization_dir)
+
+ try:
+ # Add the migration spec.
+ migration.migrate(ctx)
+ except MigrationError as e:
+ print(
+ " Skipping migration {} for {}:\n {}".format(
+ migration.__name__, self.locale, e
+ )
+ )
+ return
+
+ # Keep track of how many changesets we're committing.
+ index = 0
+ description_template = cast(str, migration.migrate.__doc__)
+
+ # Annotate localization files used as sources by this migration
+ # to preserve attribution of translations.
+ files = ctx.localization_resources.keys()
+ blame = Blame(self.client).attribution(files)
+ changesets = convert_blame_to_changesets(blame)
+ known_legacy_translations = set()
+
+ for changeset in changesets:
+ snapshot = self.snapshot(
+ ctx, changeset["changes"], known_legacy_translations
+ )
+ if not snapshot:
+ continue
+ self.serialize_changeset(snapshot)
+ index += 1
+ self.commit_changeset(description_template, changeset["author"], index)
+
+ def snapshot(
+ self,
+ ctx: MigrationContext,
+ changes_in_changeset: Changes,
+ known_legacy_translations: Changes,
+ ):
+ """Run the migration for the changeset, with the set of
+ this and all prior legacy translations.
+ """
+ known_legacy_translations.update(changes_in_changeset)
+ return ctx.serialize_changeset(changes_in_changeset, known_legacy_translations)
+
+ def serialize_changeset(self, snapshot):
+ """Write serialized FTL files to disk."""
+ for path, content in snapshot.items():
+ fullpath = os.path.join(self.localization_dir, path)
+ print(f" Writing to {fullpath}")
+ if not self.dry_run:
+ fulldir = os.path.dirname(fullpath)
+ if not os.path.isdir(fulldir):
+ os.makedirs(fulldir)
+ with open(fullpath, "wb") as f:
+ f.write(content.encode("utf8"))
+ f.close()
+
+ def commit_changeset(self, description_template: str, author: str, index: int):
+ message = description_template.format(index=index, author=author)
+
+ print(f" Committing changeset: {message}")
+ if self.dry_run:
+ return
+ try:
+ self.client.commit(message, author)
+ except Exception as err:
+ print(f" WARNING: commit failed ({err})")
+
+
+def main(
+ locale,
+ reference_dir: str,
+ localization_dir: str,
+ migrations: Iterable[ModuleType],
+ dry_run: bool,
+):
+ """Run migrations and commit files with the result."""
+ migrator = Migrator(locale, reference_dir, localization_dir, dry_run)
+
+ for migration in migrations:
+ migrator.run(migration)
+
+ migrator.close()
+
+
+def cli():
+ parser = argparse.ArgumentParser(description="Migrate translations to FTL.")
+ parser.add_argument(
+ "migrations",
+ metavar="MIGRATION",
+ type=str,
+ nargs="+",
+ help="migrations to run (Python modules)",
+ )
+ parser.add_argument(
+ "--locale", "--lang", type=str, help="target locale code (--lang is deprecated)"
+ )
+ parser.add_argument(
+ "--reference-dir", type=str, help="directory with reference FTL files"
+ )
+ parser.add_argument(
+ "--localization-dir", type=str, help="directory for localization files"
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="do not write to disk nor commit any changes",
+ )
+ parser.set_defaults(dry_run=False)
+
+ logger = logging.getLogger("migrate")
+ logger.setLevel(logging.INFO)
+
+ args = parser.parse_args()
+
+ # Don't byte-compile migrations.
+ # They're not our code, and infrequently run
+ with dont_write_bytecode():
+ migrations = map(importlib.import_module, args.migrations)
+
+ main(
+ locale=args.locale,
+ reference_dir=args.reference_dir,
+ localization_dir=args.localization_dir,
+ migrations=migrations,
+ dry_run=args.dry_run,
+ )
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/third_party/python/fluent.migrate/fluent/migrate/transforms.py b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
new file mode 100644
index 0000000000..f45ad1531c
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
@@ -0,0 +1,558 @@
+"""Migration Transforms.
+
+Transforms are AST nodes which describe how legacy translations should be
+migrated. They are created inert and only return the migrated AST nodes when
+they are evaluated by a MigrationContext.
+
+All Transforms evaluate to Fluent Patterns. This makes them suitable for
+defining migrations of values of message, attributes and variants. The special
+CONCAT Transform is capable of joining multiple Patterns returned by evaluating
+other Transforms into a single Pattern. It can also concatenate Pattern
+elements: TextElements and Placeables.
+
+The COPY, REPLACE and PLURALS Transforms inherit from Source which is a special
+AST Node defining the location (the file path and the id) of the legacy
+translation. During the migration, the current MigrationContext scans the
+migration spec for Source nodes and extracts the information about all legacy
+translations being migrated. For instance,
+
+ COPY('file.dtd', 'hello')
+
+is equivalent to:
+
+ FTL.Pattern([
+ Source('file.dtd', 'hello')
+ ])
+
+Sometimes it's useful to work with text rather than (path, key) source
+definitions. This is the case when the migrated translation requires some
+hardcoded text, e.g. <a> and </a> when multiple translations become a single
+one with a DOM overlay. In such cases it's best to use FTL.TextElements:
+
+ FTL.Message(
+ id=FTL.Identifier('update-failed'),
+ value=CONCAT(
+ COPY('aboutDialog.dtd', 'update.failed.start'),
+ FTL.TextElement('<a>'),
+ COPY('aboutDialog.dtd', 'update.failed.linkText'),
+ FTL.TextElement('</a>'),
+ COPY('aboutDialog.dtd', 'update.failed.end'),
+ )
+ )
+
+The REPLACE_IN_TEXT Transform also takes TextElements as input, making it
+possible to pass it as the foreach function of the PLURALS Transform. In the
+example below, each slice of the plural string is converted into a
+TextElement by PLURALS and then run through the REPLACE_IN_TEXT transform.
+
+ FTL.Message(
+ FTL.Identifier('delete-all'),
+ value=PLURALS(
+ 'aboutDownloads.dtd',
+ 'deleteAll',
+ VARIABLE_REFERENCE('num'),
+ lambda text: REPLACE_IN_TEXT(
+ text,
+ {
+ '#1': VARIABLE_REFERENCE('num')
+ }
+ )
+ )
+ )
+"""
+
+import re
+
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Transformer
+from .errors import NotSupportedError
+
+
+def chain_elements(elements):
+ """Flatten a list of FTL nodes into an iterator over PatternElements."""
+ for element in elements:
+ if isinstance(element, FTL.Pattern):
+ # PY3 yield from element.elements
+ yield from element.elements
+ elif isinstance(element, FTL.PatternElement):
+ yield element
+ elif isinstance(element, FTL.Expression):
+ yield FTL.Placeable(element)
+ else:
+ raise RuntimeError("Expected Pattern, PatternElement or Expression")
+
+
+re_leading_ws = re.compile(
+ r"\A(?:(?P<whitespace> +)(?P<text>.*?)|(?P<block_text>\n.*?))\Z",
+ re.S,
+)
+re_trailing_ws = re.compile(
+ r"\A(?:(?P<text>.*?)(?P<whitespace> +)|(?P<block_text>.*\n))\Z", re.S
+)
+
+
+def extract_whitespace(regex, element):
+ """Extract leading or trailing whitespace from a TextElement.
+
+ Return a tuple of (Placeable, TextElement) in which the Placeable
+ encodes the extracted whitespace as a StringLiteral and the
+ TextElement has the same amount of whitespace removed. The
+ Placeable with the extracted whitespace is always returned first.
+ If the element starts or ends with a newline, add an empty
+ StringLiteral.
+ """
+ match = re.search(regex, element.value)
+ if match:
+ # If white-space is None, we're a newline. Add an
+ # empty { "" }
+ whitespace = match.group("whitespace") or ""
+ placeable = FTL.Placeable(FTL.StringLiteral(whitespace))
+ if whitespace == element.value:
+ return placeable, None
+ else:
+ # Either text or block_text matched the rest.
+ text = match.group("text") or match.group("block_text")
+ return placeable, FTL.TextElement(text)
+ else:
+ return None, element
+
+
+class Transform(FTL.BaseNode):
+ def __call__(self, ctx):
+ raise NotImplementedError
+
+ @staticmethod
+ def pattern_of(*elements):
+ normalized = []
+
+ # Normalize text content: convert text content to TextElements, join
+ # adjacent text and prune empty. Text content is either existing
+ # TextElements or whitespace-only StringLiterals. This may result in
+ # leading and trailing whitespace being put back into TextElements if
+ # the new Pattern is built from existing Patterns (CONCAT(COPY...)).
+ # The leading and trailing whitespace of the new Pattern will be
+ # extracted later into new StringLiterals.
+ for element in chain_elements(elements):
+ if isinstance(element, FTL.TextElement):
+ text_content = element.value
+ elif (
+ isinstance(element, FTL.Placeable)
+ and isinstance(element.expression, FTL.StringLiteral)
+ and re.match(r"^ *$", element.expression.value)
+ ):
+ text_content = element.expression.value
+ else:
+ # The element does not contain text content which should be
+ # normalized. It may be a number, a reference, or
+ # a StringLiteral which should be preserved in the Pattern.
+ normalized.append(element)
+ continue
+
+ previous = normalized[-1] if len(normalized) else None
+ if isinstance(previous, FTL.TextElement):
+ # Join adjacent TextElements.
+ previous.value += text_content
+ elif len(text_content) > 0:
+ # Normalize non-empty text to a TextElement.
+ normalized.append(FTL.TextElement(text_content))
+ else:
+ # Prune empty text.
+ pass
+
+ # Store empty values explicitly as {""}.
+ if len(normalized) == 0:
+ empty = FTL.Placeable(FTL.StringLiteral(""))
+ return FTL.Pattern([empty])
+
+ # Extract explicit leading whitespace into a StringLiteral.
+ if isinstance(normalized[0], FTL.TextElement):
+ ws, text = extract_whitespace(re_leading_ws, normalized[0])
+ normalized[:1] = [ws, text]
+
+ # Extract explicit trailing whitespace into a StringLiteral.
+ if isinstance(normalized[-1], FTL.TextElement):
+ ws, text = extract_whitespace(re_trailing_ws, normalized[-1])
+ normalized[-1:] = [text, ws]
+
+ return FTL.Pattern([element for element in normalized if element is not None])
+
+
+class Source(Transform):
+ """Base class for Transforms that get translations from source files.
+
+ The contract is that the first argument is the source path, and the
+ second is a key representing legacy string IDs, or Fluent id.attr.
+ """
+
+ def __init__(self, path, key):
+ self.path = path
+ self.key = key
+
+
+class FluentSource(Source):
+ """Declare a Fluent source translation to be copied over.
+
+ When evaluated, it clones the Pattern of the parsed source.
+ """
+
+ def __init__(self, path, key):
+ if not path.endswith(".ftl"):
+ raise NotSupportedError(
+ "Please use COPY to migrate from legacy files " "({})".format(path)
+ )
+ if key[0] == "-" and "." in key:
+ raise NotSupportedError(
+ "Cannot migrate from Term Attributes, as they are"
+ "locale-dependent ({})".format(path)
+ )
+ super().__init__(path, key)
+
+ def __call__(self, ctx):
+ pattern = ctx.get_fluent_source_pattern(self.path, self.key)
+ return pattern.clone()
+
+
+class COPY_PATTERN(FluentSource):
+ """Create a Pattern with the translation value from the given source.
+
+ The given key can be a Message ID, Message ID.attribute_name, or
+ Term ID. Accessing Term attributes is not supported, as they're internal
+ to the localization.
+ """
+
+ pass
+
+
+class TransformPattern(FluentSource, Transformer):
+ """Base class for modifying a Fluent pattern as part of a migration.
+
+ Implement visit_* methods of the Transformer pattern to do the
+ actual modifications.
+ """
+
+ def __call__(self, ctx):
+ pattern = super().__call__(ctx)
+ return self.visit(pattern)
+
+ def visit_Pattern(self, node):
+ # Make sure we're creating valid Patterns after restructuring
+ # transforms.
+ node = self.generic_visit(node)
+ pattern = Transform.pattern_of(*node.elements)
+ return pattern
+
+ def visit_Placeable(self, node):
+ # Ensure we have a Placeable with an expression still.
+ # Transforms could have replaced the expression with
+ # a Pattern or PatternElement, in which case we
+ # just pass that through.
+ # Patterns then get flattened by visit_Pattern.
+ node = self.generic_visit(node)
+ if isinstance(node.expression, (FTL.Pattern, FTL.PatternElement)):
+ return node.expression
+ return node
+
+
+class LegacySource(Source):
+ """Declare the source translation to be migrated with other transforms.
+
+ When evaluated, `Source` returns a TextElement with the content from the
+ source translation. Escaped characters are unescaped by the
+ compare-locales parser according to the file format:
+
+ - in properties files: \\uXXXX,
+ - in DTD files: known named, decimal, and hexadecimal HTML entities.
+
+ Consult the following files for the list of known named HTML entities:
+
+ https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py
+ https://github.com/python/cpython/blob/3.6/Lib/html/entities.py
+
+ By default, leading and trailing whitespace on each line as well as
+ leading and trailing empty lines will be stripped from the source
+ translation's content. Set `trim=False` to disable this behavior.
+ """
+
+ def __init__(self, path, key, trim=None):
+ if path.endswith(".ftl"):
+ raise NotSupportedError(
+ "Please use COPY_PATTERN to migrate from Fluent files "
+ "({})".format(path)
+ )
+
+ super().__init__(path, key)
+ self.trim = trim
+
+ def get_text(self, ctx):
+ return ctx.get_legacy_source(self.path, self.key)
+
+ @staticmethod
+ def trim_text(text):
+ # strip leading white-space from each line
+ text = re.sub("^[ \t]+", "", text, flags=re.M)
+ # strip trailing white-space from each line
+ text = re.sub("[ \t]+$", "", text, flags=re.M)
+ # strip leading and trailing empty lines
+ text = text.strip("\r\n")
+ return text
+
+ def __call__(self, ctx):
+ text = self.get_text(ctx)
+ if self.trim is not False:
+ text = self.trim_text(text)
+ return FTL.TextElement(text)
+
+
+class COPY(LegacySource):
+ """Create a Pattern with the translation value from the given source."""
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ return Transform.pattern_of(element)
+
+
+PRINTF = re.compile(
+ r"%(?P<good>%|"
+ r"(?:(?P<number>[1-9][0-9]*)\$)?"
+ r"(?P<width>\*|[0-9]+)?"
+ r"(?P<prec>\.(?:\*|[0-9]+)?)?"
+ r"(?P<spec>[duxXosScpfg]))"
+)
+
+
+def number():
+ i = 1
+ while True:
+ yield i
+ i += 1
+
+
+def normalize_printf(text):
+ """Normalize printf arguments so that they're all numbered.
+ Gecko forbids mixing unnumbered and numbered ones, so
+ we just need to convert unnumbered to numbered ones.
+ Also remove ones that have zero width, as they're intended
+ to be removed from the output by the localizer.
+ """
+ next_number = number()
+
+ def normalized(match):
+ if match.group("good") == "%":
+ return "%"
+ hidden = match.group("width") == "0"
+ if match.group("number"):
+ return "" if hidden else match.group()
+ num = next(next_number)
+ return "" if hidden else "%{}${}".format(num, match.group("spec"))
+
+ return PRINTF.sub(normalized, text)
+
+
+class REPLACE_IN_TEXT(Transform):
+ """Create a Pattern from a TextElement and replace legacy placeables.
+
+ The original placeables are defined as keys on the `replacements` dict.
+ For each key the value must be defined as a FTL Pattern, Placeable,
+ TextElement or Expression to be interpolated.
+ """
+
+ def __init__(self, element, replacements, normalize_printf=False):
+ self.element = element
+ self.replacements = replacements
+ self.normalize_printf = normalize_printf
+
+ def __call__(self, ctx):
+ # For each specified replacement, find all indices of the original
+ # placeable in the source translation. If missing, the list of indices
+ # will be empty.
+ value = self.element.value
+ if self.normalize_printf:
+ value = normalize_printf(value)
+ key_indices = {
+ key: [m.start() for m in re.finditer(re.escape(key), value)]
+ for key in self.replacements.keys()
+ }
+
+ # Build a dict of indices to replacement keys.
+ keys_indexed = {}
+ for key, indices in key_indices.items():
+ for index in indices:
+ keys_indexed[index] = key
+
+ # Order the replacements by the position of the original placeable in
+ # the translation.
+ replacements = (
+ (key, ctx.evaluate(self.replacements[key]))
+ for index, key in sorted(keys_indexed.items(), key=lambda x: x[0])
+ )
+
+ # A list of PatternElements built from the legacy translation and the
+ # FTL replacements. It may contain empty or adjacent TextElements.
+ elements = []
+ tail = value
+
+ # Convert original placeables and text into FTL Nodes. For each
+ # original placeable the translation will be partitioned around it and
+ # the text before it will be converted into an `FTL.TextElement` and
+ # the placeable will be replaced with its replacement.
+ for key, node in replacements:
+ before, key, tail = tail.partition(key)
+ elements.append(FTL.TextElement(before))
+ elements.append(node)
+
+ # Don't forget about the tail after the loop ends.
+ elements.append(FTL.TextElement(tail))
+ return Transform.pattern_of(*elements)
+
+
+class REPLACE(LegacySource):
+ """Create a Pattern with interpolations from given source.
+
+ Interpolations in the translation value from the given source will be
+ replaced with FTL placeables using the `REPLACE_IN_TEXT` transform.
+ """
+
+ def __init__(self, path, key, replacements, **kwargs):
+ # We default normalize_printf to False except for .properties files.
+ # We still allow the caller to override the default value.
+ normalize_printf = False
+ if "normalize_printf" in kwargs:
+ normalize_printf = kwargs["normalize_printf"]
+ del kwargs["normalize_printf"]
+ elif path.endswith(".properties"):
+ normalize_printf = True
+
+ super().__init__(path, key, **kwargs)
+ self.replacements = replacements
+ self.normalize_printf = normalize_printf
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ return REPLACE_IN_TEXT(
+ element, self.replacements, normalize_printf=self.normalize_printf
+ )(ctx)
+
+
+class PLURALS(LegacySource):
+ """Create a Pattern with plurals from given source.
+
+ Build an `FTL.SelectExpression` with the supplied `selector` and variants
+ extracted from the source. The original translation should be a
+ semicolon-separated list of plural forms. Each form will be converted
+ into a TextElement and run through the `foreach` function, which should
+ return an `FTL.Node` or a `Transform`. By default, the `foreach` function
+ creates a valid Pattern from the TextElement passed into it.
+ """
+
+ DEFAULT_ORDER = ("zero", "one", "two", "few", "many", "other")
+
+ def __init__(self, path, key, selector, foreach=Transform.pattern_of, **kwargs):
+ super().__init__(path, key, **kwargs)
+ self.selector = selector
+ self.foreach = foreach
+
+ def __call__(self, ctx):
+ element = super().__call__(ctx)
+ selector = ctx.evaluate(self.selector)
+ keys = ctx.plural_categories
+ forms = [FTL.TextElement(part.strip()) for part in element.value.split(";")]
+
+ # The default CLDR form should be the last we have in DEFAULT_ORDER,
+ # usually `other`, but in some cases `many`. If we don't have a variant
+ # for that, we'll append one, using the, in CLDR order, last existing
+ # variant in the legacy translation. That may or may not be the last
+ # variant.
+ default_key = [key for key in reversed(self.DEFAULT_ORDER) if key in keys][0]
+
+ # Match keys to legacy forms in the order they are defined in Gecko's
+ # PluralForm.jsm. Filter out empty forms.
+ pairs = [(key, var) for key, var in zip(keys, forms) if var.value]
+
+ # A special case for legacy translations which don't define any
+ # plural forms.
+ if len(pairs) == 0:
+ return Transform.pattern_of()
+
+ # A special case for languages with one plural category or one legacy
+ # variant. We don't need to insert a SelectExpression for them.
+ if len(pairs) == 1:
+ _, only_form = pairs[0]
+ only_variant = ctx.evaluate(self.foreach(only_form))
+ return Transform.pattern_of(only_variant)
+
+ # Make sure the default key is defined. If it's missing, use the last
+ # form (in CLDR order) found in the legacy translation.
+ pairs.sort(key=lambda pair: self.DEFAULT_ORDER.index(pair[0]))
+ last_key, last_form = pairs[-1]
+ if last_key != default_key:
+ pairs.append((default_key, last_form))
+
+ def createVariant(key, form):
+ # Run the legacy plural form through `foreach` which returns an
+ # `FTL.Node` describing the transformation required for each
+ # variant. Then evaluate it to a migrated FTL node.
+ value = ctx.evaluate(self.foreach(form))
+ return FTL.Variant(
+ key=FTL.Identifier(key), value=value, default=key == default_key
+ )
+
+ select = FTL.SelectExpression(
+ selector=selector,
+ variants=[createVariant(key, form) for key, form in pairs],
+ )
+
+ return Transform.pattern_of(select)
+
+
+class CONCAT(Transform):
+ """Create a new Pattern from Patterns, PatternElements and Expressions.
+
+ When called with at least two elements, `CONCAT` disables the trimming
+ behavior of the elements which are subclasses of `LegacySource` by
+ setting `trim=False`, unless `trim` has already been set explicitly. The
+ following two `CONCAT` calls are equivalent:
+
+ CONCAT(
+ FTL.TextElement("Hello"),
+ COPY("file.properties", "hello")
+ )
+
+ CONCAT(
+ FTL.TextElement("Hello"),
+ COPY("file.properties", "hello", trim=False)
+ )
+
+ Set `trim=True` explicitly to force trimming:
+
+ CONCAT(
+ FTL.TextElement("Hello "),
+ COPY("file.properties", "hello", trim=True)
+ )
+
+ When called with a single element and when the element is a subclass of
+ `LegacySource`, the trimming behavior is not changed. The following two
+ transforms are equivalent:
+
+ CONCAT(COPY("file.properties", "hello"))
+
+ COPY("file.properties", "hello")
+ """
+
+ def __init__(self, *elements, **kwargs):
+ # We want to support both passing elements as *elements in the
+ # migration specs and as elements=[]. The latter is used by
+ # FTL.BaseNode.traverse when it recreates the traversed node using its
+ # attributes as kwargs.
+ self.elements = list(kwargs.get("elements", elements))
+
+ # We want to make CONCAT(COPY()) equivalent to COPY() so that it's
+ # always safe (no-op) to wrap transforms in a CONCAT. This is used by
+ # the implementation of transforms_from.
+ if len(self.elements) > 1:
+ for elem in self.elements:
+ # Only change trim if it hasn't been set explicitly.
+ if isinstance(elem, LegacySource) and elem.trim is None:
+ elem.trim = False
+
+ def __call__(self, ctx):
+ return Transform.pattern_of(*self.elements)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/util.py b/third_party/python/fluent.migrate/fluent/migrate/util.py
new file mode 100644
index 0000000000..43d9e62c19
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/util.py
@@ -0,0 +1,107 @@
+import textwrap
+
+import fluent.syntax.ast as FTL
+from fluent.syntax.parser import FluentParser, FluentParserStream
+
+
+fluent_parser = FluentParser(with_spans=False)
+
+
+def parse(Parser, string):
+ if Parser is FluentParser:
+ return fluent_parser.parse(string)
+
+ # Parsing a legacy resource.
+
+ # Parse the string into the internal Context.
+ parser = Parser()
+ # compare-locales expects ASCII strings.
+ parser.readContents(string.encode("utf8"))
+ # Transform the parsed result which is an iterator into a dict.
+ return {ent.key: ent for ent in parser}
+
+
+def ftl_resource_to_ast(code):
+ return fluent_parser.parse(ftl(code))
+
+
+def ftl_resource_to_json(code):
+ return fluent_parser.parse(ftl(code)).to_json()
+
+
+def ftl_pattern_to_json(code):
+ ps = FluentParserStream(ftl(code))
+ return fluent_parser.maybe_get_pattern(ps).to_json()
+
+
+def to_json(merged_iter):
+ return {path: resource.to_json() for path, resource in merged_iter}
+
+
+LOCALIZABLE_ENTRIES = (FTL.Message, FTL.Term)
+
+
+def get_message(body, ident):
+ """Get message called `ident` from the `body` iterable."""
+ for entity in body:
+ if isinstance(entity, LOCALIZABLE_ENTRIES) and entity.id.name == ident:
+ return entity
+
+
+def get_transform(body, ident):
+ """Get entity called `ident` from the `body` iterable."""
+ for transform in body:
+ if transform.id.name == ident:
+ return transform
+
+
+def skeleton(node):
+ """Create a skeleton copy of the given node.
+
+ For localizable entries, the value is None and the attributes are {}.
+ That's not a valid Fluent entry, so it requires further manipulation to
+ set values and/or attributes.
+ """
+ if isinstance(node, LOCALIZABLE_ENTRIES):
+ return type(node)(id=node.id.clone(), value=None)
+ return node.clone()
+
+
+def ftl(code):
+ """Nicer indentation for FTL code.
+
+ The code returned by this function is meant to be compared against the
+ output of the FTL Serializer. The input code will end with a newline to
+ match the output of the serializer.
+ """
+
+ # The code might be triple-quoted.
+ code = code.lstrip("\n")
+
+ return textwrap.dedent(code)
+
+
+def fold(fun, node, init):
+ """Reduce `node` to a single value using `fun`.
+
+ Apply `fun` against an accumulator and each subnode of `node` (in postorder
+ traversal) to reduce it to a single value.
+ """
+
+ def fold_(vals, acc):
+ if not vals:
+ return acc
+
+ head = list(vals)[0]
+ tail = list(vals)[1:]
+
+ if isinstance(head, FTL.BaseNode):
+ acc = fold(fun, head, acc)
+ if isinstance(head, list):
+ acc = fold_(head, acc)
+ if isinstance(head, dict):
+ acc = fold_(head.values(), acc)
+
+ return fold_(tail, fun(acc, head))
+
+ return fold_(vars(node).values(), init)
diff --git a/third_party/python/fluent.migrate/fluent/migrate/validator.py b/third_party/python/fluent.migrate/fluent/migrate/validator.py
new file mode 100644
index 0000000000..4e05865434
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/validator.py
@@ -0,0 +1,323 @@
+import argparse
+import ast
+from itertools import zip_longest
+
+from fluent.migrate import transforms
+from fluent.migrate.errors import MigrationError
+from fluent.migrate.helpers import transforms_from
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Visitor
+from compare_locales import mozpath
+
+
+class MigrateNotFoundException(Exception):
+ pass
+
+
+class BadContextAPIException(Exception):
+ pass
+
+
+def process_assign(node, context):
+ if isinstance(node.value, ast.Constant):
+ val = node.value.value
+ elif isinstance(node.value, ast.Name):
+ val = context.get(node.value.id)
+ elif isinstance(node.value, ast.Call):
+ val = node.value
+ else:
+ val = None
+ if val is None:
+ return
+ for target in node.targets:
+ if isinstance(target, ast.Name):
+ context[target.id] = val
+
+
+class Validator:
+ """Validate a migration recipe
+
+ Extract information from the migration recipe about which files to
+ migrate from, and which files to migrate to.
+ Also check for errors in the recipe, or bad API usage.
+ """
+
+ @classmethod
+ def validate(cls, path, code=None):
+ if code is None:
+ with open(path) as fh:
+ code = fh.read()
+ validator = cls(code, path)
+ return validator.inspect()
+
+ def __init__(self, code, path):
+ self.ast = ast.parse(code, path)
+
+ def inspect(self):
+ migrate_func = None
+ global_assigns = {}
+ for top_level in ast.iter_child_nodes(self.ast):
+ if isinstance(top_level, ast.FunctionDef) and top_level.name == "migrate":
+ if migrate_func:
+ raise MigrateNotFoundException("Duplicate definition of migrate")
+ migrate_func = top_level
+ details = self.inspect_migrate(migrate_func, global_assigns)
+ if isinstance(top_level, ast.Assign):
+ process_assign(top_level, global_assigns)
+ if isinstance(top_level, (ast.Import, ast.ImportFrom)):
+ if "module" in top_level._fields:
+ module = top_level.module
+ else:
+ module = None
+ for alias in top_level.names:
+ asname = alias.asname or alias.name
+ dotted = alias.name
+ if module:
+ dotted = f"{module}.{dotted}"
+ global_assigns[asname] = dotted
+ if not migrate_func:
+ raise MigrateNotFoundException("migrate function not found")
+ return details
+
+ def inspect_migrate(self, migrate_func, global_assigns):
+ if len(migrate_func.args.args) != 1 or any(
+ getattr(migrate_func.args, arg_field)
+ for arg_field in migrate_func.args._fields
+ if arg_field != "args"
+ ):
+ raise MigrateNotFoundException("migrate takes only one positional argument")
+ arg = migrate_func.args.args[0]
+ if isinstance(arg, ast.Name):
+ ctx_var = arg.id # python 2
+ else:
+ ctx_var = arg.arg # python 3
+ visitor = MigrateAnalyzer(ctx_var, global_assigns)
+ visitor.visit(migrate_func)
+ return {
+ "references": visitor.references,
+ "issues": visitor.issues,
+ }
+
+
+def full_name(node, global_assigns):
+ leafs = []
+ while isinstance(node, ast.Attribute):
+ leafs.append(node.attr)
+ node = node.value
+ if isinstance(node, ast.Name):
+ leafs.append(global_assigns.get(node.id, node.id))
+ return ".".join(reversed(leafs))
+
+
+PATH_TYPES = (str,) + (ast.Call,)
+
+
+class MigrateAnalyzer(ast.NodeVisitor):
+ def __init__(self, ctx_var, global_assigns):
+ super().__init__()
+ self.ctx_var = ctx_var
+ self.global_assigns = global_assigns
+ self.depth = 0
+ self.issues = []
+ self.references = set()
+
+ def generic_visit(self, node):
+ self.depth += 1
+ super().generic_visit(node)
+ self.depth -= 1
+
+ def visit_Assign(self, node):
+ if self.depth == 1:
+ process_assign(node, self.global_assigns)
+ self.generic_visit(node)
+
+ def visit_Attribute(self, node):
+ if isinstance(node.value, ast.Name) and node.value.id == self.ctx_var:
+ if node.attr not in (
+ "add_transforms",
+ "locale",
+ ):
+ raise BadContextAPIException(
+ "Unexpected attribute access on {}.{}".format(
+ self.ctx_var, node.attr
+ )
+ )
+ self.generic_visit(node)
+
+ def visit_Call(self, node):
+ if (
+ isinstance(node.func, ast.Attribute)
+ and isinstance(node.func.value, ast.Name)
+ and node.func.value.id == self.ctx_var
+ ):
+ return self.call_ctx(node)
+ dotted = full_name(node.func, self.global_assigns)
+ if dotted == "fluent.migrate.helpers.transforms_from":
+ return self.call_helpers_transforms_from(node)
+ if dotted.startswith("fluent.migrate."):
+ return self.call_transform(node, dotted)
+ self.generic_visit(node)
+
+ def call_ctx(self, node):
+ if node.func.attr == "add_transforms":
+ return self.call_add_transforms(node)
+ raise BadContextAPIException(
+ "Unexpected call on {}.{}".format(self.ctx_var, node.func.attr)
+ )
+
+ def call_add_transforms(self, node):
+ args_msg = (
+ "Expected arguments to {}.add_transforms: "
+ "target_ftl_path, reference_ftl_path, list_of_transforms"
+ ).format(self.ctx_var)
+ ref_msg = (
+ "Expected second argument to {}.add_transforms: "
+ "reference should be string or variable with string value"
+ ).format(self.ctx_var)
+ # Just check call signature here, check actual types below
+ if not self.check_arguments(node, (ast.AST, ast.AST, ast.AST)):
+ self.issues.append(
+ {
+ "msg": args_msg,
+ "line": node.lineno,
+ }
+ )
+ return
+ in_reference = node.args[1]
+ if isinstance(in_reference, ast.Name):
+ in_reference = self.global_assigns.get(in_reference.id)
+ if isinstance(in_reference, ast.Constant):
+ in_reference = in_reference.value
+ if not isinstance(in_reference, str):
+ self.issues.append(
+ {
+ "msg": ref_msg,
+ "line": node.args[1].lineno,
+ }
+ )
+ return
+ self.references.add(in_reference)
+ # Checked node.args[1].
+ # There's not a lot we can say about our target path,
+ # ignoring that.
+ # For our transforms, we want more checks.
+ self.generic_visit(node.args[2])
+
+ def call_transform(self, node, dotted):
+ module, called = dotted.rsplit(".", 1)
+ if module not in ("fluent.migrate", "fluent.migrate.transforms"):
+ return
+ transform = getattr(transforms, called)
+ if not issubclass(transform, transforms.Source):
+ return
+ bad_args = f"{called} takes path and key as first two params"
+ if not self.check_arguments(
+ node,
+ (
+ (ast.Constant, ast.Name),
+ (ast.Constant, ast.Name),
+ ),
+ allow_more=True,
+ check_kwargs=False,
+ ):
+ self.issues.append({"msg": bad_args, "line": node.lineno})
+ return
+ path = node.args[0]
+ if isinstance(path, ast.Constant):
+ path = path.value
+ if isinstance(path, ast.Name):
+ path = self.global_assigns.get(path.id)
+ if not isinstance(path, PATH_TYPES):
+ self.issues.append({"msg": bad_args, "line": node.lineno})
+
+ def call_helpers_transforms_from(self, node):
+ args_msg = "Expected arguments to transforms_from: " "str, **substitions"
+ if not self.check_arguments(node, (ast.Constant,), check_kwargs=False):
+ self.issues.append(
+ {
+ "msg": args_msg,
+ "line": node.lineno,
+ }
+ )
+ return
+ kwargs = {}
+ found_bad_keywords = False
+ for keyword in node.keywords:
+ v = keyword.value
+ if isinstance(v, ast.Constant):
+ v = v.value
+ if isinstance(v, ast.Name):
+ v = self.global_assigns.get(v.id)
+ if isinstance(v, ast.Call):
+ v = "determined at runtime"
+ if not isinstance(v, PATH_TYPES):
+ msg = "Bad keyword arg {} to transforms_from".format(keyword.arg)
+ self.issues.append(
+ {
+ "msg": msg,
+ "line": node.lineno,
+ }
+ )
+ found_bad_keywords = True
+ else:
+ kwargs[keyword.arg] = v
+ if found_bad_keywords:
+ return
+ try:
+ transforms = transforms_from(node.args[0].value, **kwargs)
+ except MigrationError as e:
+ self.issues.append(
+ {
+ "msg": str(e),
+ "line": node.lineno,
+ }
+ )
+ return
+ ti = TransformsInspector()
+ ti.visit(transforms)
+ self.issues.extend(
+ {
+ "msg": issue,
+ "line": node.lineno,
+ }
+ for issue in set(ti.issues)
+ )
+
+ def check_arguments(self, node, argspec, check_kwargs=True, allow_more=False):
+ if check_kwargs and (
+ node.keywords or (hasattr(node, "kwargs") and node.kwargs)
+ ):
+ return False
+ if hasattr(node, "starargs") and node.starargs:
+ return False
+ for arg, NODE_TYPE in zip_longest(node.args, argspec):
+ if NODE_TYPE is None:
+ return True if allow_more else False
+ if not (isinstance(arg, NODE_TYPE)):
+ return False
+ return True
+
+
+class TransformsInspector(Visitor):
+ def __init__(self):
+ super().__init__()
+ self.issues = []
+
+ def generic_visit(self, node):
+ if isinstance(node, transforms.Source):
+ src = node.path
+ # Source needs paths to be normalized
+ # https://bugzilla.mozilla.org/show_bug.cgi?id=1568199
+ if src != mozpath.normpath(src):
+ self.issues.append(f'Source "{src}" needs to be a normalized path')
+ super().generic_visit(node)
+
+
+def cli():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("migration")
+ args = parser.parse_args()
+ issues = Validator.validate(args.migration)["issues"]
+ for issue in issues:
+ print(issue["msg"], "at line", issue["line"])
+ return 1 if issues else 0