1 files changed, 558 insertions, 0 deletions
diff --git a/third_party/python/fluent.migrate/fluent/migrate/transforms.py b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
new file mode 100644
index 0000000000..f45ad1531c
--- /dev/null
+++ b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
@@ -0,0 +1,558 @@
+"""Migration Transforms.
+
+Transforms are AST nodes which describe how legacy translations should be
+migrated.  They are created inert and only return the migrated AST nodes when
+they are evaluated by a MigrationContext.
+
+All Transforms evaluate to Fluent Patterns. This makes them suitable for
+defining migrations of values of message, attributes and variants.  The special
+CONCAT Transform is capable of joining multiple Patterns returned by evaluating
+other Transforms into a single Pattern.  It can also concatenate Pattern
+elements: TextElements and Placeables.
+
+The COPY, REPLACE and PLURALS Transforms inherit from Source which is a special
+AST Node defining the location (the file path and the id) of the legacy
+translation.  During the migration, the current MigrationContext scans the
+migration spec for Source nodes and extracts the information about all legacy
+translations being migrated. For instance,
+
+    COPY('file.dtd', 'hello')
+
+is equivalent to:
+
+    FTL.Pattern([
+        Source('file.dtd', 'hello')
+    ])
+
+Sometimes it's useful to work with text rather than (path, key) source
+definitions. This is the case when the migrated translation requires some
+hardcoded text, e.g. <a> and </a> when multiple translations become a single
+one with a DOM overlay. In such cases it's best to use FTL.TextElements:
+
+    FTL.Message(
+        id=FTL.Identifier('update-failed'),
+        value=CONCAT(
+            COPY('aboutDialog.dtd', 'update.failed.start'),
+            FTL.TextElement('<a>'),
+            COPY('aboutDialog.dtd', 'update.failed.linkText'),
+            FTL.TextElement('</a>'),
+            COPY('aboutDialog.dtd', 'update.failed.end'),
+        )
+    )
+
+The REPLACE_IN_TEXT Transform also takes TextElements as input, making it
+possible to pass it as the foreach function of the PLURALS Transform. In the
+example below, each slice of the plural string is converted into a
+TextElement by PLURALS and then run through the REPLACE_IN_TEXT transform.
+
+    FTL.Message(
+        FTL.Identifier('delete-all'),
+        value=PLURALS(
+            'aboutDownloads.dtd',
+            'deleteAll',
+            VARIABLE_REFERENCE('num'),
+            lambda text: REPLACE_IN_TEXT(
+                text,
+                {
+                    '#1': VARIABLE_REFERENCE('num')
+                }
+            )
+        )
+    )
+"""
+
+import re
+
+from fluent.syntax import ast as FTL
+from fluent.syntax.visitor import Transformer
+from .errors import NotSupportedError
+
+
+def chain_elements(elements):
+    """Flatten a list of FTL nodes into an iterator over PatternElements."""
+    for element in elements:
+        if isinstance(element, FTL.Pattern):
+            # PY3 yield from element.elements
+            yield from element.elements
+        elif isinstance(element, FTL.PatternElement):
+            yield element
+        elif isinstance(element, FTL.Expression):
+            yield FTL.Placeable(element)
+        else:
+            raise RuntimeError("Expected Pattern, PatternElement or Expression")
+
+
+re_leading_ws = re.compile(
+    r"\A(?:(?P<whitespace> +)(?P<text>.*?)|(?P<block_text>\n.*?))\Z",
+    re.S,
+)
+re_trailing_ws = re.compile(
+    r"\A(?:(?P<text>.*?)(?P<whitespace> +)|(?P<block_text>.*\n))\Z", re.S
+)
+
+
+def extract_whitespace(regex, element):
+    """Extract leading or trailing whitespace from a TextElement.
+
+    Return a tuple of (Placeable, TextElement) in which the Placeable
+    encodes the extracted whitespace as a StringLiteral and the
+    TextElement has the same amount of whitespace removed. The
+    Placeable with the extracted whitespace is always returned first.
+    If the element starts or ends with a newline, add an empty
+    StringLiteral.
+    """
+    match = re.search(regex, element.value)
+    if match:
+        # If white-space is None, we're a newline. Add an
+        # empty { "" }
+        whitespace = match.group("whitespace") or ""
+        placeable = FTL.Placeable(FTL.StringLiteral(whitespace))
+        if whitespace == element.value:
+            return placeable, None
+        else:
+            # Either text or block_text matched the rest.
+            text = match.group("text") or match.group("block_text")
+            return placeable, FTL.TextElement(text)
+    else:
+        return None, element
+
+
+class Transform(FTL.BaseNode):
+    def __call__(self, ctx):
+        raise NotImplementedError
+
+    @staticmethod
+    def pattern_of(*elements):
+        normalized = []
+
+        # Normalize text content: convert text content to TextElements, join
+        # adjacent text and prune empty. Text content is either existing
+        # TextElements or whitespace-only StringLiterals. This may result in
+        # leading and trailing whitespace being put back into TextElements if
+        # the new Pattern is built from existing Patterns (CONCAT(COPY...)).
+        # The leading and trailing whitespace of the new Pattern will be
+        # extracted later into new StringLiterals.
+        for element in chain_elements(elements):
+            if isinstance(element, FTL.TextElement):
+                text_content = element.value
+            elif (
+                isinstance(element, FTL.Placeable)
+                and isinstance(element.expression, FTL.StringLiteral)
+                and re.match(r"^ *$", element.expression.value)
+            ):
+                text_content = element.expression.value
+            else:
+                # The element does not contain text content which should be
+                # normalized. It may be a number, a reference, or
+                # a StringLiteral which should be preserved in the Pattern.
+                normalized.append(element)
+                continue
+
+            previous = normalized[-1] if len(normalized) else None
+            if isinstance(previous, FTL.TextElement):
+                # Join adjacent TextElements.
+                previous.value += text_content
+            elif len(text_content) > 0:
+                # Normalize non-empty text to a TextElement.
+                normalized.append(FTL.TextElement(text_content))
+            else:
+                # Prune empty text.
+                pass
+
+        # Store empty values explicitly as {""}.
+        if len(normalized) == 0:
+            empty = FTL.Placeable(FTL.StringLiteral(""))
+            return FTL.Pattern([empty])
+
+        # Extract explicit leading whitespace into a StringLiteral.
+        if isinstance(normalized[0], FTL.TextElement):
+            ws, text = extract_whitespace(re_leading_ws, normalized[0])
+            normalized[:1] = [ws, text]
+
+        # Extract explicit trailing whitespace into a StringLiteral.
+        if isinstance(normalized[-1], FTL.TextElement):
+            ws, text = extract_whitespace(re_trailing_ws, normalized[-1])
+            normalized[-1:] = [text, ws]
+
+        return FTL.Pattern([element for element in normalized if element is not None])
+
+
+class Source(Transform):
+    """Base class for Transforms that get translations from source files.
+
+    The contract is that the first argument is the source path, and the
+    second is a key representing legacy string IDs, or Fluent id.attr.
+    """
+
+    def __init__(self, path, key):
+        self.path = path
+        self.key = key
+
+
+class FluentSource(Source):
+    """Declare a Fluent source translation to be copied over.
+
+    When evaluated, it clones the Pattern of the parsed source.
+    """
+
+    def __init__(self, path, key):
+        if not path.endswith(".ftl"):
+            raise NotSupportedError(
+                "Please use COPY to migrate from legacy files " "({})".format(path)
+            )
+        if key[0] == "-" and "." in key:
+            raise NotSupportedError(
+                "Cannot migrate from Term Attributes, as they are"
+                "locale-dependent ({})".format(path)
+            )
+        super().__init__(path, key)
+
+    def __call__(self, ctx):
+        pattern = ctx.get_fluent_source_pattern(self.path, self.key)
+        return pattern.clone()
+
+
+class COPY_PATTERN(FluentSource):
+    """Create a Pattern with the translation value from the given source.
+
+    The given key can be a Message ID, Message ID.attribute_name, or
+    Term ID. Accessing Term attributes is not supported, as they're internal
+    to the localization.
+    """
+
+    pass
+
+
+class TransformPattern(FluentSource, Transformer):
+    """Base class for modifying a Fluent pattern as part of a migration.
+
+    Implement visit_* methods of the Transformer pattern to do the
+    actual modifications.
+    """
+
+    def __call__(self, ctx):
+        pattern = super().__call__(ctx)
+        return self.visit(pattern)
+
+    def visit_Pattern(self, node):
+        # Make sure we're creating valid Patterns after restructuring
+        # transforms.
+        node = self.generic_visit(node)
+        pattern = Transform.pattern_of(*node.elements)
+        return pattern
+
+    def visit_Placeable(self, node):
+        # Ensure we have a Placeable with an expression still.
+        # Transforms could have replaced the expression with
+        # a Pattern or PatternElement, in which case we
+        # just pass that through.
+        # Patterns then get flattened by visit_Pattern.
+        node = self.generic_visit(node)
+        if isinstance(node.expression, (FTL.Pattern, FTL.PatternElement)):
+            return node.expression
+        return node
+
+
+class LegacySource(Source):
+    """Declare the source translation to be migrated with other transforms.
+
+    When evaluated, `Source` returns a TextElement with the content from the
+    source translation. Escaped characters are unescaped by the
+    compare-locales parser according to the file format:
+
+      - in properties files: \\uXXXX,
+      - in DTD files: known named, decimal, and hexadecimal HTML entities.
+
+    Consult the following files for the list of known named HTML entities:
+
+    https://github.com/python/cpython/blob/2.7/Lib/htmlentitydefs.py
+    https://github.com/python/cpython/blob/3.6/Lib/html/entities.py
+
+    By default, leading and trailing whitespace on each line as well as
+    leading and trailing empty lines will be stripped from the source
+    translation's content. Set `trim=False` to disable this behavior.
+    """
+
+    def __init__(self, path, key, trim=None):
+        if path.endswith(".ftl"):
+            raise NotSupportedError(
+                "Please use COPY_PATTERN to migrate from Fluent files "
+                "({})".format(path)
+            )
+
+        super().__init__(path, key)
+        self.trim = trim
+
+    def get_text(self, ctx):
+        return ctx.get_legacy_source(self.path, self.key)
+
+    @staticmethod
+    def trim_text(text):
+        # strip leading white-space from each line
+        text = re.sub("^[ \t]+", "", text, flags=re.M)
+        # strip trailing white-space from each line
+        text = re.sub("[ \t]+$", "", text, flags=re.M)
+        # strip leading and trailing empty lines
+        text = text.strip("\r\n")
+        return text
+
+    def __call__(self, ctx):
+        text = self.get_text(ctx)
+        if self.trim is not False:
+            text = self.trim_text(text)
+        return FTL.TextElement(text)
+
+
+class COPY(LegacySource):
+    """Create a Pattern with the translation value from the given source."""
+
+    def __call__(self, ctx):
+        element = super().__call__(ctx)
+        return Transform.pattern_of(element)
+
+
+PRINTF = re.compile(
+    r"%(?P<good>%|"
+    r"(?:(?P<number>[1-9][0-9]*)\$)?"
+    r"(?P<width>\*|[0-9]+)?"
+    r"(?P<prec>\.(?:\*|[0-9]+)?)?"
+    r"(?P<spec>[duxXosScpfg]))"
+)
+
+
+def number():
+    i = 1
+    while True:
+        yield i
+        i += 1
+
+
+def normalize_printf(text):
+    """Normalize printf arguments so that they're all numbered.
+    Gecko forbids mixing unnumbered and numbered ones, so
+    we just need to convert unnumbered to numbered ones.
+    Also remove ones that have zero width, as they're intended
+    to be removed from the output by the localizer.
+    """
+    next_number = number()
+
+    def normalized(match):
+        if match.group("good") == "%":
+            return "%"
+        hidden = match.group("width") == "0"
+        if match.group("number"):
+            return "" if hidden else match.group()
+        num = next(next_number)
+        return "" if hidden else "%{}${}".format(num, match.group("spec"))
+
+    return PRINTF.sub(normalized, text)
+
+
+class REPLACE_IN_TEXT(Transform):
+    """Create a Pattern from a TextElement and replace legacy placeables.
+
+    The original placeables are defined as keys on the `replacements` dict.
+    For each key the value must be defined as a FTL Pattern, Placeable,
+    TextElement or Expression to be interpolated.
+    """
+
+    def __init__(self, element, replacements, normalize_printf=False):
+        self.element = element
+        self.replacements = replacements
+        self.normalize_printf = normalize_printf
+
+    def __call__(self, ctx):
+        # For each specified replacement, find all indices of the original
+        # placeable in the source translation. If missing, the list of indices
+        # will be empty.
+        value = self.element.value
+        if self.normalize_printf:
+            value = normalize_printf(value)
+        key_indices = {
+            key: [m.start() for m in re.finditer(re.escape(key), value)]
+            for key in self.replacements.keys()
+        }
+
+        # Build a dict of indices to replacement keys.
+        keys_indexed = {}
+        for key, indices in key_indices.items():
+            for index in indices:
+                keys_indexed[index] = key
+
+        # Order the replacements by the position of the original placeable in
+        # the translation.
+        replacements = (
+            (key, ctx.evaluate(self.replacements[key]))
+            for index, key in sorted(keys_indexed.items(), key=lambda x: x[0])
+        )
+
+        # A list of PatternElements built from the legacy translation and the
+        # FTL replacements. It may contain empty or adjacent TextElements.
+        elements = []
+        tail = value
+
+        # Convert original placeables and text into FTL Nodes. For each
+        # original placeable the translation will be partitioned around it and
+        # the text before it will be converted into an `FTL.TextElement` and
+        # the placeable will be replaced with its replacement.
+        for key, node in replacements:
+            before, key, tail = tail.partition(key)
+            elements.append(FTL.TextElement(before))
+            elements.append(node)
+
+        # Don't forget about the tail after the loop ends.
+        elements.append(FTL.TextElement(tail))
+        return Transform.pattern_of(*elements)
+
+
+class REPLACE(LegacySource):
+    """Create a Pattern with interpolations from given source.
+
+    Interpolations in the translation value from the given source will be
+    replaced with FTL placeables using the `REPLACE_IN_TEXT` transform.
+    """
+
+    def __init__(self, path, key, replacements, **kwargs):
+        # We default normalize_printf to False except for .properties files.
+        # We still allow the caller to override the default value.
+        normalize_printf = False
+        if "normalize_printf" in kwargs:
+            normalize_printf = kwargs["normalize_printf"]
+            del kwargs["normalize_printf"]
+        elif path.endswith(".properties"):
+            normalize_printf = True
+
+        super().__init__(path, key, **kwargs)
+        self.replacements = replacements
+        self.normalize_printf = normalize_printf
+
+    def __call__(self, ctx):
+        element = super().__call__(ctx)
+        return REPLACE_IN_TEXT(
+            element, self.replacements, normalize_printf=self.normalize_printf
+        )(ctx)
+
+
+class PLURALS(LegacySource):
+    """Create a Pattern with plurals from given source.
+
+    Build an `FTL.SelectExpression` with the supplied `selector` and variants
+    extracted from the source. The original translation should be a
+    semicolon-separated list of plural forms. Each form will be converted
+    into a TextElement and run through the `foreach` function, which should
+    return an `FTL.Node` or a `Transform`. By default, the `foreach` function
+    creates a valid Pattern from the TextElement passed into it.
+    """
+
+    DEFAULT_ORDER = ("zero", "one", "two", "few", "many", "other")
+
+    def __init__(self, path, key, selector, foreach=Transform.pattern_of, **kwargs):
+        super().__init__(path, key, **kwargs)
+        self.selector = selector
+        self.foreach = foreach
+
+    def __call__(self, ctx):
+        element = super().__call__(ctx)
+        selector = ctx.evaluate(self.selector)
+        keys = ctx.plural_categories
+        forms = [FTL.TextElement(part.strip()) for part in element.value.split(";")]
+
+        # The default CLDR form should be the last we have in DEFAULT_ORDER,
+        # usually `other`, but in some cases `many`. If we don't have a variant
+        # for that, we'll append one, using the, in CLDR order, last existing
+        # variant in the legacy translation. That may or may not be the last
+        # variant.
+        default_key = [key for key in reversed(self.DEFAULT_ORDER) if key in keys][0]
+
+        # Match keys to legacy forms in the order they are defined in Gecko's
+        # PluralForm.jsm. Filter out empty forms.
+        pairs = [(key, var) for key, var in zip(keys, forms) if var.value]
+
+        # A special case for legacy translations which don't define any
+        # plural forms.
+        if len(pairs) == 0:
+            return Transform.pattern_of()
+
+        # A special case for languages with one plural category or one legacy
+        # variant. We don't need to insert a SelectExpression for them.
+        if len(pairs) == 1:
+            _, only_form = pairs[0]
+            only_variant = ctx.evaluate(self.foreach(only_form))
+            return Transform.pattern_of(only_variant)
+
+        # Make sure the default key is defined. If it's missing, use the last
+        # form (in CLDR order) found in the legacy translation.
+        pairs.sort(key=lambda pair: self.DEFAULT_ORDER.index(pair[0]))
+        last_key, last_form = pairs[-1]
+        if last_key != default_key:
+            pairs.append((default_key, last_form))
+
+        def createVariant(key, form):
+            # Run the legacy plural form through `foreach` which returns an
+            # `FTL.Node` describing the transformation required for each
+            # variant. Then evaluate it to a migrated FTL node.
+            value = ctx.evaluate(self.foreach(form))
+            return FTL.Variant(
+                key=FTL.Identifier(key), value=value, default=key == default_key
+            )
+
+        select = FTL.SelectExpression(
+            selector=selector,
+            variants=[createVariant(key, form) for key, form in pairs],
+        )
+
+        return Transform.pattern_of(select)
+
+
+class CONCAT(Transform):
+    """Create a new Pattern from Patterns, PatternElements and Expressions.
+
+    When called with at least two elements, `CONCAT` disables the trimming
+    behavior of the elements which are subclasses of `LegacySource` by
+    setting `trim=False`, unless `trim` has already been set explicitly. The
+    following two `CONCAT` calls are equivalent:
+
+       CONCAT(
+           FTL.TextElement("Hello"),
+           COPY("file.properties", "hello")
+       )
+
+       CONCAT(
+           FTL.TextElement("Hello"),
+           COPY("file.properties", "hello", trim=False)
+       )
+
+    Set `trim=True` explicitly to force trimming:
+
+       CONCAT(
+           FTL.TextElement("Hello "),
+           COPY("file.properties", "hello", trim=True)
+       )
+
+    When called with a single element and when the element is a subclass of
+    `LegacySource`, the trimming behavior is not changed. The following two
+    transforms are equivalent:
+
+       CONCAT(COPY("file.properties", "hello"))
+
+       COPY("file.properties", "hello")
+    """
+
+    def __init__(self, *elements, **kwargs):
+        # We want to support both passing elements as *elements in the
+        # migration specs and as elements=[]. The latter is used by
+        # FTL.BaseNode.traverse when it recreates the traversed node using its
+        # attributes as kwargs.
+        self.elements = list(kwargs.get("elements", elements))
+
+        # We want to make CONCAT(COPY()) equivalent to COPY() so that it's
+        # always safe (no-op) to wrap transforms in a CONCAT. This is used by
+        # the implementation of transforms_from.
+        if len(self.elements) > 1:
+            for elem in self.elements:
+                # Only change trim if it hasn't been set explicitly.
+                if isinstance(elem, LegacySource) and elem.trim is None:
+                    elem.trim = False
+
+    def __call__(self, ctx):
+        return Transform.pattern_of(*self.elements)