1 files changed, 701 insertions, 0 deletions
diff --git a/third_party/python/fluent.syntax/fluent/syntax/parser.py b/third_party/python/fluent.syntax/fluent/syntax/parser.py
new file mode 100644
index 0000000000..87075409f1
--- /dev/null
+++ b/third_party/python/fluent.syntax/fluent/syntax/parser.py
@@ -0,0 +1,701 @@
+import re
+from typing import Any, Callable, List, Set, TypeVar, Union, cast
+from . import ast
+from .stream import EOL, FluentParserStream
+from .errors import ParseError
+
+R = TypeVar("R", bound=ast.SyntaxNode)
+
+
+def with_span(fn: Callable[..., R]) -> Callable[..., R]:
+    def decorated(self: 'FluentParser', ps: FluentParserStream, *args: Any, **kwargs: Any) -> Any:
+        if not self.with_spans:
+            return fn(self, ps, *args, **kwargs)
+
+        start = ps.index
+        node = fn(self, ps, *args, **kwargs)
+
+        # Don't re-add the span if the node already has it. This may happen
+        # when one decorated function calls another decorated function.
+        if node.span is not None:
+            return node
+
+        end = ps.index
+        node.add_span(start, end)
+        return node
+
+    return decorated
+
+
+class FluentParser:
+    """This class is used to parse Fluent source content.
+
+    ``with_spans`` enables source information in the form of
+    :class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`.
+    """
+
+    def __init__(self, with_spans: bool = True):
+        self.with_spans = with_spans
+
+    def parse(self, source: str) -> ast.Resource:
+        """Create a :class:`.ast.Resource` from a Fluent source.
+        """
+        ps = FluentParserStream(source)
+        ps.skip_blank_block()
+
+        entries: List[ast.EntryType] = []
+        last_comment = None
+
+        while ps.current_char:
+            entry = self.get_entry_or_junk(ps)
+            blank_lines = ps.skip_blank_block()
+
+            # Regular Comments require special logic. Comments may be attached
+            # to Messages or Terms if they are followed immediately by them.
+            # However they should parse as standalone when they're followed by
+            # Junk. Consequently, we only attach Comments once we know that the
+            # Message or the Term parsed successfully.
+            if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
+                    and ps.current_char:
+                # Stash the comment and decide what to do with it
+                # in the next pass.
+                last_comment = entry
+                continue
+
+            if last_comment is not None:
+                if isinstance(entry, (ast.Message, ast.Term)):
+                    entry.comment = last_comment
+                    if self.with_spans:
+                        cast(ast.Span, entry.span).start = cast(ast.Span, entry.comment.span).start
+                else:
+                    entries.append(last_comment)
+                # In either case, the stashed comment has been dealt with;
+                # clear it.
+                last_comment = None
+
+            entries.append(entry)
+
+        res = ast.Resource(entries)
+
+        if self.with_spans:
+            res.add_span(0, ps.index)
+
+        return res
+
+    def parse_entry(self, source: str) -> ast.EntryType:
+        """Parse the first :class:`.ast.Entry` in source.
+
+        Skip all encountered comments and start parsing at the first :class:`.ast.Message`
+        or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful.
+
+        Preceding comments are ignored unless they contain syntax errors
+        themselves, in which case :class:`.ast.Junk` for the invalid comment is returned.
+        """
+        ps = FluentParserStream(source)
+        ps.skip_blank_block()
+
+        while ps.current_char == '#':
+            skipped = self.get_entry_or_junk(ps)
+            if isinstance(skipped, ast.Junk):
+                # Don't skip Junk comments.
+                return skipped
+            ps.skip_blank_block()
+
+        return self.get_entry_or_junk(ps)
+
+    def get_entry_or_junk(self, ps: FluentParserStream) -> ast.EntryType:
+        entry_start_pos = ps.index
+
+        try:
+            entry = self.get_entry(ps)
+            ps.expect_line_end()
+            return entry
+        except ParseError as err:
+            error_index = ps.index
+            ps.skip_to_next_entry_start(entry_start_pos)
+            next_entry_start = ps.index
+            if next_entry_start < error_index:
+                # The position of the error must be inside of the Junk's span.
+                error_index = next_entry_start
+
+            # Create a Junk instance
+            slice = ps.string[entry_start_pos:next_entry_start]
+            junk = ast.Junk(slice)
+            if self.with_spans:
+                junk.add_span(entry_start_pos, next_entry_start)
+            annot = ast.Annotation(err.code, list(err.args) if err.args else None, err.message)
+            annot.add_span(error_index, error_index)
+            junk.add_annotation(annot)
+            return junk
+
+    def get_entry(self, ps: FluentParserStream) -> ast.EntryType:
+        if ps.current_char == '#':
+            return self.get_comment(ps)
+
+        if ps.current_char == '-':
+            return self.get_term(ps)
+
+        if ps.is_identifier_start():
+            return self.get_message(ps)
+
+        raise ParseError('E0002')
+
+    @with_span
+    def get_comment(self, ps: FluentParserStream) -> Union[ast.Comment, ast.GroupComment, ast.ResourceComment]:
+        # 0 - comment
+        # 1 - group comment
+        # 2 - resource comment
+        level = -1
+        content = ''
+
+        while True:
+            i = -1
+            while ps.current_char == '#' \
+                    and (i < (2 if level == -1 else level)):
+                ps.next()
+                i += 1
+
+            if level == -1:
+                level = i
+
+            if ps.current_char != EOL:
+                ps.expect_char(' ')
+                ch = ps.take_char(lambda x: x != EOL)
+                while ch:
+                    content += ch
+                    ch = ps.take_char(lambda x: x != EOL)
+
+            if ps.is_next_line_comment(level=level):
+                content += cast(str, ps.current_char)
+                ps.next()
+            else:
+                break
+
+        if level == 0:
+            return ast.Comment(content)
+        elif level == 1:
+            return ast.GroupComment(content)
+        elif level == 2:
+            return ast.ResourceComment(content)
+
+        # never happens if ps.current_char == '#' when called
+        return cast(ast.Comment, None)
+
+    @with_span
+    def get_message(self, ps: FluentParserStream) -> ast.Message:
+        id = self.get_identifier(ps)
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        attrs = self.get_attributes(ps)
+
+        if value is None and len(attrs) == 0:
+            raise ParseError('E0005', id.name)
+
+        return ast.Message(id, value, attrs)
+
+    @with_span
+    def get_term(self, ps: FluentParserStream) -> ast.Term:
+        ps.expect_char('-')
+        id = self.get_identifier(ps)
+
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0006', id.name)
+
+        attrs = self.get_attributes(ps)
+        return ast.Term(id, value, attrs)
+
+    @with_span
+    def get_attribute(self, ps: FluentParserStream) -> ast.Attribute:
+        ps.expect_char('.')
+
+        key = self.get_identifier(ps)
+
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0012')
+
+        return ast.Attribute(key, value)
+
+    def get_attributes(self, ps: FluentParserStream) -> List[ast.Attribute]:
+        attrs: List[ast.Attribute] = []
+        ps.peek_blank()
+
+        while ps.is_attribute_start():
+            ps.skip_to_peek()
+            attr = self.get_attribute(ps)
+            attrs.append(attr)
+            ps.peek_blank()
+
+        return attrs
+
+    @with_span
+    def get_identifier(self, ps: FluentParserStream) -> ast.Identifier:
+        name = ps.take_id_start()
+        if name is None:
+            raise ParseError('E0004', 'a-zA-Z')
+
+        ch = ps.take_id_char()
+        while ch:
+            name += ch
+            ch = ps.take_id_char()
+
+        return ast.Identifier(name)
+
+    def get_variant_key(self, ps: FluentParserStream) -> Union[ast.Identifier, ast.NumberLiteral]:
+        ch = ps.current_char
+
+        if ch is None:
+            raise ParseError('E0013')
+
+        cc = ord(ch)
+        if ((cc >= 48 and cc <= 57) or cc == 45):  # 0-9, -
+            return self.get_number(ps)
+
+        return self.get_identifier(ps)
+
+    @with_span
+    def get_variant(self, ps: FluentParserStream, has_default: bool) -> ast.Variant:
+        default_index = False
+
+        if ps.current_char == '*':
+            if has_default:
+                raise ParseError('E0015')
+            ps.next()
+            default_index = True
+
+        ps.expect_char('[')
+        ps.skip_blank()
+
+        key = self.get_variant_key(ps)
+
+        ps.skip_blank()
+        ps.expect_char(']')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0012')
+
+        return ast.Variant(key, value, default_index)
+
+    def get_variants(self, ps: FluentParserStream) -> List[ast.Variant]:
+        variants: List[ast.Variant] = []
+        has_default = False
+
+        ps.skip_blank()
+        while ps.is_variant_start():
+            variant = self.get_variant(ps, has_default)
+
+            if variant.default:
+                has_default = True
+
+            variants.append(variant)
+            ps.expect_line_end()
+            ps.skip_blank()
+
+        if len(variants) == 0:
+            raise ParseError('E0011')
+
+        if not has_default:
+            raise ParseError('E0010')
+
+        return variants
+
+    def get_digits(self, ps: FluentParserStream) -> str:
+        num = ''
+
+        ch = ps.take_digit()
+        while ch:
+            num += ch
+            ch = ps.take_digit()
+
+        if len(num) == 0:
+            raise ParseError('E0004', '0-9')
+
+        return num
+
+    @with_span
+    def get_number(self, ps: FluentParserStream) -> ast.NumberLiteral:
+        num = ''
+
+        if ps.current_char == '-':
+            num += '-'
+            ps.next()
+
+        num += self.get_digits(ps)
+
+        if ps.current_char == '.':
+            num += '.'
+            ps.next()
+            num += self.get_digits(ps)
+
+        return ast.NumberLiteral(num)
+
+    def maybe_get_pattern(self, ps: FluentParserStream) -> Union[ast.Pattern, None]:
+        '''Parse an inline or a block Pattern, or None
+
+        maybe_get_pattern distinguishes between patterns which start on the
+        same line as the indentifier (aka inline singleline patterns and inline
+        multiline patterns), and patterns which start on a new line (aka block
+        patterns). The distinction is important for the dedentation logic: the
+        indent of the first line of a block pattern must be taken into account
+        when calculating the maximum common indent.
+        '''
+        ps.peek_blank_inline()
+        if ps.is_value_start():
+            ps.skip_to_peek()
+            return self.get_pattern(ps, is_block=False)
+
+        ps.peek_blank_block()
+        if ps.is_value_continuation():
+            ps.skip_to_peek()
+            return self.get_pattern(ps, is_block=True)
+
+        return None
+
+    @with_span
+    def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
+        elements: List[Any] = []
+        if is_block:
+            # A block pattern is a pattern which starts on a new line. Measure
+            # the indent of this first line for the dedentation logic.
+            blank_start = ps.index
+            first_indent = ps.skip_blank_inline()
+            elements.append(self.Indent(first_indent, blank_start, ps.index))
+            common_indent_length = len(first_indent)
+        else:
+            # Should get fixed by the subsequent min() operation
+            common_indent_length = cast(int, float('infinity'))
+
+        while ps.current_char:
+            if ps.current_char == EOL:
+                blank_start = ps.index
+                blank_lines = ps.peek_blank_block()
+                if ps.is_value_continuation():
+                    ps.skip_to_peek()
+                    indent = ps.skip_blank_inline()
+                    common_indent_length = min(common_indent_length, len(indent))
+                    elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
+                    continue
+
+                # The end condition for get_pattern's while loop is a newline
+                # which is not followed by a valid pattern continuation.
+                ps.reset_peek()
+                break
+
+            if ps.current_char == '}':
+                raise ParseError('E0027')
+
+            element: Union[ast.TextElement, ast.Placeable]
+            if ps.current_char == '{':
+                element = self.get_placeable(ps)
+            else:
+                element = self.get_text_element(ps)
+
+            elements.append(element)
+
+        dedented = self.dedent(elements, common_indent_length)
+        return ast.Pattern(dedented)
+
+    class Indent(ast.SyntaxNode):
+        def __init__(self, value: str, start: int, end: int):
+            super(FluentParser.Indent, self).__init__()
+            self.value = value
+            self.add_span(start, end)
+
+    def dedent(self,
+               elements: List[Union[ast.TextElement, ast.Placeable, Indent]],
+               common_indent: int
+               ) -> List[Union[ast.TextElement, ast.Placeable]]:
+        '''Dedent a list of elements by removing the maximum common indent from
+        the beginning of text lines. The common indent is calculated in
+        get_pattern.
+        '''
+        trimmed: List[Union[ast.TextElement, ast.Placeable]] = []
+
+        for element in elements:
+            if isinstance(element, ast.Placeable):
+                trimmed.append(element)
+                continue
+
+            if isinstance(element, self.Indent):
+                # Strip the common indent.
+                element.value = element.value[:len(element.value) - common_indent]
+                if len(element.value) == 0:
+                    continue
+
+            prev = trimmed[-1] if len(trimmed) > 0 else None
+            if isinstance(prev, ast.TextElement):
+                # Join adjacent TextElements by replacing them with their sum.
+                sum = ast.TextElement(prev.value + element.value)
+                if self.with_spans:
+                    sum.add_span(cast(ast.Span, prev.span).start, cast(ast.Span, element.span).end)
+                trimmed[-1] = sum
+                continue
+
+            if isinstance(element, self.Indent):
+                # If the indent hasn't been merged into a preceding
+                # TextElements, convert it into a new TextElement.
+                text_element = ast.TextElement(element.value)
+                if self.with_spans:
+                    text_element.add_span(cast(ast.Span, element.span).start, cast(ast.Span, element.span).end)
+                element = text_element
+
+            trimmed.append(element)
+
+        # Trim trailing whitespace from the Pattern.
+        last_element = trimmed[-1] if len(trimmed) > 0 else None
+        if isinstance(last_element, ast.TextElement):
+            last_element.value = last_element.value.rstrip(' \n\r')
+            if last_element.value == "":
+                trimmed.pop()
+
+        return trimmed
+
+    @with_span
+    def get_text_element(self, ps: FluentParserStream) -> ast.TextElement:
+        buf = ''
+
+        while ps.current_char:
+            ch = ps.current_char
+
+            if ch == '{' or ch == '}':
+                return ast.TextElement(buf)
+
+            if ch == EOL:
+                return ast.TextElement(buf)
+
+            buf += ch
+            ps.next()
+
+        return ast.TextElement(buf)
+
+    def get_escape_sequence(self, ps: FluentParserStream) -> str:
+        next = ps.current_char
+
+        if next == '\\' or next == '"':
+            ps.next()
+            return f'\\{next}'
+
+        if next == 'u':
+            return self.get_unicode_escape_sequence(ps, next, 4)
+
+        if next == 'U':
+            return self.get_unicode_escape_sequence(ps, next, 6)
+
+        raise ParseError('E0025', next)
+
+    def get_unicode_escape_sequence(self, ps: FluentParserStream, u: str, digits: int) -> str:
+        ps.expect_char(u)
+        sequence = ''
+        for _ in range(digits):
+            ch = ps.take_hex_digit()
+            if not ch:
+                raise ParseError('E0026', f'\\{u}{sequence}{ps.current_char}')
+            sequence += ch
+
+        return f'\\{u}{sequence}'
+
+    @with_span
+    def get_placeable(self, ps: FluentParserStream) -> ast.Placeable:
+        ps.expect_char('{')
+        ps.skip_blank()
+        expression = self.get_expression(ps)
+        ps.expect_char('}')
+        return ast.Placeable(expression)
+
+    @with_span
+    def get_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression,
+                                                              ast.Placeable,
+                                                              ast.SelectExpression]:
+        selector = self.get_inline_expression(ps)
+
+        ps.skip_blank()
+
+        if ps.current_char == '-':
+            if ps.peek() != '>':
+                ps.reset_peek()
+                return selector
+
+            if isinstance(selector, ast.MessageReference):
+                if selector.attribute is None:
+                    raise ParseError('E0016')
+                else:
+                    raise ParseError('E0018')
+
+            elif (
+                isinstance(selector, ast.TermReference)
+            ):
+                if selector.attribute is None:
+                    raise ParseError('E0017')
+            elif not (
+                isinstance(selector, (
+                    ast.StringLiteral,
+                    ast.NumberLiteral,
+                    ast.VariableReference,
+                    ast.FunctionReference,
+                ))
+            ):
+                raise ParseError('E0029')
+
+            ps.next()
+            ps.next()
+
+            ps.skip_blank_inline()
+            ps.expect_line_end()
+
+            variants = self.get_variants(ps)
+            return ast.SelectExpression(selector, variants)
+
+        if (
+            isinstance(selector, ast.TermReference)
+            and selector.attribute is not None
+        ):
+            raise ParseError('E0019')
+
+        return selector
+
+    @with_span
+    def get_inline_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression, ast.Placeable]:
+        if ps.current_char == '{':
+            return self.get_placeable(ps)
+
+        if ps.is_number_start():
+            return self.get_number(ps)
+
+        if ps.current_char == '"':
+            return self.get_string(ps)
+
+        if ps.current_char == '$':
+            ps.next()
+            id = self.get_identifier(ps)
+            return ast.VariableReference(id)
+
+        if ps.current_char == '-':
+            ps.next()
+            id = self.get_identifier(ps)
+            attribute = None
+            if ps.current_char == '.':
+                ps.next()
+                attribute = self.get_identifier(ps)
+            arguments = None
+            ps.peek_blank()
+            if ps.current_peek == '(':
+                ps.skip_to_peek()
+                arguments = self.get_call_arguments(ps)
+            return ast.TermReference(id, attribute, arguments)
+
+        if ps.is_identifier_start():
+            id = self.get_identifier(ps)
+            ps.peek_blank()
+
+            if ps.current_peek == '(':
+                # It's a Function. Ensure it's all upper-case.
+                if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
+                    raise ParseError('E0008')
+                ps.skip_to_peek()
+                args = self.get_call_arguments(ps)
+                return ast.FunctionReference(id, args)
+
+            attribute = None
+            if ps.current_char == '.':
+                ps.next()
+                attribute = self.get_identifier(ps)
+
+            return ast.MessageReference(id, attribute)
+
+        raise ParseError('E0028')
+
+    @with_span
+    def get_call_argument(self,
+                          ps: FluentParserStream
+                          ) -> Union[ast.InlineExpression, ast.NamedArgument, ast.Placeable]:
+        exp = self.get_inline_expression(ps)
+
+        ps.skip_blank()
+
+        if ps.current_char != ':':
+            return exp
+
+        if isinstance(exp, ast.MessageReference) and exp.attribute is None:
+            ps.next()
+            ps.skip_blank()
+
+            value = self.get_literal(ps)
+            return ast.NamedArgument(exp.id, value)
+
+        raise ParseError('E0009')
+
+    @with_span
+    def get_call_arguments(self, ps: FluentParserStream) -> ast.CallArguments:
+        positional: List[Union[ast.InlineExpression, ast.Placeable]] = []
+        named: List[ast.NamedArgument] = []
+        argument_names: Set[str] = set()
+
+        ps.expect_char('(')
+        ps.skip_blank()
+
+        while True:
+            if ps.current_char == ')':
+                break
+
+            arg = self.get_call_argument(ps)
+            if isinstance(arg, ast.NamedArgument):
+                if arg.name.name in argument_names:
+                    raise ParseError('E0022')
+                named.append(arg)
+                argument_names.add(arg.name.name)
+            elif len(argument_names) > 0:
+                raise ParseError('E0021')
+            else:
+                positional.append(arg)
+
+            ps.skip_blank()
+
+            if ps.current_char == ',':
+                ps.next()
+                ps.skip_blank()
+                continue
+
+            break
+
+        ps.expect_char(')')
+        return ast.CallArguments(positional, named)
+
+    @with_span
+    def get_string(self, ps: FluentParserStream) -> ast.StringLiteral:
+        value = ''
+
+        ps.expect_char('"')
+
+        while True:
+            ch = ps.take_char(lambda x: x != '"' and x != EOL)
+            if not ch:
+                break
+            if ch == '\\':
+                value += self.get_escape_sequence(ps)
+            else:
+                value += ch
+
+        if ps.current_char == EOL:
+            raise ParseError('E0020')
+
+        ps.expect_char('"')
+
+        return ast.StringLiteral(value)
+
+    @with_span
+    def get_literal(self, ps: FluentParserStream) -> Union[ast.NumberLiteral, ast.StringLiteral]:
+        if ps.is_number_start():
+            return self.get_number(ps)
+        if ps.current_char == '"':
+            return self.get_string(ps)
+        raise ParseError('E0014')