1 files changed, 683 insertions, 0 deletions
diff --git a/third_party/python/fluent.syntax/fluent/syntax/parser.py b/third_party/python/fluent.syntax/fluent/syntax/parser.py
new file mode 100644
index 0000000000..6731136cef
--- /dev/null
+++ b/third_party/python/fluent.syntax/fluent/syntax/parser.py
@@ -0,0 +1,683 @@
+from __future__ import unicode_literals
+import re
+from . import ast
+from .stream import EOF, EOL, FluentParserStream
+from .errors import ParseError
+
+
+def with_span(fn):
+    def decorated(self, ps, *args, **kwargs):
+        if not self.with_spans:
+            return fn(self, ps, *args, **kwargs)
+
+        start = ps.index
+        node = fn(self, ps, *args, **kwargs)
+
+        # Don't re-add the span if the node already has it. This may happen
+        # when one decorated function calls another decorated function.
+        if node.span is not None:
+            return node
+
+        end = ps.index
+        node.add_span(start, end)
+        return node
+
+    return decorated
+
+
+class FluentParser(object):
+    """This class is used to parse Fluent source content.
+
+    ``with_spans`` enables source information in the form of
+    :class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`.
+    """
+    def __init__(self, with_spans=True):
+        self.with_spans = with_spans
+
+    def parse(self, source):
+        """Create a :class:`.ast.Resource` from a Fluent source.
+        """
+        ps = FluentParserStream(source)
+        ps.skip_blank_block()
+
+        entries = []
+        last_comment = None
+
+        while ps.current_char:
+            entry = self.get_entry_or_junk(ps)
+            blank_lines = ps.skip_blank_block()
+
+            # Regular Comments require special logic. Comments may be attached
+            # to Messages or Terms if they are followed immediately by them.
+            # However they should parse as standalone when they're followed by
+            # Junk. Consequently, we only attach Comments once we know that the
+            # Message or the Term parsed successfully.
+            if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
+                    and ps.current_char:
+                # Stash the comment and decide what to do with it
+                # in the next pass.
+                last_comment = entry
+                continue
+
+            if last_comment is not None:
+                if isinstance(entry, (ast.Message, ast.Term)):
+                    entry.comment = last_comment
+                    if self.with_spans:
+                        entry.span.start = entry.comment.span.start
+                else:
+                    entries.append(last_comment)
+                # In either case, the stashed comment has been dealt with;
+                # clear it.
+                last_comment = None
+
+            entries.append(entry)
+
+        res = ast.Resource(entries)
+
+        if self.with_spans:
+            res.add_span(0, ps.index)
+
+        return res
+
+    def parse_entry(self, source):
+        """Parse the first :class:`.ast.Entry` in source.
+
+        Skip all encountered comments and start parsing at the first :class:`.ast.Message`
+        or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful.
+
+        Preceding comments are ignored unless they contain syntax errors
+        themselves, in which case :class:`.ast.Junk` for the invalid comment is returned.
+        """
+        ps = FluentParserStream(source)
+        ps.skip_blank_block()
+
+        while ps.current_char == '#':
+            skipped = self.get_entry_or_junk(ps)
+            if isinstance(skipped, ast.Junk):
+                # Don't skip Junk comments.
+                return skipped
+            ps.skip_blank_block()
+
+        return self.get_entry_or_junk(ps)
+
+    def get_entry_or_junk(self, ps):
+        entry_start_pos = ps.index
+
+        try:
+            entry = self.get_entry(ps)
+            ps.expect_line_end()
+            return entry
+        except ParseError as err:
+            error_index = ps.index
+            ps.skip_to_next_entry_start(entry_start_pos)
+            next_entry_start = ps.index
+            if next_entry_start < error_index:
+                # The position of the error must be inside of the Junk's span.
+                error_index = next_entry_start
+
+            # Create a Junk instance
+            slice = ps.string[entry_start_pos:next_entry_start]
+            junk = ast.Junk(slice)
+            if self.with_spans:
+                junk.add_span(entry_start_pos, next_entry_start)
+            annot = ast.Annotation(err.code, err.args, err.message)
+            annot.add_span(error_index, error_index)
+            junk.add_annotation(annot)
+            return junk
+
+    def get_entry(self, ps):
+        if ps.current_char == '#':
+            return self.get_comment(ps)
+
+        if ps.current_char == '-':
+            return self.get_term(ps)
+
+        if ps.is_identifier_start():
+            return self.get_message(ps)
+
+        raise ParseError('E0002')
+
+    @with_span
+    def get_comment(self, ps):
+        # 0 - comment
+        # 1 - group comment
+        # 2 - resource comment
+        level = -1
+        content = ''
+
+        while True:
+            i = -1
+            while ps.current_char == '#' \
+                    and (i < (2 if level == -1 else level)):
+                ps.next()
+                i += 1
+
+            if level == -1:
+                level = i
+
+            if ps.current_char != EOL:
+                ps.expect_char(' ')
+                ch = ps.take_char(lambda x: x != EOL)
+                while ch:
+                    content += ch
+                    ch = ps.take_char(lambda x: x != EOL)
+
+            if ps.is_next_line_comment(level=level):
+                content += ps.current_char
+                ps.next()
+            else:
+                break
+
+        if level == 0:
+            return ast.Comment(content)
+        elif level == 1:
+            return ast.GroupComment(content)
+        elif level == 2:
+            return ast.ResourceComment(content)
+
+    @with_span
+    def get_message(self, ps):
+        id = self.get_identifier(ps)
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        attrs = self.get_attributes(ps)
+
+        if value is None and len(attrs) == 0:
+            raise ParseError('E0005', id.name)
+
+        return ast.Message(id, value, attrs)
+
+    @with_span
+    def get_term(self, ps):
+        ps.expect_char('-')
+        id = self.get_identifier(ps)
+
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0006', id.name)
+
+        attrs = self.get_attributes(ps)
+        return ast.Term(id, value, attrs)
+
+    @with_span
+    def get_attribute(self, ps):
+        ps.expect_char('.')
+
+        key = self.get_identifier(ps)
+
+        ps.skip_blank_inline()
+        ps.expect_char('=')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0012')
+
+        return ast.Attribute(key, value)
+
+    def get_attributes(self, ps):
+        attrs = []
+        ps.peek_blank()
+
+        while ps.is_attribute_start():
+            ps.skip_to_peek()
+            attr = self.get_attribute(ps)
+            attrs.append(attr)
+            ps.peek_blank()
+
+        return attrs
+
+    @with_span
+    def get_identifier(self, ps):
+        name = ps.take_id_start()
+        ch = ps.take_id_char()
+        while ch:
+            name += ch
+            ch = ps.take_id_char()
+
+        return ast.Identifier(name)
+
+    def get_variant_key(self, ps):
+        ch = ps.current_char
+
+        if ch is EOF:
+            raise ParseError('E0013')
+
+        cc = ord(ch)
+        if ((cc >= 48 and cc <= 57) or cc == 45):  # 0-9, -
+            return self.get_number(ps)
+
+        return self.get_identifier(ps)
+
+    @with_span
+    def get_variant(self, ps, has_default):
+        default_index = False
+
+        if ps.current_char == '*':
+            if has_default:
+                raise ParseError('E0015')
+            ps.next()
+            default_index = True
+
+        ps.expect_char('[')
+        ps.skip_blank()
+
+        key = self.get_variant_key(ps)
+
+        ps.skip_blank()
+        ps.expect_char(']')
+
+        value = self.maybe_get_pattern(ps)
+        if value is None:
+            raise ParseError('E0012')
+
+        return ast.Variant(key, value, default_index)
+
+    def get_variants(self, ps):
+        variants = []
+        has_default = False
+
+        ps.skip_blank()
+        while ps.is_variant_start():
+            variant = self.get_variant(ps, has_default)
+
+            if variant.default:
+                has_default = True
+
+            variants.append(variant)
+            ps.expect_line_end()
+            ps.skip_blank()
+
+        if len(variants) == 0:
+            raise ParseError('E0011')
+
+        if not has_default:
+            raise ParseError('E0010')
+
+        return variants
+
+    def get_digits(self, ps):
+        num = ''
+
+        ch = ps.take_digit()
+        while ch:
+            num += ch
+            ch = ps.take_digit()
+
+        if len(num) == 0:
+            raise ParseError('E0004', '0-9')
+
+        return num
+
+    @with_span
+    def get_number(self, ps):
+        num = ''
+
+        if ps.current_char == '-':
+            num += '-'
+            ps.next()
+
+        num += self.get_digits(ps)
+
+        if ps.current_char == '.':
+            num += '.'
+            ps.next()
+            num += self.get_digits(ps)
+
+        return ast.NumberLiteral(num)
+
+    def maybe_get_pattern(self, ps):
+        '''Parse an inline or a block Pattern, or None
+
+        maybe_get_pattern distinguishes between patterns which start on the
+        same line as the indentifier (aka inline singleline patterns and inline
+        multiline patterns), and patterns which start on a new line (aka block
+        patterns). The distinction is important for the dedentation logic: the
+        indent of the first line of a block pattern must be taken into account
+        when calculating the maximum common indent.
+        '''
+        ps.peek_blank_inline()
+        if ps.is_value_start():
+            ps.skip_to_peek()
+            return self.get_pattern(ps, is_block=False)
+
+        ps.peek_blank_block()
+        if ps.is_value_continuation():
+            ps.skip_to_peek()
+            return self.get_pattern(ps, is_block=True)
+
+        return None
+
+    @with_span
+    def get_pattern(self, ps, is_block):
+        elements = []
+        if is_block:
+            # A block pattern is a pattern which starts on a new line. Measure
+            # the indent of this first line for the dedentation logic.
+            blank_start = ps.index
+            first_indent = ps.skip_blank_inline()
+            elements.append(self.Indent(first_indent, blank_start, ps.index))
+            common_indent_length = len(first_indent)
+        else:
+            common_indent_length = float('infinity')
+
+        while ps.current_char:
+            if ps.current_char == EOL:
+                blank_start = ps.index
+                blank_lines = ps.peek_blank_block()
+                if ps.is_value_continuation():
+                    ps.skip_to_peek()
+                    indent = ps.skip_blank_inline()
+                    common_indent_length = min(common_indent_length, len(indent))
+                    elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
+                    continue
+
+                # The end condition for get_pattern's while loop is a newline
+                # which is not followed by a valid pattern continuation.
+                ps.reset_peek()
+                break
+
+            if ps.current_char == '}':
+                raise ParseError('E0027')
+
+            if ps.current_char == '{':
+                element = self.get_placeable(ps)
+            else:
+                element = self.get_text_element(ps)
+
+            elements.append(element)
+
+        dedented = self.dedent(elements, common_indent_length)
+        return ast.Pattern(dedented)
+
+    class Indent(ast.SyntaxNode):
+        def __init__(self, value, start, end):
+            super(FluentParser.Indent, self).__init__()
+            self.value = value
+            self.add_span(start, end)
+
+    def dedent(self, elements, common_indent):
+        '''Dedent a list of elements by removing the maximum common indent from
+        the beginning of text lines. The common indent is calculated in
+        get_pattern.
+        '''
+        trimmed = []
+
+        for element in elements:
+            if isinstance(element, ast.Placeable):
+                trimmed.append(element)
+                continue
+
+            if isinstance(element, self.Indent):
+                # Strip the common indent.
+                element.value = element.value[:len(element.value) - common_indent]
+                if len(element.value) == 0:
+                    continue
+
+            prev = trimmed[-1] if len(trimmed) > 0 else None
+            if isinstance(prev, ast.TextElement):
+                # Join adjacent TextElements by replacing them with their sum.
+                sum = ast.TextElement(prev.value + element.value)
+                if self.with_spans:
+                    sum.add_span(prev.span.start, element.span.end)
+                trimmed[-1] = sum
+                continue
+
+            if isinstance(element, self.Indent):
+                # If the indent hasn't been merged into a preceding
+                # TextElements, convert it into a new TextElement.
+                text_element = ast.TextElement(element.value)
+                if self.with_spans:
+                    text_element.add_span(element.span.start, element.span.end)
+                element = text_element
+
+            trimmed.append(element)
+
+        # Trim trailing whitespace from the Pattern.
+        last_element = trimmed[-1] if len(trimmed) > 0 else None
+        if isinstance(last_element, ast.TextElement):
+            last_element.value = last_element.value.rstrip(' \t\n\r')
+            if last_element.value == "":
+                trimmed.pop()
+
+        return trimmed
+
+    @with_span
+    def get_text_element(self, ps):
+        buf = ''
+
+        while ps.current_char:
+            ch = ps.current_char
+
+            if ch == '{' or ch == '}':
+                return ast.TextElement(buf)
+
+            if ch == EOL:
+                return ast.TextElement(buf)
+
+            buf += ch
+            ps.next()
+
+        return ast.TextElement(buf)
+
+    def get_escape_sequence(self, ps):
+        next = ps.current_char
+
+        if next == '\\' or next == '"':
+            ps.next()
+            return '\\{}'.format(next)
+
+        if next == 'u':
+            return self.get_unicode_escape_sequence(ps, next, 4)
+
+        if next == 'U':
+            return self.get_unicode_escape_sequence(ps, next, 6)
+
+        raise ParseError('E0025', next)
+
+    def get_unicode_escape_sequence(self, ps, u, digits):
+        ps.expect_char(u)
+        sequence = ''
+        for _ in range(digits):
+            ch = ps.take_hex_digit()
+            if not ch:
+                raise ParseError('E0026', '\\{}{}{}'.format(u, sequence, ps.current_char))
+            sequence += ch
+
+        return '\\{}{}'.format(u, sequence)
+
+    @with_span
+    def get_placeable(self, ps):
+        ps.expect_char('{')
+        ps.skip_blank()
+        expression = self.get_expression(ps)
+        ps.expect_char('}')
+        return ast.Placeable(expression)
+
+    @with_span
+    def get_expression(self, ps):
+        selector = self.get_inline_expression(ps)
+
+        ps.skip_blank()
+
+        if ps.current_char == '-':
+            if ps.peek() != '>':
+                ps.reset_peek()
+                return selector
+
+            if isinstance(selector, ast.MessageReference):
+                if selector.attribute is None:
+                    raise ParseError('E0016')
+                else:
+                    raise ParseError('E0018')
+
+            elif (
+                isinstance(selector, ast.TermReference)
+            ):
+                if selector.attribute is None:
+                    raise ParseError('E0017')
+            elif not (
+                isinstance(selector, (
+                    ast.StringLiteral,
+                    ast.NumberLiteral,
+                    ast.VariableReference,
+                    ast.FunctionReference,
+                ))
+            ):
+                raise ParseError('E0029')
+
+            ps.next()
+            ps.next()
+
+            ps.skip_blank_inline()
+            ps.expect_line_end()
+
+            variants = self.get_variants(ps)
+            return ast.SelectExpression(selector, variants)
+
+        if (
+            isinstance(selector, ast.TermReference)
+            and selector.attribute is not None
+        ):
+            raise ParseError('E0019')
+
+        return selector
+
+    @with_span
+    def get_inline_expression(self, ps):
+        if ps.current_char == '{':
+            return self.get_placeable(ps)
+
+        if ps.is_number_start():
+            return self.get_number(ps)
+
+        if ps.current_char == '"':
+            return self.get_string(ps)
+
+        if ps.current_char == '$':
+            ps.next()
+            id = self.get_identifier(ps)
+            return ast.VariableReference(id)
+
+        if ps.current_char == '-':
+            ps.next()
+            id = self.get_identifier(ps)
+            attribute = None
+            if ps.current_char == '.':
+                ps.next()
+                attribute = self.get_identifier(ps)
+            arguments = None
+            ps.peek_blank()
+            if ps.current_peek == '(':
+                ps.skip_to_peek()
+                arguments = self.get_call_arguments(ps)
+            return ast.TermReference(id, attribute, arguments)
+
+        if ps.is_identifier_start():
+            id = self.get_identifier(ps)
+            ps.peek_blank()
+
+            if ps.current_peek == '(':
+                # It's a Function. Ensure it's all upper-case.
+                if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
+                    raise ParseError('E0008')
+                ps.skip_to_peek()
+                args = self.get_call_arguments(ps)
+                return ast.FunctionReference(id, args)
+
+            attribute = None
+            if ps.current_char == '.':
+                ps.next()
+                attribute = self.get_identifier(ps)
+
+            return ast.MessageReference(id, attribute)
+
+        raise ParseError('E0028')
+
+    @with_span
+    def get_call_argument(self, ps):
+        exp = self.get_inline_expression(ps)
+
+        ps.skip_blank()
+
+        if ps.current_char != ':':
+            return exp
+
+        if isinstance(exp, ast.MessageReference) and exp.attribute is None:
+            ps.next()
+            ps.skip_blank()
+
+            value = self.get_literal(ps)
+            return ast.NamedArgument(exp.id, value)
+
+        raise ParseError('E0009')
+
+    @with_span
+    def get_call_arguments(self, ps):
+        positional = []
+        named = []
+        argument_names = set()
+
+        ps.expect_char('(')
+        ps.skip_blank()
+
+        while True:
+            if ps.current_char == ')':
+                break
+
+            arg = self.get_call_argument(ps)
+            if isinstance(arg, ast.NamedArgument):
+                if arg.name.name in argument_names:
+                    raise ParseError('E0022')
+                named.append(arg)
+                argument_names.add(arg.name.name)
+            elif len(argument_names) > 0:
+                raise ParseError('E0021')
+            else:
+                positional.append(arg)
+
+            ps.skip_blank()
+
+            if ps.current_char == ',':
+                ps.next()
+                ps.skip_blank()
+                continue
+
+            break
+
+        ps.expect_char(')')
+        return ast.CallArguments(positional, named)
+
+    @with_span
+    def get_string(self, ps):
+        value = ''
+
+        ps.expect_char('"')
+
+        while True:
+            ch = ps.take_char(lambda x: x != '"' and x != EOL)
+            if not ch:
+                break
+            if ch == '\\':
+                value += self.get_escape_sequence(ps)
+            else:
+                value += ch
+
+        if ps.current_char == EOL:
+            raise ParseError('E0020')
+
+        ps.expect_char('"')
+
+        return ast.StringLiteral(value)
+
+    @with_span
+    def get_literal(self, ps):
+        if ps.is_number_start():
+            return self.get_number(ps)
+        if ps.current_char == '"':
+            return self.get_string(ps)
+        raise ParseError('E0014')