summaryrefslogtreecommitdiffstats
path: root/third_party/python/fluent.syntax/fluent/syntax/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/fluent.syntax/fluent/syntax/parser.py')
-rw-r--r--third_party/python/fluent.syntax/fluent/syntax/parser.py701
1 files changed, 701 insertions, 0 deletions
diff --git a/third_party/python/fluent.syntax/fluent/syntax/parser.py b/third_party/python/fluent.syntax/fluent/syntax/parser.py
new file mode 100644
index 0000000000..87075409f1
--- /dev/null
+++ b/third_party/python/fluent.syntax/fluent/syntax/parser.py
@@ -0,0 +1,701 @@
+import re
+from typing import Any, Callable, List, Set, TypeVar, Union, cast
+from . import ast
+from .stream import EOL, FluentParserStream
+from .errors import ParseError
+
+R = TypeVar("R", bound=ast.SyntaxNode)
+
+
+def with_span(fn: Callable[..., R]) -> Callable[..., R]:
+ def decorated(self: 'FluentParser', ps: FluentParserStream, *args: Any, **kwargs: Any) -> Any:
+ if not self.with_spans:
+ return fn(self, ps, *args, **kwargs)
+
+ start = ps.index
+ node = fn(self, ps, *args, **kwargs)
+
+ # Don't re-add the span if the node already has it. This may happen
+ # when one decorated function calls another decorated function.
+ if node.span is not None:
+ return node
+
+ end = ps.index
+ node.add_span(start, end)
+ return node
+
+ return decorated
+
+
+class FluentParser:
+ """This class is used to parse Fluent source content.
+
+ ``with_spans`` enables source information in the form of
+ :class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`.
+ """
+
+ def __init__(self, with_spans: bool = True):
+ self.with_spans = with_spans
+
+ def parse(self, source: str) -> ast.Resource:
+ """Create a :class:`.ast.Resource` from a Fluent source.
+ """
+ ps = FluentParserStream(source)
+ ps.skip_blank_block()
+
+ entries: List[ast.EntryType] = []
+ last_comment = None
+
+ while ps.current_char:
+ entry = self.get_entry_or_junk(ps)
+ blank_lines = ps.skip_blank_block()
+
+ # Regular Comments require special logic. Comments may be attached
+ # to Messages or Terms if they are followed immediately by them.
+ # However they should parse as standalone when they're followed by
+ # Junk. Consequently, we only attach Comments once we know that the
+ # Message or the Term parsed successfully.
+ if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
+ and ps.current_char:
+ # Stash the comment and decide what to do with it
+ # in the next pass.
+ last_comment = entry
+ continue
+
+ if last_comment is not None:
+ if isinstance(entry, (ast.Message, ast.Term)):
+ entry.comment = last_comment
+ if self.with_spans:
+ cast(ast.Span, entry.span).start = cast(ast.Span, entry.comment.span).start
+ else:
+ entries.append(last_comment)
+ # In either case, the stashed comment has been dealt with;
+ # clear it.
+ last_comment = None
+
+ entries.append(entry)
+
+ res = ast.Resource(entries)
+
+ if self.with_spans:
+ res.add_span(0, ps.index)
+
+ return res
+
+ def parse_entry(self, source: str) -> ast.EntryType:
+ """Parse the first :class:`.ast.Entry` in source.
+
+ Skip all encountered comments and start parsing at the first :class:`.ast.Message`
+ or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful.
+
+ Preceding comments are ignored unless they contain syntax errors
+ themselves, in which case :class:`.ast.Junk` for the invalid comment is returned.
+ """
+ ps = FluentParserStream(source)
+ ps.skip_blank_block()
+
+ while ps.current_char == '#':
+ skipped = self.get_entry_or_junk(ps)
+ if isinstance(skipped, ast.Junk):
+ # Don't skip Junk comments.
+ return skipped
+ ps.skip_blank_block()
+
+ return self.get_entry_or_junk(ps)
+
+ def get_entry_or_junk(self, ps: FluentParserStream) -> ast.EntryType:
+ entry_start_pos = ps.index
+
+ try:
+ entry = self.get_entry(ps)
+ ps.expect_line_end()
+ return entry
+ except ParseError as err:
+ error_index = ps.index
+ ps.skip_to_next_entry_start(entry_start_pos)
+ next_entry_start = ps.index
+ if next_entry_start < error_index:
+ # The position of the error must be inside of the Junk's span.
+ error_index = next_entry_start
+
+ # Create a Junk instance
+ slice = ps.string[entry_start_pos:next_entry_start]
+ junk = ast.Junk(slice)
+ if self.with_spans:
+ junk.add_span(entry_start_pos, next_entry_start)
+ annot = ast.Annotation(err.code, list(err.args) if err.args else None, err.message)
+ annot.add_span(error_index, error_index)
+ junk.add_annotation(annot)
+ return junk
+
+ def get_entry(self, ps: FluentParserStream) -> ast.EntryType:
+ if ps.current_char == '#':
+ return self.get_comment(ps)
+
+ if ps.current_char == '-':
+ return self.get_term(ps)
+
+ if ps.is_identifier_start():
+ return self.get_message(ps)
+
+ raise ParseError('E0002')
+
+ @with_span
+ def get_comment(self, ps: FluentParserStream) -> Union[ast.Comment, ast.GroupComment, ast.ResourceComment]:
+ # 0 - comment
+ # 1 - group comment
+ # 2 - resource comment
+ level = -1
+ content = ''
+
+ while True:
+ i = -1
+ while ps.current_char == '#' \
+ and (i < (2 if level == -1 else level)):
+ ps.next()
+ i += 1
+
+ if level == -1:
+ level = i
+
+ if ps.current_char != EOL:
+ ps.expect_char(' ')
+ ch = ps.take_char(lambda x: x != EOL)
+ while ch:
+ content += ch
+ ch = ps.take_char(lambda x: x != EOL)
+
+ if ps.is_next_line_comment(level=level):
+ content += cast(str, ps.current_char)
+ ps.next()
+ else:
+ break
+
+ if level == 0:
+ return ast.Comment(content)
+ elif level == 1:
+ return ast.GroupComment(content)
+ elif level == 2:
+ return ast.ResourceComment(content)
+
+ # never happens if ps.current_char == '#' when called
+ return cast(ast.Comment, None)
+
+ @with_span
+ def get_message(self, ps: FluentParserStream) -> ast.Message:
+ id = self.get_identifier(ps)
+ ps.skip_blank_inline()
+ ps.expect_char('=')
+
+ value = self.maybe_get_pattern(ps)
+ attrs = self.get_attributes(ps)
+
+ if value is None and len(attrs) == 0:
+ raise ParseError('E0005', id.name)
+
+ return ast.Message(id, value, attrs)
+
+ @with_span
+ def get_term(self, ps: FluentParserStream) -> ast.Term:
+ ps.expect_char('-')
+ id = self.get_identifier(ps)
+
+ ps.skip_blank_inline()
+ ps.expect_char('=')
+
+ value = self.maybe_get_pattern(ps)
+ if value is None:
+ raise ParseError('E0006', id.name)
+
+ attrs = self.get_attributes(ps)
+ return ast.Term(id, value, attrs)
+
+ @with_span
+ def get_attribute(self, ps: FluentParserStream) -> ast.Attribute:
+ ps.expect_char('.')
+
+ key = self.get_identifier(ps)
+
+ ps.skip_blank_inline()
+ ps.expect_char('=')
+
+ value = self.maybe_get_pattern(ps)
+ if value is None:
+ raise ParseError('E0012')
+
+ return ast.Attribute(key, value)
+
+ def get_attributes(self, ps: FluentParserStream) -> List[ast.Attribute]:
+ attrs: List[ast.Attribute] = []
+ ps.peek_blank()
+
+ while ps.is_attribute_start():
+ ps.skip_to_peek()
+ attr = self.get_attribute(ps)
+ attrs.append(attr)
+ ps.peek_blank()
+
+ return attrs
+
+ @with_span
+ def get_identifier(self, ps: FluentParserStream) -> ast.Identifier:
+ name = ps.take_id_start()
+ if name is None:
+ raise ParseError('E0004', 'a-zA-Z')
+
+ ch = ps.take_id_char()
+ while ch:
+ name += ch
+ ch = ps.take_id_char()
+
+ return ast.Identifier(name)
+
+ def get_variant_key(self, ps: FluentParserStream) -> Union[ast.Identifier, ast.NumberLiteral]:
+ ch = ps.current_char
+
+ if ch is None:
+ raise ParseError('E0013')
+
+ cc = ord(ch)
+ if ((cc >= 48 and cc <= 57) or cc == 45): # 0-9, -
+ return self.get_number(ps)
+
+ return self.get_identifier(ps)
+
+ @with_span
+ def get_variant(self, ps: FluentParserStream, has_default: bool) -> ast.Variant:
+ default_index = False
+
+ if ps.current_char == '*':
+ if has_default:
+ raise ParseError('E0015')
+ ps.next()
+ default_index = True
+
+ ps.expect_char('[')
+ ps.skip_blank()
+
+ key = self.get_variant_key(ps)
+
+ ps.skip_blank()
+ ps.expect_char(']')
+
+ value = self.maybe_get_pattern(ps)
+ if value is None:
+ raise ParseError('E0012')
+
+ return ast.Variant(key, value, default_index)
+
+ def get_variants(self, ps: FluentParserStream) -> List[ast.Variant]:
+ variants: List[ast.Variant] = []
+ has_default = False
+
+ ps.skip_blank()
+ while ps.is_variant_start():
+ variant = self.get_variant(ps, has_default)
+
+ if variant.default:
+ has_default = True
+
+ variants.append(variant)
+ ps.expect_line_end()
+ ps.skip_blank()
+
+ if len(variants) == 0:
+ raise ParseError('E0011')
+
+ if not has_default:
+ raise ParseError('E0010')
+
+ return variants
+
+ def get_digits(self, ps: FluentParserStream) -> str:
+ num = ''
+
+ ch = ps.take_digit()
+ while ch:
+ num += ch
+ ch = ps.take_digit()
+
+ if len(num) == 0:
+ raise ParseError('E0004', '0-9')
+
+ return num
+
+ @with_span
+ def get_number(self, ps: FluentParserStream) -> ast.NumberLiteral:
+ num = ''
+
+ if ps.current_char == '-':
+ num += '-'
+ ps.next()
+
+ num += self.get_digits(ps)
+
+ if ps.current_char == '.':
+ num += '.'
+ ps.next()
+ num += self.get_digits(ps)
+
+ return ast.NumberLiteral(num)
+
+ def maybe_get_pattern(self, ps: FluentParserStream) -> Union[ast.Pattern, None]:
+ '''Parse an inline or a block Pattern, or None
+
+ maybe_get_pattern distinguishes between patterns which start on the
+ same line as the indentifier (aka inline singleline patterns and inline
+ multiline patterns), and patterns which start on a new line (aka block
+ patterns). The distinction is important for the dedentation logic: the
+ indent of the first line of a block pattern must be taken into account
+ when calculating the maximum common indent.
+ '''
+ ps.peek_blank_inline()
+ if ps.is_value_start():
+ ps.skip_to_peek()
+ return self.get_pattern(ps, is_block=False)
+
+ ps.peek_blank_block()
+ if ps.is_value_continuation():
+ ps.skip_to_peek()
+ return self.get_pattern(ps, is_block=True)
+
+ return None
+
+ @with_span
+ def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
+ elements: List[Any] = []
+ if is_block:
+ # A block pattern is a pattern which starts on a new line. Measure
+ # the indent of this first line for the dedentation logic.
+ blank_start = ps.index
+ first_indent = ps.skip_blank_inline()
+ elements.append(self.Indent(first_indent, blank_start, ps.index))
+ common_indent_length = len(first_indent)
+ else:
+ # Should get fixed by the subsequent min() operation
+ common_indent_length = cast(int, float('infinity'))
+
+ while ps.current_char:
+ if ps.current_char == EOL:
+ blank_start = ps.index
+ blank_lines = ps.peek_blank_block()
+ if ps.is_value_continuation():
+ ps.skip_to_peek()
+ indent = ps.skip_blank_inline()
+ common_indent_length = min(common_indent_length, len(indent))
+ elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
+ continue
+
+ # The end condition for get_pattern's while loop is a newline
+ # which is not followed by a valid pattern continuation.
+ ps.reset_peek()
+ break
+
+ if ps.current_char == '}':
+ raise ParseError('E0027')
+
+ element: Union[ast.TextElement, ast.Placeable]
+ if ps.current_char == '{':
+ element = self.get_placeable(ps)
+ else:
+ element = self.get_text_element(ps)
+
+ elements.append(element)
+
+ dedented = self.dedent(elements, common_indent_length)
+ return ast.Pattern(dedented)
+
+ class Indent(ast.SyntaxNode):
+ def __init__(self, value: str, start: int, end: int):
+ super(FluentParser.Indent, self).__init__()
+ self.value = value
+ self.add_span(start, end)
+
+ def dedent(self,
+ elements: List[Union[ast.TextElement, ast.Placeable, Indent]],
+ common_indent: int
+ ) -> List[Union[ast.TextElement, ast.Placeable]]:
+ '''Dedent a list of elements by removing the maximum common indent from
+ the beginning of text lines. The common indent is calculated in
+ get_pattern.
+ '''
+ trimmed: List[Union[ast.TextElement, ast.Placeable]] = []
+
+ for element in elements:
+ if isinstance(element, ast.Placeable):
+ trimmed.append(element)
+ continue
+
+ if isinstance(element, self.Indent):
+ # Strip the common indent.
+ element.value = element.value[:len(element.value) - common_indent]
+ if len(element.value) == 0:
+ continue
+
+ prev = trimmed[-1] if len(trimmed) > 0 else None
+ if isinstance(prev, ast.TextElement):
+ # Join adjacent TextElements by replacing them with their sum.
+ sum = ast.TextElement(prev.value + element.value)
+ if self.with_spans:
+ sum.add_span(cast(ast.Span, prev.span).start, cast(ast.Span, element.span).end)
+ trimmed[-1] = sum
+ continue
+
+ if isinstance(element, self.Indent):
+ # If the indent hasn't been merged into a preceding
+ # TextElements, convert it into a new TextElement.
+ text_element = ast.TextElement(element.value)
+ if self.with_spans:
+ text_element.add_span(cast(ast.Span, element.span).start, cast(ast.Span, element.span).end)
+ element = text_element
+
+ trimmed.append(element)
+
+ # Trim trailing whitespace from the Pattern.
+ last_element = trimmed[-1] if len(trimmed) > 0 else None
+ if isinstance(last_element, ast.TextElement):
+ last_element.value = last_element.value.rstrip(' \n\r')
+ if last_element.value == "":
+ trimmed.pop()
+
+ return trimmed
+
+ @with_span
+ def get_text_element(self, ps: FluentParserStream) -> ast.TextElement:
+ buf = ''
+
+ while ps.current_char:
+ ch = ps.current_char
+
+ if ch == '{' or ch == '}':
+ return ast.TextElement(buf)
+
+ if ch == EOL:
+ return ast.TextElement(buf)
+
+ buf += ch
+ ps.next()
+
+ return ast.TextElement(buf)
+
+ def get_escape_sequence(self, ps: FluentParserStream) -> str:
+ next = ps.current_char
+
+ if next == '\\' or next == '"':
+ ps.next()
+ return f'\\{next}'
+
+ if next == 'u':
+ return self.get_unicode_escape_sequence(ps, next, 4)
+
+ if next == 'U':
+ return self.get_unicode_escape_sequence(ps, next, 6)
+
+ raise ParseError('E0025', next)
+
+ def get_unicode_escape_sequence(self, ps: FluentParserStream, u: str, digits: int) -> str:
+ ps.expect_char(u)
+ sequence = ''
+ for _ in range(digits):
+ ch = ps.take_hex_digit()
+ if not ch:
+ raise ParseError('E0026', f'\\{u}{sequence}{ps.current_char}')
+ sequence += ch
+
+ return f'\\{u}{sequence}'
+
+ @with_span
+ def get_placeable(self, ps: FluentParserStream) -> ast.Placeable:
+ ps.expect_char('{')
+ ps.skip_blank()
+ expression = self.get_expression(ps)
+ ps.expect_char('}')
+ return ast.Placeable(expression)
+
+ @with_span
+ def get_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression,
+ ast.Placeable,
+ ast.SelectExpression]:
+ selector = self.get_inline_expression(ps)
+
+ ps.skip_blank()
+
+ if ps.current_char == '-':
+ if ps.peek() != '>':
+ ps.reset_peek()
+ return selector
+
+ if isinstance(selector, ast.MessageReference):
+ if selector.attribute is None:
+ raise ParseError('E0016')
+ else:
+ raise ParseError('E0018')
+
+ elif (
+ isinstance(selector, ast.TermReference)
+ ):
+ if selector.attribute is None:
+ raise ParseError('E0017')
+ elif not (
+ isinstance(selector, (
+ ast.StringLiteral,
+ ast.NumberLiteral,
+ ast.VariableReference,
+ ast.FunctionReference,
+ ))
+ ):
+ raise ParseError('E0029')
+
+ ps.next()
+ ps.next()
+
+ ps.skip_blank_inline()
+ ps.expect_line_end()
+
+ variants = self.get_variants(ps)
+ return ast.SelectExpression(selector, variants)
+
+ if (
+ isinstance(selector, ast.TermReference)
+ and selector.attribute is not None
+ ):
+ raise ParseError('E0019')
+
+ return selector
+
+ @with_span
+ def get_inline_expression(self, ps: FluentParserStream) -> Union[ast.InlineExpression, ast.Placeable]:
+ if ps.current_char == '{':
+ return self.get_placeable(ps)
+
+ if ps.is_number_start():
+ return self.get_number(ps)
+
+ if ps.current_char == '"':
+ return self.get_string(ps)
+
+ if ps.current_char == '$':
+ ps.next()
+ id = self.get_identifier(ps)
+ return ast.VariableReference(id)
+
+ if ps.current_char == '-':
+ ps.next()
+ id = self.get_identifier(ps)
+ attribute = None
+ if ps.current_char == '.':
+ ps.next()
+ attribute = self.get_identifier(ps)
+ arguments = None
+ ps.peek_blank()
+ if ps.current_peek == '(':
+ ps.skip_to_peek()
+ arguments = self.get_call_arguments(ps)
+ return ast.TermReference(id, attribute, arguments)
+
+ if ps.is_identifier_start():
+ id = self.get_identifier(ps)
+ ps.peek_blank()
+
+ if ps.current_peek == '(':
+ # It's a Function. Ensure it's all upper-case.
+ if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
+ raise ParseError('E0008')
+ ps.skip_to_peek()
+ args = self.get_call_arguments(ps)
+ return ast.FunctionReference(id, args)
+
+ attribute = None
+ if ps.current_char == '.':
+ ps.next()
+ attribute = self.get_identifier(ps)
+
+ return ast.MessageReference(id, attribute)
+
+ raise ParseError('E0028')
+
+ @with_span
+ def get_call_argument(self,
+ ps: FluentParserStream
+ ) -> Union[ast.InlineExpression, ast.NamedArgument, ast.Placeable]:
+ exp = self.get_inline_expression(ps)
+
+ ps.skip_blank()
+
+ if ps.current_char != ':':
+ return exp
+
+ if isinstance(exp, ast.MessageReference) and exp.attribute is None:
+ ps.next()
+ ps.skip_blank()
+
+ value = self.get_literal(ps)
+ return ast.NamedArgument(exp.id, value)
+
+ raise ParseError('E0009')
+
+ @with_span
+ def get_call_arguments(self, ps: FluentParserStream) -> ast.CallArguments:
+ positional: List[Union[ast.InlineExpression, ast.Placeable]] = []
+ named: List[ast.NamedArgument] = []
+ argument_names: Set[str] = set()
+
+ ps.expect_char('(')
+ ps.skip_blank()
+
+ while True:
+ if ps.current_char == ')':
+ break
+
+ arg = self.get_call_argument(ps)
+ if isinstance(arg, ast.NamedArgument):
+ if arg.name.name in argument_names:
+ raise ParseError('E0022')
+ named.append(arg)
+ argument_names.add(arg.name.name)
+ elif len(argument_names) > 0:
+ raise ParseError('E0021')
+ else:
+ positional.append(arg)
+
+ ps.skip_blank()
+
+ if ps.current_char == ',':
+ ps.next()
+ ps.skip_blank()
+ continue
+
+ break
+
+ ps.expect_char(')')
+ return ast.CallArguments(positional, named)
+
+ @with_span
+ def get_string(self, ps: FluentParserStream) -> ast.StringLiteral:
+ value = ''
+
+ ps.expect_char('"')
+
+ while True:
+ ch = ps.take_char(lambda x: x != '"' and x != EOL)
+ if not ch:
+ break
+ if ch == '\\':
+ value += self.get_escape_sequence(ps)
+ else:
+ value += ch
+
+ if ps.current_char == EOL:
+ raise ParseError('E0020')
+
+ ps.expect_char('"')
+
+ return ast.StringLiteral(value)
+
+ @with_span
+ def get_literal(self, ps: FluentParserStream) -> Union[ast.NumberLiteral, ast.StringLiteral]:
+ if ps.is_number_start():
+ return self.get_number(ps)
+ if ps.current_char == '"':
+ return self.get_string(ps)
+ raise ParseError('E0014')