summaryrefslogtreecommitdiffstats
path: root/third_party/python/fluent.syntax/fluent/syntax/stream.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/fluent.syntax/fluent/syntax/stream.py')
-rw-r--r--third_party/python/fluent.syntax/fluent/syntax/stream.py283
1 files changed, 283 insertions, 0 deletions
diff --git a/third_party/python/fluent.syntax/fluent/syntax/stream.py b/third_party/python/fluent.syntax/fluent/syntax/stream.py
new file mode 100644
index 0000000000..150ac933ca
--- /dev/null
+++ b/third_party/python/fluent.syntax/fluent/syntax/stream.py
@@ -0,0 +1,283 @@
+from typing import Callable, Union
+from typing_extensions import Literal
+from .errors import ParseError
+
+
+class ParserStream:
+ def __init__(self, string: str):
+ self.string = string
+ self.index = 0
+ self.peek_offset = 0
+
+ def get(self, offset: int) -> Union[str, None]:
+ try:
+ return self.string[offset]
+ except IndexError:
+ return None
+
+ def char_at(self, offset: int) -> Union[str, None]:
+ # When the cursor is at CRLF, return LF but don't move the cursor. The
+ # cursor still points to the EOL position, which in this case is the
+ # beginning of the compound CRLF sequence. This ensures slices of
+ # [inclusive, exclusive) continue to work properly.
+ if self.get(offset) == '\r' \
+ and self.get(offset + 1) == '\n':
+ return '\n'
+
+ return self.get(offset)
+
+ @property
+ def current_char(self) -> Union[str, None]:
+ return self.char_at(self.index)
+
+ @property
+ def current_peek(self) -> Union[str, None]:
+ return self.char_at(self.index + self.peek_offset)
+
+ def next(self) -> Union[str, None]:
+ self.peek_offset = 0
+ # Skip over CRLF as if it was a single character.
+ if self.get(self.index) == '\r' \
+ and self.get(self.index + 1) == '\n':
+ self.index += 1
+ self.index += 1
+ return self.get(self.index)
+
+ def peek(self) -> Union[str, None]:
+ # Skip over CRLF as if it was a single character.
+ if self.get(self.index + self.peek_offset) == '\r' \
+ and self.get(self.index + self.peek_offset + 1) == '\n':
+ self.peek_offset += 1
+ self.peek_offset += 1
+ return self.get(self.index + self.peek_offset)
+
+ def reset_peek(self, offset: int = 0) -> None:
+ self.peek_offset = offset
+
+ def skip_to_peek(self) -> None:
+ self.index += self.peek_offset
+ self.peek_offset = 0
+
+
+EOL = '\n'
+EOF = None
+SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*')
+
+
+class FluentParserStream(ParserStream):
+
+ def peek_blank_inline(self) -> str:
+ start = self.index + self.peek_offset
+ while self.current_peek == ' ':
+ self.peek()
+ return self.string[start:self.index + self.peek_offset]
+
+ def skip_blank_inline(self) -> str:
+ blank = self.peek_blank_inline()
+ self.skip_to_peek()
+ return blank
+
+ def peek_blank_block(self) -> str:
+ blank = ""
+ while True:
+ line_start = self.peek_offset
+ self.peek_blank_inline()
+
+ if self.current_peek == EOL:
+ blank += EOL
+ self.peek()
+ continue
+
+ if self.current_peek is EOF:
+ # Treat the blank line at EOF as a blank block.
+ return blank
+
+ # Any other char; reset to column 1 on this line.
+ self.reset_peek(line_start)
+ return blank
+
+ def skip_blank_block(self) -> str:
+ blank = self.peek_blank_block()
+ self.skip_to_peek()
+ return blank
+
+ def peek_blank(self) -> None:
+ while self.current_peek in (" ", EOL):
+ self.peek()
+
+ def skip_blank(self) -> None:
+ self.peek_blank()
+ self.skip_to_peek()
+
+ def expect_char(self, ch: str) -> Literal[True]:
+ if self.current_char == ch:
+ self.next()
+ return True
+
+ raise ParseError('E0003', ch)
+
+ def expect_line_end(self) -> Literal[True]:
+ if self.current_char is EOF:
+ # EOF is a valid line end in Fluent.
+ return True
+
+ if self.current_char == EOL:
+ self.next()
+ return True
+
+ # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424)
+ raise ParseError('E0003', '\u2424')
+
+ def take_char(self, f: Callable[[str], bool]) -> Union[str, Literal[False], None]:
+ ch = self.current_char
+ if ch is None:
+ return EOF
+ if f(ch):
+ self.next()
+ return ch
+ return False
+
+ def is_char_id_start(self, ch: Union[str, None]) -> bool:
+ if ch is None:
+ return False
+
+ cc = ord(ch)
+ return (cc >= 97 and cc <= 122) or \
+ (cc >= 65 and cc <= 90)
+
+ def is_identifier_start(self) -> bool:
+ return self.is_char_id_start(self.current_peek)
+
+ def is_number_start(self) -> bool:
+ ch = self.peek() if self.current_char == '-' else self.current_char
+ if ch is None:
+ self.reset_peek()
+ return False
+
+ cc = ord(ch)
+ is_digit = cc >= 48 and cc <= 57
+ self.reset_peek()
+ return is_digit
+
+ def is_char_pattern_continuation(self, ch: Union[str, None]) -> bool:
+ if ch is EOF:
+ return False
+
+ return ch not in SPECIAL_LINE_START_CHARS
+
+ def is_value_start(self) -> bool:
+ # Inline Patterns may start with any char.
+ return self.current_peek is not EOF and self.current_peek != EOL
+
+ def is_value_continuation(self) -> bool:
+ column1 = self.peek_offset
+ self.peek_blank_inline()
+
+ if self.current_peek == '{':
+ self.reset_peek(column1)
+ return True
+
+ if self.peek_offset - column1 == 0:
+ return False
+
+ if self.is_char_pattern_continuation(self.current_peek):
+ self.reset_peek(column1)
+ return True
+
+ return False
+
+ # -1 - any
+ # 0 - comment
+ # 1 - group comment
+ # 2 - resource comment
+ def is_next_line_comment(self, level: int = -1) -> bool:
+ if self.current_peek != EOL:
+ return False
+
+ i = 0
+
+ while (i <= level or (level == -1 and i < 3)):
+ if self.peek() != '#':
+ if i <= level and level != -1:
+ self.reset_peek()
+ return False
+ break
+ i += 1
+
+ # The first char after #, ## or ###.
+ if self.peek() in (' ', EOL):
+ self.reset_peek()
+ return True
+
+ self.reset_peek()
+ return False
+
+ def is_variant_start(self) -> bool:
+ current_peek_offset = self.peek_offset
+ if self.current_peek == '*':
+ self.peek()
+ if self.current_peek == '[' and self.peek() != '[':
+ self.reset_peek(current_peek_offset)
+ return True
+
+ self.reset_peek(current_peek_offset)
+ return False
+
+ def is_attribute_start(self) -> bool:
+ return self.current_peek == '.'
+
+ def skip_to_next_entry_start(self, junk_start: int) -> None:
+ last_newline = self.string.rfind(EOL, 0, self.index)
+ if junk_start < last_newline:
+ # Last seen newline is _after_ the junk start. It's safe to rewind
+ # without the risk of resuming at the same broken entry.
+ self.index = last_newline
+
+ while self.current_char:
+ # We're only interested in beginnings of line.
+ if self.current_char != EOL:
+ self.next()
+ continue
+
+ # Break if the first char in this line looks like an entry start.
+ first = self.next()
+ if self.is_char_id_start(first) or first == '-' or first == '#':
+ break
+
+ # Syntax 0.4 compatibility
+ peek = self.peek()
+ self.reset_peek()
+ if (first, peek) == ('/', '/') or (first, peek) == ('[', '['):
+ break
+
+ def take_id_start(self) -> Union[str, None]:
+ if self.is_char_id_start(self.current_char):
+ ret = self.current_char
+ self.next()
+ return ret
+
+ raise ParseError('E0004', 'a-zA-Z')
+
+ def take_id_char(self) -> Union[str, Literal[False], None]:
+ def closure(ch: str) -> bool:
+ cc = ord(ch)
+ return ((cc >= 97 and cc <= 122) or
+ (cc >= 65 and cc <= 90) or
+ (cc >= 48 and cc <= 57) or
+ cc == 95 or cc == 45)
+ return self.take_char(closure)
+
+ def take_digit(self) -> Union[str, Literal[False], None]:
+ def closure(ch: str) -> bool:
+ cc = ord(ch)
+ return (cc >= 48 and cc <= 57)
+ return self.take_char(closure)
+
+ def take_hex_digit(self) -> Union[str, Literal[False], None]:
+ def closure(ch: str) -> bool:
+ cc = ord(ch)
+ return (
+ (cc >= 48 and cc <= 57) # 0-9
+ or (cc >= 65 and cc <= 70) # A-F
+ or (cc >= 97 and cc <= 102)) # a-f
+ return self.take_char(closure)