from typing import Callable, Union from typing_extensions import Literal from .errors import ParseError class ParserStream: def __init__(self, string: str): self.string = string self.index = 0 self.peek_offset = 0 def get(self, offset: int) -> Union[str, None]: try: return self.string[offset] except IndexError: return None def char_at(self, offset: int) -> Union[str, None]: # When the cursor is at CRLF, return LF but don't move the cursor. The # cursor still points to the EOL position, which in this case is the # beginning of the compound CRLF sequence. This ensures slices of # [inclusive, exclusive) continue to work properly. if self.get(offset) == '\r' \ and self.get(offset + 1) == '\n': return '\n' return self.get(offset) @property def current_char(self) -> Union[str, None]: return self.char_at(self.index) @property def current_peek(self) -> Union[str, None]: return self.char_at(self.index + self.peek_offset) def next(self) -> Union[str, None]: self.peek_offset = 0 # Skip over CRLF as if it was a single character. if self.get(self.index) == '\r' \ and self.get(self.index + 1) == '\n': self.index += 1 self.index += 1 return self.get(self.index) def peek(self) -> Union[str, None]: # Skip over CRLF as if it was a single character. if self.get(self.index + self.peek_offset) == '\r' \ and self.get(self.index + self.peek_offset + 1) == '\n': self.peek_offset += 1 self.peek_offset += 1 return self.get(self.index + self.peek_offset) def reset_peek(self, offset: int = 0) -> None: self.peek_offset = offset def skip_to_peek(self) -> None: self.index += self.peek_offset self.peek_offset = 0 EOL = '\n' EOF = None SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*') class FluentParserStream(ParserStream): def peek_blank_inline(self) -> str: start = self.index + self.peek_offset while self.current_peek == ' ': self.peek() return self.string[start:self.index + self.peek_offset] def skip_blank_inline(self) -> str: blank = self.peek_blank_inline() self.skip_to_peek() return blank def peek_blank_block(self) -> str: blank = "" while True: line_start = self.peek_offset self.peek_blank_inline() if self.current_peek == EOL: blank += EOL self.peek() continue if self.current_peek is EOF: # Treat the blank line at EOF as a blank block. return blank # Any other char; reset to column 1 on this line. self.reset_peek(line_start) return blank def skip_blank_block(self) -> str: blank = self.peek_blank_block() self.skip_to_peek() return blank def peek_blank(self) -> None: while self.current_peek in (" ", EOL): self.peek() def skip_blank(self) -> None: self.peek_blank() self.skip_to_peek() def expect_char(self, ch: str) -> Literal[True]: if self.current_char == ch: self.next() return True raise ParseError('E0003', ch) def expect_line_end(self) -> Literal[True]: if self.current_char is EOF: # EOF is a valid line end in Fluent. return True if self.current_char == EOL: self.next() return True # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424) raise ParseError('E0003', '\u2424') def take_char(self, f: Callable[[str], bool]) -> Union[str, Literal[False], None]: ch = self.current_char if ch is None: return EOF if f(ch): self.next() return ch return False def is_char_id_start(self, ch: Union[str, None]) -> bool: if ch is None: return False cc = ord(ch) return (cc >= 97 and cc <= 122) or \ (cc >= 65 and cc <= 90) def is_identifier_start(self) -> bool: return self.is_char_id_start(self.current_peek) def is_number_start(self) -> bool: ch = self.peek() if self.current_char == '-' else self.current_char if ch is None: self.reset_peek() return False cc = ord(ch) is_digit = cc >= 48 and cc <= 57 self.reset_peek() return is_digit def is_char_pattern_continuation(self, ch: Union[str, None]) -> bool: if ch is EOF: return False return ch not in SPECIAL_LINE_START_CHARS def is_value_start(self) -> bool: # Inline Patterns may start with any char. return self.current_peek is not EOF and self.current_peek != EOL def is_value_continuation(self) -> bool: column1 = self.peek_offset self.peek_blank_inline() if self.current_peek == '{': self.reset_peek(column1) return True if self.peek_offset - column1 == 0: return False if self.is_char_pattern_continuation(self.current_peek): self.reset_peek(column1) return True return False # -1 - any # 0 - comment # 1 - group comment # 2 - resource comment def is_next_line_comment(self, level: int = -1) -> bool: if self.current_peek != EOL: return False i = 0 while (i <= level or (level == -1 and i < 3)): if self.peek() != '#': if i <= level and level != -1: self.reset_peek() return False break i += 1 # The first char after #, ## or ###. if self.peek() in (' ', EOL): self.reset_peek() return True self.reset_peek() return False def is_variant_start(self) -> bool: current_peek_offset = self.peek_offset if self.current_peek == '*': self.peek() if self.current_peek == '[' and self.peek() != '[': self.reset_peek(current_peek_offset) return True self.reset_peek(current_peek_offset) return False def is_attribute_start(self) -> bool: return self.current_peek == '.' def skip_to_next_entry_start(self, junk_start: int) -> None: last_newline = self.string.rfind(EOL, 0, self.index) if junk_start < last_newline: # Last seen newline is _after_ the junk start. It's safe to rewind # without the risk of resuming at the same broken entry. self.index = last_newline while self.current_char: # We're only interested in beginnings of line. if self.current_char != EOL: self.next() continue # Break if the first char in this line looks like an entry start. first = self.next() if self.is_char_id_start(first) or first == '-' or first == '#': break # Syntax 0.4 compatibility peek = self.peek() self.reset_peek() if (first, peek) == ('/', '/') or (first, peek) == ('[', '['): break def take_id_start(self) -> Union[str, None]: if self.is_char_id_start(self.current_char): ret = self.current_char self.next() return ret raise ParseError('E0004', 'a-zA-Z') def take_id_char(self) -> Union[str, Literal[False], None]: def closure(ch: str) -> bool: cc = ord(ch) return ((cc >= 97 and cc <= 122) or (cc >= 65 and cc <= 90) or (cc >= 48 and cc <= 57) or cc == 95 or cc == 45) return self.take_char(closure) def take_digit(self) -> Union[str, Literal[False], None]: def closure(ch: str) -> bool: cc = ord(ch) return (cc >= 48 and cc <= 57) return self.take_char(closure) def take_hex_digit(self) -> Union[str, Literal[False], None]: def closure(ch: str) -> bool: cc = ord(ch) return ( (cc >= 48 and cc <= 57) # 0-9 or (cc >= 65 and cc <= 70) # A-F or (cc >= 97 and cc <= 102)) # a-f return self.take_char(closure)