diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-27 10:46:33 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-27 10:46:33 +0000 |
commit | aa1d1f1ea72887a3efb78f4950e27bc79dfa3766 (patch) | |
tree | a9f5bf5ab14c69d011e21d40f8504c4f94a8de72 /sqlglot/tokens.py | |
parent | Adding upstream version 11.2.0. (diff) | |
download | sqlglot-e6e780e941899ee9d0bcfd34b7f8fb39ef2a3d6d.tar.xz sqlglot-e6e780e941899ee9d0bcfd34b7f8fb39ef2a3d6d.zip |
Adding upstream version 11.2.3.upstream/11.2.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 53 |
1 files changed, 26 insertions, 27 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 9b29c12..f3f1a70 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -115,6 +115,7 @@ class TokenType(AutoName): IMAGE = auto() VARIANT = auto() OBJECT = auto() + INET = auto() # keywords ALIAS = auto() @@ -437,16 +438,8 @@ class Tokenizer(metaclass=_Tokenizer): _IDENTIFIER_ESCAPES: t.Set[str] = set() KEYWORDS = { - **{ - f"{key}{postfix}": TokenType.BLOCK_START - for key in ("{%", "{#") - for postfix in ("", "+", "-") - }, - **{ - f"{prefix}{key}": TokenType.BLOCK_END - for key in ("%}", "#}") - for prefix in ("", "+", "-") - }, + **{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")}, + **{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")}, "{{+": TokenType.BLOCK_START, "{{-": TokenType.BLOCK_START, "+}}": TokenType.BLOCK_END, @@ -533,6 +526,7 @@ class Tokenizer(metaclass=_Tokenizer): "IGNORE NULLS": TokenType.IGNORE_NULLS, "IN": TokenType.IN, "INDEX": TokenType.INDEX, + "INET": TokenType.INET, "INNER": TokenType.INNER, "INSERT": TokenType.INSERT, "INTERVAL": TokenType.INTERVAL, @@ -701,7 +695,7 @@ class Tokenizer(metaclass=_Tokenizer): "VACUUM": TokenType.COMMAND, } - WHITE_SPACE = { + WHITE_SPACE: t.Dict[str, TokenType] = { " ": TokenType.SPACE, "\t": TokenType.SPACE, "\n": TokenType.BREAK, @@ -723,7 +717,7 @@ class Tokenizer(metaclass=_Tokenizer): NUMERIC_LITERALS: t.Dict[str, str] = {} ENCODE: t.Optional[str] = None - COMMENTS = ["--", ("/*", "*/")] + COMMENTS = ["--", ("/*", "*/"), ("{#", "#}")] KEYWORD_TRIE = None # autofilled IDENTIFIER_CAN_START_WITH_DIGIT = False @@ -778,22 +772,16 @@ class Tokenizer(metaclass=_Tokenizer): self._start = self._current self._advance() - if not self._char: + if self._char is None: break - white_space = self.WHITE_SPACE.get(self._char) # type: ignore - identifier_end = self._IDENTIFIERS.get(self._char) # type: ignore - - if white_space: - if white_space == TokenType.BREAK: - self._col = 1 - self._line += 1 - elif self._char.isdigit(): # type:ignore - self._scan_number() - elif identifier_end: - self._scan_identifier(identifier_end) - else: - self._scan_keywords() + if self._char not in self.WHITE_SPACE: + if self._char.isdigit(): + self._scan_number() + elif self._char in self._IDENTIFIERS: + self._scan_identifier(self._IDENTIFIERS[self._char]) + else: + self._scan_keywords() if until and until(): break @@ -807,13 +795,23 @@ class Tokenizer(metaclass=_Tokenizer): return self.sql[start:end] return "" + def _line_break(self, char: t.Optional[str]) -> bool: + return self.WHITE_SPACE.get(char) == TokenType.BREAK # type: ignore + def _advance(self, i: int = 1) -> None: + if self._line_break(self._char): + self._set_new_line() + self._col += i self._current += i self._end = self._current >= self.size # type: ignore self._char = self.sql[self._current - 1] # type: ignore self._peek = self.sql[self._current] if self._current < self.size else "" # type: ignore + def _set_new_line(self) -> None: + self._col = 1 + self._line += 1 + @property def _text(self) -> str: return self.sql[self._start : self._current] @@ -917,7 +915,7 @@ class Tokenizer(metaclass=_Tokenizer): self._comments.append(self._text[comment_start_size : -comment_end_size + 1]) # type: ignore self._advance(comment_end_size - 1) else: - while not self._end and self.WHITE_SPACE.get(self._peek) != TokenType.BREAK: # type: ignore + while not self._end and not self._line_break(self._peek): self._advance() self._comments.append(self._text[comment_start_size:]) # type: ignore @@ -926,6 +924,7 @@ class Tokenizer(metaclass=_Tokenizer): if comment_start_line == self._prev_token_line: self.tokens[-1].comments.extend(self._comments) self._comments = [] + self._prev_token_line = self._line return True |