diff options
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 45 |
1 files changed, 29 insertions, 16 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index d278dbf..83b97d6 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -48,6 +48,7 @@ class TokenType(AutoName): HASH_ARROW = auto() DHASH_ARROW = auto() LR_ARROW = auto() + DAT = auto() LT_AT = auto() AT_GT = auto() DOLLAR = auto() @@ -84,6 +85,7 @@ class TokenType(AutoName): UTINYINT = auto() SMALLINT = auto() USMALLINT = auto() + MEDIUMINT = auto() INT = auto() UINT = auto() BIGINT = auto() @@ -140,6 +142,7 @@ class TokenType(AutoName): SMALLSERIAL = auto() BIGSERIAL = auto() XML = auto() + YEAR = auto() UNIQUEIDENTIFIER = auto() USERDEFINED = auto() MONEY = auto() @@ -157,6 +160,7 @@ class TokenType(AutoName): FIXEDSTRING = auto() LOWCARDINALITY = auto() NESTED = auto() + UNKNOWN = auto() # keywords ALIAS = auto() @@ -180,6 +184,7 @@ class TokenType(AutoName): COMMAND = auto() COMMENT = auto() COMMIT = auto() + CONNECT_BY = auto() CONSTRAINT = auto() CREATE = auto() CROSS = auto() @@ -256,6 +261,7 @@ class TokenType(AutoName): NEXT = auto() NOTNULL = auto() NULL = auto() + OBJECT_IDENTIFIER = auto() OFFSET = auto() ON = auto() ORDER_BY = auto() @@ -298,6 +304,7 @@ class TokenType(AutoName): SIMILAR_TO = auto() SOME = auto() SORT_BY = auto() + START_WITH = auto() STRUCT = auto() TABLE_SAMPLE = auto() TEMPORARY = auto() @@ -319,6 +326,8 @@ class TokenType(AutoName): WINDOW = auto() WITH = auto() UNIQUE = auto() + VERSION_SNAPSHOT = auto() + TIMESTAMP_SNAPSHOT = auto() class Token: @@ -530,6 +539,7 @@ class Tokenizer(metaclass=_Tokenizer): "COLLATE": TokenType.COLLATE, "COLUMN": TokenType.COLUMN, "COMMIT": TokenType.COMMIT, + "CONNECT BY": TokenType.CONNECT_BY, "CONSTRAINT": TokenType.CONSTRAINT, "CREATE": TokenType.CREATE, "CROSS": TokenType.CROSS, @@ -636,6 +646,7 @@ class Tokenizer(metaclass=_Tokenizer): "SIMILAR TO": TokenType.SIMILAR_TO, "SOME": TokenType.SOME, "SORT BY": TokenType.SORT_BY, + "START WITH": TokenType.START_WITH, "TABLE": TokenType.TABLE, "TABLESAMPLE": TokenType.TABLE_SAMPLE, "TEMP": TokenType.TEMPORARY, @@ -643,6 +654,7 @@ class Tokenizer(metaclass=_Tokenizer): "THEN": TokenType.THEN, "TRUE": TokenType.TRUE, "UNION": TokenType.UNION, + "UNKNOWN": TokenType.UNKNOWN, "UNNEST": TokenType.UNNEST, "UNPIVOT": TokenType.UNPIVOT, "UPDATE": TokenType.UPDATE, @@ -739,6 +751,8 @@ class Tokenizer(metaclass=_Tokenizer): "TRUNCATE": TokenType.COMMAND, "VACUUM": TokenType.COMMAND, "USER-DEFINED": TokenType.USERDEFINED, + "FOR VERSION": TokenType.VERSION_SNAPSHOT, + "FOR TIMESTAMP": TokenType.TIMESTAMP_SNAPSHOT, } WHITE_SPACE: t.Dict[t.Optional[str], TokenType] = { @@ -941,8 +955,8 @@ class Tokenizer(metaclass=_Tokenizer): if result == TrieResult.EXISTS: word = chars + end = self._current + size size += 1 - end = self._current - 1 + size if end < self.size: char = self.sql[end] @@ -961,21 +975,20 @@ class Tokenizer(metaclass=_Tokenizer): char = "" chars = " " - if not word: - if self._char in self.SINGLE_TOKENS: - self._add(self.SINGLE_TOKENS[self._char], text=self._char) + if word: + if self._scan_string(word): return - self._scan_var() - return - - if self._scan_string(word): - return - if self._scan_comment(word): + if self._scan_comment(word): + return + if prev_space or single_token or not char: + self._advance(size - 1) + word = word.upper() + self._add(self.KEYWORDS[word], text=word) + return + if self._char in self.SINGLE_TOKENS: + self._add(self.SINGLE_TOKENS[self._char], text=self._char) return - - self._advance(size - 1) - word = word.upper() - self._add(self.KEYWORDS[word], text=word) + self._scan_var() def _scan_comment(self, comment_start: str) -> bool: if comment_start not in self._COMMENTS: @@ -1053,8 +1066,8 @@ class Tokenizer(metaclass=_Tokenizer): elif self.IDENTIFIERS_CAN_START_WITH_DIGIT: return self._add(TokenType.VAR) - self._add(TokenType.NUMBER, number_text) - return self._advance(-len(literal)) + self._advance(-len(literal)) + return self._add(TokenType.NUMBER, number_text) else: return self._add(TokenType.NUMBER) |