summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py45
1 files changed, 29 insertions, 16 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index d278dbf..83b97d6 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -48,6 +48,7 @@ class TokenType(AutoName):
HASH_ARROW = auto()
DHASH_ARROW = auto()
LR_ARROW = auto()
+ DAT = auto()
LT_AT = auto()
AT_GT = auto()
DOLLAR = auto()
@@ -84,6 +85,7 @@ class TokenType(AutoName):
UTINYINT = auto()
SMALLINT = auto()
USMALLINT = auto()
+ MEDIUMINT = auto()
INT = auto()
UINT = auto()
BIGINT = auto()
@@ -140,6 +142,7 @@ class TokenType(AutoName):
SMALLSERIAL = auto()
BIGSERIAL = auto()
XML = auto()
+ YEAR = auto()
UNIQUEIDENTIFIER = auto()
USERDEFINED = auto()
MONEY = auto()
@@ -157,6 +160,7 @@ class TokenType(AutoName):
FIXEDSTRING = auto()
LOWCARDINALITY = auto()
NESTED = auto()
+ UNKNOWN = auto()
# keywords
ALIAS = auto()
@@ -180,6 +184,7 @@ class TokenType(AutoName):
COMMAND = auto()
COMMENT = auto()
COMMIT = auto()
+ CONNECT_BY = auto()
CONSTRAINT = auto()
CREATE = auto()
CROSS = auto()
@@ -256,6 +261,7 @@ class TokenType(AutoName):
NEXT = auto()
NOTNULL = auto()
NULL = auto()
+ OBJECT_IDENTIFIER = auto()
OFFSET = auto()
ON = auto()
ORDER_BY = auto()
@@ -298,6 +304,7 @@ class TokenType(AutoName):
SIMILAR_TO = auto()
SOME = auto()
SORT_BY = auto()
+ START_WITH = auto()
STRUCT = auto()
TABLE_SAMPLE = auto()
TEMPORARY = auto()
@@ -319,6 +326,8 @@ class TokenType(AutoName):
WINDOW = auto()
WITH = auto()
UNIQUE = auto()
+ VERSION_SNAPSHOT = auto()
+ TIMESTAMP_SNAPSHOT = auto()
class Token:
@@ -530,6 +539,7 @@ class Tokenizer(metaclass=_Tokenizer):
"COLLATE": TokenType.COLLATE,
"COLUMN": TokenType.COLUMN,
"COMMIT": TokenType.COMMIT,
+ "CONNECT BY": TokenType.CONNECT_BY,
"CONSTRAINT": TokenType.CONSTRAINT,
"CREATE": TokenType.CREATE,
"CROSS": TokenType.CROSS,
@@ -636,6 +646,7 @@ class Tokenizer(metaclass=_Tokenizer):
"SIMILAR TO": TokenType.SIMILAR_TO,
"SOME": TokenType.SOME,
"SORT BY": TokenType.SORT_BY,
+ "START WITH": TokenType.START_WITH,
"TABLE": TokenType.TABLE,
"TABLESAMPLE": TokenType.TABLE_SAMPLE,
"TEMP": TokenType.TEMPORARY,
@@ -643,6 +654,7 @@ class Tokenizer(metaclass=_Tokenizer):
"THEN": TokenType.THEN,
"TRUE": TokenType.TRUE,
"UNION": TokenType.UNION,
+ "UNKNOWN": TokenType.UNKNOWN,
"UNNEST": TokenType.UNNEST,
"UNPIVOT": TokenType.UNPIVOT,
"UPDATE": TokenType.UPDATE,
@@ -739,6 +751,8 @@ class Tokenizer(metaclass=_Tokenizer):
"TRUNCATE": TokenType.COMMAND,
"VACUUM": TokenType.COMMAND,
"USER-DEFINED": TokenType.USERDEFINED,
+ "FOR VERSION": TokenType.VERSION_SNAPSHOT,
+ "FOR TIMESTAMP": TokenType.TIMESTAMP_SNAPSHOT,
}
WHITE_SPACE: t.Dict[t.Optional[str], TokenType] = {
@@ -941,8 +955,8 @@ class Tokenizer(metaclass=_Tokenizer):
if result == TrieResult.EXISTS:
word = chars
+ end = self._current + size
size += 1
- end = self._current - 1 + size
if end < self.size:
char = self.sql[end]
@@ -961,21 +975,20 @@ class Tokenizer(metaclass=_Tokenizer):
char = ""
chars = " "
- if not word:
- if self._char in self.SINGLE_TOKENS:
- self._add(self.SINGLE_TOKENS[self._char], text=self._char)
+ if word:
+ if self._scan_string(word):
return
- self._scan_var()
- return
-
- if self._scan_string(word):
- return
- if self._scan_comment(word):
+ if self._scan_comment(word):
+ return
+ if prev_space or single_token or not char:
+ self._advance(size - 1)
+ word = word.upper()
+ self._add(self.KEYWORDS[word], text=word)
+ return
+ if self._char in self.SINGLE_TOKENS:
+ self._add(self.SINGLE_TOKENS[self._char], text=self._char)
return
-
- self._advance(size - 1)
- word = word.upper()
- self._add(self.KEYWORDS[word], text=word)
+ self._scan_var()
def _scan_comment(self, comment_start: str) -> bool:
if comment_start not in self._COMMENTS:
@@ -1053,8 +1066,8 @@ class Tokenizer(metaclass=_Tokenizer):
elif self.IDENTIFIERS_CAN_START_WITH_DIGIT:
return self._add(TokenType.VAR)
- self._add(TokenType.NUMBER, number_text)
- return self._advance(-len(literal))
+ self._advance(-len(literal))
+ return self._add(TokenType.NUMBER, number_text)
else:
return self._add(TokenType.NUMBER)