From d3bb537b2b73788ba06bf4158f473ecc5bb556cc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 17 Jan 2023 11:32:16 +0100 Subject: Merging upstream version 10.5.2. Signed-off-by: Daniel Baumann --- sqlglot/tokens.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'sqlglot/tokens.py') diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 0efa7d0..8e312a7 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -86,6 +86,7 @@ class TokenType(AutoName): VARBINARY = auto() JSON = auto() JSONB = auto() + TIME = auto() TIMESTAMP = auto() TIMESTAMPTZ = auto() TIMESTAMPLTZ = auto() @@ -181,6 +182,7 @@ class TokenType(AutoName): FUNCTION = auto() FROM = auto() GENERATED = auto() + GLOBAL = auto() GROUP_BY = auto() GROUPING_SETS = auto() HAVING = auto() @@ -656,6 +658,7 @@ class Tokenizer(metaclass=_Tokenizer): "FLOAT4": TokenType.FLOAT, "FLOAT8": TokenType.DOUBLE, "DOUBLE": TokenType.DOUBLE, + "DOUBLE PRECISION": TokenType.DOUBLE, "JSON": TokenType.JSON, "CHAR": TokenType.CHAR, "NCHAR": TokenType.NCHAR, @@ -671,6 +674,7 @@ class Tokenizer(metaclass=_Tokenizer): "BLOB": TokenType.VARBINARY, "BYTEA": TokenType.VARBINARY, "VARBINARY": TokenType.VARBINARY, + "TIME": TokenType.TIME, "TIMESTAMP": TokenType.TIMESTAMP, "TIMESTAMPTZ": TokenType.TIMESTAMPTZ, "TIMESTAMPLTZ": TokenType.TIMESTAMPLTZ, @@ -721,6 +725,8 @@ class Tokenizer(metaclass=_Tokenizer): COMMENTS = ["--", ("/*", "*/")] KEYWORD_TRIE = None # autofilled + IDENTIFIER_CAN_START_WITH_DIGIT = False + __slots__ = ( "sql", "size", @@ -938,17 +944,24 @@ class Tokenizer(metaclass=_Tokenizer): elif self._peek.upper() == "E" and not scientific: # type: ignore scientific += 1 self._advance() - elif self._peek.isalpha(): # type: ignore - self._add(TokenType.NUMBER) + elif self._peek.isidentifier(): # type: ignore + number_text = self._text literal = [] - while self._peek.isalpha(): # type: ignore + while self._peek.isidentifier(): # type: ignore literal.append(self._peek.upper()) # type: ignore self._advance() + literal = "".join(literal) # type: ignore token_type = self.KEYWORDS.get(self.NUMERIC_LITERALS.get(literal)) # type: ignore + if token_type: + self._add(TokenType.NUMBER, number_text) self._add(TokenType.DCOLON, "::") return self._add(token_type, literal) # type: ignore + elif self.IDENTIFIER_CAN_START_WITH_DIGIT: + return self._add(TokenType.VAR) + + self._add(TokenType.NUMBER, number_text) return self._advance(-len(literal)) else: return self._add(TokenType.NUMBER) -- cgit v1.2.3