diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-01-23 08:42:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-01-23 08:42:55 +0000 |
commit | ade4a78e8fabcaa7270b6d4be2187457a3fa115f (patch) | |
tree | 018225e76010479b3a568bb6d9ef5df457802885 /sqlglot/tokens.py | |
parent | Adding upstream version 10.5.2. (diff) | |
download | sqlglot-c6fb255c17fb08417188ad2b438a6e8d85ce331b.tar.xz sqlglot-c6fb255c17fb08417188ad2b438a6e8d85ce331b.zip |
Adding upstream version 10.5.6.upstream/10.5.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 8e312a7..f12528f 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -82,6 +82,8 @@ class TokenType(AutoName): VARCHAR = auto() NVARCHAR = auto() TEXT = auto() + MEDIUMTEXT = auto() + LONGTEXT = auto() BINARY = auto() VARBINARY = auto() JSON = auto() @@ -434,6 +436,8 @@ class Tokenizer(metaclass=_Tokenizer): ESCAPES = ["'"] + _ESCAPES: t.Set[str] = set() + KEYWORDS = { **{ f"{key}{postfix}": TokenType.BLOCK_START @@ -461,6 +465,7 @@ class Tokenizer(metaclass=_Tokenizer): "#>>": TokenType.DHASH_ARROW, "<->": TokenType.LR_ARROW, "ALL": TokenType.ALL, + "ALWAYS": TokenType.ALWAYS, "AND": TokenType.AND, "ANTI": TokenType.ANTI, "ANY": TokenType.ANY, @@ -472,6 +477,7 @@ class Tokenizer(metaclass=_Tokenizer): "BETWEEN": TokenType.BETWEEN, "BOTH": TokenType.BOTH, "BUCKET": TokenType.BUCKET, + "BY DEFAULT": TokenType.BY_DEFAULT, "CACHE": TokenType.CACHE, "UNCACHE": TokenType.UNCACHE, "CASE": TokenType.CASE, @@ -521,9 +527,11 @@ class Tokenizer(metaclass=_Tokenizer): "FOREIGN KEY": TokenType.FOREIGN_KEY, "FORMAT": TokenType.FORMAT, "FROM": TokenType.FROM, + "GENERATED": TokenType.GENERATED, "GROUP BY": TokenType.GROUP_BY, "GROUPING SETS": TokenType.GROUPING_SETS, "HAVING": TokenType.HAVING, + "IDENTITY": TokenType.IDENTITY, "IF": TokenType.IF, "ILIKE": TokenType.ILIKE, "IMMUTABLE": TokenType.IMMUTABLE, @@ -746,7 +754,7 @@ class Tokenizer(metaclass=_Tokenizer): ) def __init__(self) -> None: - self._replace_backslash = "\\" in self._ESCAPES # type: ignore + self._replace_backslash = "\\" in self._ESCAPES self.reset() def reset(self) -> None: @@ -771,7 +779,10 @@ class Tokenizer(metaclass=_Tokenizer): self.reset() self.sql = sql self.size = len(sql) + self._scan() + return self.tokens + def _scan(self, until: t.Optional[t.Callable] = None) -> None: while self.size and not self._end: self._start = self._current self._advance() @@ -792,7 +803,9 @@ class Tokenizer(metaclass=_Tokenizer): self._scan_identifier(identifier_end) else: self._scan_keywords() - return self.tokens + + if until and until(): + break def _chars(self, size: int) -> str: if size == 1: @@ -832,11 +845,13 @@ class Tokenizer(metaclass=_Tokenizer): if token_type in self.COMMANDS and ( len(self.tokens) == 1 or self.tokens[-2].token_type == TokenType.SEMICOLON ): - self._start = self._current - while not self._end and self._peek != ";": - self._advance() - if self._start < self._current: - self._add(TokenType.STRING) + start = self._current + tokens = len(self.tokens) + self._scan(lambda: self._peek == ";") + self.tokens = self.tokens[:tokens] + text = self.sql[start : self._current].strip() + if text: + self._add(TokenType.STRING, text) def _scan_keywords(self) -> None: size = 0 @@ -947,7 +962,8 @@ class Tokenizer(metaclass=_Tokenizer): elif self._peek.isidentifier(): # type: ignore number_text = self._text literal = [] - while self._peek.isidentifier(): # type: ignore + + while self._peek.strip() and self._peek not in self.SINGLE_TOKENS: # type: ignore literal.append(self._peek.upper()) # type: ignore self._advance() @@ -1063,8 +1079,12 @@ class Tokenizer(metaclass=_Tokenizer): delim_size = len(delimiter) while True: - if self._char in self._ESCAPES and self._peek == delimiter: # type: ignore - text += delimiter + if ( + self._char in self._ESCAPES + and self._peek + and (self._peek == delimiter or self._peek in self._ESCAPES) + ): + text += self._peek self._advance(2) else: if self._chars(delim_size) == delimiter: |