summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-01-23 08:43:00 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-01-23 08:43:00 +0000
commit2228e192dc1a582aa2ae004f20c692f6c7aeb853 (patch)
tree9308157255d0d746263d77fa71bd3cb29fded23b /sqlglot/tokens.py
parentReleasing debian version 10.5.2-1. (diff)
downloadsqlglot-2228e192dc1a582aa2ae004f20c692f6c7aeb853.tar.xz
sqlglot-2228e192dc1a582aa2ae004f20c692f6c7aeb853.zip
Merging upstream version 10.5.6.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py40
1 files changed, 30 insertions, 10 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index 8e312a7..f12528f 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -82,6 +82,8 @@ class TokenType(AutoName):
VARCHAR = auto()
NVARCHAR = auto()
TEXT = auto()
+ MEDIUMTEXT = auto()
+ LONGTEXT = auto()
BINARY = auto()
VARBINARY = auto()
JSON = auto()
@@ -434,6 +436,8 @@ class Tokenizer(metaclass=_Tokenizer):
ESCAPES = ["'"]
+ _ESCAPES: t.Set[str] = set()
+
KEYWORDS = {
**{
f"{key}{postfix}": TokenType.BLOCK_START
@@ -461,6 +465,7 @@ class Tokenizer(metaclass=_Tokenizer):
"#>>": TokenType.DHASH_ARROW,
"<->": TokenType.LR_ARROW,
"ALL": TokenType.ALL,
+ "ALWAYS": TokenType.ALWAYS,
"AND": TokenType.AND,
"ANTI": TokenType.ANTI,
"ANY": TokenType.ANY,
@@ -472,6 +477,7 @@ class Tokenizer(metaclass=_Tokenizer):
"BETWEEN": TokenType.BETWEEN,
"BOTH": TokenType.BOTH,
"BUCKET": TokenType.BUCKET,
+ "BY DEFAULT": TokenType.BY_DEFAULT,
"CACHE": TokenType.CACHE,
"UNCACHE": TokenType.UNCACHE,
"CASE": TokenType.CASE,
@@ -521,9 +527,11 @@ class Tokenizer(metaclass=_Tokenizer):
"FOREIGN KEY": TokenType.FOREIGN_KEY,
"FORMAT": TokenType.FORMAT,
"FROM": TokenType.FROM,
+ "GENERATED": TokenType.GENERATED,
"GROUP BY": TokenType.GROUP_BY,
"GROUPING SETS": TokenType.GROUPING_SETS,
"HAVING": TokenType.HAVING,
+ "IDENTITY": TokenType.IDENTITY,
"IF": TokenType.IF,
"ILIKE": TokenType.ILIKE,
"IMMUTABLE": TokenType.IMMUTABLE,
@@ -746,7 +754,7 @@ class Tokenizer(metaclass=_Tokenizer):
)
def __init__(self) -> None:
- self._replace_backslash = "\\" in self._ESCAPES # type: ignore
+ self._replace_backslash = "\\" in self._ESCAPES
self.reset()
def reset(self) -> None:
@@ -771,7 +779,10 @@ class Tokenizer(metaclass=_Tokenizer):
self.reset()
self.sql = sql
self.size = len(sql)
+ self._scan()
+ return self.tokens
+ def _scan(self, until: t.Optional[t.Callable] = None) -> None:
while self.size and not self._end:
self._start = self._current
self._advance()
@@ -792,7 +803,9 @@ class Tokenizer(metaclass=_Tokenizer):
self._scan_identifier(identifier_end)
else:
self._scan_keywords()
- return self.tokens
+
+ if until and until():
+ break
def _chars(self, size: int) -> str:
if size == 1:
@@ -832,11 +845,13 @@ class Tokenizer(metaclass=_Tokenizer):
if token_type in self.COMMANDS and (
len(self.tokens) == 1 or self.tokens[-2].token_type == TokenType.SEMICOLON
):
- self._start = self._current
- while not self._end and self._peek != ";":
- self._advance()
- if self._start < self._current:
- self._add(TokenType.STRING)
+ start = self._current
+ tokens = len(self.tokens)
+ self._scan(lambda: self._peek == ";")
+ self.tokens = self.tokens[:tokens]
+ text = self.sql[start : self._current].strip()
+ if text:
+ self._add(TokenType.STRING, text)
def _scan_keywords(self) -> None:
size = 0
@@ -947,7 +962,8 @@ class Tokenizer(metaclass=_Tokenizer):
elif self._peek.isidentifier(): # type: ignore
number_text = self._text
literal = []
- while self._peek.isidentifier(): # type: ignore
+
+ while self._peek.strip() and self._peek not in self.SINGLE_TOKENS: # type: ignore
literal.append(self._peek.upper()) # type: ignore
self._advance()
@@ -1063,8 +1079,12 @@ class Tokenizer(metaclass=_Tokenizer):
delim_size = len(delimiter)
while True:
- if self._char in self._ESCAPES and self._peek == delimiter: # type: ignore
- text += delimiter
+ if (
+ self._char in self._ESCAPES
+ and self._peek
+ and (self._peek == delimiter or self._peek in self._ESCAPES)
+ ):
+ text += self._peek
self._advance(2)
else:
if self._chars(delim_size) == delimiter: