summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py38
1 files changed, 29 insertions, 9 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index e95057a..8cf17a7 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -357,7 +357,8 @@ class _Tokenizer(type):
klass._HEX_STRINGS = cls._delimeter_list_to_dict(klass.HEX_STRINGS)
klass._BYTE_STRINGS = cls._delimeter_list_to_dict(klass.BYTE_STRINGS)
klass._IDENTIFIERS = cls._delimeter_list_to_dict(klass.IDENTIFIERS)
- klass._ESCAPES = set(klass.ESCAPES)
+ klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
+ klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
klass._COMMENTS = dict(
(comment, None) if isinstance(comment, str) else (comment[0], comment[1])
for comment in klass.COMMENTS
@@ -429,9 +430,13 @@ class Tokenizer(metaclass=_Tokenizer):
IDENTIFIERS: t.List[str | t.Tuple[str, str]] = ['"']
- ESCAPES = ["'"]
+ STRING_ESCAPES = ["'"]
- _ESCAPES: t.Set[str] = set()
+ _STRING_ESCAPES: t.Set[str] = set()
+
+ IDENTIFIER_ESCAPES = ['"']
+
+ _IDENTIFIER_ESCAPES: t.Set[str] = set()
KEYWORDS = {
**{
@@ -469,6 +474,7 @@ class Tokenizer(metaclass=_Tokenizer):
"ASC": TokenType.ASC,
"AS": TokenType.ALIAS,
"AT TIME ZONE": TokenType.AT_TIME_ZONE,
+ "AUTOINCREMENT": TokenType.AUTO_INCREMENT,
"AUTO_INCREMENT": TokenType.AUTO_INCREMENT,
"BEGIN": TokenType.BEGIN,
"BETWEEN": TokenType.BETWEEN,
@@ -691,6 +697,7 @@ class Tokenizer(metaclass=_Tokenizer):
"ALTER VIEW": TokenType.COMMAND,
"ANALYZE": TokenType.COMMAND,
"CALL": TokenType.COMMAND,
+ "COPY": TokenType.COMMAND,
"EXPLAIN": TokenType.COMMAND,
"OPTIMIZE": TokenType.COMMAND,
"PREPARE": TokenType.COMMAND,
@@ -744,7 +751,7 @@ class Tokenizer(metaclass=_Tokenizer):
)
def __init__(self) -> None:
- self._replace_backslash = "\\" in self._ESCAPES
+ self._replace_backslash = "\\" in self._STRING_ESCAPES
self.reset()
def reset(self) -> None:
@@ -1046,12 +1053,25 @@ class Tokenizer(metaclass=_Tokenizer):
return True
def _scan_identifier(self, identifier_end: str) -> None:
- while self._peek != identifier_end:
+ text = ""
+ identifier_end_is_escape = identifier_end in self._IDENTIFIER_ESCAPES
+
+ while True:
if self._end:
raise RuntimeError(f"Missing {identifier_end} from {self._line}:{self._start}")
+
self._advance()
- self._advance()
- self._add(TokenType.IDENTIFIER, self._text[1:-1])
+ if self._char == identifier_end:
+ if identifier_end_is_escape and self._peek == identifier_end:
+ text += identifier_end # type: ignore
+ self._advance()
+ continue
+
+ break
+
+ text += self._char # type: ignore
+
+ self._add(TokenType.IDENTIFIER, text)
def _scan_var(self) -> None:
while True:
@@ -1072,9 +1092,9 @@ class Tokenizer(metaclass=_Tokenizer):
while True:
if (
- self._char in self._ESCAPES
+ self._char in self._STRING_ESCAPES
and self._peek
- and (self._peek == delimiter or self._peek in self._ESCAPES)
+ and (self._peek == delimiter or self._peek in self._STRING_ESCAPES)
):
text += self._peek
self._advance(2)