summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py53
1 files changed, 26 insertions, 27 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index 9b29c12..f3f1a70 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -115,6 +115,7 @@ class TokenType(AutoName):
IMAGE = auto()
VARIANT = auto()
OBJECT = auto()
+ INET = auto()
# keywords
ALIAS = auto()
@@ -437,16 +438,8 @@ class Tokenizer(metaclass=_Tokenizer):
_IDENTIFIER_ESCAPES: t.Set[str] = set()
KEYWORDS = {
- **{
- f"{key}{postfix}": TokenType.BLOCK_START
- for key in ("{%", "{#")
- for postfix in ("", "+", "-")
- },
- **{
- f"{prefix}{key}": TokenType.BLOCK_END
- for key in ("%}", "#}")
- for prefix in ("", "+", "-")
- },
+ **{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
+ **{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")},
"{{+": TokenType.BLOCK_START,
"{{-": TokenType.BLOCK_START,
"+}}": TokenType.BLOCK_END,
@@ -533,6 +526,7 @@ class Tokenizer(metaclass=_Tokenizer):
"IGNORE NULLS": TokenType.IGNORE_NULLS,
"IN": TokenType.IN,
"INDEX": TokenType.INDEX,
+ "INET": TokenType.INET,
"INNER": TokenType.INNER,
"INSERT": TokenType.INSERT,
"INTERVAL": TokenType.INTERVAL,
@@ -701,7 +695,7 @@ class Tokenizer(metaclass=_Tokenizer):
"VACUUM": TokenType.COMMAND,
}
- WHITE_SPACE = {
+ WHITE_SPACE: t.Dict[str, TokenType] = {
" ": TokenType.SPACE,
"\t": TokenType.SPACE,
"\n": TokenType.BREAK,
@@ -723,7 +717,7 @@ class Tokenizer(metaclass=_Tokenizer):
NUMERIC_LITERALS: t.Dict[str, str] = {}
ENCODE: t.Optional[str] = None
- COMMENTS = ["--", ("/*", "*/")]
+ COMMENTS = ["--", ("/*", "*/"), ("{#", "#}")]
KEYWORD_TRIE = None # autofilled
IDENTIFIER_CAN_START_WITH_DIGIT = False
@@ -778,22 +772,16 @@ class Tokenizer(metaclass=_Tokenizer):
self._start = self._current
self._advance()
- if not self._char:
+ if self._char is None:
break
- white_space = self.WHITE_SPACE.get(self._char) # type: ignore
- identifier_end = self._IDENTIFIERS.get(self._char) # type: ignore
-
- if white_space:
- if white_space == TokenType.BREAK:
- self._col = 1
- self._line += 1
- elif self._char.isdigit(): # type:ignore
- self._scan_number()
- elif identifier_end:
- self._scan_identifier(identifier_end)
- else:
- self._scan_keywords()
+ if self._char not in self.WHITE_SPACE:
+ if self._char.isdigit():
+ self._scan_number()
+ elif self._char in self._IDENTIFIERS:
+ self._scan_identifier(self._IDENTIFIERS[self._char])
+ else:
+ self._scan_keywords()
if until and until():
break
@@ -807,13 +795,23 @@ class Tokenizer(metaclass=_Tokenizer):
return self.sql[start:end]
return ""
+ def _line_break(self, char: t.Optional[str]) -> bool:
+ return self.WHITE_SPACE.get(char) == TokenType.BREAK # type: ignore
+
def _advance(self, i: int = 1) -> None:
+ if self._line_break(self._char):
+ self._set_new_line()
+
self._col += i
self._current += i
self._end = self._current >= self.size # type: ignore
self._char = self.sql[self._current - 1] # type: ignore
self._peek = self.sql[self._current] if self._current < self.size else "" # type: ignore
+ def _set_new_line(self) -> None:
+ self._col = 1
+ self._line += 1
+
@property
def _text(self) -> str:
return self.sql[self._start : self._current]
@@ -917,7 +915,7 @@ class Tokenizer(metaclass=_Tokenizer):
self._comments.append(self._text[comment_start_size : -comment_end_size + 1]) # type: ignore
self._advance(comment_end_size - 1)
else:
- while not self._end and self.WHITE_SPACE.get(self._peek) != TokenType.BREAK: # type: ignore
+ while not self._end and not self._line_break(self._peek):
self._advance()
self._comments.append(self._text[comment_start_size:]) # type: ignore
@@ -926,6 +924,7 @@ class Tokenizer(metaclass=_Tokenizer):
if comment_start_line == self._prev_token_line:
self.tokens[-1].comments.extend(self._comments)
self._comments = []
+ self._prev_token_line = self._line
return True