summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py45
1 files changed, 24 insertions, 21 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index ec8cd91..8a7a38e 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -81,6 +81,7 @@ class TokenType(AutoName):
BINARY = auto()
VARBINARY = auto()
JSON = auto()
+ JSONB = auto()
TIMESTAMP = auto()
TIMESTAMPTZ = auto()
TIMESTAMPLTZ = auto()
@@ -91,6 +92,7 @@ class TokenType(AutoName):
NULLABLE = auto()
GEOMETRY = auto()
HLLSKETCH = auto()
+ HSTORE = auto()
SUPER = auto()
SERIAL = auto()
SMALLSERIAL = auto()
@@ -113,6 +115,7 @@ class TokenType(AutoName):
APPLY = auto()
ARRAY = auto()
ASC = auto()
+ ASOF = auto()
AT_TIME_ZONE = auto()
AUTO_INCREMENT = auto()
BEGIN = auto()
@@ -130,6 +133,7 @@ class TokenType(AutoName):
COMMAND = auto()
COMMENT = auto()
COMMIT = auto()
+ COMPOUND = auto()
CONSTRAINT = auto()
CREATE = auto()
CROSS = auto()
@@ -271,6 +275,7 @@ class TokenType(AutoName):
UNBOUNDED = auto()
UNCACHE = auto()
UNION = auto()
+ UNLOGGED = auto()
UNNEST = auto()
UNPIVOT = auto()
UPDATE = auto()
@@ -291,7 +296,7 @@ class TokenType(AutoName):
class Token:
- __slots__ = ("token_type", "text", "line", "col", "comment")
+ __slots__ = ("token_type", "text", "line", "col", "comments")
@classmethod
def number(cls, number: int) -> Token:
@@ -319,13 +324,13 @@ class Token:
text: str,
line: int = 1,
col: int = 1,
- comment: t.Optional[str] = None,
+ comments: t.List[str] = [],
) -> None:
self.token_type = token_type
self.text = text
self.line = line
self.col = max(col - len(text), 1)
- self.comment = comment
+ self.comments = comments
def __repr__(self) -> str:
attributes = ", ".join(f"{k}: {getattr(self, k)}" for k in self.__slots__)
@@ -452,6 +457,7 @@ class Tokenizer(metaclass=_Tokenizer):
"COLLATE": TokenType.COLLATE,
"COMMENT": TokenType.SCHEMA_COMMENT,
"COMMIT": TokenType.COMMIT,
+ "COMPOUND": TokenType.COMPOUND,
"CONSTRAINT": TokenType.CONSTRAINT,
"CREATE": TokenType.CREATE,
"CROSS": TokenType.CROSS,
@@ -582,8 +588,9 @@ class Tokenizer(metaclass=_Tokenizer):
"TRAILING": TokenType.TRAILING,
"UNBOUNDED": TokenType.UNBOUNDED,
"UNION": TokenType.UNION,
- "UNPIVOT": TokenType.UNPIVOT,
+ "UNLOGGED": TokenType.UNLOGGED,
"UNNEST": TokenType.UNNEST,
+ "UNPIVOT": TokenType.UNPIVOT,
"UPDATE": TokenType.UPDATE,
"USE": TokenType.USE,
"USING": TokenType.USING,
@@ -686,12 +693,12 @@ class Tokenizer(metaclass=_Tokenizer):
"_current",
"_line",
"_col",
- "_comment",
+ "_comments",
"_char",
"_end",
"_peek",
"_prev_token_line",
- "_prev_token_comment",
+ "_prev_token_comments",
"_prev_token_type",
"_replace_backslash",
)
@@ -708,13 +715,13 @@ class Tokenizer(metaclass=_Tokenizer):
self._current = 0
self._line = 1
self._col = 1
- self._comment = None
+ self._comments: t.List[str] = []
self._char = None
self._end = None
self._peek = None
self._prev_token_line = -1
- self._prev_token_comment = None
+ self._prev_token_comments: t.List[str] = []
self._prev_token_type = None
def tokenize(self, sql: str) -> t.List[Token]:
@@ -767,7 +774,7 @@ class Tokenizer(metaclass=_Tokenizer):
def _add(self, token_type: TokenType, text: t.Optional[str] = None) -> None:
self._prev_token_line = self._line
- self._prev_token_comment = self._comment
+ self._prev_token_comments = self._comments
self._prev_token_type = token_type # type: ignore
self.tokens.append(
Token(
@@ -775,10 +782,10 @@ class Tokenizer(metaclass=_Tokenizer):
self._text if text is None else text,
self._line,
self._col,
- self._comment,
+ self._comments,
)
)
- self._comment = None
+ self._comments = []
if token_type in self.COMMANDS and (
len(self.tokens) == 1 or self.tokens[-2].token_type == TokenType.SEMICOLON
@@ -857,22 +864,18 @@ class Tokenizer(metaclass=_Tokenizer):
while not self._end and self._chars(comment_end_size) != comment_end:
self._advance()
- self._comment = self._text[comment_start_size : -comment_end_size + 1] # type: ignore
+ self._comments.append(self._text[comment_start_size : -comment_end_size + 1]) # type: ignore
self._advance(comment_end_size - 1)
else:
while not self._end and self.WHITE_SPACE.get(self._peek) != TokenType.BREAK: # type: ignore
self._advance()
- self._comment = self._text[comment_start_size:] # type: ignore
-
- # Leading comment is attached to the succeeding token, whilst trailing comment to the preceding. If both
- # types of comment can be attached to a token, the trailing one is discarded in favour of the leading one.
+ self._comments.append(self._text[comment_start_size:]) # type: ignore
+ # Leading comment is attached to the succeeding token, whilst trailing comment to the preceding.
+ # Multiple consecutive comments are preserved by appending them to the current comments list.
if comment_start_line == self._prev_token_line:
- if self._prev_token_comment is None:
- self.tokens[-1].comment = self._comment
- self._prev_token_comment = self._comment
-
- self._comment = None
+ self.tokens[-1].comments.extend(self._comments)
+ self._comments = []
return True