diff options
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 40 |
1 files changed, 19 insertions, 21 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 8cf17a7..9b29c12 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -138,7 +138,6 @@ class TokenType(AutoName): CASCADE = auto() CASE = auto() CHARACTER_SET = auto() - CHECK = auto() CLUSTER_BY = auto() COLLATE = auto() COMMAND = auto() @@ -164,7 +163,6 @@ class TokenType(AutoName): DIV = auto() DROP = auto() ELSE = auto() - ENCODE = auto() END = auto() ESCAPE = auto() EXCEPT = auto() @@ -182,17 +180,16 @@ class TokenType(AutoName): FROM = auto() FULL = auto() FUNCTION = auto() - GENERATED = auto() GLOB = auto() GLOBAL = auto() GROUP_BY = auto() GROUPING_SETS = auto() HAVING = auto() HINT = auto() - IDENTITY = auto() IF = auto() IGNORE_NULLS = auto() ILIKE = auto() + ILIKE_ANY = auto() IN = auto() INDEX = auto() INNER = auto() @@ -211,6 +208,7 @@ class TokenType(AutoName): LEADING = auto() LEFT = auto() LIKE = auto() + LIKE_ANY = auto() LIMIT = auto() LOAD_DATA = auto() LOCAL = auto() @@ -253,6 +251,7 @@ class TokenType(AutoName): RECURSIVE = auto() REPLACE = auto() RESPECT_NULLS = auto() + RETURNING = auto() REFERENCES = auto() RIGHT = auto() RLIKE = auto() @@ -260,7 +259,6 @@ class TokenType(AutoName): ROLLUP = auto() ROW = auto() ROWS = auto() - SCHEMA_COMMENT = auto() SEED = auto() SELECT = auto() SEMI = auto() @@ -441,7 +439,7 @@ class Tokenizer(metaclass=_Tokenizer): KEYWORDS = { **{ f"{key}{postfix}": TokenType.BLOCK_START - for key in ("{{", "{%", "{#") + for key in ("{%", "{#") for postfix in ("", "+", "-") }, **{ @@ -449,6 +447,8 @@ class Tokenizer(metaclass=_Tokenizer): for key in ("%}", "#}") for prefix in ("", "+", "-") }, + "{{+": TokenType.BLOCK_START, + "{{-": TokenType.BLOCK_START, "+}}": TokenType.BLOCK_END, "-}}": TokenType.BLOCK_END, "/*+": TokenType.HINT, @@ -486,11 +486,9 @@ class Tokenizer(metaclass=_Tokenizer): "CASE": TokenType.CASE, "CASCADE": TokenType.CASCADE, "CHARACTER SET": TokenType.CHARACTER_SET, - "CHECK": TokenType.CHECK, "CLUSTER BY": TokenType.CLUSTER_BY, "COLLATE": TokenType.COLLATE, "COLUMN": TokenType.COLUMN, - "COMMENT": TokenType.SCHEMA_COMMENT, "COMMIT": TokenType.COMMIT, "COMPOUND": TokenType.COMPOUND, "CONSTRAINT": TokenType.CONSTRAINT, @@ -526,12 +524,10 @@ class Tokenizer(metaclass=_Tokenizer): "FOREIGN KEY": TokenType.FOREIGN_KEY, "FORMAT": TokenType.FORMAT, "FROM": TokenType.FROM, - "GENERATED": TokenType.GENERATED, "GLOB": TokenType.GLOB, "GROUP BY": TokenType.GROUP_BY, "GROUPING SETS": TokenType.GROUPING_SETS, "HAVING": TokenType.HAVING, - "IDENTITY": TokenType.IDENTITY, "IF": TokenType.IF, "ILIKE": TokenType.ILIKE, "IGNORE NULLS": TokenType.IGNORE_NULLS, @@ -747,11 +743,9 @@ class Tokenizer(metaclass=_Tokenizer): "_prev_token_line", "_prev_token_comments", "_prev_token_type", - "_replace_backslash", ) def __init__(self) -> None: - self._replace_backslash = "\\" in self._STRING_ESCAPES self.reset() def reset(self) -> None: @@ -855,7 +849,7 @@ class Tokenizer(metaclass=_Tokenizer): def _scan_keywords(self) -> None: size = 0 word = None - chars = self._text + chars: t.Optional[str] = self._text char = chars prev_space = False skip = False @@ -887,7 +881,7 @@ class Tokenizer(metaclass=_Tokenizer): else: skip = True else: - chars = None # type: ignore + chars = None if not word: if self._char in self.SINGLE_TOKENS: @@ -1015,7 +1009,6 @@ class Tokenizer(metaclass=_Tokenizer): self._advance(len(quote)) text = self._extract_string(quote_end) text = text.encode(self.ENCODE).decode(self.ENCODE) if self.ENCODE else text # type: ignore - text = text.replace("\\\\", "\\") if self._replace_backslash else text self._add(TokenType.NATIONAL if quote[0].upper() == "N" else TokenType.STRING, text) return True @@ -1091,13 +1084,18 @@ class Tokenizer(metaclass=_Tokenizer): delim_size = len(delimiter) while True: - if ( - self._char in self._STRING_ESCAPES - and self._peek - and (self._peek == delimiter or self._peek in self._STRING_ESCAPES) + if self._char in self._STRING_ESCAPES and ( + self._peek == delimiter or self._peek in self._STRING_ESCAPES ): - text += self._peek - self._advance(2) + if self._peek == delimiter: + text += self._peek # type: ignore + else: + text += self._char + self._peek # type: ignore + + if self._current + 1 < self.size: + self._advance(2) + else: + raise RuntimeError(f"Missing {delimiter} from {self._line}:{self._current}") else: if self._chars(delim_size) == delimiter: if delim_size > 1: |