From ebec59cc5cb6c6856705bf82ced7fe8d9f75b0d0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 7 Mar 2023 19:09:31 +0100 Subject: Merging upstream version 11.3.0. Signed-off-by: Daniel Baumann --- sqlglot/parser.py | 179 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 113 insertions(+), 66 deletions(-) (limited to 'sqlglot/parser.py') diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 9f32765..f39bb39 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -36,6 +36,10 @@ class _Parser(type): klass = super().__new__(cls, clsname, bases, attrs) klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) + + if not klass.INTEGER_DIVISION: + klass.FACTOR = {**klass.FACTOR, TokenType.SLASH: exp.FloatDiv} + return klass @@ -157,6 +161,21 @@ class Parser(metaclass=_Parser): RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} + DB_CREATABLES = { + TokenType.DATABASE, + TokenType.SCHEMA, + TokenType.TABLE, + TokenType.VIEW, + } + + CREATABLES = { + TokenType.COLUMN, + TokenType.FUNCTION, + TokenType.INDEX, + TokenType.PROCEDURE, + *DB_CREATABLES, + } + ID_VAR_TOKENS = { TokenType.VAR, TokenType.ANTI, @@ -168,8 +187,8 @@ class Parser(metaclass=_Parser): TokenType.CACHE, TokenType.CASCADE, TokenType.COLLATE, - TokenType.COLUMN, TokenType.COMMAND, + TokenType.COMMENT, TokenType.COMMIT, TokenType.COMPOUND, TokenType.CONSTRAINT, @@ -186,9 +205,7 @@ class Parser(metaclass=_Parser): TokenType.FILTER, TokenType.FOLLOWING, TokenType.FORMAT, - TokenType.FUNCTION, TokenType.IF, - TokenType.INDEX, TokenType.ISNULL, TokenType.INTERVAL, TokenType.LAZY, @@ -211,13 +228,11 @@ class Parser(metaclass=_Parser): TokenType.RIGHT, TokenType.ROW, TokenType.ROWS, - TokenType.SCHEMA, TokenType.SEED, TokenType.SEMI, TokenType.SET, TokenType.SHOW, TokenType.SORTKEY, - TokenType.TABLE, TokenType.TEMPORARY, TokenType.TOP, TokenType.TRAILING, @@ -226,10 +241,9 @@ class Parser(metaclass=_Parser): TokenType.UNIQUE, TokenType.UNLOGGED, TokenType.UNPIVOT, - TokenType.PROCEDURE, - TokenType.VIEW, TokenType.VOLATILE, TokenType.WINDOW, + *CREATABLES, *SUBQUERY_PREDICATES, *TYPE_TOKENS, *NO_PAREN_FUNCTIONS, @@ -428,6 +442,7 @@ class Parser(metaclass=_Parser): TokenType.BEGIN: lambda self: self._parse_transaction(), TokenType.CACHE: lambda self: self._parse_cache(), TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), + TokenType.COMMENT: lambda self: self._parse_comment(), TokenType.CREATE: lambda self: self._parse_create(), TokenType.DELETE: lambda self: self._parse_delete(), TokenType.DESC: lambda self: self._parse_describe(), @@ -490,6 +505,9 @@ class Parser(metaclass=_Parser): TokenType.GLOB: lambda self, this: self._parse_escape( self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) ), + TokenType.OVERLAPS: lambda self, this: self._parse_escape( + self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) + ), TokenType.IN: lambda self, this: self._parse_in(this), TokenType.IS: lambda self, this: self._parse_is(this), TokenType.LIKE: lambda self, this: self._parse_escape( @@ -628,6 +646,14 @@ class Parser(metaclass=_Parser): "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), } + ALTER_PARSERS = { + "ADD": lambda self: self._parse_alter_table_add(), + "ALTER": lambda self: self._parse_alter_table_alter(), + "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), + "DROP": lambda self: self._parse_alter_table_drop(), + "RENAME": lambda self: self._parse_alter_table_rename(), + } + SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} NO_PAREN_FUNCTION_PARSERS = { @@ -669,16 +695,6 @@ class Parser(metaclass=_Parser): MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) - CREATABLES = { - TokenType.COLUMN, - TokenType.FUNCTION, - TokenType.INDEX, - TokenType.PROCEDURE, - TokenType.SCHEMA, - TokenType.TABLE, - TokenType.VIEW, - } - TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} @@ -689,6 +705,8 @@ class Parser(metaclass=_Parser): STRICT_CAST = True + INTEGER_DIVISION = True + __slots__ = ( "error_level", "error_message_context", @@ -940,6 +958,32 @@ class Parser(metaclass=_Parser): def _parse_command(self) -> exp.Expression: return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) + def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: + start = self._prev + exists = self._parse_exists() if allow_exists else None + + self._match(TokenType.ON) + + kind = self._match_set(self.CREATABLES) and self._prev + + if not kind: + return self._parse_as_command(start) + + if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): + this = self._parse_user_defined_function(kind=kind.token_type) + elif kind.token_type == TokenType.TABLE: + this = self._parse_table() + elif kind.token_type == TokenType.COLUMN: + this = self._parse_column() + else: + this = self._parse_id_var() + + self._match(TokenType.IS) + + return self.expression( + exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists + ) + def _parse_statement(self) -> t.Optional[exp.Expression]: if self._curr is None: return None @@ -990,6 +1034,7 @@ class Parser(metaclass=_Parser): TokenType.OR, TokenType.REPLACE ) unique = self._match(TokenType.UNIQUE) + volatile = self._match(TokenType.VOLATILE) if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): self._match(TokenType.TABLE) @@ -1028,11 +1073,7 @@ class Parser(metaclass=_Parser): expression = self.expression(exp.Return, this=expression) elif create_token.token_type == TokenType.INDEX: this = self._parse_index() - elif create_token.token_type in ( - TokenType.TABLE, - TokenType.VIEW, - TokenType.SCHEMA, - ): + elif create_token.token_type in self.DB_CREATABLES: table_parts = self._parse_table_parts(schema=True) # exp.Properties.Location.POST_NAME @@ -1100,11 +1141,12 @@ class Parser(metaclass=_Parser): exp.Create, this=this, kind=create_token.text, + replace=replace, unique=unique, + volatile=volatile, expression=expression, exists=exists, properties=properties, - replace=replace, indexes=indexes, no_schema_binding=no_schema_binding, begin=begin, @@ -3648,6 +3690,47 @@ class Parser(metaclass=_Parser): return self.expression(exp.AddConstraint, this=this, expression=expression) + def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: + index = self._index - 1 + + if self._match_set(self.ADD_CONSTRAINT_TOKENS): + return self._parse_csv(self._parse_add_constraint) + + self._retreat(index) + return self._parse_csv(self._parse_add_column) + + def _parse_alter_table_alter(self) -> exp.Expression: + self._match(TokenType.COLUMN) + column = self._parse_field(any_token=True) + + if self._match_pair(TokenType.DROP, TokenType.DEFAULT): + return self.expression(exp.AlterColumn, this=column, drop=True) + if self._match_pair(TokenType.SET, TokenType.DEFAULT): + return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) + + self._match_text_seq("SET", "DATA") + return self.expression( + exp.AlterColumn, + this=column, + dtype=self._match_text_seq("TYPE") and self._parse_types(), + collate=self._match(TokenType.COLLATE) and self._parse_term(), + using=self._match(TokenType.USING) and self._parse_conjunction(), + ) + + def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: + index = self._index - 1 + + partition_exists = self._parse_exists() + if self._match(TokenType.PARTITION, advance=False): + return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) + + self._retreat(index) + return self._parse_csv(self._parse_drop_column) + + def _parse_alter_table_rename(self) -> exp.Expression: + self._match_text_seq("TO") + return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) + def _parse_alter(self) -> t.Optional[exp.Expression]: if not self._match(TokenType.TABLE): return self._parse_as_command(self._prev) @@ -3655,50 +3738,12 @@ class Parser(metaclass=_Parser): exists = self._parse_exists() this = self._parse_table(schema=True) - actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None - - index = self._index - if self._match(TokenType.DELETE): - actions = [self.expression(exp.Delete, where=self._parse_where())] - elif self._match_text_seq("ADD"): - if self._match_set(self.ADD_CONSTRAINT_TOKENS): - actions = self._parse_csv(self._parse_add_constraint) - else: - self._retreat(index) - actions = self._parse_csv(self._parse_add_column) - elif self._match_text_seq("DROP"): - partition_exists = self._parse_exists() + if not self._curr: + return None - if self._match(TokenType.PARTITION, advance=False): - actions = self._parse_csv( - lambda: self._parse_drop_partition(exists=partition_exists) - ) - else: - self._retreat(index) - actions = self._parse_csv(self._parse_drop_column) - elif self._match_text_seq("RENAME", "TO"): - actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) - elif self._match_text_seq("ALTER"): - self._match(TokenType.COLUMN) - column = self._parse_field(any_token=True) - - if self._match_pair(TokenType.DROP, TokenType.DEFAULT): - actions = self.expression(exp.AlterColumn, this=column, drop=True) - elif self._match_pair(TokenType.SET, TokenType.DEFAULT): - actions = self.expression( - exp.AlterColumn, this=column, default=self._parse_conjunction() - ) - else: - self._match_text_seq("SET", "DATA") - actions = self.expression( - exp.AlterColumn, - this=column, - dtype=self._match_text_seq("TYPE") and self._parse_types(), - collate=self._match(TokenType.COLLATE) and self._parse_term(), - using=self._match(TokenType.USING) and self._parse_conjunction(), - ) + parser = self.ALTER_PARSERS.get(self._curr.text.upper()) + actions = ensure_list(self._advance() or parser(self)) if parser else [] # type: ignore - actions = ensure_list(actions) return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) def _parse_show(self) -> t.Optional[exp.Expression]: @@ -3772,7 +3817,9 @@ class Parser(metaclass=_Parser): def _parse_as_command(self, start: Token) -> exp.Command: while self._curr: self._advance() - return exp.Command(this=self._find_sql(start, self._prev)) + text = self._find_sql(start, self._prev) + size = len(start.text) + return exp.Command(this=text[:size], expression=text[size:]) def _find_parser( self, parsers: t.Dict[str, t.Callable], trie: t.Dict -- cgit v1.2.3