From c03ba18c491e52cc85d8aae1825dd9e0b4f75e32 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 26 Oct 2023 19:21:54 +0200 Subject: Merging upstream version 18.17.0. Signed-off-by: Daniel Baumann --- sqlglot/parser.py | 66 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 14 deletions(-) (limited to 'sqlglot/parser.py') diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 8de76ca..b7f91ab 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -313,6 +313,7 @@ class Parser(metaclass=_Parser): TokenType.UNIQUE, TokenType.UNPIVOT, TokenType.UPDATE, + TokenType.USE, TokenType.VOLATILE, TokenType.WINDOW, *CREATABLES, @@ -629,11 +630,14 @@ class Parser(metaclass=_Parser): "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), - "CHARACTER SET": lambda self: self._parse_character_set(), + "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), + "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), "CHECKSUM": lambda self: self._parse_checksum(), "CLUSTER BY": lambda self: self._parse_cluster(), "CLUSTERED": lambda self: self._parse_clustered_by(), - "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), + "COLLATE": lambda self, **kwargs: self._parse_property_assignment( + exp.CollateProperty, **kwargs + ), "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), "COPY": lambda self: self._parse_copy_property(), "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), @@ -1443,8 +1447,8 @@ class Parser(metaclass=_Parser): if self._match_texts(self.PROPERTY_PARSERS): return self.PROPERTY_PARSERS[self._prev.text.upper()](self) - if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): - return self._parse_character_set(default=True) + if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): + return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) if self._match_text_seq("COMPOUND", "SORTKEY"): return self._parse_sortkey(compound=True) @@ -1480,10 +1484,10 @@ class Parser(metaclass=_Parser): else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), ) - def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: + def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: self._match(TokenType.EQ) self._match(TokenType.ALIAS) - return self.expression(exp_class, this=self._parse_field()) + return self.expression(exp_class, this=self._parse_field(), **kwargs) def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: properties = [] @@ -2426,9 +2430,9 @@ class Parser(metaclass=_Parser): table_alias: t.Optional[exp.TableAlias] = self.expression( exp.TableAlias, this=table, columns=columns ) - elif isinstance(this, exp.Subquery) and this.alias: - # Ensures parity between the Subquery's and the Lateral's "alias" args - table_alias = this.args["alias"].copy() + elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: + # We move the alias from the lateral's child node to the lateral itself + table_alias = this.args["alias"].pop() else: table_alias = self._parse_table_alias() @@ -2952,6 +2956,7 @@ class Parser(metaclass=_Parser): cube = None totals = None + index = self._index with_ = self._match(TokenType.WITH) if self._match(TokenType.ROLLUP): rollup = with_ or self._parse_wrapped_csv(self._parse_column) @@ -2966,6 +2971,8 @@ class Parser(metaclass=_Parser): elements["totals"] = True # type: ignore if not (grouping_sets or rollup or cube or totals): + if with_: + self._retreat(index) break return self.expression(exp.Group, **elements) # type: ignore @@ -3157,6 +3164,7 @@ class Parser(metaclass=_Parser): return self.expression( expression, + comments=self._prev.comments, this=this, distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), by_name=self._match_text_seq("BY", "NAME"), @@ -3618,6 +3626,32 @@ class Parser(metaclass=_Parser): functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False, optional_parens: bool = True, + ) -> t.Optional[exp.Expression]: + # This allows us to also parse {fn } syntax (Snowflake, MySQL support this) + # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences + fn_syntax = False + if ( + self._match(TokenType.L_BRACE, advance=False) + and self._next + and self._next.text.upper() == "FN" + ): + self._advance(2) + fn_syntax = True + + func = self._parse_function_call( + functions=functions, anonymous=anonymous, optional_parens=optional_parens + ) + + if fn_syntax: + self._match(TokenType.R_BRACE) + + return func + + def _parse_function_call( + self, + functions: t.Optional[t.Dict[str, t.Callable]] = None, + anonymous: bool = False, + optional_parens: bool = True, ) -> t.Optional[exp.Expression]: if not self._curr: return None @@ -3856,6 +3890,10 @@ class Parser(metaclass=_Parser): if not identity: this.set("expression", self._parse_bitwise()) + elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): + args = self._parse_csv(self._parse_bitwise) + this.set("start", seq_get(args, 0)) + this.set("increment", seq_get(args, 1)) self._match_r_paren() @@ -4039,6 +4077,11 @@ class Parser(metaclass=_Parser): ) ) + if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: + self.raise_error("Expected ]") + elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: + self.raise_error("Expected }") + # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs if bracket_kind == TokenType.L_BRACE: this = self.expression(exp.Struct, expressions=expressions) @@ -4048,11 +4091,6 @@ class Parser(metaclass=_Parser): expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) this = self.expression(exp.Bracket, this=this, expressions=expressions) - if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: - self.raise_error("Expected ]") - elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: - self.raise_error("Expected }") - self._add_comments(this) return self._parse_bracket(this) -- cgit v1.2.3