diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-11 16:34:56 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-11 16:34:56 +0000 |
commit | 278f416d08028bd175e1d6433739461f2168f4e2 (patch) | |
tree | 12492ebc5907744b2a4297228324fcda9ee2e40f /sqlglot/parser.py | |
parent | Adding upstream version 24.1.0. (diff) | |
download | sqlglot-278f416d08028bd175e1d6433739461f2168f4e2.tar.xz sqlglot-278f416d08028bd175e1d6433739461f2168f4e2.zip |
Adding upstream version 25.0.3.upstream/25.0.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 201 |
1 files changed, 129 insertions, 72 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index c2cb3a1..ed53b19 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -193,6 +193,7 @@ class Parser(metaclass=_Parser): NESTED_TYPE_TOKENS = { TokenType.ARRAY, + TokenType.LIST, TokenType.LOWCARDINALITY, TokenType.MAP, TokenType.NULLABLE, @@ -456,6 +457,11 @@ class Parser(metaclass=_Parser): ALIAS_TOKENS = ID_VAR_TOKENS + ARRAY_CONSTRUCTORS = { + "ARRAY": exp.Array, + "LIST": exp.List, + } + COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} @@ -504,8 +510,15 @@ class Parser(metaclass=_Parser): *SUBQUERY_PREDICATES, } - CONJUNCTION = { + CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { TokenType.AND: exp.And, + } + + ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { + TokenType.COLON_EQ: exp.PropertyEQ, + } + + DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { TokenType.OR: exp.Or, } @@ -588,7 +601,7 @@ class Parser(metaclass=_Parser): TokenType.ARROW: lambda self, expressions: self.expression( exp.Lambda, this=self._replace_lambda( - self._parse_conjunction(), + self._parse_assignment(), expressions, ), expressions=expressions, @@ -596,7 +609,7 @@ class Parser(metaclass=_Parser): TokenType.FARROW: lambda self, expressions: self.expression( exp.Kwarg, this=exp.var(expressions[0].name), - expression=self._parse_conjunction(), + expression=self._parse_assignment(), ), } @@ -639,7 +652,7 @@ class Parser(metaclass=_Parser): EXPRESSION_PARSERS = { exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), exp.Column: lambda self: self._parse_column(), - exp.Condition: lambda self: self._parse_conjunction(), + exp.Condition: lambda self: self._parse_assignment(), exp.DataType: lambda self: self._parse_types(allow_identifiers=False), exp.Expression: lambda self: self._parse_expression(), exp.From: lambda self: self._parse_from(joins=True), @@ -890,11 +903,11 @@ class Parser(metaclass=_Parser): ), "CHECK": lambda self: self.expression( exp.CheckColumnConstraint, - this=self._parse_wrapped(self._parse_conjunction), + this=self._parse_wrapped(self._parse_assignment), enforced=self._match_text_seq("ENFORCED"), ), "COLLATE": lambda self: self.expression( - exp.CollateColumnConstraint, this=self._parse_var() + exp.CollateColumnConstraint, this=self._parse_var(any_token=True) ), "COMMENT": lambda self: self.expression( exp.CommentColumnConstraint, this=self._parse_string() @@ -994,6 +1007,7 @@ class Parser(metaclass=_Parser): "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), "DECODE": lambda self: self._parse_decode(), "EXTRACT": lambda self: self._parse_extract(), + "GAP_FILL": lambda self: self._parse_gap_fill(), "JSON_OBJECT": lambda self: self._parse_json_object(), "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), "JSON_TABLE": lambda self: self._parse_json_table(), @@ -2191,7 +2205,7 @@ class Parser(metaclass=_Parser): def _parse_partition_by(self) -> t.List[exp.Expression]: if self._match(TokenType.PARTITION_BY): - return self._parse_csv(self._parse_conjunction) + return self._parse_csv(self._parse_assignment) return [] def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: @@ -2408,8 +2422,7 @@ class Parser(metaclass=_Parser): stored=self._match_text_seq("STORED") and self._parse_stored(), by_name=self._match_text_seq("BY", "NAME"), exists=self._parse_exists(), - where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) - and self._parse_conjunction(), + where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), expression=self._parse_derived_table_values() or self._parse_ddl_select(), conflict=self._parse_on_conflict(), returning=returning or self._parse_returning(), @@ -2619,7 +2632,7 @@ class Parser(metaclass=_Parser): return None return self.expression( - exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) + exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) ) def _parse_value(self) -> t.Optional[exp.Tuple]: @@ -3115,7 +3128,7 @@ class Parser(metaclass=_Parser): kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) if self._match(TokenType.ON): - kwargs["on"] = self._parse_conjunction() + kwargs["on"] = self._parse_assignment() elif self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() elif not isinstance(kwargs["this"], exp.Unnest) and not ( @@ -3125,7 +3138,7 @@ class Parser(metaclass=_Parser): joins: t.Optional[list] = list(self._parse_joins()) if joins and self._match(TokenType.ON): - kwargs["on"] = self._parse_conjunction() + kwargs["on"] = self._parse_assignment() elif joins and self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() else: @@ -3138,7 +3151,7 @@ class Parser(metaclass=_Parser): return self.expression(exp.Join, comments=comments, **kwargs) def _parse_opclass(self) -> t.Optional[exp.Expression]: - this = self._parse_conjunction() + this = self._parse_assignment() if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): return this @@ -3554,7 +3567,7 @@ class Parser(metaclass=_Parser): def _parse_pivot_in(self) -> exp.In: def _parse_aliased_expression() -> t.Optional[exp.Expression]: - this = self._parse_conjunction() + this = self._parse_assignment() self._match(TokenType.ALIAS) alias = self._parse_field() @@ -3648,7 +3661,7 @@ class Parser(metaclass=_Parser): return None return self.expression( - exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() + exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() ) def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: @@ -3656,7 +3669,7 @@ class Parser(metaclass=_Parser): return None return self.expression( - exp.Where, comments=self._prev_comments, this=self._parse_conjunction() + exp.Where, comments=self._prev_comments, this=self._parse_assignment() ) def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: @@ -3674,7 +3687,7 @@ class Parser(metaclass=_Parser): expressions = self._parse_csv( lambda: None if self._match(TokenType.ROLLUP, advance=False) - else self._parse_conjunction() + else self._parse_assignment() ) if expressions: elements["expressions"].extend(expressions) @@ -3725,18 +3738,18 @@ class Parser(metaclass=_Parser): def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: if not skip_having_token and not self._match(TokenType.HAVING): return None - return self.expression(exp.Having, this=self._parse_conjunction()) + return self.expression(exp.Having, this=self._parse_assignment()) def _parse_qualify(self) -> t.Optional[exp.Qualify]: if not self._match(TokenType.QUALIFY): return None - return self.expression(exp.Qualify, this=self._parse_conjunction()) + return self.expression(exp.Qualify, this=self._parse_assignment()) def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: if skip_start_token: start = None elif self._match(TokenType.START_WITH): - start = self._parse_conjunction() + start = self._parse_assignment() else: return None @@ -3745,11 +3758,11 @@ class Parser(metaclass=_Parser): self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( exp.Prior, this=self._parse_bitwise() ) - connect = self._parse_conjunction() + connect = self._parse_assignment() self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") if not start and self._match(TokenType.START_WITH): - start = self._parse_conjunction() + start = self._parse_assignment() return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) @@ -3757,7 +3770,7 @@ class Parser(metaclass=_Parser): return self.expression( exp.Alias, alias=self._parse_id_var(any_token=True), - this=self._match(TokenType.ALIAS) and self._parse_conjunction(), + this=self._match(TokenType.ALIAS) and self._parse_assignment(), ) def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: @@ -3791,7 +3804,7 @@ class Parser(metaclass=_Parser): def _parse_ordered( self, parse_method: t.Optional[t.Callable] = None ) -> t.Optional[exp.Ordered]: - this = parse_method() if parse_method else self._parse_conjunction() + this = parse_method() if parse_method else self._parse_assignment() if not this: return None @@ -3970,28 +3983,27 @@ class Parser(metaclass=_Parser): return this def _parse_expression(self) -> t.Optional[exp.Expression]: - return self._parse_alias(self._parse_conjunction()) + return self._parse_alias(self._parse_assignment()) - def _parse_conjunction(self) -> t.Optional[exp.Expression]: - this = self._parse_equality() + def _parse_assignment(self) -> t.Optional[exp.Expression]: + this = self._parse_disjunction() - if self._match(TokenType.COLON_EQ): + while self._match_set(self.ASSIGNMENT): this = self.expression( - exp.PropertyEQ, + self.ASSIGNMENT[self._prev.token_type], this=this, comments=self._prev_comments, - expression=self._parse_conjunction(), + expression=self._parse_assignment(), ) - while self._match_set(self.CONJUNCTION): - this = self.expression( - self.CONJUNCTION[self._prev.token_type], - this=this, - comments=self._prev_comments, - expression=self._parse_equality(), - ) return this + def _parse_disjunction(self) -> t.Optional[exp.Expression]: + return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) + + def _parse_conjunction(self) -> t.Optional[exp.Expression]: + return self._parse_tokens(self._parse_equality, self.CONJUNCTION) + def _parse_equality(self) -> t.Optional[exp.Expression]: return self._parse_tokens(self._parse_comparison, self.EQUALITY) @@ -4172,12 +4184,16 @@ class Parser(metaclass=_Parser): this = parse_method() while self._match_set(self.FACTOR): - this = self.expression( - self.FACTOR[self._prev.token_type], - this=this, - comments=self._prev_comments, - expression=parse_method(), - ) + klass = self.FACTOR[self._prev.token_type] + comments = self._prev_comments + expression = parse_method() + + if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): + self._retreat(self._index - 1) + return this + + this = self.expression(klass, this=this, comments=comments, expression=expression) + if isinstance(this, exp.Div): this.args["typed"] = self.dialect.TYPED_DIVISION this.args["safe"] = self.dialect.SAFE_DIVISION @@ -4291,6 +4307,29 @@ class Parser(metaclass=_Parser): if type_token == TokenType.OBJECT_IDENTIFIER: return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) + # https://materialize.com/docs/sql/types/map/ + if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): + key_type = self._parse_types( + check_func=check_func, schema=schema, allow_identifiers=allow_identifiers + ) + if not self._match(TokenType.FARROW): + self._retreat(index) + return None + + value_type = self._parse_types( + check_func=check_func, schema=schema, allow_identifiers=allow_identifiers + ) + if not self._match(TokenType.R_BRACKET): + self._retreat(index) + return None + + return exp.DataType( + this=exp.DataType.Type.MAP, + expressions=[key_type, value_type], + nested=True, + prefix=prefix, + ) + nested = type_token in self.NESTED_TYPE_TOKENS is_struct = type_token in self.STRUCT_TYPE_TOKENS is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS @@ -4345,7 +4384,7 @@ class Parser(metaclass=_Parser): self.raise_error("Expecting >") if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): - values = self._parse_csv(self._parse_conjunction) + values = self._parse_csv(self._parse_assignment) self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) if type_token in self.TIMESTAMPS: @@ -4400,6 +4439,10 @@ class Parser(metaclass=_Parser): elif expressions: this.set("expressions", expressions) + # https://materialize.com/docs/sql/types/list/#type-name + while self._match(TokenType.LIST): + this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) + index = self._index # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] @@ -4411,7 +4454,7 @@ class Parser(metaclass=_Parser): break matched_array = False - values = self._parse_csv(self._parse_conjunction) or None + values = self._parse_csv(self._parse_assignment) or None if values and not schema: self._retreat(index) break @@ -4818,7 +4861,7 @@ class Parser(metaclass=_Parser): if self._match(TokenType.DISTINCT): this = self.expression( - exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) + exp.Distinct, expressions=self._parse_csv(self._parse_assignment) ) else: this = self._parse_select_or_expression(alias=alias) @@ -4863,7 +4906,7 @@ class Parser(metaclass=_Parser): constraints.append( self.expression( exp.ComputedColumnConstraint, - this=self._parse_conjunction(), + this=self._parse_assignment(), persisted=persisted or self._match_text_seq("PERSISTED"), not_null=self._match_pair(TokenType.NOT, TokenType.NULL), ) @@ -5153,7 +5196,7 @@ class Parser(metaclass=_Parser): return self.expression(exp.PrimaryKey, expressions=expressions, options=options) def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: - return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) + return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): @@ -5172,9 +5215,13 @@ class Parser(metaclass=_Parser): # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs if bracket_kind == TokenType.L_BRACE: this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) - elif not this or this.name.upper() == "ARRAY": + elif not this: this = self.expression(exp.Array, expressions=expressions) else: + constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) + if constructor_type: + return self.expression(constructor_type, expressions=expressions) + expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) this = self.expression(exp.Bracket, this=this, expressions=expressions) @@ -5183,7 +5230,7 @@ class Parser(metaclass=_Parser): def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: if self._match(TokenType.COLON): - return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) + return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) return this def _parse_case(self) -> t.Optional[exp.Expression]: @@ -5191,16 +5238,16 @@ class Parser(metaclass=_Parser): default = None comments = self._prev_comments - expression = self._parse_conjunction() + expression = self._parse_assignment() while self._match(TokenType.WHEN): - this = self._parse_conjunction() + this = self._parse_assignment() self._match(TokenType.THEN) - then = self._parse_conjunction() + then = self._parse_assignment() ifs.append(self.expression(exp.If, this=this, true=then)) if self._match(TokenType.ELSE): - default = self._parse_conjunction() + default = self._parse_assignment() if not self._match(TokenType.END): if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": @@ -5214,7 +5261,7 @@ class Parser(metaclass=_Parser): def _parse_if(self) -> t.Optional[exp.Expression]: if self._match(TokenType.L_PAREN): - args = self._parse_csv(self._parse_conjunction) + args = self._parse_csv(self._parse_assignment) this = self.validate_expression(exp.If.from_arg_list(args), args) self._match_r_paren() else: @@ -5223,15 +5270,15 @@ class Parser(metaclass=_Parser): if self.NO_PAREN_IF_COMMANDS and index == 0: return self._parse_as_command(self._prev) - condition = self._parse_conjunction() + condition = self._parse_assignment() if not condition: self._retreat(index) return None self._match(TokenType.THEN) - true = self._parse_conjunction() - false = self._parse_conjunction() if self._match(TokenType.ELSE) else None + true = self._parse_assignment() + false = self._parse_assignment() if self._match(TokenType.ELSE) else None self._match(TokenType.END) this = self.expression(exp.If, this=condition, true=true, false=false) @@ -5259,8 +5306,18 @@ class Parser(metaclass=_Parser): return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) + def _parse_gap_fill(self) -> exp.GapFill: + self._match(TokenType.TABLE) + this = self._parse_table() + + self._match(TokenType.COMMA) + args = [this, *self._parse_csv(self._parse_lambda)] + + gap_fill = exp.GapFill.from_arg_list(args) + return self.validate_expression(gap_fill, args) + def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: - this = self._parse_conjunction() + this = self._parse_assignment() if not self._match(TokenType.ALIAS): if self._match(TokenType.COMMA): @@ -5313,12 +5370,12 @@ class Parser(metaclass=_Parser): def _parse_string_agg(self) -> exp.Expression: if self._match(TokenType.DISTINCT): args: t.List[t.Optional[exp.Expression]] = [ - self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) + self.expression(exp.Distinct, expressions=[self._parse_assignment()]) ] if self._match(TokenType.COMMA): - args.extend(self._parse_csv(self._parse_conjunction)) + args.extend(self._parse_csv(self._parse_assignment)) else: - args = self._parse_csv(self._parse_conjunction) # type: ignore + args = self._parse_csv(self._parse_assignment) # type: ignore index = self._index if not self._match(TokenType.R_PAREN) and args: @@ -5365,7 +5422,7 @@ class Parser(metaclass=_Parser): needs special treatment, since we need to explicitly check for it with `IS NULL`, instead of relying on pattern matching. """ - args = self._parse_csv(self._parse_conjunction) + args = self._parse_csv(self._parse_assignment) if len(args) < 3: return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) @@ -5965,7 +6022,7 @@ class Parser(metaclass=_Parser): def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: return self._parse_select() or self._parse_set_operations( - self._parse_expression() if alias else self._parse_conjunction() + self._parse_expression() if alias else self._parse_assignment() ) def _parse_ddl_select(self) -> t.Optional[exp.Expression]: @@ -6077,7 +6134,7 @@ class Parser(metaclass=_Parser): if self._match_pair(TokenType.DROP, TokenType.DEFAULT): return self.expression(exp.AlterColumn, this=column, drop=True) if self._match_pair(TokenType.SET, TokenType.DEFAULT): - return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) + return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) if self._match(TokenType.COMMENT): return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) if self._match_text_seq("DROP", "NOT", "NULL"): @@ -6100,7 +6157,7 @@ class Parser(metaclass=_Parser): this=column, dtype=self._parse_types(), collate=self._match(TokenType.COLLATE) and self._parse_term(), - using=self._match(TokenType.USING) and self._parse_conjunction(), + using=self._match(TokenType.USING) and self._parse_assignment(), ) def _parse_alter_diststyle(self) -> exp.AlterDistStyle: @@ -6155,9 +6212,9 @@ class Parser(metaclass=_Parser): if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( "TABLE", "PROPERTIES" ): - alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) + alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) elif self._match_text_seq("FILESTREAM_ON", advance=False): - alter_set.set("expressions", [self._parse_conjunction()]) + alter_set.set("expressions", [self._parse_assignment()]) elif self._match_texts(("LOGGED", "UNLOGGED")): alter_set.set("option", exp.var(self._prev.text.upper())) elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): @@ -6175,7 +6232,7 @@ class Parser(metaclass=_Parser): elif self._match_text_seq("STAGE_COPY_OPTIONS"): alter_set.set("copy_options", self._parse_wrapped_options()) elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): - alter_set.set("tag", self._parse_csv(self._parse_conjunction)) + alter_set.set("tag", self._parse_csv(self._parse_assignment)) else: if self._match_text_seq("SERDE"): alter_set.set("serde", self._parse_field()) @@ -6227,7 +6284,7 @@ class Parser(metaclass=_Parser): using = self._parse_table() self._match(TokenType.ON) - on = self._parse_conjunction() + on = self._parse_assignment() return self.expression( exp.Merge, @@ -6248,7 +6305,7 @@ class Parser(metaclass=_Parser): if self._match_text_seq("BY", "TARGET") else self._match_text_seq("BY", "SOURCE") ) - condition = self._parse_conjunction() if self._match(TokenType.AND) else None + condition = self._parse_assignment() if self._match(TokenType.AND) else None self._match(TokenType.THEN) @@ -6428,7 +6485,7 @@ class Parser(metaclass=_Parser): self._retreat(index - 1) return None iterator = self._parse_column() - condition = self._parse_conjunction() if self._match_text_seq("IF") else None + condition = self._parse_assignment() if self._match_text_seq("IF") else None return self.expression( exp.Comprehension, this=this, |