diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-10 06:44:54 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-10 06:44:54 +0000 |
commit | d2e9401b18925b5702c5c758af7d4f5b61deb493 (patch) | |
tree | 58dbf490c0457c2908751b3e4b63af13287381ee /sqlglot/parser.py | |
parent | Adding upstream version 11.7.1. (diff) | |
download | sqlglot-d2e9401b18925b5702c5c758af7d4f5b61deb493.tar.xz sqlglot-d2e9401b18925b5702c5c758af7d4f5b61deb493.zip |
Adding upstream version 12.2.0.upstream/12.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 121 |
1 files changed, 65 insertions, 56 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index abb23ad..d8d9f88 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -126,9 +126,17 @@ class Parser(metaclass=_Parser): TokenType.BIT, TokenType.BOOLEAN, TokenType.TINYINT, + TokenType.UTINYINT, TokenType.SMALLINT, + TokenType.USMALLINT, TokenType.INT, + TokenType.UINT, TokenType.BIGINT, + TokenType.UBIGINT, + TokenType.INT128, + TokenType.UINT128, + TokenType.INT256, + TokenType.UINT256, TokenType.FLOAT, TokenType.DOUBLE, TokenType.CHAR, @@ -961,14 +969,15 @@ class Parser(metaclass=_Parser): The target expression. """ instance = exp_class(**kwargs) - if self._prev_comments: - instance.comments = self._prev_comments - self._prev_comments = None - if comments: - instance.comments = comments + instance.add_comments(comments) if comments else self._add_comments(instance) self.validate_expression(instance) return instance + def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: + if expression and self._prev_comments: + expression.add_comments(self._prev_comments) + self._prev_comments = None + def validate_expression( self, expression: exp.Expression, args: t.Optional[t.List] = None ) -> None: @@ -1567,7 +1576,7 @@ class Parser(metaclass=_Parser): value = self.expression( exp.Schema, this="TABLE", - expressions=self._parse_csv(self._parse_struct_kwargs), + expressions=self._parse_csv(self._parse_struct_types), ) if not self._match(TokenType.GT): self.raise_error("Expecting >") @@ -1802,14 +1811,15 @@ class Parser(metaclass=_Parser): elif self._match(TokenType.SELECT): comments = self._prev_comments + hint = self._parse_hint() + all_ = self._match(TokenType.ALL) + distinct = self._match(TokenType.DISTINCT) + kind = ( self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) and self._prev.text ) - hint = self._parse_hint() - all_ = self._match(TokenType.ALL) - distinct = self._match(TokenType.DISTINCT) if distinct: distinct = self.expression( @@ -2284,7 +2294,7 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.UNNEST): return None - expressions = self._parse_wrapped_csv(self._parse_column) + expressions = self._parse_wrapped_csv(self._parse_type) ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) alias = self._parse_table_alias() @@ -2333,7 +2343,9 @@ class Parser(metaclass=_Parser): size = None seed = None - kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" + kind = ( + self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" + ) method = self._parse_var(tokens=(TokenType.ROW,)) self._match(TokenType.L_PAREN) @@ -2684,7 +2696,7 @@ class Parser(metaclass=_Parser): else: this = self.expression(exp.In, this=this, expressions=expressions) - self._match_r_paren() + self._match_r_paren(this) else: this = self.expression(exp.In, this=this, field=self._parse_field()) @@ -2798,7 +2810,7 @@ class Parser(metaclass=_Parser): if self._match(TokenType.L_PAREN): if is_struct: - expressions = self._parse_csv(self._parse_struct_kwargs) + expressions = self._parse_csv(self._parse_struct_types) elif nested: expressions = self._parse_csv(self._parse_types) else: @@ -2833,7 +2845,7 @@ class Parser(metaclass=_Parser): values: t.Optional[t.List[t.Optional[exp.Expression]]] = None if nested and self._match(TokenType.LT): if is_struct: - expressions = self._parse_csv(self._parse_struct_kwargs) + expressions = self._parse_csv(self._parse_struct_types) else: expressions = self._parse_csv(self._parse_types) @@ -2891,16 +2903,10 @@ class Parser(metaclass=_Parser): prefix=prefix, ) - def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: - index = self._index - this = self._parse_id_var() + def _parse_struct_types(self) -> t.Optional[exp.Expression]: + this = self._parse_type() or self._parse_id_var() self._match(TokenType.COLON) - data_type = self._parse_types() - - if not data_type: - self._retreat(index) - return self._parse_types() - return self.expression(exp.StructKwarg, this=this, expression=data_type) + return self._parse_column_def(this) def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: if not self._match(TokenType.AT_TIME_ZONE): @@ -2932,7 +2938,11 @@ class Parser(metaclass=_Parser): else exp.Literal.string(value) ) else: - field = self._parse_star() or self._parse_function() or self._parse_id_var() + field = ( + self._parse_star() + or self._parse_function(anonymous=True) + or self._parse_id_var() + ) if isinstance(field, exp.Func): # bigquery allows function calls like x.y.count(...) @@ -2995,11 +3005,9 @@ class Parser(metaclass=_Parser): else: this = self.expression(exp.Paren, this=self._parse_set_operations(this)) - self._match_r_paren() - comments.extend(self._prev_comments) - - if this and comments: - this.comments = comments + if this: + this.add_comments(comments) + self._match_r_paren(expression=this) return this @@ -3017,7 +3025,7 @@ class Parser(metaclass=_Parser): ) def _parse_function( - self, functions: t.Optional[t.Dict[str, t.Callable]] = None + self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False ) -> t.Optional[exp.Expression]: if not self._curr: return None @@ -3043,7 +3051,7 @@ class Parser(metaclass=_Parser): parser = self.FUNCTION_PARSERS.get(upper) - if parser: + if parser and not anonymous: this = parser(self) else: subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) @@ -3059,7 +3067,7 @@ class Parser(metaclass=_Parser): function = functions.get(upper) args = self._parse_csv(self._parse_lambda) - if function: + if function and not anonymous: # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. if count_params(function) == 2: @@ -3148,12 +3156,7 @@ class Parser(metaclass=_Parser): if isinstance(left, exp.Column): left.replace(exp.Var(this=left.text("this"))) - if self._match(TokenType.IGNORE_NULLS): - this = self.expression(exp.IgnoreNulls, this=this) - else: - self._match(TokenType.RESPECT_NULLS) - - return self._parse_limit(self._parse_order(this)) + return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: index = self._index @@ -3177,6 +3180,9 @@ class Parser(metaclass=_Parser): return self.expression(exp.Schema, this=this, expressions=args) def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + # column defs are not really columns, they're identifiers + if isinstance(this, exp.Column): + this = this.this kind = self._parse_types() if self._match_text_seq("FOR", "ORDINALITY"): @@ -3420,7 +3426,7 @@ class Parser(metaclass=_Parser): elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: self.raise_error("Expected }") - this.comments = self._prev_comments + self._add_comments(this) return self._parse_bracket(this) def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: @@ -3584,7 +3590,9 @@ class Parser(metaclass=_Parser): exp.and_( exp.Is(this=expression.copy(), expression=exp.Null()), exp.Is(this=search.copy(), expression=exp.Null()), + copy=False, ), + copy=False, ) ifs.append(exp.If(this=cond, true=result)) @@ -3717,15 +3725,15 @@ class Parser(metaclass=_Parser): if self._match_set(self.TRIM_TYPES): position = self._prev.text.upper() - expression = self._parse_term() + expression = self._parse_bitwise() if self._match_set((TokenType.FROM, TokenType.COMMA)): - this = self._parse_term() + this = self._parse_bitwise() else: this = expression expression = None if self._match(TokenType.COLLATE): - collation = self._parse_term() + collation = self._parse_bitwise() return self.expression( exp.Trim, @@ -3741,6 +3749,15 @@ class Parser(metaclass=_Parser): def _parse_named_window(self) -> t.Optional[exp.Expression]: return self._parse_window(self._parse_id_var(), alias=True) + def _parse_respect_or_ignore_nulls( + self, this: t.Optional[exp.Expression] + ) -> t.Optional[exp.Expression]: + if self._match(TokenType.IGNORE_NULLS): + return self.expression(exp.IgnoreNulls, this=this) + if self._match(TokenType.RESPECT_NULLS): + return self.expression(exp.RespectNulls, this=this) + return this + def _parse_window( self, this: t.Optional[exp.Expression], alias: bool = False ) -> t.Optional[exp.Expression]: @@ -3768,10 +3785,7 @@ class Parser(metaclass=_Parser): # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) # and Snowflake chose to do the same for familiarity # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes - if self._match(TokenType.IGNORE_NULLS): - this = self.expression(exp.IgnoreNulls, this=this) - elif self._match(TokenType.RESPECT_NULLS): - this = self.expression(exp.RespectNulls, this=this) + this = self._parse_respect_or_ignore_nulls(this) # bigquery select from window x AS (partition by ...) if alias: @@ -3975,9 +3989,7 @@ class Parser(metaclass=_Parser): items = [parse_result] if parse_result is not None else [] while self._match(sep): - if parse_result and self._prev_comments: - parse_result.comments = self._prev_comments - + self._add_comments(parse_result) parse_result = parse_method() if parse_result is not None: items.append(parse_result) @@ -4345,13 +4357,14 @@ class Parser(metaclass=_Parser): self._retreat(index) return None - def _match(self, token_type, advance=True): + def _match(self, token_type, advance=True, expression=None): if not self._curr: return None if self._curr.token_type == token_type: if advance: self._advance() + self._add_comments(expression) return True return None @@ -4379,16 +4392,12 @@ class Parser(metaclass=_Parser): return None def _match_l_paren(self, expression=None): - if not self._match(TokenType.L_PAREN): + if not self._match(TokenType.L_PAREN, expression=expression): self.raise_error("Expecting (") - if expression and self._prev_comments: - expression.comments = self._prev_comments def _match_r_paren(self, expression=None): - if not self._match(TokenType.R_PAREN): + if not self._match(TokenType.R_PAREN, expression=expression): self.raise_error("Expecting )") - if expression and self._prev_comments: - expression.comments = self._prev_comments def _match_texts(self, texts, advance=True): if self._curr and self._curr.text.upper() in texts: |