diff options
Diffstat (limited to '')
-rw-r--r-- | sqlglot/parser.py | 129 |
1 files changed, 81 insertions, 48 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index c7e27a3..3d01a84 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -568,6 +568,7 @@ class Parser(metaclass=_Parser): exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), exp.Table: lambda self: self._parse_table_parts(), exp.TableAlias: lambda self: self._parse_table_alias(), + exp.When: lambda self: seq_get(self._parse_when_matched(), 0), exp.Where: lambda self: self._parse_where(), exp.Window: lambda self: self._parse_named_window(), exp.With: lambda self: self._parse_with(), @@ -635,6 +636,11 @@ class Parser(metaclass=_Parser): TokenType.HEREDOC_STRING: lambda self, token: self.expression( exp.RawString, this=token.text ), + TokenType.UNICODE_STRING: lambda self, token: self.expression( + exp.UnicodeString, + this=token.text, + escape=self._match_text_seq("UESCAPE") and self._parse_string(), + ), TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), } @@ -907,7 +913,7 @@ class Parser(metaclass=_Parser): INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} CLONE_KEYWORDS = {"CLONE", "COPY"} - CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} + HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} @@ -947,6 +953,10 @@ class Parser(metaclass=_Parser): # Whether the TRIM function expects the characters to trim as its first argument TRIM_PATTERN_FIRST = False + # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) + MODIFIERS_ATTACHED_TO_UNION = True + UNION_MODIFIERS = {"order", "limit", "offset"} + __slots__ = ( "error_level", "error_message_context", @@ -1162,6 +1172,9 @@ class Parser(metaclass=_Parser): def _find_sql(self, start: Token, end: Token) -> str: return self.sql[start.start : end.end + 1] + def _is_connected(self) -> bool: + return self._prev and self._curr and self._prev.end + 1 == self._curr.start + def _advance(self, times: int = 1) -> None: self._index += times self._curr = seq_get(self._tokens, self._index) @@ -1404,23 +1417,8 @@ class Parser(metaclass=_Parser): if self._match_texts(self.CLONE_KEYWORDS): copy = self._prev.text.lower() == "copy" - clone = self._parse_table(schema=True) - when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() - clone_kind = ( - self._match(TokenType.L_PAREN) - and self._match_texts(self.CLONE_KINDS) - and self._prev.text.upper() - ) - clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() - self._match(TokenType.R_PAREN) clone = self.expression( - exp.Clone, - this=clone, - when=when, - kind=clone_kind, - shallow=shallow, - expression=clone_expression, - copy=copy, + exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy ) return self.expression( @@ -2471,13 +2469,7 @@ class Parser(metaclass=_Parser): pattern = None define = ( - self._parse_csv( - lambda: self.expression( - exp.Alias, - alias=self._parse_id_var(any_token=True), - this=self._match(TokenType.ALIAS) and self._parse_conjunction(), - ) - ) + self._parse_csv(self._parse_name_as_expression) if self._match_text_seq("DEFINE") else None ) @@ -3124,6 +3116,18 @@ class Parser(metaclass=_Parser): return self.expression(exp.Connect, start=start, connect=connect) + def _parse_name_as_expression(self) -> exp.Alias: + return self.expression( + exp.Alias, + alias=self._parse_id_var(any_token=True), + this=self._match(TokenType.ALIAS) and self._parse_conjunction(), + ) + + def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: + if self._match_text_seq("INTERPOLATE"): + return self._parse_wrapped_csv(self._parse_name_as_expression) + return None + def _parse_order( self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False ) -> t.Optional[exp.Expression]: @@ -3131,7 +3135,10 @@ class Parser(metaclass=_Parser): return this return self.expression( - exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) + exp.Order, + this=this, + expressions=self._parse_csv(self._parse_ordered), + interpolate=self._parse_interpolate(), ) def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: @@ -3161,7 +3168,21 @@ class Parser(metaclass=_Parser): ): nulls_first = True - return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) + if self._match_text_seq("WITH", "FILL"): + with_fill = self.expression( + exp.WithFill, + **{ # type: ignore + "from": self._match(TokenType.FROM) and self._parse_bitwise(), + "to": self._match_text_seq("TO") and self._parse_bitwise(), + "step": self._match_text_seq("STEP") and self._parse_bitwise(), + }, + ) + else: + with_fill = None + + return self.expression( + exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill + ) def _parse_limit( self, this: t.Optional[exp.Expression] = None, top: bool = False @@ -3253,28 +3274,40 @@ class Parser(metaclass=_Parser): return locks def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: - if not self._match_set(self.SET_OPERATIONS): - return this + while this and self._match_set(self.SET_OPERATIONS): + token_type = self._prev.token_type - token_type = self._prev.token_type + if token_type == TokenType.UNION: + operation = exp.Union + elif token_type == TokenType.EXCEPT: + operation = exp.Except + else: + operation = exp.Intersect - if token_type == TokenType.UNION: - expression = exp.Union - elif token_type == TokenType.EXCEPT: - expression = exp.Except - else: - expression = exp.Intersect + comments = self._prev.comments + distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) + by_name = self._match_text_seq("BY", "NAME") + expression = self._parse_select(nested=True, parse_set_operation=False) - return self.expression( - expression, - comments=self._prev.comments, - this=this, - distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), - by_name=self._match_text_seq("BY", "NAME"), - expression=self._parse_set_operations( - self._parse_select(nested=True, parse_set_operation=False) - ), - ) + this = self.expression( + operation, + comments=comments, + this=this, + distinct=distinct, + by_name=by_name, + expression=expression, + ) + + if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: + expression = this.expression + + if expression: + for arg in self.UNION_MODIFIERS: + expr = expression.args.get(arg) + if expr: + this.set(arg, expr.pop()) + + return this def _parse_expression(self) -> t.Optional[exp.Expression]: return self._parse_alias(self._parse_conjunction()) @@ -3595,7 +3628,7 @@ class Parser(metaclass=_Parser): exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span ) else: - this = self.expression(exp.Interval, unit=unit) + this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) if maybe_func and check_func: index2 = self._index @@ -4891,8 +4924,8 @@ class Parser(metaclass=_Parser): return self.expression(exp.Var, this=self._prev.text) return self._parse_placeholder() - def _advance_any(self) -> t.Optional[Token]: - if self._curr and self._curr.token_type not in self.RESERVED_TOKENS: + def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: + if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): self._advance() return self._prev return None |