1 files changed, 81 insertions, 48 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index c7e27a3..3d01a84 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -568,6 +568,7 @@ class Parser(metaclass=_Parser):
         exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
         exp.Table: lambda self: self._parse_table_parts(),
         exp.TableAlias: lambda self: self._parse_table_alias(),
+        exp.When: lambda self: seq_get(self._parse_when_matched(), 0),
         exp.Where: lambda self: self._parse_where(),
         exp.Window: lambda self: self._parse_named_window(),
         exp.With: lambda self: self._parse_with(),
@@ -635,6 +636,11 @@ class Parser(metaclass=_Parser):
         TokenType.HEREDOC_STRING: lambda self, token: self.expression(
             exp.RawString, this=token.text
         ),
+        TokenType.UNICODE_STRING: lambda self, token: self.expression(
+            exp.UnicodeString,
+            this=token.text,
+            escape=self._match_text_seq("UESCAPE") and self._parse_string(),
+        ),
         TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
     }
 
@@ -907,7 +913,7 @@ class Parser(metaclass=_Parser):
     INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 
     CLONE_KEYWORDS = {"CLONE", "COPY"}
-    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
+    HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
 
     OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
     OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
@@ -947,6 +953,10 @@ class Parser(metaclass=_Parser):
     # Whether the TRIM function expects the characters to trim as its first argument
     TRIM_PATTERN_FIRST = False
 
+    # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
+    MODIFIERS_ATTACHED_TO_UNION = True
+    UNION_MODIFIERS = {"order", "limit", "offset"}
+
     __slots__ = (
         "error_level",
         "error_message_context",
@@ -1162,6 +1172,9 @@ class Parser(metaclass=_Parser):
     def _find_sql(self, start: Token, end: Token) -> str:
         return self.sql[start.start : end.end + 1]
 
+    def _is_connected(self) -> bool:
+        return self._prev and self._curr and self._prev.end + 1 == self._curr.start
+
     def _advance(self, times: int = 1) -> None:
         self._index += times
         self._curr = seq_get(self._tokens, self._index)
@@ -1404,23 +1417,8 @@ class Parser(metaclass=_Parser):
 
             if self._match_texts(self.CLONE_KEYWORDS):
                 copy = self._prev.text.lower() == "copy"
-                clone = self._parse_table(schema=True)
-                when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper()
-                clone_kind = (
-                    self._match(TokenType.L_PAREN)
-                    and self._match_texts(self.CLONE_KINDS)
-                    and self._prev.text.upper()
-                )
-                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
-                self._match(TokenType.R_PAREN)
                 clone = self.expression(
-                    exp.Clone,
-                    this=clone,
-                    when=when,
-                    kind=clone_kind,
-                    shallow=shallow,
-                    expression=clone_expression,
-                    copy=copy,
+                    exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
                 )
 
         return self.expression(
@@ -2471,13 +2469,7 @@ class Parser(metaclass=_Parser):
             pattern = None
 
         define = (
-            self._parse_csv(
-                lambda: self.expression(
-                    exp.Alias,
-                    alias=self._parse_id_var(any_token=True),
-                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
-                )
-            )
+            self._parse_csv(self._parse_name_as_expression)
             if self._match_text_seq("DEFINE")
             else None
         )
@@ -3124,6 +3116,18 @@ class Parser(metaclass=_Parser):
 
         return self.expression(exp.Connect, start=start, connect=connect)
 
+    def _parse_name_as_expression(self) -> exp.Alias:
+        return self.expression(
+            exp.Alias,
+            alias=self._parse_id_var(any_token=True),
+            this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
+        )
+
+    def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]:
+        if self._match_text_seq("INTERPOLATE"):
+            return self._parse_wrapped_csv(self._parse_name_as_expression)
+        return None
+
     def _parse_order(
         self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
     ) -> t.Optional[exp.Expression]:
@@ -3131,7 +3135,10 @@ class Parser(metaclass=_Parser):
             return this
 
         return self.expression(
-            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
+            exp.Order,
+            this=this,
+            expressions=self._parse_csv(self._parse_ordered),
+            interpolate=self._parse_interpolate(),
         )
 
     def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]:
@@ -3161,7 +3168,21 @@ class Parser(metaclass=_Parser):
         ):
             nulls_first = True
 
-        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
+        if self._match_text_seq("WITH", "FILL"):
+            with_fill = self.expression(
+                exp.WithFill,
+                **{  # type: ignore
+                    "from": self._match(TokenType.FROM) and self._parse_bitwise(),
+                    "to": self._match_text_seq("TO") and self._parse_bitwise(),
+                    "step": self._match_text_seq("STEP") and self._parse_bitwise(),
+                },
+            )
+        else:
+            with_fill = None
+
+        return self.expression(
+            exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill
+        )
 
     def _parse_limit(
         self, this: t.Optional[exp.Expression] = None, top: bool = False
@@ -3253,28 +3274,40 @@ class Parser(metaclass=_Parser):
         return locks
 
     def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
-        if not self._match_set(self.SET_OPERATIONS):
-            return this
+        while this and self._match_set(self.SET_OPERATIONS):
+            token_type = self._prev.token_type
 
-        token_type = self._prev.token_type
+            if token_type == TokenType.UNION:
+                operation = exp.Union
+            elif token_type == TokenType.EXCEPT:
+                operation = exp.Except
+            else:
+                operation = exp.Intersect
 
-        if token_type == TokenType.UNION:
-            expression = exp.Union
-        elif token_type == TokenType.EXCEPT:
-            expression = exp.Except
-        else:
-            expression = exp.Intersect
+            comments = self._prev.comments
+            distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL)
+            by_name = self._match_text_seq("BY", "NAME")
+            expression = self._parse_select(nested=True, parse_set_operation=False)
 
-        return self.expression(
-            expression,
-            comments=self._prev.comments,
-            this=this,
-            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
-            by_name=self._match_text_seq("BY", "NAME"),
-            expression=self._parse_set_operations(
-                self._parse_select(nested=True, parse_set_operation=False)
-            ),
-        )
+            this = self.expression(
+                operation,
+                comments=comments,
+                this=this,
+                distinct=distinct,
+                by_name=by_name,
+                expression=expression,
+            )
+
+        if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION:
+            expression = this.expression
+
+            if expression:
+                for arg in self.UNION_MODIFIERS:
+                    expr = expression.args.get(arg)
+                    if expr:
+                        this.set(arg, expr.pop())
+
+        return this
 
     def _parse_expression(self) -> t.Optional[exp.Expression]:
         return self._parse_alias(self._parse_conjunction())
@@ -3595,7 +3628,7 @@ class Parser(metaclass=_Parser):
                     exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span
                 )
             else:
-                this = self.expression(exp.Interval, unit=unit)
+                this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit))
 
         if maybe_func and check_func:
             index2 = self._index
@@ -4891,8 +4924,8 @@ class Parser(metaclass=_Parser):
             return self.expression(exp.Var, this=self._prev.text)
         return self._parse_placeholder()
 
-    def _advance_any(self) -> t.Optional[Token]:
-        if self._curr and self._curr.token_type not in self.RESERVED_TOKENS:
+    def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]:
+        if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS):
             self._advance()
             return self._prev
         return None