summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sqlglot/parser.py129
1 files changed, 81 insertions, 48 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index c7e27a3..3d01a84 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -568,6 +568,7 @@ class Parser(metaclass=_Parser):
exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
exp.Table: lambda self: self._parse_table_parts(),
exp.TableAlias: lambda self: self._parse_table_alias(),
+ exp.When: lambda self: seq_get(self._parse_when_matched(), 0),
exp.Where: lambda self: self._parse_where(),
exp.Window: lambda self: self._parse_named_window(),
exp.With: lambda self: self._parse_with(),
@@ -635,6 +636,11 @@ class Parser(metaclass=_Parser):
TokenType.HEREDOC_STRING: lambda self, token: self.expression(
exp.RawString, this=token.text
),
+ TokenType.UNICODE_STRING: lambda self, token: self.expression(
+ exp.UnicodeString,
+ this=token.text,
+ escape=self._match_text_seq("UESCAPE") and self._parse_string(),
+ ),
TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
}
@@ -907,7 +913,7 @@ class Parser(metaclass=_Parser):
INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
CLONE_KEYWORDS = {"CLONE", "COPY"}
- CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
+ HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
@@ -947,6 +953,10 @@ class Parser(metaclass=_Parser):
# Whether the TRIM function expects the characters to trim as its first argument
TRIM_PATTERN_FIRST = False
+ # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
+ MODIFIERS_ATTACHED_TO_UNION = True
+ UNION_MODIFIERS = {"order", "limit", "offset"}
+
__slots__ = (
"error_level",
"error_message_context",
@@ -1162,6 +1172,9 @@ class Parser(metaclass=_Parser):
def _find_sql(self, start: Token, end: Token) -> str:
return self.sql[start.start : end.end + 1]
+ def _is_connected(self) -> bool:
+ return self._prev and self._curr and self._prev.end + 1 == self._curr.start
+
def _advance(self, times: int = 1) -> None:
self._index += times
self._curr = seq_get(self._tokens, self._index)
@@ -1404,23 +1417,8 @@ class Parser(metaclass=_Parser):
if self._match_texts(self.CLONE_KEYWORDS):
copy = self._prev.text.lower() == "copy"
- clone = self._parse_table(schema=True)
- when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper()
- clone_kind = (
- self._match(TokenType.L_PAREN)
- and self._match_texts(self.CLONE_KINDS)
- and self._prev.text.upper()
- )
- clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
- self._match(TokenType.R_PAREN)
clone = self.expression(
- exp.Clone,
- this=clone,
- when=when,
- kind=clone_kind,
- shallow=shallow,
- expression=clone_expression,
- copy=copy,
+ exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
)
return self.expression(
@@ -2471,13 +2469,7 @@ class Parser(metaclass=_Parser):
pattern = None
define = (
- self._parse_csv(
- lambda: self.expression(
- exp.Alias,
- alias=self._parse_id_var(any_token=True),
- this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
- )
- )
+ self._parse_csv(self._parse_name_as_expression)
if self._match_text_seq("DEFINE")
else None
)
@@ -3124,6 +3116,18 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Connect, start=start, connect=connect)
+ def _parse_name_as_expression(self) -> exp.Alias:
+ return self.expression(
+ exp.Alias,
+ alias=self._parse_id_var(any_token=True),
+ this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
+ )
+
+ def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]:
+ if self._match_text_seq("INTERPOLATE"):
+ return self._parse_wrapped_csv(self._parse_name_as_expression)
+ return None
+
def _parse_order(
self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
) -> t.Optional[exp.Expression]:
@@ -3131,7 +3135,10 @@ class Parser(metaclass=_Parser):
return this
return self.expression(
- exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
+ exp.Order,
+ this=this,
+ expressions=self._parse_csv(self._parse_ordered),
+ interpolate=self._parse_interpolate(),
)
def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]:
@@ -3161,7 +3168,21 @@ class Parser(metaclass=_Parser):
):
nulls_first = True
- return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
+ if self._match_text_seq("WITH", "FILL"):
+ with_fill = self.expression(
+ exp.WithFill,
+ **{ # type: ignore
+ "from": self._match(TokenType.FROM) and self._parse_bitwise(),
+ "to": self._match_text_seq("TO") and self._parse_bitwise(),
+ "step": self._match_text_seq("STEP") and self._parse_bitwise(),
+ },
+ )
+ else:
+ with_fill = None
+
+ return self.expression(
+ exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill
+ )
def _parse_limit(
self, this: t.Optional[exp.Expression] = None, top: bool = False
@@ -3253,28 +3274,40 @@ class Parser(metaclass=_Parser):
return locks
def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
- if not self._match_set(self.SET_OPERATIONS):
- return this
+ while this and self._match_set(self.SET_OPERATIONS):
+ token_type = self._prev.token_type
- token_type = self._prev.token_type
+ if token_type == TokenType.UNION:
+ operation = exp.Union
+ elif token_type == TokenType.EXCEPT:
+ operation = exp.Except
+ else:
+ operation = exp.Intersect
- if token_type == TokenType.UNION:
- expression = exp.Union
- elif token_type == TokenType.EXCEPT:
- expression = exp.Except
- else:
- expression = exp.Intersect
+ comments = self._prev.comments
+ distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL)
+ by_name = self._match_text_seq("BY", "NAME")
+ expression = self._parse_select(nested=True, parse_set_operation=False)
- return self.expression(
- expression,
- comments=self._prev.comments,
- this=this,
- distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
- by_name=self._match_text_seq("BY", "NAME"),
- expression=self._parse_set_operations(
- self._parse_select(nested=True, parse_set_operation=False)
- ),
- )
+ this = self.expression(
+ operation,
+ comments=comments,
+ this=this,
+ distinct=distinct,
+ by_name=by_name,
+ expression=expression,
+ )
+
+ if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION:
+ expression = this.expression
+
+ if expression:
+ for arg in self.UNION_MODIFIERS:
+ expr = expression.args.get(arg)
+ if expr:
+ this.set(arg, expr.pop())
+
+ return this
def _parse_expression(self) -> t.Optional[exp.Expression]:
return self._parse_alias(self._parse_conjunction())
@@ -3595,7 +3628,7 @@ class Parser(metaclass=_Parser):
exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span
)
else:
- this = self.expression(exp.Interval, unit=unit)
+ this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit))
if maybe_func and check_func:
index2 = self._index
@@ -4891,8 +4924,8 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Var, this=self._prev.text)
return self._parse_placeholder()
- def _advance_any(self) -> t.Optional[Token]:
- if self._curr and self._curr.token_type not in self.RESERVED_TOKENS:
+ def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]:
+ if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS):
self._advance()
return self._prev
return None