summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py110
1 files changed, 69 insertions, 41 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index e16a88e..e5bd4ae 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -585,6 +585,7 @@ class Parser(metaclass=_Parser):
"CHARACTER SET": lambda self: self._parse_character_set(),
"CHECKSUM": lambda self: self._parse_checksum(),
"CLUSTER BY": lambda self: self._parse_cluster(),
+ "CLUSTERED": lambda self: self._parse_clustered_by(),
"COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
"COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
"COPY": lambda self: self._parse_copy_property(),
@@ -794,8 +795,6 @@ class Parser(metaclass=_Parser):
# A NULL arg in CONCAT yields NULL by default
CONCAT_NULL_OUTPUTS_STRING = False
- CONVERT_TYPE_FIRST = False
-
PREFIXED_PIVOT_COLUMNS = False
IDENTIFY_PIVOT_STRINGS = False
@@ -1426,9 +1425,34 @@ class Parser(metaclass=_Parser):
return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
- def _parse_cluster(self) -> t.Optional[exp.Cluster]:
+ def _parse_cluster(self) -> exp.Cluster:
return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
+ def _parse_clustered_by(self) -> exp.ClusteredByProperty:
+ self._match_text_seq("BY")
+
+ self._match_l_paren()
+ expressions = self._parse_csv(self._parse_column)
+ self._match_r_paren()
+
+ if self._match_text_seq("SORTED", "BY"):
+ self._match_l_paren()
+ sorted_by = self._parse_csv(self._parse_ordered)
+ self._match_r_paren()
+ else:
+ sorted_by = None
+
+ self._match(TokenType.INTO)
+ buckets = self._parse_number()
+ self._match_text_seq("BUCKETS")
+
+ return self.expression(
+ exp.ClusteredByProperty,
+ expressions=expressions,
+ sorted_by=sorted_by,
+ buckets=buckets,
+ )
+
def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]:
if not self._match_text_seq("GRANTS"):
self._retreat(self._index - 1)
@@ -2863,7 +2887,11 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.INTERVAL):
return None
- this = self._parse_primary() or self._parse_term()
+ if self._match(TokenType.STRING, advance=False):
+ this = self._parse_primary()
+ else:
+ this = self._parse_term()
+
unit = self._parse_function() or self._parse_var()
# Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
@@ -3661,6 +3689,7 @@ class Parser(metaclass=_Parser):
else:
self.raise_error("Expected AS after CAST")
+ fmt = None
to = self._parse_types()
if not to:
@@ -3668,22 +3697,23 @@ class Parser(metaclass=_Parser):
elif to.this == exp.DataType.Type.CHAR:
if self._match(TokenType.CHARACTER_SET):
to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
- elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT):
- fmt = self._parse_string()
+ elif self._match(TokenType.FORMAT):
+ fmt = self._parse_at_time_zone(self._parse_string())
- return self.expression(
- exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime,
- this=this,
- format=exp.Literal.string(
- format_time(
- fmt.this if fmt else "",
- self.FORMAT_MAPPING or self.TIME_MAPPING,
- self.FORMAT_TRIE or self.TIME_TRIE,
- )
- ),
- )
+ if to.this in exp.DataType.TEMPORAL_TYPES:
+ return self.expression(
+ exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime,
+ this=this,
+ format=exp.Literal.string(
+ format_time(
+ fmt.this if fmt else "",
+ self.FORMAT_MAPPING or self.TIME_MAPPING,
+ self.FORMAT_TRIE or self.TIME_TRIE,
+ )
+ ),
+ )
- return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
+ return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt)
def _parse_concat(self) -> t.Optional[exp.Expression]:
args = self._parse_csv(self._parse_conjunction)
@@ -3704,20 +3734,23 @@ class Parser(metaclass=_Parser):
)
def _parse_string_agg(self) -> exp.Expression:
- expression: t.Optional[exp.Expression]
-
if self._match(TokenType.DISTINCT):
- args = self._parse_csv(self._parse_conjunction)
- expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
+ args: t.List[t.Optional[exp.Expression]] = [
+ self.expression(exp.Distinct, expressions=[self._parse_conjunction()])
+ ]
+ if self._match(TokenType.COMMA):
+ args.extend(self._parse_csv(self._parse_conjunction))
else:
args = self._parse_csv(self._parse_conjunction)
- expression = seq_get(args, 0)
index = self._index
if not self._match(TokenType.R_PAREN):
# postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
- order = self._parse_order(this=expression)
- return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
+ return self.expression(
+ exp.GroupConcat,
+ this=seq_get(args, 0),
+ separator=self._parse_order(this=seq_get(args, 1)),
+ )
# Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
# This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
@@ -3727,24 +3760,21 @@ class Parser(metaclass=_Parser):
return self.validate_expression(exp.GroupConcat.from_arg_list(args), args)
self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller)
- order = self._parse_order(this=expression)
+ order = self._parse_order(this=seq_get(args, 0))
return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
- to: t.Optional[exp.Expression]
this = self._parse_bitwise()
if self._match(TokenType.USING):
- to = self.expression(exp.CharacterSet, this=self._parse_var())
+ to: t.Optional[exp.Expression] = self.expression(
+ exp.CharacterSet, this=self._parse_var()
+ )
elif self._match(TokenType.COMMA):
- to = self._parse_bitwise()
+ to = self._parse_types()
else:
to = None
- # Swap the argument order if needed to produce the correct AST
- if self.CONVERT_TYPE_FIRST:
- this, to = to, this
-
return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]:
@@ -4394,8 +4424,8 @@ class Parser(metaclass=_Parser):
if self._next:
self._advance()
- parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
+ parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
if parser:
actions = ensure_list(parser(self))
@@ -4516,9 +4546,11 @@ class Parser(metaclass=_Parser):
parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE)
return parser(self) if parser else self._parse_set_item_assignment(kind=None)
- def _parse_set(self) -> exp.Set | exp.Command:
+ def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command:
index = self._index
- set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
+ set_ = self.expression(
+ exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag
+ )
if self._curr:
self._retreat(index)
@@ -4683,12 +4715,8 @@ class Parser(metaclass=_Parser):
exp.replace_children(this, self._replace_columns_with_dots)
table = this.args.get("table")
this = (
- self.expression(exp.Dot, this=table, expression=this.this)
- if table
- else self.expression(exp.Var, this=this.name)
+ self.expression(exp.Dot, this=table, expression=this.this) if table else this.this
)
- elif isinstance(this, exp.Identifier):
- this = self.expression(exp.Var, this=this.name)
return this