summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py243
1 files changed, 164 insertions, 79 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 6ad6391..72bad92 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -99,7 +99,8 @@ class Parser:
TokenType.SMALLMONEY,
TokenType.ROWVERSION,
TokenType.IMAGE,
- TokenType.SQL_VARIANT,
+ TokenType.VARIANT,
+ TokenType.OBJECT,
*NESTED_TYPE_TOKENS,
}
@@ -131,7 +132,6 @@ class Parser:
TokenType.FALSE,
TokenType.FIRST,
TokenType.FOLLOWING,
- TokenType.FOR,
TokenType.FORMAT,
TokenType.FUNCTION,
TokenType.GENERATED,
@@ -141,20 +141,26 @@ class Parser:
TokenType.ISNULL,
TokenType.INTERVAL,
TokenType.LAZY,
+ TokenType.LANGUAGE,
TokenType.LEADING,
TokenType.LOCATION,
+ TokenType.MATERIALIZED,
TokenType.NATURAL,
TokenType.NEXT,
TokenType.ONLY,
TokenType.OPTIMIZE,
TokenType.OPTIONS,
TokenType.ORDINALITY,
+ TokenType.PARTITIONED_BY,
TokenType.PERCENT,
+ TokenType.PIVOT,
TokenType.PRECEDING,
TokenType.RANGE,
TokenType.REFERENCES,
+ TokenType.RETURNS,
TokenType.ROWS,
TokenType.SCHEMA_COMMENT,
+ TokenType.SEED,
TokenType.SET,
TokenType.SHOW,
TokenType.STORED,
@@ -167,6 +173,7 @@ class Parser:
TokenType.TRUE,
TokenType.UNBOUNDED,
TokenType.UNIQUE,
+ TokenType.UNPIVOT,
TokenType.PROPERTIES,
*SUBQUERY_PREDICATES,
*TYPE_TOKENS,
@@ -303,6 +310,8 @@ class Parser:
exp.Condition: lambda self: self._parse_conjunction(),
exp.Expression: lambda self: self._parse_statement(),
exp.Properties: lambda self: self._parse_properties(),
+ exp.Where: lambda self: self._parse_where(),
+ exp.Ordered: lambda self: self._parse_ordered(),
"JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
}
@@ -355,23 +364,21 @@ class Parser:
PROPERTY_PARSERS = {
TokenType.AUTO_INCREMENT: lambda self: self._parse_auto_increment(),
TokenType.CHARACTER_SET: lambda self: self._parse_character_set(),
- TokenType.COLLATE: lambda self: self._parse_collate(),
- TokenType.ENGINE: lambda self: self._parse_engine(),
- TokenType.FORMAT: lambda self: self._parse_format(),
TokenType.LOCATION: lambda self: self.expression(
exp.LocationProperty,
this=exp.Literal.string("LOCATION"),
value=self._parse_string(),
),
- TokenType.PARTITIONED_BY: lambda self: self.expression(
- exp.PartitionedByProperty,
- this=exp.Literal.string("PARTITIONED_BY"),
- value=self._parse_schema(),
- ),
+ TokenType.PARTITIONED_BY: lambda self: self._parse_partitioned_by(),
TokenType.SCHEMA_COMMENT: lambda self: self._parse_schema_comment(),
TokenType.STORED: lambda self: self._parse_stored(),
- TokenType.TABLE_FORMAT: lambda self: self._parse_table_format(),
- TokenType.USING: lambda self: self._parse_table_format(),
+ TokenType.RETURNS: lambda self: self._parse_returns(),
+ TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty),
+ TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
+ TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty),
+ TokenType.TABLE_FORMAT: lambda self: self._parse_property_assignment(exp.TableFormatProperty),
+ TokenType.USING: lambda self: self._parse_property_assignment(exp.TableFormatProperty),
+ TokenType.LANGUAGE: lambda self: self._parse_property_assignment(exp.LanguageProperty),
}
CONSTRAINT_PARSERS = {
@@ -388,6 +395,7 @@ class Parser:
FUNCTION_PARSERS = {
"CONVERT": lambda self: self._parse_convert(),
"EXTRACT": lambda self: self._parse_extract(),
+ "POSITION": lambda self: self._parse_position(),
"SUBSTRING": lambda self: self._parse_substring(),
"TRIM": lambda self: self._parse_trim(),
"CAST": lambda self: self._parse_cast(self.STRICT_CAST),
@@ -628,6 +636,10 @@ class Parser:
replace = self._match(TokenType.OR) and self._match(TokenType.REPLACE)
temporary = self._match(TokenType.TEMPORARY)
unique = self._match(TokenType.UNIQUE)
+ materialized = self._match(TokenType.MATERIALIZED)
+
+ if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
+ self._match(TokenType.TABLE)
create_token = self._match_set(self.CREATABLES) and self._prev
@@ -640,14 +652,15 @@ class Parser:
properties = None
if create_token.token_type == TokenType.FUNCTION:
- this = self._parse_var()
+ this = self._parse_user_defined_function()
+ properties = self._parse_properties()
if self._match(TokenType.ALIAS):
- expression = self._parse_string()
+ expression = self._parse_select_or_expression()
elif create_token.token_type == TokenType.INDEX:
this = self._parse_index()
elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
this = self._parse_table(schema=True)
- properties = self._parse_properties(this if isinstance(this, exp.Schema) else None)
+ properties = self._parse_properties()
if self._match(TokenType.ALIAS):
expression = self._parse_select(nested=True)
@@ -661,9 +674,10 @@ class Parser:
temporary=temporary,
replace=replace,
unique=unique,
+ materialized=materialized,
)
- def _parse_property(self, schema):
+ def _parse_property(self):
if self._match_set(self.PROPERTY_PARSERS):
return self.PROPERTY_PARSERS[self._prev.token_type](self)
if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
@@ -673,54 +687,34 @@ class Parser:
key = self._parse_var().this
self._match(TokenType.EQ)
- if key.upper() == "PARTITIONED_BY":
- expression = exp.PartitionedByProperty
- value = self._parse_schema() or self._parse_bracket(self._parse_field())
-
- if schema and not isinstance(value, exp.Schema):
- columns = {v.name.upper() for v in value.expressions}
- partitions = [
- expression for expression in schema.expressions if expression.this.name.upper() in columns
- ]
- schema.set(
- "expressions",
- [e for e in schema.expressions if e not in partitions],
- )
- value = self.expression(exp.Schema, expressions=partitions)
- else:
- value = self._parse_column()
- expression = exp.AnonymousProperty
-
return self.expression(
- expression,
+ exp.AnonymousProperty,
this=exp.Literal.string(key),
- value=value,
+ value=self._parse_column(),
)
+
return None
- def _parse_stored(self):
- self._match(TokenType.ALIAS)
+ def _parse_property_assignment(self, exp_class):
+ prop = self._prev.text
self._match(TokenType.EQ)
- return self.expression(
- exp.FileFormatProperty,
- this=exp.Literal.string("FORMAT"),
- value=exp.Literal.string(self._parse_var().name),
- )
+ return self.expression(exp_class, this=prop, value=self._parse_var_or_string())
- def _parse_format(self):
+ def _parse_partitioned_by(self):
self._match(TokenType.EQ)
return self.expression(
- exp.FileFormatProperty,
- this=exp.Literal.string("FORMAT"),
- value=self._parse_string() or self._parse_var(),
+ exp.PartitionedByProperty,
+ this=exp.Literal.string("PARTITIONED_BY"),
+ value=self._parse_schema() or self._parse_bracket(self._parse_field()),
)
- def _parse_engine(self):
+ def _parse_stored(self):
+ self._match(TokenType.ALIAS)
self._match(TokenType.EQ)
return self.expression(
- exp.EngineProperty,
- this=exp.Literal.string("ENGINE"),
- value=self._parse_var_or_string(),
+ exp.FileFormatProperty,
+ this=exp.Literal.string("FORMAT"),
+ value=exp.Literal.string(self._parse_var().name),
)
def _parse_auto_increment(self):
@@ -731,14 +725,6 @@ class Parser:
value=self._parse_var() or self._parse_number(),
)
- def _parse_collate(self):
- self._match(TokenType.EQ)
- return self.expression(
- exp.CollateProperty,
- this=exp.Literal.string("COLLATE"),
- value=self._parse_var_or_string(),
- )
-
def _parse_schema_comment(self):
self._match(TokenType.EQ)
return self.expression(
@@ -756,26 +742,34 @@ class Parser:
default=default,
)
- def _parse_table_format(self):
- self._match(TokenType.EQ)
+ def _parse_returns(self):
+ is_table = self._match(TokenType.TABLE)
+ if is_table:
+ if self._match(TokenType.LT):
+ value = self.expression(
+ exp.Schema, this="TABLE", expressions=self._parse_csv(self._parse_struct_kwargs)
+ )
+ if not self._match(TokenType.GT):
+ self.raise_error("Expecting >")
+ else:
+ value = self._parse_schema("TABLE")
+ else:
+ value = self._parse_types()
+
return self.expression(
- exp.TableFormatProperty,
- this=exp.Literal.string("TABLE_FORMAT"),
- value=self._parse_var_or_string(),
+ exp.ReturnsProperty,
+ this=exp.Literal.string("RETURNS"),
+ value=value,
+ is_table=is_table,
)
- def _parse_properties(self, schema=None):
- """
- Schema is included since if the table schema is defined and we later get a partition by expression
- then we will define those columns in the partition by section and not in with the rest of the
- columns
- """
+ def _parse_properties(self):
properties = []
while True:
if self._match(TokenType.WITH):
self._match_l_paren()
- properties.extend(self._parse_csv(lambda: self._parse_property(schema)))
+ properties.extend(self._parse_csv(lambda: self._parse_property()))
self._match_r_paren()
elif self._match(TokenType.PROPERTIES):
self._match_l_paren()
@@ -790,7 +784,7 @@ class Parser:
)
self._match_r_paren()
else:
- identified_property = self._parse_property(schema)
+ identified_property = self._parse_property()
if not identified_property:
break
properties.append(identified_property)
@@ -1003,7 +997,7 @@ class Parser:
)
def _parse_subquery(self, this):
- return self.expression(exp.Subquery, this=this, alias=self._parse_table_alias())
+ return self.expression(exp.Subquery, this=this, pivots=self._parse_pivots(), alias=self._parse_table_alias())
def _parse_query_modifiers(self, this):
if not isinstance(this, self.MODIFIABLES):
@@ -1134,14 +1128,18 @@ class Parser:
table = (not schema and self._parse_function()) or self._parse_id_var(False)
while self._match(TokenType.DOT):
- catalog = db
- db = table
- table = self._parse_id_var()
+ if catalog:
+ # This allows nesting the table in arbitrarily many dot expressions if needed
+ table = self.expression(exp.Dot, this=table, expression=self._parse_id_var())
+ else:
+ catalog = db
+ db = table
+ table = self._parse_id_var()
if not table:
self.raise_error("Expected table name")
- this = self.expression(exp.Table, this=table, db=db, catalog=catalog)
+ this = self.expression(exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots())
if schema:
return self._parse_schema(this=this)
@@ -1199,6 +1197,7 @@ class Parser:
percent = None
rows = None
size = None
+ seed = None
self._match_l_paren()
@@ -1220,6 +1219,11 @@ class Parser:
self._match_r_paren()
+ if self._match(TokenType.SEED):
+ self._match_l_paren()
+ seed = self._parse_number()
+ self._match_r_paren()
+
return self.expression(
exp.TableSample,
method=method,
@@ -1229,6 +1233,51 @@ class Parser:
percent=percent,
rows=rows,
size=size,
+ seed=seed,
+ )
+
+ def _parse_pivots(self):
+ return list(iter(self._parse_pivot, None))
+
+ def _parse_pivot(self):
+ index = self._index
+
+ if self._match(TokenType.PIVOT):
+ unpivot = False
+ elif self._match(TokenType.UNPIVOT):
+ unpivot = True
+ else:
+ return None
+
+ expressions = []
+ field = None
+
+ if not self._match(TokenType.L_PAREN):
+ self._retreat(index)
+ return None
+
+ if unpivot:
+ expressions = self._parse_csv(self._parse_column)
+ else:
+ expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
+
+ if not self._match(TokenType.FOR):
+ self.raise_error("Expecting FOR")
+
+ value = self._parse_column()
+
+ if not self._match(TokenType.IN):
+ self.raise_error("Expecting IN")
+
+ field = self._parse_in(value)
+
+ self._match_r_paren()
+
+ return self.expression(
+ exp.Pivot,
+ expressions=expressions,
+ field=field,
+ unpivot=unpivot,
)
def _parse_where(self):
@@ -1384,7 +1433,7 @@ class Parser:
this = self.expression(exp.In, this=this, unnest=unnest)
else:
self._match_l_paren()
- expressions = self._parse_csv(lambda: self._parse_select() or self._parse_expression())
+ expressions = self._parse_csv(self._parse_select_or_expression)
if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
this = self.expression(exp.In, this=this, query=expressions[0])
@@ -1577,6 +1626,9 @@ class Parser:
if self._match_set(self.PRIMARY_PARSERS):
return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev)
+ if self._match_pair(TokenType.DOT, TokenType.NUMBER):
+ return exp.Literal.number(f"0.{self._prev.text}")
+
if self._match(TokenType.L_PAREN):
query = self._parse_select()
@@ -1647,6 +1699,23 @@ class Parser:
self._match_r_paren()
return self._parse_window(this)
+ def _parse_user_defined_function(self):
+ this = self._parse_var()
+ if not self._match(TokenType.L_PAREN):
+ return this
+ expressions = self._parse_csv(self._parse_udf_kwarg)
+ self._match_r_paren()
+ return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)
+
+ def _parse_udf_kwarg(self):
+ this = self._parse_id_var()
+ kind = self._parse_types()
+
+ if not kind:
+ return this
+
+ return self.expression(exp.UserDefinedFunctionKwarg, this=this, kind=kind)
+
def _parse_lambda(self):
index = self._index
@@ -1672,9 +1741,10 @@ class Parser:
return self._parse_alias(self._parse_limit(self._parse_order(this)))
+ conjunction = self._parse_conjunction().transform(self._replace_lambda, {node.name for node in expressions})
return self.expression(
exp.Lambda,
- this=self._parse_conjunction(),
+ this=conjunction,
expressions=expressions,
)
@@ -1896,6 +1966,12 @@ class Parser:
to = None
return self.expression(exp.Cast, this=this, to=to)
+ def _parse_position(self):
+ substr = self._parse_bitwise()
+ if self._match(TokenType.IN):
+ string = self._parse_bitwise()
+ return self.expression(exp.StrPosition, this=string, substr=substr)
+
def _parse_substring(self):
# Postgres supports the form: substring(string [from int] [for int])
# https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
@@ -2155,6 +2231,9 @@ class Parser:
self._match_r_paren()
return expressions
+ def _parse_select_or_expression(self):
+ return self._parse_select() or self._parse_expression()
+
def _match(self, token_type):
if not self._curr:
return None
@@ -2208,3 +2287,9 @@ class Parser:
elif isinstance(this, exp.Identifier):
this = self.expression(exp.Var, this=this.name)
return this
+
+ def _replace_lambda(self, node, lambda_variables):
+ if isinstance(node, exp.Column):
+ if node.name in lambda_variables:
+ return node.this
+ return node