summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py136
1 files changed, 101 insertions, 35 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index bdf0d2d..55ab453 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -185,6 +185,7 @@ class Parser(metaclass=_Parser):
TokenType.LOCAL,
TokenType.LOCATION,
TokenType.MATERIALIZED,
+ TokenType.MERGE,
TokenType.NATURAL,
TokenType.NEXT,
TokenType.ONLY,
@@ -211,7 +212,6 @@ class Parser(metaclass=_Parser):
TokenType.TABLE,
TokenType.TABLE_FORMAT,
TokenType.TEMPORARY,
- TokenType.TRANSIENT,
TokenType.TOP,
TokenType.TRAILING,
TokenType.TRUE,
@@ -229,6 +229,8 @@ class Parser(metaclass=_Parser):
TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.NATURAL, TokenType.APPLY}
+ UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
+
TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
FUNC_TOKENS = {
@@ -241,6 +243,7 @@ class Parser(metaclass=_Parser):
TokenType.FORMAT,
TokenType.IDENTIFIER,
TokenType.ISNULL,
+ TokenType.MERGE,
TokenType.OFFSET,
TokenType.PRIMARY_KEY,
TokenType.REPLACE,
@@ -407,6 +410,7 @@ class Parser(metaclass=_Parser):
TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
TokenType.END: lambda self: self._parse_commit_or_rollback(),
TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
+ TokenType.MERGE: lambda self: self._parse_merge(),
}
UNARY_PARSERS = {
@@ -474,6 +478,7 @@ class Parser(metaclass=_Parser):
TokenType.SORTKEY: lambda self: self._parse_sortkey(),
TokenType.LIKE: lambda self: self._parse_create_like(),
TokenType.RETURNS: lambda self: self._parse_returns(),
+ TokenType.ROW: lambda self: self._parse_row(),
TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty),
TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty),
@@ -495,6 +500,8 @@ class Parser(metaclass=_Parser):
TokenType.VOLATILE: lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
),
+ TokenType.WITH: lambda self: self._parse_wrapped_csv(self._parse_property),
+ TokenType.PROPERTIES: lambda self: self._parse_wrapped_csv(self._parse_property),
}
CONSTRAINT_PARSERS = {
@@ -802,7 +809,8 @@ class Parser(metaclass=_Parser):
def _parse_create(self):
replace = self._match_pair(TokenType.OR, TokenType.REPLACE)
temporary = self._match(TokenType.TEMPORARY)
- transient = self._match(TokenType.TRANSIENT)
+ transient = self._match_text_seq("TRANSIENT")
+ external = self._match_text_seq("EXTERNAL")
unique = self._match(TokenType.UNIQUE)
materialized = self._match(TokenType.MATERIALIZED)
@@ -846,6 +854,7 @@ class Parser(metaclass=_Parser):
properties=properties,
temporary=temporary,
transient=transient,
+ external=external,
replace=replace,
unique=unique,
materialized=materialized,
@@ -861,8 +870,12 @@ class Parser(metaclass=_Parser):
if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
return self._parse_sortkey(compound=True)
- if self._match_pair(TokenType.VAR, TokenType.EQ, advance=False):
- key = self._parse_var()
+ assignment = self._match_pair(
+ TokenType.VAR, TokenType.EQ, advance=False
+ ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
+
+ if assignment:
+ key = self._parse_var() or self._parse_string()
self._match(TokenType.EQ)
return self.expression(exp.Property, this=key, value=self._parse_column())
@@ -871,7 +884,10 @@ class Parser(metaclass=_Parser):
def _parse_property_assignment(self, exp_class):
self._match(TokenType.EQ)
self._match(TokenType.ALIAS)
- return self.expression(exp_class, this=self._parse_var_or_string() or self._parse_number())
+ return self.expression(
+ exp_class,
+ this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
+ )
def _parse_partitioned_by(self):
self._match(TokenType.EQ)
@@ -881,7 +897,7 @@ class Parser(metaclass=_Parser):
)
def _parse_distkey(self):
- return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_var))
+ return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
def _parse_create_like(self):
table = self._parse_table(schema=True)
@@ -898,7 +914,7 @@ class Parser(metaclass=_Parser):
def _parse_sortkey(self, compound=False):
return self.expression(
- exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_var), compound=compound
+ exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
)
def _parse_character_set(self, default=False):
@@ -929,23 +945,11 @@ class Parser(metaclass=_Parser):
properties = []
while True:
- if self._match(TokenType.WITH):
- properties.extend(self._parse_wrapped_csv(self._parse_property))
- elif self._match(TokenType.PROPERTIES):
- properties.extend(
- self._parse_wrapped_csv(
- lambda: self.expression(
- exp.Property,
- this=self._parse_string(),
- value=self._match(TokenType.EQ) and self._parse_string(),
- )
- )
- )
- else:
- identified_property = self._parse_property()
- if not identified_property:
- break
- properties.append(identified_property)
+ identified_property = self._parse_property()
+ if not identified_property:
+ break
+ for p in ensure_collection(identified_property):
+ properties.append(p)
if properties:
return self.expression(exp.Properties, expressions=properties)
@@ -963,7 +967,7 @@ class Parser(metaclass=_Parser):
exp.Directory,
this=self._parse_var_or_string(),
local=local,
- row_format=self._parse_row_format(),
+ row_format=self._parse_row_format(match_row=True),
)
else:
self._match(TokenType.INTO)
@@ -978,10 +982,18 @@ class Parser(metaclass=_Parser):
overwrite=overwrite,
)
- def _parse_row_format(self):
- if not self._match_pair(TokenType.ROW, TokenType.FORMAT):
+ def _parse_row(self):
+ if not self._match(TokenType.FORMAT):
+ return None
+ return self._parse_row_format()
+
+ def _parse_row_format(self, match_row=False):
+ if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
return None
+ if self._match_text_seq("SERDE"):
+ return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
+
self._match_text_seq("DELIMITED")
kwargs = {}
@@ -998,7 +1010,7 @@ class Parser(metaclass=_Parser):
kwargs["lines"] = self._parse_string()
if self._match_text_seq("NULL", "DEFINED", "AS"):
kwargs["null"] = self._parse_string()
- return self.expression(exp.RowFormat, **kwargs)
+ return self.expression(exp.RowFormatDelimitedProperty, **kwargs)
def _parse_load_data(self):
local = self._match(TokenType.LOCAL)
@@ -1032,7 +1044,7 @@ class Parser(metaclass=_Parser):
return self.expression(
exp.Update,
**{
- "this": self._parse_table(schema=True),
+ "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
"expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
"from": self._parse_from(),
"where": self._parse_where(),
@@ -1183,9 +1195,11 @@ class Parser(metaclass=_Parser):
alias=alias,
)
- def _parse_table_alias(self):
+ def _parse_table_alias(self, alias_tokens=None):
any_token = self._match(TokenType.ALIAS)
- alias = self._parse_id_var(any_token=any_token, tokens=self.TABLE_ALIAS_TOKENS)
+ alias = self._parse_id_var(
+ any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
+ )
columns = None
if self._match(TokenType.L_PAREN):
@@ -1337,7 +1351,7 @@ class Parser(metaclass=_Parser):
columns=self._parse_expression(),
)
- def _parse_table(self, schema=False):
+ def _parse_table(self, schema=False, alias_tokens=None):
lateral = self._parse_lateral()
if lateral:
@@ -1372,7 +1386,7 @@ class Parser(metaclass=_Parser):
table = self._parse_id_var()
if not table:
- self.raise_error("Expected table name")
+ self.raise_error(f"Expected table name but got {self._curr}")
this = self.expression(
exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
@@ -1384,7 +1398,7 @@ class Parser(metaclass=_Parser):
if self.alias_post_tablesample:
table_sample = self._parse_table_sample()
- alias = self._parse_table_alias()
+ alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
if alias:
this.set("alias", alias)
@@ -2092,10 +2106,14 @@ class Parser(metaclass=_Parser):
kind = self.expression(exp.CheckColumnConstraint, this=constraint)
elif self._match(TokenType.COLLATE):
kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
+ elif self._match(TokenType.ENCODE):
+ kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var())
elif self._match(TokenType.DEFAULT):
kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_conjunction())
elif self._match_pair(TokenType.NOT, TokenType.NULL):
kind = exp.NotNullColumnConstraint()
+ elif self._match(TokenType.NULL):
+ kind = exp.NotNullColumnConstraint(allow_null=True)
elif self._match(TokenType.SCHEMA_COMMENT):
kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string())
elif self._match(TokenType.PRIMARY_KEY):
@@ -2234,7 +2252,7 @@ class Parser(metaclass=_Parser):
return self._parse_window(this)
def _parse_extract(self):
- this = self._parse_var() or self._parse_type()
+ this = self._parse_function() or self._parse_var() or self._parse_type()
if self._match(TokenType.FROM):
return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
@@ -2635,6 +2653,54 @@ class Parser(metaclass=_Parser):
parser = self._find_parser(self.SET_PARSERS, self._set_trie)
return parser(self) if parser else self._default_parse_set_item()
+ def _parse_merge(self):
+ self._match(TokenType.INTO)
+ target = self._parse_table(schema=True)
+
+ self._match(TokenType.USING)
+ using = self._parse_table()
+
+ self._match(TokenType.ON)
+ on = self._parse_conjunction()
+
+ whens = []
+ while self._match(TokenType.WHEN):
+ this = self._parse_conjunction()
+ self._match(TokenType.THEN)
+
+ if self._match(TokenType.INSERT):
+ _this = self._parse_star()
+ if _this:
+ then = self.expression(exp.Insert, this=_this)
+ else:
+ then = self.expression(
+ exp.Insert,
+ this=self._parse_value(),
+ expression=self._match(TokenType.VALUES) and self._parse_value(),
+ )
+ elif self._match(TokenType.UPDATE):
+ expressions = self._parse_star()
+ if expressions:
+ then = self.expression(exp.Update, expressions=expressions)
+ else:
+ then = self.expression(
+ exp.Update,
+ expressions=self._match(TokenType.SET)
+ and self._parse_csv(self._parse_equality),
+ )
+ elif self._match(TokenType.DELETE):
+ then = self.expression(exp.Var, this=self._prev.text)
+
+ whens.append(self.expression(exp.When, this=this, then=then))
+
+ return self.expression(
+ exp.Merge,
+ this=target,
+ using=using,
+ on=on,
+ expressions=whens,
+ )
+
def _parse_set(self):
return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))