diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-12-12 15:42:33 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-12-12 15:42:33 +0000 |
commit | 579e404567dfff42e64325a8c79f03ac627ea341 (patch) | |
tree | 12d101aa5d1b70a69132e5cbd3307741c00d097f /sqlglot/parser.py | |
parent | Adding upstream version 10.1.3. (diff) | |
download | sqlglot-579e404567dfff42e64325a8c79f03ac627ea341.tar.xz sqlglot-579e404567dfff42e64325a8c79f03ac627ea341.zip |
Adding upstream version 10.2.6.upstream/10.2.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 136 |
1 files changed, 101 insertions, 35 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index bdf0d2d..55ab453 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -185,6 +185,7 @@ class Parser(metaclass=_Parser): TokenType.LOCAL, TokenType.LOCATION, TokenType.MATERIALIZED, + TokenType.MERGE, TokenType.NATURAL, TokenType.NEXT, TokenType.ONLY, @@ -211,7 +212,6 @@ class Parser(metaclass=_Parser): TokenType.TABLE, TokenType.TABLE_FORMAT, TokenType.TEMPORARY, - TokenType.TRANSIENT, TokenType.TOP, TokenType.TRAILING, TokenType.TRUE, @@ -229,6 +229,8 @@ class Parser(metaclass=_Parser): TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.NATURAL, TokenType.APPLY} + UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} + TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} FUNC_TOKENS = { @@ -241,6 +243,7 @@ class Parser(metaclass=_Parser): TokenType.FORMAT, TokenType.IDENTIFIER, TokenType.ISNULL, + TokenType.MERGE, TokenType.OFFSET, TokenType.PRIMARY_KEY, TokenType.REPLACE, @@ -407,6 +410,7 @@ class Parser(metaclass=_Parser): TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), TokenType.END: lambda self: self._parse_commit_or_rollback(), TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), + TokenType.MERGE: lambda self: self._parse_merge(), } UNARY_PARSERS = { @@ -474,6 +478,7 @@ class Parser(metaclass=_Parser): TokenType.SORTKEY: lambda self: self._parse_sortkey(), TokenType.LIKE: lambda self: self._parse_create_like(), TokenType.RETURNS: lambda self: self._parse_returns(), + TokenType.ROW: lambda self: self._parse_row(), TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty), TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty), @@ -495,6 +500,8 @@ class Parser(metaclass=_Parser): TokenType.VOLATILE: lambda self: self.expression( exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") ), + TokenType.WITH: lambda self: self._parse_wrapped_csv(self._parse_property), + TokenType.PROPERTIES: lambda self: self._parse_wrapped_csv(self._parse_property), } CONSTRAINT_PARSERS = { @@ -802,7 +809,8 @@ class Parser(metaclass=_Parser): def _parse_create(self): replace = self._match_pair(TokenType.OR, TokenType.REPLACE) temporary = self._match(TokenType.TEMPORARY) - transient = self._match(TokenType.TRANSIENT) + transient = self._match_text_seq("TRANSIENT") + external = self._match_text_seq("EXTERNAL") unique = self._match(TokenType.UNIQUE) materialized = self._match(TokenType.MATERIALIZED) @@ -846,6 +854,7 @@ class Parser(metaclass=_Parser): properties=properties, temporary=temporary, transient=transient, + external=external, replace=replace, unique=unique, materialized=materialized, @@ -861,8 +870,12 @@ class Parser(metaclass=_Parser): if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): return self._parse_sortkey(compound=True) - if self._match_pair(TokenType.VAR, TokenType.EQ, advance=False): - key = self._parse_var() + assignment = self._match_pair( + TokenType.VAR, TokenType.EQ, advance=False + ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) + + if assignment: + key = self._parse_var() or self._parse_string() self._match(TokenType.EQ) return self.expression(exp.Property, this=key, value=self._parse_column()) @@ -871,7 +884,10 @@ class Parser(metaclass=_Parser): def _parse_property_assignment(self, exp_class): self._match(TokenType.EQ) self._match(TokenType.ALIAS) - return self.expression(exp_class, this=self._parse_var_or_string() or self._parse_number()) + return self.expression( + exp_class, + this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), + ) def _parse_partitioned_by(self): self._match(TokenType.EQ) @@ -881,7 +897,7 @@ class Parser(metaclass=_Parser): ) def _parse_distkey(self): - return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_var)) + return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) def _parse_create_like(self): table = self._parse_table(schema=True) @@ -898,7 +914,7 @@ class Parser(metaclass=_Parser): def _parse_sortkey(self, compound=False): return self.expression( - exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_var), compound=compound + exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound ) def _parse_character_set(self, default=False): @@ -929,23 +945,11 @@ class Parser(metaclass=_Parser): properties = [] while True: - if self._match(TokenType.WITH): - properties.extend(self._parse_wrapped_csv(self._parse_property)) - elif self._match(TokenType.PROPERTIES): - properties.extend( - self._parse_wrapped_csv( - lambda: self.expression( - exp.Property, - this=self._parse_string(), - value=self._match(TokenType.EQ) and self._parse_string(), - ) - ) - ) - else: - identified_property = self._parse_property() - if not identified_property: - break - properties.append(identified_property) + identified_property = self._parse_property() + if not identified_property: + break + for p in ensure_collection(identified_property): + properties.append(p) if properties: return self.expression(exp.Properties, expressions=properties) @@ -963,7 +967,7 @@ class Parser(metaclass=_Parser): exp.Directory, this=self._parse_var_or_string(), local=local, - row_format=self._parse_row_format(), + row_format=self._parse_row_format(match_row=True), ) else: self._match(TokenType.INTO) @@ -978,10 +982,18 @@ class Parser(metaclass=_Parser): overwrite=overwrite, ) - def _parse_row_format(self): - if not self._match_pair(TokenType.ROW, TokenType.FORMAT): + def _parse_row(self): + if not self._match(TokenType.FORMAT): + return None + return self._parse_row_format() + + def _parse_row_format(self, match_row=False): + if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): return None + if self._match_text_seq("SERDE"): + return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) + self._match_text_seq("DELIMITED") kwargs = {} @@ -998,7 +1010,7 @@ class Parser(metaclass=_Parser): kwargs["lines"] = self._parse_string() if self._match_text_seq("NULL", "DEFINED", "AS"): kwargs["null"] = self._parse_string() - return self.expression(exp.RowFormat, **kwargs) + return self.expression(exp.RowFormatDelimitedProperty, **kwargs) def _parse_load_data(self): local = self._match(TokenType.LOCAL) @@ -1032,7 +1044,7 @@ class Parser(metaclass=_Parser): return self.expression( exp.Update, **{ - "this": self._parse_table(schema=True), + "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), "from": self._parse_from(), "where": self._parse_where(), @@ -1183,9 +1195,11 @@ class Parser(metaclass=_Parser): alias=alias, ) - def _parse_table_alias(self): + def _parse_table_alias(self, alias_tokens=None): any_token = self._match(TokenType.ALIAS) - alias = self._parse_id_var(any_token=any_token, tokens=self.TABLE_ALIAS_TOKENS) + alias = self._parse_id_var( + any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS + ) columns = None if self._match(TokenType.L_PAREN): @@ -1337,7 +1351,7 @@ class Parser(metaclass=_Parser): columns=self._parse_expression(), ) - def _parse_table(self, schema=False): + def _parse_table(self, schema=False, alias_tokens=None): lateral = self._parse_lateral() if lateral: @@ -1372,7 +1386,7 @@ class Parser(metaclass=_Parser): table = self._parse_id_var() if not table: - self.raise_error("Expected table name") + self.raise_error(f"Expected table name but got {self._curr}") this = self.expression( exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() @@ -1384,7 +1398,7 @@ class Parser(metaclass=_Parser): if self.alias_post_tablesample: table_sample = self._parse_table_sample() - alias = self._parse_table_alias() + alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) if alias: this.set("alias", alias) @@ -2092,10 +2106,14 @@ class Parser(metaclass=_Parser): kind = self.expression(exp.CheckColumnConstraint, this=constraint) elif self._match(TokenType.COLLATE): kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var()) + elif self._match(TokenType.ENCODE): + kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var()) elif self._match(TokenType.DEFAULT): kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_conjunction()) elif self._match_pair(TokenType.NOT, TokenType.NULL): kind = exp.NotNullColumnConstraint() + elif self._match(TokenType.NULL): + kind = exp.NotNullColumnConstraint(allow_null=True) elif self._match(TokenType.SCHEMA_COMMENT): kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string()) elif self._match(TokenType.PRIMARY_KEY): @@ -2234,7 +2252,7 @@ class Parser(metaclass=_Parser): return self._parse_window(this) def _parse_extract(self): - this = self._parse_var() or self._parse_type() + this = self._parse_function() or self._parse_var() or self._parse_type() if self._match(TokenType.FROM): return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) @@ -2635,6 +2653,54 @@ class Parser(metaclass=_Parser): parser = self._find_parser(self.SET_PARSERS, self._set_trie) return parser(self) if parser else self._default_parse_set_item() + def _parse_merge(self): + self._match(TokenType.INTO) + target = self._parse_table(schema=True) + + self._match(TokenType.USING) + using = self._parse_table() + + self._match(TokenType.ON) + on = self._parse_conjunction() + + whens = [] + while self._match(TokenType.WHEN): + this = self._parse_conjunction() + self._match(TokenType.THEN) + + if self._match(TokenType.INSERT): + _this = self._parse_star() + if _this: + then = self.expression(exp.Insert, this=_this) + else: + then = self.expression( + exp.Insert, + this=self._parse_value(), + expression=self._match(TokenType.VALUES) and self._parse_value(), + ) + elif self._match(TokenType.UPDATE): + expressions = self._parse_star() + if expressions: + then = self.expression(exp.Update, expressions=expressions) + else: + then = self.expression( + exp.Update, + expressions=self._match(TokenType.SET) + and self._parse_csv(self._parse_equality), + ) + elif self._match(TokenType.DELETE): + then = self.expression(exp.Var, this=self._prev.text) + + whens.append(self.expression(exp.When, this=this, then=then)) + + return self.expression( + exp.Merge, + this=target, + using=using, + on=on, + expressions=whens, + ) + def _parse_set(self): return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) |