diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-10-15 13:52:53 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-10-15 13:52:53 +0000 |
commit | 97d3673ec2d668050912aa6aea1816885ca6c5ab (patch) | |
tree | f391e30e039a3d22368e9696e171f759e104c765 /sqlglot/parser.py | |
parent | Adding upstream version 6.3.1. (diff) | |
download | sqlglot-97d3673ec2d668050912aa6aea1816885ca6c5ab.tar.xz sqlglot-97d3673ec2d668050912aa6aea1816885ca6c5ab.zip |
Adding upstream version 7.1.3.upstream/7.1.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 145 |
1 files changed, 122 insertions, 23 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index c29e520..b378f12 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -135,11 +135,13 @@ class Parser: TokenType.BOTH, TokenType.BUCKET, TokenType.CACHE, + TokenType.CALL, TokenType.COLLATE, TokenType.COMMIT, TokenType.CONSTRAINT, TokenType.DEFAULT, TokenType.DELETE, + TokenType.DESCRIBE, TokenType.DETERMINISTIC, TokenType.EXECUTE, TokenType.ENGINE, @@ -160,6 +162,7 @@ class Parser: TokenType.LAZY, TokenType.LANGUAGE, TokenType.LEADING, + TokenType.LOCAL, TokenType.LOCATION, TokenType.MATERIALIZED, TokenType.NATURAL, @@ -176,6 +179,7 @@ class Parser: TokenType.REFERENCES, TokenType.RETURNS, TokenType.ROWS, + TokenType.SCHEMA, TokenType.SCHEMA_COMMENT, TokenType.SEED, TokenType.SEMI, @@ -294,6 +298,11 @@ class Parser: COLUMN_OPERATORS = { TokenType.DOT: None, + TokenType.DCOLON: lambda self, this, to: self.expression( + exp.Cast, + this=this, + to=to, + ), TokenType.ARROW: lambda self, this, path: self.expression( exp.JSONExtract, this=this, @@ -342,8 +351,10 @@ class Parser: STATEMENT_PARSERS = { TokenType.CREATE: lambda self: self._parse_create(), + TokenType.DESCRIBE: lambda self: self._parse_describe(), TokenType.DROP: lambda self: self._parse_drop(), TokenType.INSERT: lambda self: self._parse_insert(), + TokenType.LOAD_DATA: lambda self: self._parse_load_data(), TokenType.UPDATE: lambda self: self._parse_update(), TokenType.DELETE: lambda self: self._parse_delete(), TokenType.CACHE: lambda self: self._parse_cache(), @@ -449,7 +460,14 @@ class Parser: MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) - CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE} + CREATABLES = { + TokenType.TABLE, + TokenType.VIEW, + TokenType.FUNCTION, + TokenType.INDEX, + TokenType.PROCEDURE, + TokenType.SCHEMA, + } STRICT_CAST = True @@ -650,7 +668,7 @@ class Parser: materialized = self._match(TokenType.MATERIALIZED) kind = self._match_set(self.CREATABLES) and self._prev.text if not kind: - self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE") + self.raise_error(f"Expected {self.CREATABLES}") return return self.expression( @@ -677,7 +695,7 @@ class Parser: create_token = self._match_set(self.CREATABLES) and self._prev if not create_token: - self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE") + self.raise_error(f"Expected {self.CREATABLES}") return exists = self._parse_exists(not_=True) @@ -692,7 +710,7 @@ class Parser: expression = self._parse_select_or_expression() elif create_token.token_type == TokenType.INDEX: this = self._parse_index() - elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW): + elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW, TokenType.SCHEMA): this = self._parse_table(schema=True) properties = self._parse_properties() if self._match(TokenType.ALIAS): @@ -836,19 +854,74 @@ class Parser: return self.expression(exp.Properties, expressions=properties) return None + def _parse_describe(self): + self._match(TokenType.TABLE) + + return self.expression(exp.Describe, this=self._parse_id_var()) + def _parse_insert(self): overwrite = self._match(TokenType.OVERWRITE) - self._match(TokenType.INTO) - self._match(TokenType.TABLE) + local = self._match(TokenType.LOCAL) + if self._match_text("DIRECTORY"): + this = self.expression( + exp.Directory, + this=self._parse_var_or_string(), + local=local, + row_format=self._parse_row_format(), + ) + else: + self._match(TokenType.INTO) + self._match(TokenType.TABLE) + this = self._parse_table(schema=True) return self.expression( exp.Insert, - this=self._parse_table(schema=True), + this=this, exists=self._parse_exists(), partition=self._parse_partition(), expression=self._parse_select(nested=True), overwrite=overwrite, ) + def _parse_row_format(self): + if not self._match_pair(TokenType.ROW, TokenType.FORMAT): + return None + + self._match_text("DELIMITED") + + kwargs = {} + + if self._match_text("FIELDS", "TERMINATED", "BY"): + kwargs["fields"] = self._parse_string() + if self._match_text("ESCAPED", "BY"): + kwargs["escaped"] = self._parse_string() + if self._match_text("COLLECTION", "ITEMS", "TERMINATED", "BY"): + kwargs["collection_items"] = self._parse_string() + if self._match_text("MAP", "KEYS", "TERMINATED", "BY"): + kwargs["map_keys"] = self._parse_string() + if self._match_text("LINES", "TERMINATED", "BY"): + kwargs["lines"] = self._parse_string() + if self._match_text("NULL", "DEFINED", "AS"): + kwargs["null"] = self._parse_string() + return self.expression(exp.RowFormat, **kwargs) + + def _parse_load_data(self): + local = self._match(TokenType.LOCAL) + self._match_text("INPATH") + inpath = self._parse_string() + overwrite = self._match(TokenType.OVERWRITE) + self._match_pair(TokenType.INTO, TokenType.TABLE) + + return self.expression( + exp.LoadData, + this=self._parse_table(schema=True), + local=local, + overwrite=overwrite, + inpath=inpath, + partition=self._parse_partition(), + input_format=self._match_text("INPUTFORMAT") and self._parse_string(), + serde=self._match_text("SERDE") and self._parse_string(), + ) + def _parse_delete(self): self._match(TokenType.FROM) @@ -1484,6 +1557,14 @@ class Parser: if self._match_set(self.RANGE_PARSERS): this = self.RANGE_PARSERS[self._prev.token_type](self, this) + elif self._match(TokenType.ISNULL): + this = self.expression(exp.Is, this=this, expression=exp.Null()) + + # Postgres supports ISNULL and NOTNULL for conditions. + # https://blog.andreiavram.ro/postgresql-null-composite-type/ + if self._match(TokenType.NOTNULL): + this = self.expression(exp.Is, this=this, expression=exp.Null()) + this = self.expression(exp.Not, this=this) if negate: this = self.expression(exp.Not, this=this) @@ -1582,12 +1663,6 @@ class Parser: return self._parse_column() return type_token - while self._match(TokenType.DCOLON): - type_token = self._parse_types() - if not type_token: - self.raise_error("Expected type") - this = self.expression(exp.Cast, this=this, to=type_token) - return this def _parse_types(self): @@ -1601,6 +1676,11 @@ class Parser: is_struct = type_token == TokenType.STRUCT expressions = None + if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): + return exp.DataType( + this=exp.DataType.Type.ARRAY, expressions=[exp.DataType.build(type_token.value)], nested=True + ) + if self._match(TokenType.L_BRACKET): self._retreat(index) return None @@ -1611,7 +1691,7 @@ class Parser: elif nested: expressions = self._parse_csv(self._parse_types) else: - expressions = self._parse_csv(self._parse_type) + expressions = self._parse_csv(self._parse_conjunction) if not expressions: self._retreat(index) @@ -1677,8 +1757,17 @@ class Parser: this = self._parse_bracket(this) while self._match_set(self.COLUMN_OPERATORS): - op = self.COLUMN_OPERATORS.get(self._prev.token_type) - field = self._parse_star() or self._parse_function() or self._parse_id_var() + op_token = self._prev.token_type + op = self.COLUMN_OPERATORS.get(op_token) + + if op_token == TokenType.DCOLON: + field = self._parse_types() + if not field: + self.raise_error("Expected type") + elif op: + field = exp.Literal.string(self._advance() or self._prev.text) + else: + field = self._parse_star() or self._parse_function() or self._parse_id_var() if isinstance(field, exp.Func): # bigquery allows function calls like x.y.count(...) @@ -1687,7 +1776,7 @@ class Parser: this = self._replace_columns_with_dots(this) if op: - this = op(self, this, exp.Literal.string(field.name)) + this = op(self, this, field) elif isinstance(this, exp.Column) and not this.table: this = self.expression(exp.Column, this=field, table=this.this) else: @@ -1808,11 +1897,10 @@ class Parser: if not self._match(TokenType.ARROW): self._retreat(index) - distinct = self._match(TokenType.DISTINCT) - this = self._parse_conjunction() - - if distinct: - this = self.expression(exp.Distinct, this=this) + if self._match(TokenType.DISTINCT): + this = self.expression(exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)) + else: + this = self._parse_conjunction() if self._match(TokenType.IGNORE_NULLS): this = self.expression(exp.IgnoreNulls, this=this) @@ -2112,6 +2200,8 @@ class Parser: this = self.expression(exp.Filter, this=this, expression=self._parse_where()) self._match_r_paren() + # T-SQL allows the OVER (...) syntax after WITHIN GROUP. + # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 if self._match(TokenType.WITHIN_GROUP): self._match_l_paren() this = self.expression( @@ -2120,7 +2210,6 @@ class Parser: expression=self._parse_order(), ) self._match_r_paren() - return this # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER # Some dialects choose to implement and some do not. @@ -2366,6 +2455,16 @@ class Parser: if not self._match(TokenType.R_PAREN): self.raise_error("Expecting )") + def _match_text(self, *texts): + index = self._index + for text in texts: + if self._curr and self._curr.text.upper() == text: + self._advance() + else: + self._retreat(index) + return False + return True + def _replace_columns_with_dots(self, this): if isinstance(this, exp.Dot): exp.replace_children(this, self._replace_columns_with_dots) |