summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py145
1 files changed, 122 insertions, 23 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index c29e520..b378f12 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -135,11 +135,13 @@ class Parser:
TokenType.BOTH,
TokenType.BUCKET,
TokenType.CACHE,
+ TokenType.CALL,
TokenType.COLLATE,
TokenType.COMMIT,
TokenType.CONSTRAINT,
TokenType.DEFAULT,
TokenType.DELETE,
+ TokenType.DESCRIBE,
TokenType.DETERMINISTIC,
TokenType.EXECUTE,
TokenType.ENGINE,
@@ -160,6 +162,7 @@ class Parser:
TokenType.LAZY,
TokenType.LANGUAGE,
TokenType.LEADING,
+ TokenType.LOCAL,
TokenType.LOCATION,
TokenType.MATERIALIZED,
TokenType.NATURAL,
@@ -176,6 +179,7 @@ class Parser:
TokenType.REFERENCES,
TokenType.RETURNS,
TokenType.ROWS,
+ TokenType.SCHEMA,
TokenType.SCHEMA_COMMENT,
TokenType.SEED,
TokenType.SEMI,
@@ -294,6 +298,11 @@ class Parser:
COLUMN_OPERATORS = {
TokenType.DOT: None,
+ TokenType.DCOLON: lambda self, this, to: self.expression(
+ exp.Cast,
+ this=this,
+ to=to,
+ ),
TokenType.ARROW: lambda self, this, path: self.expression(
exp.JSONExtract,
this=this,
@@ -342,8 +351,10 @@ class Parser:
STATEMENT_PARSERS = {
TokenType.CREATE: lambda self: self._parse_create(),
+ TokenType.DESCRIBE: lambda self: self._parse_describe(),
TokenType.DROP: lambda self: self._parse_drop(),
TokenType.INSERT: lambda self: self._parse_insert(),
+ TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
TokenType.UPDATE: lambda self: self._parse_update(),
TokenType.DELETE: lambda self: self._parse_delete(),
TokenType.CACHE: lambda self: self._parse_cache(),
@@ -449,7 +460,14 @@ class Parser:
MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
- CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE}
+ CREATABLES = {
+ TokenType.TABLE,
+ TokenType.VIEW,
+ TokenType.FUNCTION,
+ TokenType.INDEX,
+ TokenType.PROCEDURE,
+ TokenType.SCHEMA,
+ }
STRICT_CAST = True
@@ -650,7 +668,7 @@ class Parser:
materialized = self._match(TokenType.MATERIALIZED)
kind = self._match_set(self.CREATABLES) and self._prev.text
if not kind:
- self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
+ self.raise_error(f"Expected {self.CREATABLES}")
return
return self.expression(
@@ -677,7 +695,7 @@ class Parser:
create_token = self._match_set(self.CREATABLES) and self._prev
if not create_token:
- self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
+ self.raise_error(f"Expected {self.CREATABLES}")
return
exists = self._parse_exists(not_=True)
@@ -692,7 +710,7 @@ class Parser:
expression = self._parse_select_or_expression()
elif create_token.token_type == TokenType.INDEX:
this = self._parse_index()
- elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
+ elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW, TokenType.SCHEMA):
this = self._parse_table(schema=True)
properties = self._parse_properties()
if self._match(TokenType.ALIAS):
@@ -836,19 +854,74 @@ class Parser:
return self.expression(exp.Properties, expressions=properties)
return None
+ def _parse_describe(self):
+ self._match(TokenType.TABLE)
+
+ return self.expression(exp.Describe, this=self._parse_id_var())
+
def _parse_insert(self):
overwrite = self._match(TokenType.OVERWRITE)
- self._match(TokenType.INTO)
- self._match(TokenType.TABLE)
+ local = self._match(TokenType.LOCAL)
+ if self._match_text("DIRECTORY"):
+ this = self.expression(
+ exp.Directory,
+ this=self._parse_var_or_string(),
+ local=local,
+ row_format=self._parse_row_format(),
+ )
+ else:
+ self._match(TokenType.INTO)
+ self._match(TokenType.TABLE)
+ this = self._parse_table(schema=True)
return self.expression(
exp.Insert,
- this=self._parse_table(schema=True),
+ this=this,
exists=self._parse_exists(),
partition=self._parse_partition(),
expression=self._parse_select(nested=True),
overwrite=overwrite,
)
+ def _parse_row_format(self):
+ if not self._match_pair(TokenType.ROW, TokenType.FORMAT):
+ return None
+
+ self._match_text("DELIMITED")
+
+ kwargs = {}
+
+ if self._match_text("FIELDS", "TERMINATED", "BY"):
+ kwargs["fields"] = self._parse_string()
+ if self._match_text("ESCAPED", "BY"):
+ kwargs["escaped"] = self._parse_string()
+ if self._match_text("COLLECTION", "ITEMS", "TERMINATED", "BY"):
+ kwargs["collection_items"] = self._parse_string()
+ if self._match_text("MAP", "KEYS", "TERMINATED", "BY"):
+ kwargs["map_keys"] = self._parse_string()
+ if self._match_text("LINES", "TERMINATED", "BY"):
+ kwargs["lines"] = self._parse_string()
+ if self._match_text("NULL", "DEFINED", "AS"):
+ kwargs["null"] = self._parse_string()
+ return self.expression(exp.RowFormat, **kwargs)
+
+ def _parse_load_data(self):
+ local = self._match(TokenType.LOCAL)
+ self._match_text("INPATH")
+ inpath = self._parse_string()
+ overwrite = self._match(TokenType.OVERWRITE)
+ self._match_pair(TokenType.INTO, TokenType.TABLE)
+
+ return self.expression(
+ exp.LoadData,
+ this=self._parse_table(schema=True),
+ local=local,
+ overwrite=overwrite,
+ inpath=inpath,
+ partition=self._parse_partition(),
+ input_format=self._match_text("INPUTFORMAT") and self._parse_string(),
+ serde=self._match_text("SERDE") and self._parse_string(),
+ )
+
def _parse_delete(self):
self._match(TokenType.FROM)
@@ -1484,6 +1557,14 @@ class Parser:
if self._match_set(self.RANGE_PARSERS):
this = self.RANGE_PARSERS[self._prev.token_type](self, this)
+ elif self._match(TokenType.ISNULL):
+ this = self.expression(exp.Is, this=this, expression=exp.Null())
+
+ # Postgres supports ISNULL and NOTNULL for conditions.
+ # https://blog.andreiavram.ro/postgresql-null-composite-type/
+ if self._match(TokenType.NOTNULL):
+ this = self.expression(exp.Is, this=this, expression=exp.Null())
+ this = self.expression(exp.Not, this=this)
if negate:
this = self.expression(exp.Not, this=this)
@@ -1582,12 +1663,6 @@ class Parser:
return self._parse_column()
return type_token
- while self._match(TokenType.DCOLON):
- type_token = self._parse_types()
- if not type_token:
- self.raise_error("Expected type")
- this = self.expression(exp.Cast, this=this, to=type_token)
-
return this
def _parse_types(self):
@@ -1601,6 +1676,11 @@ class Parser:
is_struct = type_token == TokenType.STRUCT
expressions = None
+ if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
+ return exp.DataType(
+ this=exp.DataType.Type.ARRAY, expressions=[exp.DataType.build(type_token.value)], nested=True
+ )
+
if self._match(TokenType.L_BRACKET):
self._retreat(index)
return None
@@ -1611,7 +1691,7 @@ class Parser:
elif nested:
expressions = self._parse_csv(self._parse_types)
else:
- expressions = self._parse_csv(self._parse_type)
+ expressions = self._parse_csv(self._parse_conjunction)
if not expressions:
self._retreat(index)
@@ -1677,8 +1757,17 @@ class Parser:
this = self._parse_bracket(this)
while self._match_set(self.COLUMN_OPERATORS):
- op = self.COLUMN_OPERATORS.get(self._prev.token_type)
- field = self._parse_star() or self._parse_function() or self._parse_id_var()
+ op_token = self._prev.token_type
+ op = self.COLUMN_OPERATORS.get(op_token)
+
+ if op_token == TokenType.DCOLON:
+ field = self._parse_types()
+ if not field:
+ self.raise_error("Expected type")
+ elif op:
+ field = exp.Literal.string(self._advance() or self._prev.text)
+ else:
+ field = self._parse_star() or self._parse_function() or self._parse_id_var()
if isinstance(field, exp.Func):
# bigquery allows function calls like x.y.count(...)
@@ -1687,7 +1776,7 @@ class Parser:
this = self._replace_columns_with_dots(this)
if op:
- this = op(self, this, exp.Literal.string(field.name))
+ this = op(self, this, field)
elif isinstance(this, exp.Column) and not this.table:
this = self.expression(exp.Column, this=field, table=this.this)
else:
@@ -1808,11 +1897,10 @@ class Parser:
if not self._match(TokenType.ARROW):
self._retreat(index)
- distinct = self._match(TokenType.DISTINCT)
- this = self._parse_conjunction()
-
- if distinct:
- this = self.expression(exp.Distinct, this=this)
+ if self._match(TokenType.DISTINCT):
+ this = self.expression(exp.Distinct, expressions=self._parse_csv(self._parse_conjunction))
+ else:
+ this = self._parse_conjunction()
if self._match(TokenType.IGNORE_NULLS):
this = self.expression(exp.IgnoreNulls, this=this)
@@ -2112,6 +2200,8 @@ class Parser:
this = self.expression(exp.Filter, this=this, expression=self._parse_where())
self._match_r_paren()
+ # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
+ # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
if self._match(TokenType.WITHIN_GROUP):
self._match_l_paren()
this = self.expression(
@@ -2120,7 +2210,6 @@ class Parser:
expression=self._parse_order(),
)
self._match_r_paren()
- return this
# SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
# Some dialects choose to implement and some do not.
@@ -2366,6 +2455,16 @@ class Parser:
if not self._match(TokenType.R_PAREN):
self.raise_error("Expecting )")
+ def _match_text(self, *texts):
+ index = self._index
+ for text in texts:
+ if self._curr and self._curr.text.upper() == text:
+ self._advance()
+ else:
+ self._retreat(index)
+ return False
+ return True
+
def _replace_columns_with_dots(self, this):
if isinstance(this, exp.Dot):
exp.replace_children(this, self._replace_columns_with_dots)