1 files changed, 122 insertions, 23 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index c29e520..b378f12 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -135,11 +135,13 @@ class Parser:
         TokenType.BOTH,
         TokenType.BUCKET,
         TokenType.CACHE,
+        TokenType.CALL,
         TokenType.COLLATE,
         TokenType.COMMIT,
         TokenType.CONSTRAINT,
         TokenType.DEFAULT,
         TokenType.DELETE,
+        TokenType.DESCRIBE,
         TokenType.DETERMINISTIC,
         TokenType.EXECUTE,
         TokenType.ENGINE,
@@ -160,6 +162,7 @@ class Parser:
         TokenType.LAZY,
         TokenType.LANGUAGE,
         TokenType.LEADING,
+        TokenType.LOCAL,
         TokenType.LOCATION,
         TokenType.MATERIALIZED,
         TokenType.NATURAL,
@@ -176,6 +179,7 @@ class Parser:
         TokenType.REFERENCES,
         TokenType.RETURNS,
         TokenType.ROWS,
+        TokenType.SCHEMA,
         TokenType.SCHEMA_COMMENT,
         TokenType.SEED,
         TokenType.SEMI,
@@ -294,6 +298,11 @@ class Parser:
 
     COLUMN_OPERATORS = {
         TokenType.DOT: None,
+        TokenType.DCOLON: lambda self, this, to: self.expression(
+            exp.Cast,
+            this=this,
+            to=to,
+        ),
         TokenType.ARROW: lambda self, this, path: self.expression(
             exp.JSONExtract,
             this=this,
@@ -342,8 +351,10 @@ class Parser:
 
     STATEMENT_PARSERS = {
         TokenType.CREATE: lambda self: self._parse_create(),
+        TokenType.DESCRIBE: lambda self: self._parse_describe(),
         TokenType.DROP: lambda self: self._parse_drop(),
         TokenType.INSERT: lambda self: self._parse_insert(),
+        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
         TokenType.UPDATE: lambda self: self._parse_update(),
         TokenType.DELETE: lambda self: self._parse_delete(),
         TokenType.CACHE: lambda self: self._parse_cache(),
@@ -449,7 +460,14 @@ class Parser:
 
     MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 
-    CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE}
+    CREATABLES = {
+        TokenType.TABLE,
+        TokenType.VIEW,
+        TokenType.FUNCTION,
+        TokenType.INDEX,
+        TokenType.PROCEDURE,
+        TokenType.SCHEMA,
+    }
 
     STRICT_CAST = True
 
@@ -650,7 +668,7 @@ class Parser:
         materialized = self._match(TokenType.MATERIALIZED)
         kind = self._match_set(self.CREATABLES) and self._prev.text
         if not kind:
-            self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
+            self.raise_error(f"Expected {self.CREATABLES}")
             return
 
         return self.expression(
@@ -677,7 +695,7 @@ class Parser:
         create_token = self._match_set(self.CREATABLES) and self._prev
 
         if not create_token:
-            self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
+            self.raise_error(f"Expected {self.CREATABLES}")
             return
 
         exists = self._parse_exists(not_=True)
@@ -692,7 +710,7 @@ class Parser:
                 expression = self._parse_select_or_expression()
         elif create_token.token_type == TokenType.INDEX:
             this = self._parse_index()
-        elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
+        elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW, TokenType.SCHEMA):
             this = self._parse_table(schema=True)
             properties = self._parse_properties()
             if self._match(TokenType.ALIAS):
@@ -836,19 +854,74 @@ class Parser:
             return self.expression(exp.Properties, expressions=properties)
         return None
 
+    def _parse_describe(self):
+        self._match(TokenType.TABLE)
+
+        return self.expression(exp.Describe, this=self._parse_id_var())
+
     def _parse_insert(self):
         overwrite = self._match(TokenType.OVERWRITE)
-        self._match(TokenType.INTO)
-        self._match(TokenType.TABLE)
+        local = self._match(TokenType.LOCAL)
+        if self._match_text("DIRECTORY"):
+            this = self.expression(
+                exp.Directory,
+                this=self._parse_var_or_string(),
+                local=local,
+                row_format=self._parse_row_format(),
+            )
+        else:
+            self._match(TokenType.INTO)
+            self._match(TokenType.TABLE)
+            this = self._parse_table(schema=True)
         return self.expression(
             exp.Insert,
-            this=self._parse_table(schema=True),
+            this=this,
             exists=self._parse_exists(),
             partition=self._parse_partition(),
             expression=self._parse_select(nested=True),
             overwrite=overwrite,
         )
 
+    def _parse_row_format(self):
+        if not self._match_pair(TokenType.ROW, TokenType.FORMAT):
+            return None
+
+        self._match_text("DELIMITED")
+
+        kwargs = {}
+
+        if self._match_text("FIELDS", "TERMINATED", "BY"):
+            kwargs["fields"] = self._parse_string()
+            if self._match_text("ESCAPED", "BY"):
+                kwargs["escaped"] = self._parse_string()
+        if self._match_text("COLLECTION", "ITEMS", "TERMINATED", "BY"):
+            kwargs["collection_items"] = self._parse_string()
+        if self._match_text("MAP", "KEYS", "TERMINATED", "BY"):
+            kwargs["map_keys"] = self._parse_string()
+        if self._match_text("LINES", "TERMINATED", "BY"):
+            kwargs["lines"] = self._parse_string()
+        if self._match_text("NULL", "DEFINED", "AS"):
+            kwargs["null"] = self._parse_string()
+        return self.expression(exp.RowFormat, **kwargs)
+
+    def _parse_load_data(self):
+        local = self._match(TokenType.LOCAL)
+        self._match_text("INPATH")
+        inpath = self._parse_string()
+        overwrite = self._match(TokenType.OVERWRITE)
+        self._match_pair(TokenType.INTO, TokenType.TABLE)
+
+        return self.expression(
+            exp.LoadData,
+            this=self._parse_table(schema=True),
+            local=local,
+            overwrite=overwrite,
+            inpath=inpath,
+            partition=self._parse_partition(),
+            input_format=self._match_text("INPUTFORMAT") and self._parse_string(),
+            serde=self._match_text("SERDE") and self._parse_string(),
+        )
+
     def _parse_delete(self):
         self._match(TokenType.FROM)
 
@@ -1484,6 +1557,14 @@ class Parser:
 
         if self._match_set(self.RANGE_PARSERS):
             this = self.RANGE_PARSERS[self._prev.token_type](self, this)
+        elif self._match(TokenType.ISNULL):
+            this = self.expression(exp.Is, this=this, expression=exp.Null())
+
+        # Postgres supports ISNULL and NOTNULL for conditions.
+        # https://blog.andreiavram.ro/postgresql-null-composite-type/
+        if self._match(TokenType.NOTNULL):
+            this = self.expression(exp.Is, this=this, expression=exp.Null())
+            this = self.expression(exp.Not, this=this)
 
         if negate:
             this = self.expression(exp.Not, this=this)
@@ -1582,12 +1663,6 @@ class Parser:
                 return self._parse_column()
             return type_token
 
-        while self._match(TokenType.DCOLON):
-            type_token = self._parse_types()
-            if not type_token:
-                self.raise_error("Expected type")
-            this = self.expression(exp.Cast, this=this, to=type_token)
-
         return this
 
     def _parse_types(self):
@@ -1601,6 +1676,11 @@ class Parser:
         is_struct = type_token == TokenType.STRUCT
         expressions = None
 
+        if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
+            return exp.DataType(
+                this=exp.DataType.Type.ARRAY, expressions=[exp.DataType.build(type_token.value)], nested=True
+            )
+
         if self._match(TokenType.L_BRACKET):
             self._retreat(index)
             return None
@@ -1611,7 +1691,7 @@ class Parser:
             elif nested:
                 expressions = self._parse_csv(self._parse_types)
             else:
-                expressions = self._parse_csv(self._parse_type)
+                expressions = self._parse_csv(self._parse_conjunction)
 
             if not expressions:
                 self._retreat(index)
@@ -1677,8 +1757,17 @@ class Parser:
         this = self._parse_bracket(this)
 
         while self._match_set(self.COLUMN_OPERATORS):
-            op = self.COLUMN_OPERATORS.get(self._prev.token_type)
-            field = self._parse_star() or self._parse_function() or self._parse_id_var()
+            op_token = self._prev.token_type
+            op = self.COLUMN_OPERATORS.get(op_token)
+
+            if op_token == TokenType.DCOLON:
+                field = self._parse_types()
+                if not field:
+                    self.raise_error("Expected type")
+            elif op:
+                field = exp.Literal.string(self._advance() or self._prev.text)
+            else:
+                field = self._parse_star() or self._parse_function() or self._parse_id_var()
 
             if isinstance(field, exp.Func):
                 # bigquery allows function calls like x.y.count(...)
@@ -1687,7 +1776,7 @@ class Parser:
                 this = self._replace_columns_with_dots(this)
 
             if op:
-                this = op(self, this, exp.Literal.string(field.name))
+                this = op(self, this, field)
             elif isinstance(this, exp.Column) and not this.table:
                 this = self.expression(exp.Column, this=field, table=this.this)
             else:
@@ -1808,11 +1897,10 @@ class Parser:
         if not self._match(TokenType.ARROW):
             self._retreat(index)
 
-            distinct = self._match(TokenType.DISTINCT)
-            this = self._parse_conjunction()
-
-            if distinct:
-                this = self.expression(exp.Distinct, this=this)
+            if self._match(TokenType.DISTINCT):
+                this = self.expression(exp.Distinct, expressions=self._parse_csv(self._parse_conjunction))
+            else:
+                this = self._parse_conjunction()
 
             if self._match(TokenType.IGNORE_NULLS):
                 this = self.expression(exp.IgnoreNulls, this=this)
@@ -2112,6 +2200,8 @@ class Parser:
             this = self.expression(exp.Filter, this=this, expression=self._parse_where())
             self._match_r_paren()
 
+        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
+        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
         if self._match(TokenType.WITHIN_GROUP):
             self._match_l_paren()
             this = self.expression(
@@ -2120,7 +2210,6 @@ class Parser:
                 expression=self._parse_order(),
             )
             self._match_r_paren()
-            return this
 
         # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
         # Some dialects choose to implement and some do not.
@@ -2366,6 +2455,16 @@ class Parser:
         if not self._match(TokenType.R_PAREN):
             self.raise_error("Expecting )")
 
+    def _match_text(self, *texts):
+        index = self._index
+        for text in texts:
+            if self._curr and self._curr.text.upper() == text:
+                self._advance()
+            else:
+                self._retreat(index)
+                return False
+        return True
+
     def _replace_columns_with_dots(self, this):
         if isinstance(this, exp.Dot):
             exp.replace_children(this, self._replace_columns_with_dots)