Adding upstream version 23.7.0.upstream/23.7.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-08 08:11:50 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-08 08:11:50 +0000
commit: 8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2 (patch)
tree: 2e29f131dff77b31e84c957266de8f18655b6f88 /sqlglot/parser.py
parent: Adding upstream version 22.2.0. (diff)
download: sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.tar.xz
sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.zip
1 files changed, 354 insertions, 139 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 49dac2e..91d8d13 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -15,6 +15,8 @@ if t.TYPE_CHECKING:
     from sqlglot._typing import E, Lit
     from sqlglot.dialects.dialect import Dialect, DialectType
 
+    T = t.TypeVar("T")
+
 logger = logging.getLogger("sqlglot")
 
 OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
@@ -119,6 +121,9 @@ class Parser(metaclass=_Parser):
         "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
         "LIKE": build_like,
         "LOG": build_logarithm,
+        "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
+        "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
+        "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)),
         "TIME_TO_TIME_STR": lambda args: exp.Cast(
             this=seq_get(args, 0),
             to=exp.DataType(this=exp.DataType.Type.TEXT),
@@ -144,6 +149,7 @@ class Parser(metaclass=_Parser):
 
     STRUCT_TYPE_TOKENS = {
         TokenType.NESTED,
+        TokenType.OBJECT,
         TokenType.STRUCT,
     }
 
@@ -258,6 +264,7 @@ class Parser(metaclass=_Parser):
         TokenType.IPV6,
         TokenType.UNKNOWN,
         TokenType.NULL,
+        TokenType.NAME,
         *ENUM_TYPE_TOKENS,
         *NESTED_TYPE_TOKENS,
         *AGGREGATE_TYPE_TOKENS,
@@ -291,6 +298,7 @@ class Parser(metaclass=_Parser):
         TokenType.VIEW,
         TokenType.MODEL,
         TokenType.DICTIONARY,
+        TokenType.SEQUENCE,
         TokenType.STORAGE_INTEGRATION,
     }
 
@@ -310,6 +318,7 @@ class Parser(metaclass=_Parser):
         TokenType.ANTI,
         TokenType.APPLY,
         TokenType.ASC,
+        TokenType.ASOF,
         TokenType.AUTO_INCREMENT,
         TokenType.BEGIN,
         TokenType.BPCHAR,
@@ -398,6 +407,8 @@ class Parser(metaclass=_Parser):
         TokenType.WINDOW,
     }
 
+    ALIAS_TOKENS = ID_VAR_TOKENS
+
     COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 
     UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
@@ -433,6 +444,7 @@ class Parser(metaclass=_Parser):
         TokenType.VAR,
         TokenType.LEFT,
         TokenType.RIGHT,
+        TokenType.SEQUENCE,
         TokenType.DATE,
         TokenType.DATETIME,
         TokenType.TABLE,
@@ -505,8 +517,9 @@ class Parser(metaclass=_Parser):
     }
 
     JOIN_METHODS = {
-        TokenType.NATURAL,
         TokenType.ASOF,
+        TokenType.NATURAL,
+        TokenType.POSITIONAL,
     }
 
     JOIN_SIDES = {
@@ -611,8 +624,8 @@ class Parser(metaclass=_Parser):
         TokenType.ALTER: lambda self: self._parse_alter(),
         TokenType.BEGIN: lambda self: self._parse_transaction(),
         TokenType.CACHE: lambda self: self._parse_cache(),
-        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
         TokenType.COMMENT: lambda self: self._parse_comment(),
+        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
         TokenType.CREATE: lambda self: self._parse_create(),
         TokenType.DELETE: lambda self: self._parse_delete(),
         TokenType.DESC: lambda self: self._parse_describe(),
@@ -627,9 +640,9 @@ class Parser(metaclass=_Parser):
         TokenType.REFRESH: lambda self: self._parse_refresh(),
         TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
         TokenType.SET: lambda self: self._parse_set(),
+        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
         TokenType.UNCACHE: lambda self: self._parse_uncache(),
         TokenType.UPDATE: lambda self: self._parse_update(),
-        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
         TokenType.USE: lambda self: self.expression(
             exp.Use,
             kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
@@ -714,6 +727,9 @@ class Parser(metaclass=_Parser):
         "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
         "AUTO": lambda self: self._parse_auto_property(),
         "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
+        "BACKUP": lambda self: self.expression(
+            exp.BackupProperty, this=self._parse_var(any_token=True)
+        ),
         "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
         "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
         "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
@@ -739,7 +755,9 @@ class Parser(metaclass=_Parser):
         "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
         "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
         "FREESPACE": lambda self: self._parse_freespace(),
+        "GLOBAL": lambda self: self.expression(exp.GlobalProperty),
         "HEAP": lambda self: self.expression(exp.HeapProperty),
+        "ICEBERG": lambda self: self.expression(exp.IcebergProperty),
         "IMMUTABLE": lambda self: self.expression(
             exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
         ),
@@ -782,6 +800,7 @@ class Parser(metaclass=_Parser):
         "SETTINGS": lambda self: self.expression(
             exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
         ),
+        "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
         "SORTKEY": lambda self: self._parse_sortkey(),
         "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
         "STABLE": lambda self: self.expression(
@@ -789,7 +808,7 @@ class Parser(metaclass=_Parser):
         ),
         "STORED": lambda self: self._parse_stored(),
         "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
-        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
+        "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
         "TEMP": lambda self: self.expression(exp.TemporaryProperty),
         "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
         "TO": lambda self: self._parse_to_table(),
@@ -799,6 +818,7 @@ class Parser(metaclass=_Parser):
         ),
         "TTL": lambda self: self._parse_ttl(),
         "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
+        "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty),
         "VOLATILE": lambda self: self._parse_volatile_property(),
         "WITH": lambda self: self._parse_with_property(),
     }
@@ -832,6 +852,9 @@ class Parser(metaclass=_Parser):
             exp.DefaultColumnConstraint, this=self._parse_bitwise()
         ),
         "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
+        "EXCLUDE": lambda self: self.expression(
+            exp.ExcludeColumnConstraint, this=self._parse_index_params()
+        ),
         "FOREIGN KEY": lambda self: self._parse_foreign_key(),
         "FORMAT": lambda self: self.expression(
             exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
@@ -858,7 +881,7 @@ class Parser(metaclass=_Parser):
         "UNIQUE": lambda self: self._parse_unique(),
         "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
         "WITH": lambda self: self.expression(
-            exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property)
+            exp.Properties, expressions=self._parse_wrapped_properties()
         ),
     }
 
@@ -871,7 +894,15 @@ class Parser(metaclass=_Parser):
         "RENAME": lambda self: self._parse_alter_table_rename(),
     }
 
-    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"}
+    SCHEMA_UNNAMED_CONSTRAINTS = {
+        "CHECK",
+        "EXCLUDE",
+        "FOREIGN KEY",
+        "LIKE",
+        "PERIOD",
+        "PRIMARY KEY",
+        "UNIQUE",
+    }
 
     NO_PAREN_FUNCTION_PARSERS = {
         "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
@@ -966,18 +997,54 @@ class Parser(metaclass=_Parser):
         "READ": ("WRITE", "ONLY"),
     }
 
+    CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys(
+        ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()
+    )
+    CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE")
+
+    CREATE_SEQUENCE: OPTIONS_TYPE = {
+        "SCALE": ("EXTEND", "NOEXTEND"),
+        "SHARD": ("EXTEND", "NOEXTEND"),
+        "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
+        **dict.fromkeys(
+            (
+                "SESSION",
+                "GLOBAL",
+                "KEEP",
+                "NOKEEP",
+                "ORDER",
+                "NOORDER",
+                "NOCACHE",
+                "CYCLE",
+                "NOCYCLE",
+                "NOMINVALUE",
+                "NOMAXVALUE",
+                "NOSCALE",
+                "NOSHARD",
+            ),
+            tuple(),
+        ),
+    }
+
+    ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")}
+
     USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple())
 
+    CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
+
     INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 
     CLONE_KEYWORDS = {"CLONE", "COPY"}
     HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
 
-    OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
+    OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"}
+
     OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
 
     TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
 
+    VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
+
     WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
     WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
     WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
@@ -994,6 +1061,8 @@ class Parser(metaclass=_Parser):
 
     UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
 
+    SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
+
     STRICT_CAST = True
 
     PREFIXED_PIVOT_COLUMNS = False
@@ -1033,6 +1102,9 @@ class Parser(metaclass=_Parser):
     # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
     SUPPORTS_IMPLICIT_UNNEST = False
 
+    # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
+    INTERVAL_SPANS = True
+
     __slots__ = (
         "error_level",
         "error_message_context",
@@ -1285,6 +1357,27 @@ class Parser(metaclass=_Parser):
             exp.Command, this=self._prev.text.upper(), expression=self._parse_string()
         )
 
+    def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
+        """
+        Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can
+        be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting
+        the parser state accordingly
+        """
+        index = self._index
+        error_level = self.error_level
+
+        self.error_level = ErrorLevel.IMMEDIATE
+        try:
+            this = parse_method()
+        except ParseError:
+            this = None
+        finally:
+            if not this or retreat:
+                self._retreat(index)
+            self.error_level = error_level
+
+        return this
+
     def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
         start = self._prev
         exists = self._parse_exists() if allow_exists else None
@@ -1377,13 +1470,22 @@ class Parser(metaclass=_Parser):
         if not kind:
             return self._parse_as_command(start)
 
+        if_exists = exists or self._parse_exists()
+        table = self._parse_table_parts(
+            schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
+        )
+
+        if self._match(TokenType.L_PAREN, advance=False):
+            expressions = self._parse_wrapped_csv(self._parse_types)
+        else:
+            expressions = None
+
         return self.expression(
             exp.Drop,
             comments=start.comments,
-            exists=exists or self._parse_exists(),
-            this=self._parse_table(
-                schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
-            ),
+            exists=if_exists,
+            this=table,
+            expressions=expressions,
             kind=kind,
             temporary=temporary,
             materialized=materialized,
@@ -1409,6 +1511,7 @@ class Parser(metaclass=_Parser):
             or self._match_pair(TokenType.OR, TokenType.REPLACE)
             or self._match_pair(TokenType.OR, TokenType.ALTER)
         )
+
         unique = self._match(TokenType.UNIQUE)
 
         if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
@@ -1489,7 +1592,11 @@ class Parser(metaclass=_Parser):
                 # exp.Properties.Location.POST_ALIAS
                 extend_props(self._parse_properties())
 
-            expression = self._parse_ddl_select()
+            if create_token.token_type == TokenType.SEQUENCE:
+                expression = self._parse_types()
+                extend_props(self._parse_properties())
+            else:
+                expression = self._parse_ddl_select()
 
             if create_token.token_type == TokenType.TABLE:
                 # exp.Properties.Location.POST_EXPRESSION
@@ -1539,6 +1646,40 @@ class Parser(metaclass=_Parser):
             clone=clone,
         )
 
+    def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]:
+        seq = exp.SequenceProperties()
+
+        options = []
+        index = self._index
+
+        while self._curr:
+            if self._match_text_seq("INCREMENT"):
+                self._match_text_seq("BY")
+                self._match_text_seq("=")
+                seq.set("increment", self._parse_term())
+            elif self._match_text_seq("MINVALUE"):
+                seq.set("minvalue", self._parse_term())
+            elif self._match_text_seq("MAXVALUE"):
+                seq.set("maxvalue", self._parse_term())
+            elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
+                self._match_text_seq("=")
+                seq.set("start", self._parse_term())
+            elif self._match_text_seq("CACHE"):
+                # T-SQL allows empty CACHE which is initialized dynamically
+                seq.set("cache", self._parse_number() or True)
+            elif self._match_text_seq("OWNED", "BY"):
+                # "OWNED BY NONE" is the default
+                seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
+            else:
+                opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
+                if opt:
+                    options.append(opt)
+                else:
+                    break
+
+        seq.set("options", options if options else None)
+        return None if self._index == index else seq
+
     def _parse_property_before(self) -> t.Optional[exp.Expression]:
         # only used for teradata currently
         self._match(TokenType.COMMA)
@@ -1564,6 +1705,9 @@ class Parser(metaclass=_Parser):
 
         return None
 
+    def _parse_wrapped_properties(self) -> t.List[exp.Expression]:
+        return self._parse_wrapped_csv(self._parse_property)
+
     def _parse_property(self) -> t.Optional[exp.Expression]:
         if self._match_texts(self.PROPERTY_PARSERS):
             return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
@@ -1582,12 +1726,12 @@ class Parser(metaclass=_Parser):
 
         if not self._match(TokenType.EQ):
             self._retreat(index)
-            return None
+            return self._parse_sequence_properties()
 
         return self.expression(
             exp.Property,
             this=key.to_dot() if isinstance(key, exp.Column) else key,
-            value=self._parse_column() or self._parse_var(any_token=True),
+            value=self._parse_bitwise() or self._parse_var(any_token=True),
         )
 
     def _parse_stored(self) -> exp.FileFormatProperty:
@@ -1619,7 +1763,6 @@ class Parser(metaclass=_Parser):
                 prop = self._parse_property_before()
             else:
                 prop = self._parse_property()
-
             if not prop:
                 break
             for p in ensure_list(prop):
@@ -1662,15 +1805,16 @@ class Parser(metaclass=_Parser):
 
         return prop
 
-    def _parse_with_property(
-        self,
-    ) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
+    def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
         if self._match(TokenType.L_PAREN, advance=False):
-            return self._parse_wrapped_csv(self._parse_property)
+            return self._parse_wrapped_properties()
 
         if self._match_text_seq("JOURNAL"):
             return self._parse_withjournaltable()
 
+        if self._match_texts(self.VIEW_ATTRIBUTES):
+            return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper())
+
         if self._match_text_seq("DATA"):
             return self._parse_withdata(no=False)
         elif self._match_text_seq("NO", "DATA"):
@@ -1818,20 +1962,18 @@ class Parser(metaclass=_Parser):
             autotemp=autotemp,
         )
 
-    def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty:
+    def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]:
+        index = self._index
         no = self._match_text_seq("NO")
         concurrent = self._match_text_seq("CONCURRENT")
-        self._match_text_seq("ISOLATED", "LOADING")
-        for_all = self._match_text_seq("FOR", "ALL")
-        for_insert = self._match_text_seq("FOR", "INSERT")
-        for_none = self._match_text_seq("FOR", "NONE")
+
+        if not self._match_text_seq("ISOLATED", "LOADING"):
+            self._retreat(index)
+            return None
+
+        target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
         return self.expression(
-            exp.IsolatedLoadingProperty,
-            no=no,
-            concurrent=concurrent,
-            for_all=for_all,
-            for_insert=for_insert,
-            for_none=for_none,
+            exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target
         )
 
     def _parse_locking(self) -> exp.LockingProperty:
@@ -2046,20 +2188,22 @@ class Parser(metaclass=_Parser):
 
     def _parse_describe(self) -> exp.Describe:
         kind = self._match_set(self.CREATABLES) and self._prev.text
-        extended = self._match_text_seq("EXTENDED")
+        style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper()
         this = self._parse_table(schema=True)
         properties = self._parse_properties()
         expressions = properties.expressions if properties else None
         return self.expression(
-            exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions
+            exp.Describe, this=this, style=style, kind=kind, expressions=expressions
         )
 
     def _parse_insert(self) -> exp.Insert:
         comments = ensure_list(self._prev_comments)
+        hint = self._parse_hint()
         overwrite = self._match(TokenType.OVERWRITE)
         ignore = self._match(TokenType.IGNORE)
         local = self._match_text_seq("LOCAL")
         alternative = None
+        is_function = None
 
         if self._match_text_seq("DIRECTORY"):
             this: t.Optional[exp.Expression] = self.expression(
@@ -2075,13 +2219,17 @@ class Parser(metaclass=_Parser):
             self._match(TokenType.INTO)
             comments += ensure_list(self._prev_comments)
             self._match(TokenType.TABLE)
-            this = self._parse_table(schema=True)
+            is_function = self._match(TokenType.FUNCTION)
+
+            this = self._parse_table(schema=True) if not is_function else self._parse_function()
 
         returning = self._parse_returning()
 
         return self.expression(
             exp.Insert,
             comments=comments,
+            hint=hint,
+            is_function=is_function,
             this=this,
             by_name=self._match_text_seq("BY", "NAME"),
             exists=self._parse_exists(),
@@ -2112,31 +2260,29 @@ class Parser(metaclass=_Parser):
         if not conflict and not duplicate:
             return None
 
-        nothing = None
-        expressions = None
-        key = None
+        conflict_keys = None
         constraint = None
 
         if conflict:
             if self._match_text_seq("ON", "CONSTRAINT"):
                 constraint = self._parse_id_var()
-            else:
-                key = self._parse_csv(self._parse_value)
+            elif self._match(TokenType.L_PAREN):
+                conflict_keys = self._parse_csv(self._parse_id_var)
+                self._match_r_paren()
 
-        self._match_text_seq("DO")
-        if self._match_text_seq("NOTHING"):
-            nothing = True
-        else:
-            self._match(TokenType.UPDATE)
+        action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
+        if self._prev.token_type == TokenType.UPDATE:
             self._match(TokenType.SET)
             expressions = self._parse_csv(self._parse_equality)
+        else:
+            expressions = None
 
         return self.expression(
             exp.OnConflict,
             duplicate=duplicate,
             expressions=expressions,
-            nothing=nothing,
-            key=key,
+            action=action,
+            conflict_keys=conflict_keys,
             constraint=constraint,
         )
 
@@ -2166,7 +2312,7 @@ class Parser(metaclass=_Parser):
             serde_properties = None
             if self._match(TokenType.SERDE_PROPERTIES):
                 serde_properties = self.expression(
-                    exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property)
+                    exp.SerdeProperties, expressions=self._parse_wrapped_properties()
                 )
 
             return self.expression(
@@ -2433,8 +2579,19 @@ class Parser(metaclass=_Parser):
             self.raise_error("Expected CTE to have alias")
 
         self._match(TokenType.ALIAS)
+
+        if self._match_text_seq("NOT", "MATERIALIZED"):
+            materialized = False
+        elif self._match_text_seq("MATERIALIZED"):
+            materialized = True
+        else:
+            materialized = None
+
         return self.expression(
-            exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias
+            exp.CTE,
+            this=self._parse_wrapped(self._parse_statement),
+            alias=alias,
+            materialized=materialized,
         )
 
     def _parse_table_alias(
@@ -2472,7 +2629,9 @@ class Parser(metaclass=_Parser):
         )
 
     def _implicit_unnests_to_explicit(self, this: E) -> E:
-        from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
+        from sqlglot.optimizer.normalize_identifiers import (
+            normalize_identifiers as _norm,
+        )
 
         refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name}
         for i, join in enumerate(this.args.get("joins") or []):
@@ -2502,7 +2661,7 @@ class Parser(metaclass=_Parser):
         self, this: t.Optional[exp.Expression]
     ) -> t.Optional[exp.Expression]:
         if isinstance(this, (exp.Query, exp.Table)):
-            for join in iter(self._parse_join, None):
+            for join in self._parse_joins():
                 this.append("joins", join)
             for lateral in iter(self._parse_lateral, None):
                 this.append("laterals", lateral)
@@ -2535,7 +2694,12 @@ class Parser(metaclass=_Parser):
     def _parse_hint(self) -> t.Optional[exp.Hint]:
         if self._match(TokenType.HINT):
             hints = []
-            for hint in iter(lambda: self._parse_csv(self._parse_function), []):
+            for hint in iter(
+                lambda: self._parse_csv(
+                    lambda: self._parse_function() or self._parse_var(upper=True)
+                ),
+                [],
+            ):
                 hints.extend(hint)
 
             if not self._match_pair(TokenType.STAR, TokenType.SLASH):
@@ -2743,29 +2907,35 @@ class Parser(metaclass=_Parser):
         if hint:
             kwargs["hint"] = hint
 
+        if self._match(TokenType.MATCH_CONDITION):
+            kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
+
         if self._match(TokenType.ON):
             kwargs["on"] = self._parse_conjunction()
         elif self._match(TokenType.USING):
             kwargs["using"] = self._parse_wrapped_id_vars()
-        elif not (kind and kind.token_type == TokenType.CROSS):
+        elif not isinstance(kwargs["this"], exp.Unnest) and not (
+            kind and kind.token_type == TokenType.CROSS
+        ):
             index = self._index
-            join = self._parse_join()
+            joins: t.Optional[list] = list(self._parse_joins())
 
-            if join and self._match(TokenType.ON):
+            if joins and self._match(TokenType.ON):
                 kwargs["on"] = self._parse_conjunction()
-            elif join and self._match(TokenType.USING):
+            elif joins and self._match(TokenType.USING):
                 kwargs["using"] = self._parse_wrapped_id_vars()
             else:
-                join = None
+                joins = None
                 self._retreat(index)
 
-            kwargs["this"].set("joins", [join] if join else None)
+            kwargs["this"].set("joins", joins if joins else None)
 
         comments = [c for token in (method, side, kind) if token for c in token.comments]
         return self.expression(exp.Join, comments=comments, **kwargs)
 
     def _parse_opclass(self) -> t.Optional[exp.Expression]:
         this = self._parse_conjunction()
+
         if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
             return this
 
@@ -2774,6 +2944,35 @@ class Parser(metaclass=_Parser):
 
         return this
 
+    def _parse_index_params(self) -> exp.IndexParameters:
+        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
+
+        if self._match(TokenType.L_PAREN, advance=False):
+            columns = self._parse_wrapped_csv(self._parse_with_operator)
+        else:
+            columns = None
+
+        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
+        partition_by = self._parse_partition_by()
+        with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
+        tablespace = (
+            self._parse_var(any_token=True)
+            if self._match_text_seq("USING", "INDEX", "TABLESPACE")
+            else None
+        )
+        where = self._parse_where()
+
+        return self.expression(
+            exp.IndexParameters,
+            using=using,
+            columns=columns,
+            include=include,
+            partition_by=partition_by,
+            where=where,
+            with_storage=with_storage,
+            tablespace=tablespace,
+        )
+
     def _parse_index(
         self,
         index: t.Optional[exp.Expression] = None,
@@ -2797,27 +2996,16 @@ class Parser(metaclass=_Parser):
             index = self._parse_id_var()
             table = None
 
-        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
-
-        if self._match(TokenType.L_PAREN, advance=False):
-            columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass))
-        else:
-            columns = None
-
-        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
+        params = self._parse_index_params()
 
         return self.expression(
             exp.Index,
             this=index,
             table=table,
-            using=using,
-            columns=columns,
             unique=unique,
             primary=primary,
             amp=amp,
-            include=include,
-            partition_by=self._parse_partition_by(),
-            where=self._parse_where(),
+            params=params,
         )
 
     def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
@@ -2977,7 +3165,7 @@ class Parser(metaclass=_Parser):
             this = table_sample
 
         if joins:
-            for join in iter(self._parse_join, None):
+            for join in self._parse_joins():
                 this.append("joins", join)
 
         if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
@@ -3126,8 +3314,8 @@ class Parser(metaclass=_Parser):
     def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]:
         return list(iter(self._parse_pivot, None)) or None
 
-    def _parse_joins(self) -> t.Optional[t.List[exp.Join]]:
-        return list(iter(self._parse_join, None)) or None
+    def _parse_joins(self) -> t.Iterator[exp.Join]:
+        return iter(self._parse_join, None)
 
     # https://duckdb.org/docs/sql/statements/pivot
     def _parse_simplified_pivot(self) -> exp.Pivot:
@@ -3328,6 +3516,7 @@ class Parser(metaclass=_Parser):
             return None
 
         self._match(TokenType.CONNECT_BY)
+        nocycle = self._match_text_seq("NOCYCLE")
         self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
             exp.Prior, this=self._parse_bitwise()
         )
@@ -3337,7 +3526,7 @@ class Parser(metaclass=_Parser):
         if not start and self._match(TokenType.START_WITH):
             start = self._parse_conjunction()
 
-        return self.expression(exp.Connect, start=start, connect=connect)
+        return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle)
 
     def _parse_name_as_expression(self) -> exp.Alias:
         return self.expression(
@@ -3417,9 +3606,12 @@ class Parser(metaclass=_Parser):
         )
 
     def _parse_limit(
-        self, this: t.Optional[exp.Expression] = None, top: bool = False
+        self,
+        this: t.Optional[exp.Expression] = None,
+        top: bool = False,
+        skip_limit_token: bool = False,
     ) -> t.Optional[exp.Expression]:
-        if self._match(TokenType.TOP if top else TokenType.LIMIT):
+        if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT):
             comments = self._prev_comments
             if top:
                 limit_paren = self._match(TokenType.L_PAREN)
@@ -3681,6 +3873,11 @@ class Parser(metaclass=_Parser):
                 this = exp.Literal.string(parts[0])
                 unit = self.expression(exp.Var, this=parts[1].upper())
 
+        if self.INTERVAL_SPANS and self._match_text_seq("TO"):
+            unit = self.expression(
+                exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True)
+            )
+
         return self.expression(exp.Interval, this=this, unit=unit)
 
     def _parse_bitwise(self) -> t.Optional[exp.Expression]:
@@ -3783,6 +3980,9 @@ class Parser(metaclass=_Parser):
         if not this:
             return None
 
+        if isinstance(this, exp.Column) and not this.table:
+            this = exp.var(this.name.upper())
+
         return self.expression(
             exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
         )
@@ -3900,19 +4100,14 @@ class Parser(metaclass=_Parser):
             elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
                 maybe_func = False
         elif type_token == TokenType.INTERVAL:
-            unit = self._parse_var()
-
-            if self._match_text_seq("TO"):
-                span = [exp.IntervalSpan(this=unit, expression=self._parse_var())]
-            else:
-                span = None
+            unit = self._parse_var(upper=True)
+            if unit:
+                if self._match_text_seq("TO"):
+                    unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True))
 
-            if span or not unit:
-                this = self.expression(
-                    exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span
-                )
-            else:
                 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit))
+            else:
+                this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
 
         if maybe_func and check_func:
             index2 = self._index
@@ -3996,11 +4191,20 @@ class Parser(metaclass=_Parser):
             else:
                 field = self._parse_field(anonymous_func=True, any_token=True)
 
-            if isinstance(field, exp.Func):
+            if isinstance(field, exp.Func) and this:
                 # bigquery allows function calls like x.y.count(...)
                 # SAFE.SUBSTR(...)
                 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
-                this = self._replace_columns_with_dots(this)
+                this = exp.replace_tree(
+                    this,
+                    lambda n: (
+                        self.expression(exp.Dot, this=n.args.get("table"), expression=n.this)
+                        if n.table
+                        else n.this
+                    )
+                    if isinstance(n, exp.Column)
+                    else n,
+                )
 
             if op:
                 this = op(self, this, field)
@@ -4050,10 +4254,14 @@ class Parser(metaclass=_Parser):
                 this = self._parse_set_operations(
                     self._parse_subquery(this=this, parse_alias=False)
                 )
+            elif isinstance(this, exp.Subquery):
+                this = self._parse_subquery(
+                    this=self._parse_set_operations(this), parse_alias=False
+                )
             elif len(expressions) > 1:
                 this = self.expression(exp.Tuple, expressions=expressions)
             else:
-                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
+                this = self.expression(exp.Paren, this=this)
 
             if this:
                 this.add_comments(comments)
@@ -4118,7 +4326,7 @@ class Parser(metaclass=_Parser):
         parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
         if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS:
             self._advance()
-            return parser(self)
+            return self._parse_window(parser(self))
 
         if not self._next or self._next.token_type != TokenType.L_PAREN:
             if optional_parens and token_type in self.NO_PAREN_FUNCTIONS:
@@ -4186,7 +4394,7 @@ class Parser(metaclass=_Parser):
 
                 if not isinstance(e, exp.PropertyEQ):
                     e = self.expression(
-                        exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression
+                        exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression
                     )
 
                 if isinstance(e.this, exp.Column):
@@ -4267,19 +4475,15 @@ class Parser(metaclass=_Parser):
     def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
         index = self._index
 
-        if not self.errors:
-            try:
-                if self._parse_select(nested=True):
-                    return this
-            except ParseError:
-                pass
-            finally:
-                self.errors.clear()
-                self._retreat(index)
-
         if not self._match(TokenType.L_PAREN):
             return this
 
+        # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>),
+        # expr can be of both types
+        if self._match_set(self.SELECT_START_TOKENS):
+            self._retreat(index)
+            return this
+
         args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def())
 
         self._match_r_paren()
@@ -4300,7 +4504,7 @@ class Parser(metaclass=_Parser):
 
         constraints: t.List[exp.Expression] = []
 
-        if not kind and self._match(TokenType.ALIAS):
+        if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"):
             constraints.append(
                 self.expression(
                     exp.ComputedColumnConstraint,
@@ -4417,9 +4621,7 @@ class Parser(metaclass=_Parser):
         self._match_text_seq("LENGTH")
         return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
 
-    def _parse_not_constraint(
-        self,
-    ) -> t.Optional[exp.Expression]:
+    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
         if self._match_text_seq("NULL"):
             return self.expression(exp.NotNullColumnConstraint)
         if self._match_text_seq("CASESPECIFIC"):
@@ -4447,16 +4649,21 @@ class Parser(metaclass=_Parser):
         if not self._match(TokenType.CONSTRAINT):
             return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
 
-        this = self._parse_id_var()
-        expressions = []
+        return self.expression(
+            exp.Constraint,
+            this=self._parse_id_var(),
+            expressions=self._parse_unnamed_constraints(),
+        )
 
+    def _parse_unnamed_constraints(self) -> t.List[exp.Expression]:
+        constraints = []
         while True:
             constraint = self._parse_unnamed_constraint() or self._parse_function()
             if not constraint:
                 break
-            expressions.append(constraint)
+            constraints.append(constraint)
 
-        return self.expression(exp.Constraint, this=this, expressions=expressions)
+        return constraints
 
     def _parse_unnamed_constraint(
         self, constraints: t.Optional[t.Collection[str]] = None
@@ -4478,6 +4685,7 @@ class Parser(metaclass=_Parser):
             exp.UniqueColumnConstraint,
             this=self._parse_schema(self._parse_id_var(any_token=False)),
             index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text,
+            on_conflict=self._parse_on_conflict(),
         )
 
     def _parse_key_constraint_options(self) -> t.List[str]:
@@ -4592,7 +4800,7 @@ class Parser(metaclass=_Parser):
     def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]:
         return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True))
 
-    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+    def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
         if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
             return this
 
@@ -4601,9 +4809,9 @@ class Parser(metaclass=_Parser):
             lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE)
         )
 
-        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
+        if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET):
             self.raise_error("Expected ]")
-        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
+        elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE):
             self.raise_error("Expected }")
 
         # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
@@ -4645,8 +4853,8 @@ class Parser(metaclass=_Parser):
             else:
                 self.raise_error("Expected END after CASE", self._prev)
 
-        return self._parse_window(
-            self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default)
+        return self.expression(
+            exp.Case, comments=comments, this=expression, ifs=ifs, default=default
         )
 
     def _parse_if(self) -> t.Optional[exp.Expression]:
@@ -4672,7 +4880,7 @@ class Parser(metaclass=_Parser):
             self._match(TokenType.END)
             this = self.expression(exp.If, this=condition, true=true, false=false)
 
-        return self._parse_window(this)
+        return this
 
     def _parse_next_value_for(self) -> t.Optional[exp.Expression]:
         if not self._match_text_seq("VALUE", "FOR"):
@@ -4739,7 +4947,12 @@ class Parser(metaclass=_Parser):
                 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
 
         return self.expression(
-            exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe
+            exp.Cast if strict else exp.TryCast,
+            this=this,
+            to=to,
+            format=fmt,
+            safe=safe,
+            action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False),
         )
 
     def _parse_string_agg(self) -> exp.Expression:
@@ -5087,6 +5300,9 @@ class Parser(metaclass=_Parser):
     def _parse_window(
         self, this: t.Optional[exp.Expression], alias: bool = False
     ) -> t.Optional[exp.Expression]:
+        func = this
+        comments = func.comments if isinstance(func, exp.Expression) else None
+
         if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
             self._match(TokenType.WHERE)
             this = self.expression(
@@ -5132,9 +5348,16 @@ class Parser(metaclass=_Parser):
         else:
             over = self._prev.text.upper()
 
+        if comments:
+            func.comments = None  # type: ignore
+
         if not self._match(TokenType.L_PAREN):
             return self.expression(
-                exp.Window, this=this, alias=self._parse_id_var(False), over=over
+                exp.Window,
+                comments=comments,
+                this=this,
+                alias=self._parse_id_var(False),
+                over=over,
             )
 
         window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
@@ -5167,6 +5390,7 @@ class Parser(metaclass=_Parser):
 
         window = self.expression(
             exp.Window,
+            comments=comments,
             this=this,
             partition_by=partition,
             order=order,
@@ -5218,7 +5442,7 @@ class Parser(metaclass=_Parser):
             self._match_r_paren(aliases)
             return aliases
 
-        alias = self._parse_id_var(any_token) or (
+        alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or (
             self.STRING_ALIASES and self._parse_string_as_identifier()
         )
 
@@ -5512,10 +5736,11 @@ class Parser(metaclass=_Parser):
             return self.expression(exp.AlterColumn, this=column, comment=self._parse_string())
 
         self._match_text_seq("SET", "DATA")
+        self._match_text_seq("TYPE")
         return self.expression(
             exp.AlterColumn,
             this=column,
-            dtype=self._match_text_seq("TYPE") and self._parse_types(),
+            dtype=self._parse_types(),
             collate=self._match(TokenType.COLLATE) and self._parse_term(),
             using=self._match(TokenType.USING) and self._parse_conjunction(),
         )
@@ -5919,26 +6144,6 @@ class Parser(metaclass=_Parser):
 
         return True
 
-    @t.overload
-    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ...
-
-    @t.overload
-    def _replace_columns_with_dots(
-        self, this: t.Optional[exp.Expression]
-    ) -> t.Optional[exp.Expression]: ...
-
-    def _replace_columns_with_dots(self, this):
-        if isinstance(this, exp.Dot):
-            exp.replace_children(this, self._replace_columns_with_dots)
-        elif isinstance(this, exp.Column):
-            exp.replace_children(this, self._replace_columns_with_dots)
-            table = this.args.get("table")
-            this = (
-                self.expression(exp.Dot, this=table, expression=this.this) if table else this.this
-            )
-
-        return this
-
     def _replace_lambda(
         self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
     ) -> t.Optional[exp.Expression]:
@@ -6011,3 +6216,13 @@ class Parser(metaclass=_Parser):
             option=option,
             partition=partition,
         )
+
+    def _parse_with_operator(self) -> t.Optional[exp.Expression]:
+        this = self._parse_ordered(self._parse_opclass)
+
+        if not self._match(TokenType.WITH):
+            return this
+
+        op = self._parse_var(any_token=True)
+
+        return self.expression(exp.WithOperator, this=this, op=op)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-08 08:11:50 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-08 08:11:50 +0000
commit	8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2 (patch)
tree	2e29f131dff77b31e84c957266de8f18655b6f88 /sqlglot/parser.py
parent	Adding upstream version 22.2.0. (diff)
download	sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.tar.xz sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.zip