diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-08 08:11:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-08 08:11:50 +0000 |
commit | 8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2 (patch) | |
tree | 2e29f131dff77b31e84c957266de8f18655b6f88 /sqlglot/parser.py | |
parent | Adding upstream version 22.2.0. (diff) | |
download | sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.tar.xz sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.zip |
Adding upstream version 23.7.0.upstream/23.7.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 493 |
1 files changed, 354 insertions, 139 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 49dac2e..91d8d13 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -15,6 +15,8 @@ if t.TYPE_CHECKING: from sqlglot._typing import E, Lit from sqlglot.dialects.dialect import Dialect, DialectType + T = t.TypeVar("T") + logger = logging.getLogger("sqlglot") OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] @@ -119,6 +121,9 @@ class Parser(metaclass=_Parser): "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), "LIKE": build_like, "LOG": build_logarithm, + "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), + "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), + "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), "TIME_TO_TIME_STR": lambda args: exp.Cast( this=seq_get(args, 0), to=exp.DataType(this=exp.DataType.Type.TEXT), @@ -144,6 +149,7 @@ class Parser(metaclass=_Parser): STRUCT_TYPE_TOKENS = { TokenType.NESTED, + TokenType.OBJECT, TokenType.STRUCT, } @@ -258,6 +264,7 @@ class Parser(metaclass=_Parser): TokenType.IPV6, TokenType.UNKNOWN, TokenType.NULL, + TokenType.NAME, *ENUM_TYPE_TOKENS, *NESTED_TYPE_TOKENS, *AGGREGATE_TYPE_TOKENS, @@ -291,6 +298,7 @@ class Parser(metaclass=_Parser): TokenType.VIEW, TokenType.MODEL, TokenType.DICTIONARY, + TokenType.SEQUENCE, TokenType.STORAGE_INTEGRATION, } @@ -310,6 +318,7 @@ class Parser(metaclass=_Parser): TokenType.ANTI, TokenType.APPLY, TokenType.ASC, + TokenType.ASOF, TokenType.AUTO_INCREMENT, TokenType.BEGIN, TokenType.BPCHAR, @@ -398,6 +407,8 @@ class Parser(metaclass=_Parser): TokenType.WINDOW, } + ALIAS_TOKENS = ID_VAR_TOKENS + COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} @@ -433,6 +444,7 @@ class Parser(metaclass=_Parser): TokenType.VAR, TokenType.LEFT, TokenType.RIGHT, + TokenType.SEQUENCE, TokenType.DATE, TokenType.DATETIME, TokenType.TABLE, @@ -505,8 +517,9 @@ class Parser(metaclass=_Parser): } JOIN_METHODS = { - TokenType.NATURAL, TokenType.ASOF, + TokenType.NATURAL, + TokenType.POSITIONAL, } JOIN_SIDES = { @@ -611,8 +624,8 @@ class Parser(metaclass=_Parser): TokenType.ALTER: lambda self: self._parse_alter(), TokenType.BEGIN: lambda self: self._parse_transaction(), TokenType.CACHE: lambda self: self._parse_cache(), - TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), TokenType.COMMENT: lambda self: self._parse_comment(), + TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), TokenType.CREATE: lambda self: self._parse_create(), TokenType.DELETE: lambda self: self._parse_delete(), TokenType.DESC: lambda self: self._parse_describe(), @@ -627,9 +640,9 @@ class Parser(metaclass=_Parser): TokenType.REFRESH: lambda self: self._parse_refresh(), TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), TokenType.SET: lambda self: self._parse_set(), + TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), TokenType.UNCACHE: lambda self: self._parse_uncache(), TokenType.UPDATE: lambda self: self._parse_update(), - TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), TokenType.USE: lambda self: self.expression( exp.Use, kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), @@ -714,6 +727,9 @@ class Parser(metaclass=_Parser): "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), "AUTO": lambda self: self._parse_auto_property(), "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), + "BACKUP": lambda self: self.expression( + exp.BackupProperty, this=self._parse_var(any_token=True) + ), "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), @@ -739,7 +755,9 @@ class Parser(metaclass=_Parser): "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), "FREESPACE": lambda self: self._parse_freespace(), + "GLOBAL": lambda self: self.expression(exp.GlobalProperty), "HEAP": lambda self: self.expression(exp.HeapProperty), + "ICEBERG": lambda self: self.expression(exp.IcebergProperty), "IMMUTABLE": lambda self: self.expression( exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") ), @@ -782,6 +800,7 @@ class Parser(metaclass=_Parser): "SETTINGS": lambda self: self.expression( exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) ), + "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), "SORTKEY": lambda self: self._parse_sortkey(), "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), "STABLE": lambda self: self.expression( @@ -789,7 +808,7 @@ class Parser(metaclass=_Parser): ), "STORED": lambda self: self._parse_stored(), "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), - "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), + "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), "TEMP": lambda self: self.expression(exp.TemporaryProperty), "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), "TO": lambda self: self._parse_to_table(), @@ -799,6 +818,7 @@ class Parser(metaclass=_Parser): ), "TTL": lambda self: self._parse_ttl(), "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), "VOLATILE": lambda self: self._parse_volatile_property(), "WITH": lambda self: self._parse_with_property(), } @@ -832,6 +852,9 @@ class Parser(metaclass=_Parser): exp.DefaultColumnConstraint, this=self._parse_bitwise() ), "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), + "EXCLUDE": lambda self: self.expression( + exp.ExcludeColumnConstraint, this=self._parse_index_params() + ), "FOREIGN KEY": lambda self: self._parse_foreign_key(), "FORMAT": lambda self: self.expression( exp.DateFormatColumnConstraint, this=self._parse_var_or_string() @@ -858,7 +881,7 @@ class Parser(metaclass=_Parser): "UNIQUE": lambda self: self._parse_unique(), "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), "WITH": lambda self: self.expression( - exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) + exp.Properties, expressions=self._parse_wrapped_properties() ), } @@ -871,7 +894,15 @@ class Parser(metaclass=_Parser): "RENAME": lambda self: self._parse_alter_table_rename(), } - SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} + SCHEMA_UNNAMED_CONSTRAINTS = { + "CHECK", + "EXCLUDE", + "FOREIGN KEY", + "LIKE", + "PERIOD", + "PRIMARY KEY", + "UNIQUE", + } NO_PAREN_FUNCTION_PARSERS = { "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), @@ -966,18 +997,54 @@ class Parser(metaclass=_Parser): "READ": ("WRITE", "ONLY"), } + CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( + ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() + ) + CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") + + CREATE_SEQUENCE: OPTIONS_TYPE = { + "SCALE": ("EXTEND", "NOEXTEND"), + "SHARD": ("EXTEND", "NOEXTEND"), + "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), + **dict.fromkeys( + ( + "SESSION", + "GLOBAL", + "KEEP", + "NOKEEP", + "ORDER", + "NOORDER", + "NOCACHE", + "CYCLE", + "NOCYCLE", + "NOMINVALUE", + "NOMAXVALUE", + "NOSCALE", + "NOSHARD", + ), + tuple(), + ), + } + + ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} + USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) + CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) + INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} CLONE_KEYWORDS = {"CLONE", "COPY"} HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} - OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} + OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} + OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} + VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} + WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} @@ -994,6 +1061,8 @@ class Parser(metaclass=_Parser): UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS + SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} + STRICT_CAST = True PREFIXED_PIVOT_COLUMNS = False @@ -1033,6 +1102,9 @@ class Parser(metaclass=_Parser): # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) SUPPORTS_IMPLICIT_UNNEST = False + # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS + INTERVAL_SPANS = True + __slots__ = ( "error_level", "error_message_context", @@ -1285,6 +1357,27 @@ class Parser(metaclass=_Parser): exp.Command, this=self._prev.text.upper(), expression=self._parse_string() ) + def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: + """ + Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can + be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting + the parser state accordingly + """ + index = self._index + error_level = self.error_level + + self.error_level = ErrorLevel.IMMEDIATE + try: + this = parse_method() + except ParseError: + this = None + finally: + if not this or retreat: + self._retreat(index) + self.error_level = error_level + + return this + def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: start = self._prev exists = self._parse_exists() if allow_exists else None @@ -1377,13 +1470,22 @@ class Parser(metaclass=_Parser): if not kind: return self._parse_as_command(start) + if_exists = exists or self._parse_exists() + table = self._parse_table_parts( + schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA + ) + + if self._match(TokenType.L_PAREN, advance=False): + expressions = self._parse_wrapped_csv(self._parse_types) + else: + expressions = None + return self.expression( exp.Drop, comments=start.comments, - exists=exists or self._parse_exists(), - this=self._parse_table( - schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA - ), + exists=if_exists, + this=table, + expressions=expressions, kind=kind, temporary=temporary, materialized=materialized, @@ -1409,6 +1511,7 @@ class Parser(metaclass=_Parser): or self._match_pair(TokenType.OR, TokenType.REPLACE) or self._match_pair(TokenType.OR, TokenType.ALTER) ) + unique = self._match(TokenType.UNIQUE) if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): @@ -1489,7 +1592,11 @@ class Parser(metaclass=_Parser): # exp.Properties.Location.POST_ALIAS extend_props(self._parse_properties()) - expression = self._parse_ddl_select() + if create_token.token_type == TokenType.SEQUENCE: + expression = self._parse_types() + extend_props(self._parse_properties()) + else: + expression = self._parse_ddl_select() if create_token.token_type == TokenType.TABLE: # exp.Properties.Location.POST_EXPRESSION @@ -1539,6 +1646,40 @@ class Parser(metaclass=_Parser): clone=clone, ) + def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: + seq = exp.SequenceProperties() + + options = [] + index = self._index + + while self._curr: + if self._match_text_seq("INCREMENT"): + self._match_text_seq("BY") + self._match_text_seq("=") + seq.set("increment", self._parse_term()) + elif self._match_text_seq("MINVALUE"): + seq.set("minvalue", self._parse_term()) + elif self._match_text_seq("MAXVALUE"): + seq.set("maxvalue", self._parse_term()) + elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): + self._match_text_seq("=") + seq.set("start", self._parse_term()) + elif self._match_text_seq("CACHE"): + # T-SQL allows empty CACHE which is initialized dynamically + seq.set("cache", self._parse_number() or True) + elif self._match_text_seq("OWNED", "BY"): + # "OWNED BY NONE" is the default + seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) + else: + opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) + if opt: + options.append(opt) + else: + break + + seq.set("options", options if options else None) + return None if self._index == index else seq + def _parse_property_before(self) -> t.Optional[exp.Expression]: # only used for teradata currently self._match(TokenType.COMMA) @@ -1564,6 +1705,9 @@ class Parser(metaclass=_Parser): return None + def _parse_wrapped_properties(self) -> t.List[exp.Expression]: + return self._parse_wrapped_csv(self._parse_property) + def _parse_property(self) -> t.Optional[exp.Expression]: if self._match_texts(self.PROPERTY_PARSERS): return self.PROPERTY_PARSERS[self._prev.text.upper()](self) @@ -1582,12 +1726,12 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.EQ): self._retreat(index) - return None + return self._parse_sequence_properties() return self.expression( exp.Property, this=key.to_dot() if isinstance(key, exp.Column) else key, - value=self._parse_column() or self._parse_var(any_token=True), + value=self._parse_bitwise() or self._parse_var(any_token=True), ) def _parse_stored(self) -> exp.FileFormatProperty: @@ -1619,7 +1763,6 @@ class Parser(metaclass=_Parser): prop = self._parse_property_before() else: prop = self._parse_property() - if not prop: break for p in ensure_list(prop): @@ -1662,15 +1805,16 @@ class Parser(metaclass=_Parser): return prop - def _parse_with_property( - self, - ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: + def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: if self._match(TokenType.L_PAREN, advance=False): - return self._parse_wrapped_csv(self._parse_property) + return self._parse_wrapped_properties() if self._match_text_seq("JOURNAL"): return self._parse_withjournaltable() + if self._match_texts(self.VIEW_ATTRIBUTES): + return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) + if self._match_text_seq("DATA"): return self._parse_withdata(no=False) elif self._match_text_seq("NO", "DATA"): @@ -1818,20 +1962,18 @@ class Parser(metaclass=_Parser): autotemp=autotemp, ) - def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: + def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: + index = self._index no = self._match_text_seq("NO") concurrent = self._match_text_seq("CONCURRENT") - self._match_text_seq("ISOLATED", "LOADING") - for_all = self._match_text_seq("FOR", "ALL") - for_insert = self._match_text_seq("FOR", "INSERT") - for_none = self._match_text_seq("FOR", "NONE") + + if not self._match_text_seq("ISOLATED", "LOADING"): + self._retreat(index) + return None + + target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) return self.expression( - exp.IsolatedLoadingProperty, - no=no, - concurrent=concurrent, - for_all=for_all, - for_insert=for_insert, - for_none=for_none, + exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target ) def _parse_locking(self) -> exp.LockingProperty: @@ -2046,20 +2188,22 @@ class Parser(metaclass=_Parser): def _parse_describe(self) -> exp.Describe: kind = self._match_set(self.CREATABLES) and self._prev.text - extended = self._match_text_seq("EXTENDED") + style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper() this = self._parse_table(schema=True) properties = self._parse_properties() expressions = properties.expressions if properties else None return self.expression( - exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions + exp.Describe, this=this, style=style, kind=kind, expressions=expressions ) def _parse_insert(self) -> exp.Insert: comments = ensure_list(self._prev_comments) + hint = self._parse_hint() overwrite = self._match(TokenType.OVERWRITE) ignore = self._match(TokenType.IGNORE) local = self._match_text_seq("LOCAL") alternative = None + is_function = None if self._match_text_seq("DIRECTORY"): this: t.Optional[exp.Expression] = self.expression( @@ -2075,13 +2219,17 @@ class Parser(metaclass=_Parser): self._match(TokenType.INTO) comments += ensure_list(self._prev_comments) self._match(TokenType.TABLE) - this = self._parse_table(schema=True) + is_function = self._match(TokenType.FUNCTION) + + this = self._parse_table(schema=True) if not is_function else self._parse_function() returning = self._parse_returning() return self.expression( exp.Insert, comments=comments, + hint=hint, + is_function=is_function, this=this, by_name=self._match_text_seq("BY", "NAME"), exists=self._parse_exists(), @@ -2112,31 +2260,29 @@ class Parser(metaclass=_Parser): if not conflict and not duplicate: return None - nothing = None - expressions = None - key = None + conflict_keys = None constraint = None if conflict: if self._match_text_seq("ON", "CONSTRAINT"): constraint = self._parse_id_var() - else: - key = self._parse_csv(self._parse_value) + elif self._match(TokenType.L_PAREN): + conflict_keys = self._parse_csv(self._parse_id_var) + self._match_r_paren() - self._match_text_seq("DO") - if self._match_text_seq("NOTHING"): - nothing = True - else: - self._match(TokenType.UPDATE) + action = self._parse_var_from_options(self.CONFLICT_ACTIONS) + if self._prev.token_type == TokenType.UPDATE: self._match(TokenType.SET) expressions = self._parse_csv(self._parse_equality) + else: + expressions = None return self.expression( exp.OnConflict, duplicate=duplicate, expressions=expressions, - nothing=nothing, - key=key, + action=action, + conflict_keys=conflict_keys, constraint=constraint, ) @@ -2166,7 +2312,7 @@ class Parser(metaclass=_Parser): serde_properties = None if self._match(TokenType.SERDE_PROPERTIES): serde_properties = self.expression( - exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) + exp.SerdeProperties, expressions=self._parse_wrapped_properties() ) return self.expression( @@ -2433,8 +2579,19 @@ class Parser(metaclass=_Parser): self.raise_error("Expected CTE to have alias") self._match(TokenType.ALIAS) + + if self._match_text_seq("NOT", "MATERIALIZED"): + materialized = False + elif self._match_text_seq("MATERIALIZED"): + materialized = True + else: + materialized = None + return self.expression( - exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias + exp.CTE, + this=self._parse_wrapped(self._parse_statement), + alias=alias, + materialized=materialized, ) def _parse_table_alias( @@ -2472,7 +2629,9 @@ class Parser(metaclass=_Parser): ) def _implicit_unnests_to_explicit(self, this: E) -> E: - from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm + from sqlglot.optimizer.normalize_identifiers import ( + normalize_identifiers as _norm, + ) refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} for i, join in enumerate(this.args.get("joins") or []): @@ -2502,7 +2661,7 @@ class Parser(metaclass=_Parser): self, this: t.Optional[exp.Expression] ) -> t.Optional[exp.Expression]: if isinstance(this, (exp.Query, exp.Table)): - for join in iter(self._parse_join, None): + for join in self._parse_joins(): this.append("joins", join) for lateral in iter(self._parse_lateral, None): this.append("laterals", lateral) @@ -2535,7 +2694,12 @@ class Parser(metaclass=_Parser): def _parse_hint(self) -> t.Optional[exp.Hint]: if self._match(TokenType.HINT): hints = [] - for hint in iter(lambda: self._parse_csv(self._parse_function), []): + for hint in iter( + lambda: self._parse_csv( + lambda: self._parse_function() or self._parse_var(upper=True) + ), + [], + ): hints.extend(hint) if not self._match_pair(TokenType.STAR, TokenType.SLASH): @@ -2743,29 +2907,35 @@ class Parser(metaclass=_Parser): if hint: kwargs["hint"] = hint + if self._match(TokenType.MATCH_CONDITION): + kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) + if self._match(TokenType.ON): kwargs["on"] = self._parse_conjunction() elif self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() - elif not (kind and kind.token_type == TokenType.CROSS): + elif not isinstance(kwargs["this"], exp.Unnest) and not ( + kind and kind.token_type == TokenType.CROSS + ): index = self._index - join = self._parse_join() + joins: t.Optional[list] = list(self._parse_joins()) - if join and self._match(TokenType.ON): + if joins and self._match(TokenType.ON): kwargs["on"] = self._parse_conjunction() - elif join and self._match(TokenType.USING): + elif joins and self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() else: - join = None + joins = None self._retreat(index) - kwargs["this"].set("joins", [join] if join else None) + kwargs["this"].set("joins", joins if joins else None) comments = [c for token in (method, side, kind) if token for c in token.comments] return self.expression(exp.Join, comments=comments, **kwargs) def _parse_opclass(self) -> t.Optional[exp.Expression]: this = self._parse_conjunction() + if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): return this @@ -2774,6 +2944,35 @@ class Parser(metaclass=_Parser): return this + def _parse_index_params(self) -> exp.IndexParameters: + using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None + + if self._match(TokenType.L_PAREN, advance=False): + columns = self._parse_wrapped_csv(self._parse_with_operator) + else: + columns = None + + include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None + partition_by = self._parse_partition_by() + with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() + tablespace = ( + self._parse_var(any_token=True) + if self._match_text_seq("USING", "INDEX", "TABLESPACE") + else None + ) + where = self._parse_where() + + return self.expression( + exp.IndexParameters, + using=using, + columns=columns, + include=include, + partition_by=partition_by, + where=where, + with_storage=with_storage, + tablespace=tablespace, + ) + def _parse_index( self, index: t.Optional[exp.Expression] = None, @@ -2797,27 +2996,16 @@ class Parser(metaclass=_Parser): index = self._parse_id_var() table = None - using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None - - if self._match(TokenType.L_PAREN, advance=False): - columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) - else: - columns = None - - include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None + params = self._parse_index_params() return self.expression( exp.Index, this=index, table=table, - using=using, - columns=columns, unique=unique, primary=primary, amp=amp, - include=include, - partition_by=self._parse_partition_by(), - where=self._parse_where(), + params=params, ) def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: @@ -2977,7 +3165,7 @@ class Parser(metaclass=_Parser): this = table_sample if joins: - for join in iter(self._parse_join, None): + for join in self._parse_joins(): this.append("joins", join) if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): @@ -3126,8 +3314,8 @@ class Parser(metaclass=_Parser): def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: return list(iter(self._parse_pivot, None)) or None - def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: - return list(iter(self._parse_join, None)) or None + def _parse_joins(self) -> t.Iterator[exp.Join]: + return iter(self._parse_join, None) # https://duckdb.org/docs/sql/statements/pivot def _parse_simplified_pivot(self) -> exp.Pivot: @@ -3328,6 +3516,7 @@ class Parser(metaclass=_Parser): return None self._match(TokenType.CONNECT_BY) + nocycle = self._match_text_seq("NOCYCLE") self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( exp.Prior, this=self._parse_bitwise() ) @@ -3337,7 +3526,7 @@ class Parser(metaclass=_Parser): if not start and self._match(TokenType.START_WITH): start = self._parse_conjunction() - return self.expression(exp.Connect, start=start, connect=connect) + return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) def _parse_name_as_expression(self) -> exp.Alias: return self.expression( @@ -3417,9 +3606,12 @@ class Parser(metaclass=_Parser): ) def _parse_limit( - self, this: t.Optional[exp.Expression] = None, top: bool = False + self, + this: t.Optional[exp.Expression] = None, + top: bool = False, + skip_limit_token: bool = False, ) -> t.Optional[exp.Expression]: - if self._match(TokenType.TOP if top else TokenType.LIMIT): + if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): comments = self._prev_comments if top: limit_paren = self._match(TokenType.L_PAREN) @@ -3681,6 +3873,11 @@ class Parser(metaclass=_Parser): this = exp.Literal.string(parts[0]) unit = self.expression(exp.Var, this=parts[1].upper()) + if self.INTERVAL_SPANS and self._match_text_seq("TO"): + unit = self.expression( + exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) + ) + return self.expression(exp.Interval, this=this, unit=unit) def _parse_bitwise(self) -> t.Optional[exp.Expression]: @@ -3783,6 +3980,9 @@ class Parser(metaclass=_Parser): if not this: return None + if isinstance(this, exp.Column) and not this.table: + this = exp.var(this.name.upper()) + return self.expression( exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) ) @@ -3900,19 +4100,14 @@ class Parser(metaclass=_Parser): elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): maybe_func = False elif type_token == TokenType.INTERVAL: - unit = self._parse_var() - - if self._match_text_seq("TO"): - span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] - else: - span = None + unit = self._parse_var(upper=True) + if unit: + if self._match_text_seq("TO"): + unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) - if span or not unit: - this = self.expression( - exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span - ) - else: this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) + else: + this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) if maybe_func and check_func: index2 = self._index @@ -3996,11 +4191,20 @@ class Parser(metaclass=_Parser): else: field = self._parse_field(anonymous_func=True, any_token=True) - if isinstance(field, exp.Func): + if isinstance(field, exp.Func) and this: # bigquery allows function calls like x.y.count(...) # SAFE.SUBSTR(...) # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules - this = self._replace_columns_with_dots(this) + this = exp.replace_tree( + this, + lambda n: ( + self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) + if n.table + else n.this + ) + if isinstance(n, exp.Column) + else n, + ) if op: this = op(self, this, field) @@ -4050,10 +4254,14 @@ class Parser(metaclass=_Parser): this = self._parse_set_operations( self._parse_subquery(this=this, parse_alias=False) ) + elif isinstance(this, exp.Subquery): + this = self._parse_subquery( + this=self._parse_set_operations(this), parse_alias=False + ) elif len(expressions) > 1: this = self.expression(exp.Tuple, expressions=expressions) else: - this = self.expression(exp.Paren, this=self._parse_set_operations(this)) + this = self.expression(exp.Paren, this=this) if this: this.add_comments(comments) @@ -4118,7 +4326,7 @@ class Parser(metaclass=_Parser): parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: self._advance() - return parser(self) + return self._parse_window(parser(self)) if not self._next or self._next.token_type != TokenType.L_PAREN: if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: @@ -4186,7 +4394,7 @@ class Parser(metaclass=_Parser): if not isinstance(e, exp.PropertyEQ): e = self.expression( - exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression + exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression ) if isinstance(e.this, exp.Column): @@ -4267,19 +4475,15 @@ class Parser(metaclass=_Parser): def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: index = self._index - if not self.errors: - try: - if self._parse_select(nested=True): - return this - except ParseError: - pass - finally: - self.errors.clear() - self._retreat(index) - if not self._match(TokenType.L_PAREN): return this + # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), + # expr can be of both types + if self._match_set(self.SELECT_START_TOKENS): + self._retreat(index) + return this + args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) self._match_r_paren() @@ -4300,7 +4504,7 @@ class Parser(metaclass=_Parser): constraints: t.List[exp.Expression] = [] - if not kind and self._match(TokenType.ALIAS): + if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"): constraints.append( self.expression( exp.ComputedColumnConstraint, @@ -4417,9 +4621,7 @@ class Parser(metaclass=_Parser): self._match_text_seq("LENGTH") return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) - def _parse_not_constraint( - self, - ) -> t.Optional[exp.Expression]: + def _parse_not_constraint(self) -> t.Optional[exp.Expression]: if self._match_text_seq("NULL"): return self.expression(exp.NotNullColumnConstraint) if self._match_text_seq("CASESPECIFIC"): @@ -4447,16 +4649,21 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.CONSTRAINT): return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) - this = self._parse_id_var() - expressions = [] + return self.expression( + exp.Constraint, + this=self._parse_id_var(), + expressions=self._parse_unnamed_constraints(), + ) + def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: + constraints = [] while True: constraint = self._parse_unnamed_constraint() or self._parse_function() if not constraint: break - expressions.append(constraint) + constraints.append(constraint) - return self.expression(exp.Constraint, this=this, expressions=expressions) + return constraints def _parse_unnamed_constraint( self, constraints: t.Optional[t.Collection[str]] = None @@ -4478,6 +4685,7 @@ class Parser(metaclass=_Parser): exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)), index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, + on_conflict=self._parse_on_conflict(), ) def _parse_key_constraint_options(self) -> t.List[str]: @@ -4592,7 +4800,7 @@ class Parser(metaclass=_Parser): def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) - def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): return this @@ -4601,9 +4809,9 @@ class Parser(metaclass=_Parser): lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) ) - if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: + if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): self.raise_error("Expected ]") - elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: + elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): self.raise_error("Expected }") # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs @@ -4645,8 +4853,8 @@ class Parser(metaclass=_Parser): else: self.raise_error("Expected END after CASE", self._prev) - return self._parse_window( - self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) + return self.expression( + exp.Case, comments=comments, this=expression, ifs=ifs, default=default ) def _parse_if(self) -> t.Optional[exp.Expression]: @@ -4672,7 +4880,7 @@ class Parser(metaclass=_Parser): self._match(TokenType.END) this = self.expression(exp.If, this=condition, true=true, false=false) - return self._parse_window(this) + return this def _parse_next_value_for(self) -> t.Optional[exp.Expression]: if not self._match_text_seq("VALUE", "FOR"): @@ -4739,7 +4947,12 @@ class Parser(metaclass=_Parser): to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) return self.expression( - exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe + exp.Cast if strict else exp.TryCast, + this=this, + to=to, + format=fmt, + safe=safe, + action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), ) def _parse_string_agg(self) -> exp.Expression: @@ -5087,6 +5300,9 @@ class Parser(metaclass=_Parser): def _parse_window( self, this: t.Optional[exp.Expression], alias: bool = False ) -> t.Optional[exp.Expression]: + func = this + comments = func.comments if isinstance(func, exp.Expression) else None + if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): self._match(TokenType.WHERE) this = self.expression( @@ -5132,9 +5348,16 @@ class Parser(metaclass=_Parser): else: over = self._prev.text.upper() + if comments: + func.comments = None # type: ignore + if not self._match(TokenType.L_PAREN): return self.expression( - exp.Window, this=this, alias=self._parse_id_var(False), over=over + exp.Window, + comments=comments, + this=this, + alias=self._parse_id_var(False), + over=over, ) window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) @@ -5167,6 +5390,7 @@ class Parser(metaclass=_Parser): window = self.expression( exp.Window, + comments=comments, this=this, partition_by=partition, order=order, @@ -5218,7 +5442,7 @@ class Parser(metaclass=_Parser): self._match_r_paren(aliases) return aliases - alias = self._parse_id_var(any_token) or ( + alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( self.STRING_ALIASES and self._parse_string_as_identifier() ) @@ -5512,10 +5736,11 @@ class Parser(metaclass=_Parser): return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) self._match_text_seq("SET", "DATA") + self._match_text_seq("TYPE") return self.expression( exp.AlterColumn, this=column, - dtype=self._match_text_seq("TYPE") and self._parse_types(), + dtype=self._parse_types(), collate=self._match(TokenType.COLLATE) and self._parse_term(), using=self._match(TokenType.USING) and self._parse_conjunction(), ) @@ -5919,26 +6144,6 @@ class Parser(metaclass=_Parser): return True - @t.overload - def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... - - @t.overload - def _replace_columns_with_dots( - self, this: t.Optional[exp.Expression] - ) -> t.Optional[exp.Expression]: ... - - def _replace_columns_with_dots(self, this): - if isinstance(this, exp.Dot): - exp.replace_children(this, self._replace_columns_with_dots) - elif isinstance(this, exp.Column): - exp.replace_children(this, self._replace_columns_with_dots) - table = this.args.get("table") - this = ( - self.expression(exp.Dot, this=table, expression=this.this) if table else this.this - ) - - return this - def _replace_lambda( self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] ) -> t.Optional[exp.Expression]: @@ -6011,3 +6216,13 @@ class Parser(metaclass=_Parser): option=option, partition=partition, ) + + def _parse_with_operator(self) -> t.Optional[exp.Expression]: + this = self._parse_ordered(self._parse_opclass) + + if not self._match(TokenType.WITH): + return this + + op = self._parse_var(any_token=True) + + return self.expression(exp.WithOperator, this=this, op=op) |