diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-03 06:02:47 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-02-03 06:02:47 +0000 |
commit | e67dc36ad88f4bbf01ffb495fe2ae846424ac015 (patch) | |
tree | 4d0c88b54afb1aafaa01ace15650affa6f436195 /sqlglot/parser.py | |
parent | Adding upstream version 10.5.10. (diff) | |
download | sqlglot-e67dc36ad88f4bbf01ffb495fe2ae846424ac015.tar.xz sqlglot-e67dc36ad88f4bbf01ffb495fe2ae846424ac015.zip |
Adding upstream version 10.6.0.upstream/10.6.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 499 |
1 files changed, 396 insertions, 103 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 42777d1..6229105 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -175,13 +175,9 @@ class Parser(metaclass=_Parser): TokenType.DEFAULT, TokenType.DELETE, TokenType.DESCRIBE, - TokenType.DETERMINISTIC, TokenType.DIV, - TokenType.DISTKEY, - TokenType.DISTSTYLE, TokenType.END, TokenType.EXECUTE, - TokenType.ENGINE, TokenType.ESCAPE, TokenType.FALSE, TokenType.FIRST, @@ -194,13 +190,10 @@ class Parser(metaclass=_Parser): TokenType.IF, TokenType.INDEX, TokenType.ISNULL, - TokenType.IMMUTABLE, TokenType.INTERVAL, TokenType.LAZY, - TokenType.LANGUAGE, TokenType.LEADING, TokenType.LOCAL, - TokenType.LOCATION, TokenType.MATERIALIZED, TokenType.MERGE, TokenType.NATURAL, @@ -209,13 +202,11 @@ class Parser(metaclass=_Parser): TokenType.ONLY, TokenType.OPTIONS, TokenType.ORDINALITY, - TokenType.PARTITIONED_BY, TokenType.PERCENT, TokenType.PIVOT, TokenType.PRECEDING, TokenType.RANGE, TokenType.REFERENCES, - TokenType.RETURNS, TokenType.ROW, TokenType.ROWS, TokenType.SCHEMA, @@ -225,10 +216,7 @@ class Parser(metaclass=_Parser): TokenType.SET, TokenType.SHOW, TokenType.SORTKEY, - TokenType.STABLE, - TokenType.STORED, TokenType.TABLE, - TokenType.TABLE_FORMAT, TokenType.TEMPORARY, TokenType.TOP, TokenType.TRAILING, @@ -237,7 +225,6 @@ class Parser(metaclass=_Parser): TokenType.UNIQUE, TokenType.UNLOGGED, TokenType.UNPIVOT, - TokenType.PROPERTIES, TokenType.PROCEDURE, TokenType.VIEW, TokenType.VOLATILE, @@ -448,7 +435,12 @@ class Parser(metaclass=_Parser): TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), TokenType.UNCACHE: lambda self: self._parse_uncache(), TokenType.UPDATE: lambda self: self._parse_update(), - TokenType.USE: lambda self: self.expression(exp.Use, this=self._parse_id_var()), + TokenType.USE: lambda self: self.expression( + exp.Use, + kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) + and exp.Var(this=self._prev.text), + this=self._parse_table(schema=False), + ), } UNARY_PARSERS = { @@ -492,6 +484,9 @@ class Parser(metaclass=_Parser): RANGE_PARSERS = { TokenType.BETWEEN: lambda self, this: self._parse_between(this), + TokenType.GLOB: lambda self, this: self._parse_escape( + self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) + ), TokenType.IN: lambda self, this: self._parse_in(this), TokenType.IS: lambda self, this: self._parse_is(this), TokenType.LIKE: lambda self, this: self._parse_escape( @@ -512,45 +507,66 @@ class Parser(metaclass=_Parser): } PROPERTY_PARSERS = { - TokenType.AUTO_INCREMENT: lambda self: self._parse_property_assignment( - exp.AutoIncrementProperty - ), - TokenType.CHARACTER_SET: lambda self: self._parse_character_set(), - TokenType.LOCATION: lambda self: self._parse_property_assignment(exp.LocationProperty), - TokenType.PARTITIONED_BY: lambda self: self._parse_partitioned_by(), - TokenType.SCHEMA_COMMENT: lambda self: self._parse_property_assignment( - exp.SchemaCommentProperty - ), - TokenType.STORED: lambda self: self._parse_property_assignment(exp.FileFormatProperty), - TokenType.DISTKEY: lambda self: self._parse_distkey(), - TokenType.DISTSTYLE: lambda self: self._parse_property_assignment(exp.DistStyleProperty), - TokenType.SORTKEY: lambda self: self._parse_sortkey(), - TokenType.LIKE: lambda self: self._parse_create_like(), - TokenType.RETURNS: lambda self: self._parse_returns(), - TokenType.ROW: lambda self: self._parse_row(), - TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty), - TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), - TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty), - TokenType.TABLE_FORMAT: lambda self: self._parse_property_assignment( - exp.TableFormatProperty - ), - TokenType.USING: lambda self: self._parse_property_assignment(exp.TableFormatProperty), - TokenType.LANGUAGE: lambda self: self._parse_property_assignment(exp.LanguageProperty), - TokenType.EXECUTE: lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), - TokenType.DETERMINISTIC: lambda self: self.expression( + "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), + "CHARACTER SET": lambda self: self._parse_character_set(), + "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), + "PARTITION BY": lambda self: self._parse_partitioned_by(), + "PARTITIONED BY": lambda self: self._parse_partitioned_by(), + "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), + "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), + "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + "DISTKEY": lambda self: self._parse_distkey(), + "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), + "SORTKEY": lambda self: self._parse_sortkey(), + "LIKE": lambda self: self._parse_create_like(), + "RETURNS": lambda self: self._parse_returns(), + "ROW": lambda self: self._parse_row(), + "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), + "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), + "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), + "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), + "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), + "DETERMINISTIC": lambda self: self.expression( exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") ), - TokenType.IMMUTABLE: lambda self: self.expression( + "IMMUTABLE": lambda self: self.expression( exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") ), - TokenType.STABLE: lambda self: self.expression( + "STABLE": lambda self: self.expression( exp.VolatilityProperty, this=exp.Literal.string("STABLE") ), - TokenType.VOLATILE: lambda self: self.expression( + "VOLATILE": lambda self: self.expression( exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") ), - TokenType.WITH: lambda self: self._parse_wrapped_csv(self._parse_property), - TokenType.PROPERTIES: lambda self: self._parse_wrapped_csv(self._parse_property), + "WITH": lambda self: self._parse_with_property(), + "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), + "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), + "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), + "BEFORE": lambda self: self._parse_journal( + no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + ), + "JOURNAL": lambda self: self._parse_journal( + no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + ), + "AFTER": lambda self: self._parse_afterjournal( + no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" + ), + "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), + "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), + "CHECKSUM": lambda self: self._parse_checksum(), + "FREESPACE": lambda self: self._parse_freespace(), + "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( + no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" + ), + "MIN": lambda self: self._parse_datablocksize(), + "MINIMUM": lambda self: self._parse_datablocksize(), + "MAX": lambda self: self._parse_datablocksize(), + "MAXIMUM": lambda self: self._parse_datablocksize(), + "DATABLOCKSIZE": lambda self: self._parse_datablocksize( + default=self._prev.text.upper() == "DEFAULT" + ), + "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), } CONSTRAINT_PARSERS = { @@ -580,6 +596,7 @@ class Parser(metaclass=_Parser): } QUERY_MODIFIER_PARSERS = { + "match": lambda self: self._parse_match_recognize(), "where": lambda self: self._parse_where(), "group": lambda self: self._parse_group(), "having": lambda self: self._parse_having(), @@ -627,7 +644,6 @@ class Parser(metaclass=_Parser): "max_errors", "null_ordering", "_tokens", - "_chunks", "_index", "_curr", "_next", @@ -660,7 +676,6 @@ class Parser(metaclass=_Parser): self.sql = "" self.errors = [] self._tokens = [] - self._chunks = [[]] self._index = 0 self._curr = None self._next = None @@ -728,17 +743,18 @@ class Parser(metaclass=_Parser): self.reset() self.sql = sql or "" total = len(raw_tokens) + chunks: t.List[t.List[Token]] = [[]] for i, token in enumerate(raw_tokens): if token.token_type == TokenType.SEMICOLON: if i < total - 1: - self._chunks.append([]) + chunks.append([]) else: - self._chunks[-1].append(token) + chunks[-1].append(token) expressions = [] - for tokens in self._chunks: + for tokens in chunks: self._index = -1 self._tokens = tokens self._advance() @@ -771,7 +787,7 @@ class Parser(metaclass=_Parser): error level setting. """ token = token or self._curr or self._prev or Token.string("") - start = self._find_token(token, self.sql) + start = self._find_token(token) end = start + len(token.text) start_context = self.sql[max(start - self.error_message_context, 0) : start] highlight = self.sql[start:end] @@ -833,13 +849,16 @@ class Parser(metaclass=_Parser): for error_message in expression.error_messages(args): self.raise_error(error_message) - def _find_token(self, token: Token, sql: str) -> int: + def _find_sql(self, start: Token, end: Token) -> str: + return self.sql[self._find_token(start) : self._find_token(end)] + + def _find_token(self, token: Token) -> int: line = 1 col = 1 index = 0 while line < token.line or col < token.col: - if Tokenizer.WHITE_SPACE.get(sql[index]) == TokenType.BREAK: + if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: line += 1 col = 1 else: @@ -911,6 +930,10 @@ class Parser(metaclass=_Parser): def _parse_create(self) -> t.Optional[exp.Expression]: replace = self._match_pair(TokenType.OR, TokenType.REPLACE) + set_ = self._match(TokenType.SET) # Teradata + multiset = self._match_text_seq("MULTISET") # Teradata + global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata + volatile = self._match(TokenType.VOLATILE) # Teradata temporary = self._match(TokenType.TEMPORARY) transient = self._match_text_seq("TRANSIENT") external = self._match_text_seq("EXTERNAL") @@ -954,10 +977,18 @@ class Parser(metaclass=_Parser): TokenType.VIEW, TokenType.SCHEMA, ): - this = self._parse_table(schema=True) - properties = self._parse_properties() - if self._match(TokenType.ALIAS): - expression = self._parse_ddl_select() + table_parts = self._parse_table_parts(schema=True) + + if self._match(TokenType.COMMA): # comma-separated properties before schema definition + properties = self._parse_properties(before=True) + + this = self._parse_schema(this=table_parts) + + if not properties: # properties after schema definition + properties = self._parse_properties() + + self._match(TokenType.ALIAS) + expression = self._parse_ddl_select() if create_token.token_type == TokenType.TABLE: if self._match_text_seq("WITH", "DATA"): @@ -988,6 +1019,10 @@ class Parser(metaclass=_Parser): this=this, kind=create_token.text, expression=expression, + set=set_, + multiset=multiset, + global_temporary=global_temporary, + volatile=volatile, exists=exists, properties=properties, temporary=temporary, @@ -1004,9 +1039,19 @@ class Parser(metaclass=_Parser): begin=begin, ) + def _parse_property_before(self) -> t.Optional[exp.Expression]: + self._match_text_seq("NO") + self._match_text_seq("DUAL") + self._match_text_seq("DEFAULT") + + if self.PROPERTY_PARSERS.get(self._curr.text.upper()): + return self.PROPERTY_PARSERS[self._curr.text.upper()](self) + + return None + def _parse_property(self) -> t.Optional[exp.Expression]: - if self._match_set(self.PROPERTY_PARSERS): - return self.PROPERTY_PARSERS[self._prev.token_type](self) + if self._match_texts(self.PROPERTY_PARSERS): + return self.PROPERTY_PARSERS[self._prev.text.upper()](self) if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): return self._parse_character_set(True) @@ -1033,6 +1078,166 @@ class Parser(metaclass=_Parser): this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), ) + def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: + properties = [] + + while True: + if before: + self._match(TokenType.COMMA) + identified_property = self._parse_property_before() + else: + identified_property = self._parse_property() + + if not identified_property: + break + for p in ensure_collection(identified_property): + properties.append(p) + + if properties: + return self.expression(exp.Properties, expressions=properties, before=before) + + return None + + def _parse_fallback(self, no=False) -> exp.Expression: + self._match_text_seq("FALLBACK") + return self.expression( + exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") + ) + + def _parse_with_property( + self, + ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: + if self._match(TokenType.L_PAREN, advance=False): + return self._parse_wrapped_csv(self._parse_property) + + if not self._next: + return None + + if self._next.text.upper() == "JOURNAL": + return self._parse_withjournaltable() + + return self._parse_withisolatedloading() + + def _parse_withjournaltable(self) -> exp.Expression: + self._match_text_seq("WITH", "JOURNAL", "TABLE") + self._match(TokenType.EQ) + return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) + + def _parse_log(self, no=False) -> exp.Expression: + self._match_text_seq("LOG") + return self.expression(exp.LogProperty, no=no) + + def _parse_journal(self, no=False, dual=False) -> exp.Expression: + before = self._match_text_seq("BEFORE") + self._match_text_seq("JOURNAL") + return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) + + def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: + self._match_text_seq("NOT") + self._match_text_seq("LOCAL") + self._match_text_seq("AFTER", "JOURNAL") + return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) + + def _parse_checksum(self) -> exp.Expression: + self._match_text_seq("CHECKSUM") + self._match(TokenType.EQ) + + on = None + if self._match(TokenType.ON): + on = True + elif self._match_text_seq("OFF"): + on = False + default = self._match(TokenType.DEFAULT) + + return self.expression( + exp.ChecksumProperty, + on=on, + default=default, + ) + + def _parse_freespace(self) -> exp.Expression: + self._match_text_seq("FREESPACE") + self._match(TokenType.EQ) + return self.expression( + exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) + ) + + def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: + self._match_text_seq("MERGEBLOCKRATIO") + if self._match(TokenType.EQ): + return self.expression( + exp.MergeBlockRatioProperty, + this=self._parse_number(), + percent=self._match(TokenType.PERCENT), + ) + else: + return self.expression( + exp.MergeBlockRatioProperty, + no=no, + default=default, + ) + + def _parse_datablocksize(self, default=None) -> exp.Expression: + if default: + self._match_text_seq("DATABLOCKSIZE") + return self.expression(exp.DataBlocksizeProperty, default=True) + elif self._match_texts(("MIN", "MINIMUM")): + self._match_text_seq("DATABLOCKSIZE") + return self.expression(exp.DataBlocksizeProperty, min=True) + elif self._match_texts(("MAX", "MAXIMUM")): + self._match_text_seq("DATABLOCKSIZE") + return self.expression(exp.DataBlocksizeProperty, min=False) + + self._match_text_seq("DATABLOCKSIZE") + self._match(TokenType.EQ) + size = self._parse_number() + units = None + if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): + units = self._prev.text + return self.expression(exp.DataBlocksizeProperty, size=size, units=units) + + def _parse_blockcompression(self) -> exp.Expression: + self._match_text_seq("BLOCKCOMPRESSION") + self._match(TokenType.EQ) + always = self._match(TokenType.ALWAYS) + manual = self._match_text_seq("MANUAL") + never = self._match_text_seq("NEVER") + default = self._match_text_seq("DEFAULT") + autotemp = None + if self._match_text_seq("AUTOTEMP"): + autotemp = self._parse_schema() + + return self.expression( + exp.BlockCompressionProperty, + always=always, + manual=manual, + never=never, + default=default, + autotemp=autotemp, + ) + + def _parse_withisolatedloading(self) -> exp.Expression: + self._match(TokenType.WITH) + no = self._match_text_seq("NO") + concurrent = self._match_text_seq("CONCURRENT") + self._match_text_seq("ISOLATED", "LOADING") + for_all = self._match_text_seq("FOR", "ALL") + for_insert = self._match_text_seq("FOR", "INSERT") + for_none = self._match_text_seq("FOR", "NONE") + return self.expression( + exp.IsolatedLoadingProperty, + no=no, + concurrent=concurrent, + for_all=for_all, + for_insert=for_insert, + for_none=for_none, + ) + + def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: + if self._match(TokenType.PARTITION_BY): + return self._parse_csv(self._parse_conjunction) + return [] + def _parse_partitioned_by(self) -> exp.Expression: self._match(TokenType.EQ) return self.expression( @@ -1093,21 +1298,6 @@ class Parser(metaclass=_Parser): return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) - def _parse_properties(self) -> t.Optional[exp.Expression]: - properties = [] - - while True: - identified_property = self._parse_property() - if not identified_property: - break - for p in ensure_collection(identified_property): - properties.append(p) - - if properties: - return self.expression(exp.Properties, expressions=properties) - - return None - def _parse_describe(self) -> exp.Expression: kind = self._match_set(self.CREATABLES) and self._prev.text this = self._parse_table() @@ -1248,11 +1438,9 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.PARTITION): return None - def parse_values() -> exp.Property: - props = self._parse_csv(self._parse_var_or_string, sep=TokenType.EQ) - return exp.Property(this=seq_get(props, 0), value=seq_get(props, 1)) - - return self.expression(exp.Partition, this=self._parse_wrapped_csv(parse_values)) + return self.expression( + exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) + ) def _parse_value(self) -> exp.Expression: if self._match(TokenType.L_PAREN): @@ -1360,8 +1548,7 @@ class Parser(metaclass=_Parser): if not alias or not alias.this: self.raise_error("Expected CTE to have alias") - if not self._match(TokenType.ALIAS): - self.raise_error("Expected AS in CTE") + self._match(TokenType.ALIAS) return self.expression( exp.CTE, @@ -1376,10 +1563,11 @@ class Parser(metaclass=_Parser): alias = self._parse_id_var( any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS ) + index = self._index if self._match(TokenType.L_PAREN): columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var())) - self._match_r_paren() + self._match_r_paren() if columns else self._retreat(index) else: columns = None @@ -1452,6 +1640,87 @@ class Parser(metaclass=_Parser): exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) ) + def _parse_match_recognize(self) -> t.Optional[exp.Expression]: + if not self._match(TokenType.MATCH_RECOGNIZE): + return None + self._match_l_paren() + + partition = self._parse_partition_by() + order = self._parse_order() + measures = ( + self._parse_alias(self._parse_conjunction()) + if self._match_text_seq("MEASURES") + else None + ) + + if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): + rows = exp.Var(this="ONE ROW PER MATCH") + elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): + text = "ALL ROWS PER MATCH" + if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): + text += f" SHOW EMPTY MATCHES" + elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): + text += f" OMIT EMPTY MATCHES" + elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): + text += f" WITH UNMATCHED ROWS" + rows = exp.Var(this=text) + else: + rows = None + + if self._match_text_seq("AFTER", "MATCH", "SKIP"): + text = "AFTER MATCH SKIP" + if self._match_text_seq("PAST", "LAST", "ROW"): + text += f" PAST LAST ROW" + elif self._match_text_seq("TO", "NEXT", "ROW"): + text += f" TO NEXT ROW" + elif self._match_text_seq("TO", "FIRST"): + text += f" TO FIRST {self._advance_any().text}" # type: ignore + elif self._match_text_seq("TO", "LAST"): + text += f" TO LAST {self._advance_any().text}" # type: ignore + after = exp.Var(this=text) + else: + after = None + + if self._match_text_seq("PATTERN"): + self._match_l_paren() + + if not self._curr: + self.raise_error("Expecting )", self._curr) + + paren = 1 + start = self._curr + + while self._curr and paren > 0: + if self._curr.token_type == TokenType.L_PAREN: + paren += 1 + if self._curr.token_type == TokenType.R_PAREN: + paren -= 1 + self._advance() + if paren > 0: + self.raise_error("Expecting )", self._curr) + if not self._curr: + self.raise_error("Expecting pattern", self._curr) + end = self._prev + pattern = exp.Var(this=self._find_sql(start, end)) + else: + pattern = None + + define = ( + self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None + ) + self._match_r_paren() + + return self.expression( + exp.MatchRecognize, + partition_by=partition, + order=order, + measures=measures, + rows=rows, + after=after, + pattern=pattern, + define=define, + ) + def _parse_lateral(self) -> t.Optional[exp.Expression]: outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) @@ -1772,12 +2041,19 @@ class Parser(metaclass=_Parser): if not skip_group_by_token and not self._match(TokenType.GROUP_BY): return None + expressions = self._parse_csv(self._parse_conjunction) + grouping_sets = self._parse_grouping_sets() + + with_ = self._match(TokenType.WITH) + cube = self._match(TokenType.CUBE) and (with_ or self._parse_wrapped_id_vars()) + rollup = self._match(TokenType.ROLLUP) and (with_ or self._parse_wrapped_id_vars()) + return self.expression( exp.Group, - expressions=self._parse_csv(self._parse_conjunction), - grouping_sets=self._parse_grouping_sets(), - cube=self._match(TokenType.CUBE) and self._parse_wrapped_id_vars(), - rollup=self._match(TokenType.ROLLUP) and self._parse_wrapped_id_vars(), + expressions=expressions, + grouping_sets=grouping_sets, + cube=cube, + rollup=rollup, ) def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: @@ -1788,11 +2064,11 @@ class Parser(metaclass=_Parser): def _parse_grouping_set(self) -> t.Optional[exp.Expression]: if self._match(TokenType.L_PAREN): - grouping_set = self._parse_csv(self._parse_id_var) + grouping_set = self._parse_csv(self._parse_column) self._match_r_paren() return self.expression(exp.Tuple, expressions=grouping_set) - return self._parse_id_var() + return self._parse_column() def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: if not skip_having_token and not self._match(TokenType.HAVING): @@ -2268,7 +2544,6 @@ class Parser(metaclass=_Parser): args = self._parse_csv(self._parse_lambda) if function: - # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. if count_params(function) == 2: @@ -2541,9 +2816,10 @@ class Parser(metaclass=_Parser): return self.expression(exp.PrimaryKey, expressions=expressions, options=options) def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: - if not self._match(TokenType.L_BRACKET): + if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): return this + bracket_kind = self._prev.token_type expressions: t.List[t.Optional[exp.Expression]] if self._match(TokenType.COLON): @@ -2551,14 +2827,19 @@ class Parser(metaclass=_Parser): else: expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) - if not this or this.name.upper() == "ARRAY": + # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs + if bracket_kind == TokenType.L_BRACE: + this = self.expression(exp.Struct, expressions=expressions) + elif not this or this.name.upper() == "ARRAY": this = self.expression(exp.Array, expressions=expressions) else: expressions = apply_index_offset(expressions, -self.index_offset) this = self.expression(exp.Bracket, this=this, expressions=expressions) - if not self._match(TokenType.R_BRACKET): + if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: self.raise_error("Expected ]") + elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: + self.raise_error("Expected }") this.comments = self._prev_comments return self._parse_bracket(this) @@ -2727,7 +3008,7 @@ class Parser(metaclass=_Parser): position = self._prev.text.upper() expression = self._parse_term() - if self._match(TokenType.FROM): + if self._match_set((TokenType.FROM, TokenType.COMMA)): this = self._parse_term() else: this = expression @@ -2792,14 +3073,8 @@ class Parser(metaclass=_Parser): return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) - - partition = None - if self._match(TokenType.PARTITION_BY): - partition = self._parse_csv(self._parse_conjunction) - + partition = self._parse_partition_by() order = self._parse_order() - - spec = None kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text if kind: @@ -2816,6 +3091,8 @@ class Parser(metaclass=_Parser): end=end["value"], end_side=end["side"], ) + else: + spec = None self._match_r_paren() @@ -3060,6 +3337,12 @@ class Parser(metaclass=_Parser): def _parse_drop_column(self) -> t.Optional[exp.Expression]: return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") + # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html + def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: + return self.expression( + exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists + ) + def _parse_add_constraint(self) -> t.Optional[exp.Expression]: this = None kind = self._prev.token_type @@ -3092,14 +3375,24 @@ class Parser(metaclass=_Parser): actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None index = self._index - if self._match_text_seq("ADD"): + if self._match(TokenType.DELETE): + actions = [self.expression(exp.Delete, where=self._parse_where())] + elif self._match_text_seq("ADD"): if self._match_set(self.ADD_CONSTRAINT_TOKENS): actions = self._parse_csv(self._parse_add_constraint) else: self._retreat(index) actions = self._parse_csv(self._parse_add_column) - elif self._match_text_seq("DROP", advance=False): - actions = self._parse_csv(self._parse_drop_column) + elif self._match_text_seq("DROP"): + partition_exists = self._parse_exists() + + if self._match(TokenType.PARTITION, advance=False): + actions = self._parse_csv( + lambda: self._parse_drop_partition(exists=partition_exists) + ) + else: + self._retreat(index) + actions = self._parse_csv(self._parse_drop_column) elif self._match_text_seq("RENAME", "TO"): actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) elif self._match_text_seq("ALTER"): |