summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-03 06:02:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-03 06:02:47 +0000
commite67dc36ad88f4bbf01ffb495fe2ae846424ac015 (patch)
tree4d0c88b54afb1aafaa01ace15650affa6f436195 /sqlglot/parser.py
parentAdding upstream version 10.5.10. (diff)
downloadsqlglot-e67dc36ad88f4bbf01ffb495fe2ae846424ac015.tar.xz
sqlglot-e67dc36ad88f4bbf01ffb495fe2ae846424ac015.zip
Adding upstream version 10.6.0.upstream/10.6.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py499
1 files changed, 396 insertions, 103 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 42777d1..6229105 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -175,13 +175,9 @@ class Parser(metaclass=_Parser):
TokenType.DEFAULT,
TokenType.DELETE,
TokenType.DESCRIBE,
- TokenType.DETERMINISTIC,
TokenType.DIV,
- TokenType.DISTKEY,
- TokenType.DISTSTYLE,
TokenType.END,
TokenType.EXECUTE,
- TokenType.ENGINE,
TokenType.ESCAPE,
TokenType.FALSE,
TokenType.FIRST,
@@ -194,13 +190,10 @@ class Parser(metaclass=_Parser):
TokenType.IF,
TokenType.INDEX,
TokenType.ISNULL,
- TokenType.IMMUTABLE,
TokenType.INTERVAL,
TokenType.LAZY,
- TokenType.LANGUAGE,
TokenType.LEADING,
TokenType.LOCAL,
- TokenType.LOCATION,
TokenType.MATERIALIZED,
TokenType.MERGE,
TokenType.NATURAL,
@@ -209,13 +202,11 @@ class Parser(metaclass=_Parser):
TokenType.ONLY,
TokenType.OPTIONS,
TokenType.ORDINALITY,
- TokenType.PARTITIONED_BY,
TokenType.PERCENT,
TokenType.PIVOT,
TokenType.PRECEDING,
TokenType.RANGE,
TokenType.REFERENCES,
- TokenType.RETURNS,
TokenType.ROW,
TokenType.ROWS,
TokenType.SCHEMA,
@@ -225,10 +216,7 @@ class Parser(metaclass=_Parser):
TokenType.SET,
TokenType.SHOW,
TokenType.SORTKEY,
- TokenType.STABLE,
- TokenType.STORED,
TokenType.TABLE,
- TokenType.TABLE_FORMAT,
TokenType.TEMPORARY,
TokenType.TOP,
TokenType.TRAILING,
@@ -237,7 +225,6 @@ class Parser(metaclass=_Parser):
TokenType.UNIQUE,
TokenType.UNLOGGED,
TokenType.UNPIVOT,
- TokenType.PROPERTIES,
TokenType.PROCEDURE,
TokenType.VIEW,
TokenType.VOLATILE,
@@ -448,7 +435,12 @@ class Parser(metaclass=_Parser):
TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
TokenType.UNCACHE: lambda self: self._parse_uncache(),
TokenType.UPDATE: lambda self: self._parse_update(),
- TokenType.USE: lambda self: self.expression(exp.Use, this=self._parse_id_var()),
+ TokenType.USE: lambda self: self.expression(
+ exp.Use,
+ kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
+ and exp.Var(this=self._prev.text),
+ this=self._parse_table(schema=False),
+ ),
}
UNARY_PARSERS = {
@@ -492,6 +484,9 @@ class Parser(metaclass=_Parser):
RANGE_PARSERS = {
TokenType.BETWEEN: lambda self, this: self._parse_between(this),
+ TokenType.GLOB: lambda self, this: self._parse_escape(
+ self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
+ ),
TokenType.IN: lambda self, this: self._parse_in(this),
TokenType.IS: lambda self, this: self._parse_is(this),
TokenType.LIKE: lambda self, this: self._parse_escape(
@@ -512,45 +507,66 @@ class Parser(metaclass=_Parser):
}
PROPERTY_PARSERS = {
- TokenType.AUTO_INCREMENT: lambda self: self._parse_property_assignment(
- exp.AutoIncrementProperty
- ),
- TokenType.CHARACTER_SET: lambda self: self._parse_character_set(),
- TokenType.LOCATION: lambda self: self._parse_property_assignment(exp.LocationProperty),
- TokenType.PARTITIONED_BY: lambda self: self._parse_partitioned_by(),
- TokenType.SCHEMA_COMMENT: lambda self: self._parse_property_assignment(
- exp.SchemaCommentProperty
- ),
- TokenType.STORED: lambda self: self._parse_property_assignment(exp.FileFormatProperty),
- TokenType.DISTKEY: lambda self: self._parse_distkey(),
- TokenType.DISTSTYLE: lambda self: self._parse_property_assignment(exp.DistStyleProperty),
- TokenType.SORTKEY: lambda self: self._parse_sortkey(),
- TokenType.LIKE: lambda self: self._parse_create_like(),
- TokenType.RETURNS: lambda self: self._parse_returns(),
- TokenType.ROW: lambda self: self._parse_row(),
- TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty),
- TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
- TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty),
- TokenType.TABLE_FORMAT: lambda self: self._parse_property_assignment(
- exp.TableFormatProperty
- ),
- TokenType.USING: lambda self: self._parse_property_assignment(exp.TableFormatProperty),
- TokenType.LANGUAGE: lambda self: self._parse_property_assignment(exp.LanguageProperty),
- TokenType.EXECUTE: lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
- TokenType.DETERMINISTIC: lambda self: self.expression(
+ "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
+ "CHARACTER SET": lambda self: self._parse_character_set(),
+ "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
+ "PARTITION BY": lambda self: self._parse_partitioned_by(),
+ "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
+ "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
+ "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
+ "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
+ "DISTKEY": lambda self: self._parse_distkey(),
+ "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
+ "SORTKEY": lambda self: self._parse_sortkey(),
+ "LIKE": lambda self: self._parse_create_like(),
+ "RETURNS": lambda self: self._parse_returns(),
+ "ROW": lambda self: self._parse_row(),
+ "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
+ "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
+ "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
+ "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
+ "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
+ "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
+ "DETERMINISTIC": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
),
- TokenType.IMMUTABLE: lambda self: self.expression(
+ "IMMUTABLE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
),
- TokenType.STABLE: lambda self: self.expression(
+ "STABLE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("STABLE")
),
- TokenType.VOLATILE: lambda self: self.expression(
+ "VOLATILE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
),
- TokenType.WITH: lambda self: self._parse_wrapped_csv(self._parse_property),
- TokenType.PROPERTIES: lambda self: self._parse_wrapped_csv(self._parse_property),
+ "WITH": lambda self: self._parse_with_property(),
+ "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
+ "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
+ "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
+ "BEFORE": lambda self: self._parse_journal(
+ no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
+ ),
+ "JOURNAL": lambda self: self._parse_journal(
+ no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
+ ),
+ "AFTER": lambda self: self._parse_afterjournal(
+ no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
+ ),
+ "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
+ "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
+ "CHECKSUM": lambda self: self._parse_checksum(),
+ "FREESPACE": lambda self: self._parse_freespace(),
+ "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
+ no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
+ ),
+ "MIN": lambda self: self._parse_datablocksize(),
+ "MINIMUM": lambda self: self._parse_datablocksize(),
+ "MAX": lambda self: self._parse_datablocksize(),
+ "MAXIMUM": lambda self: self._parse_datablocksize(),
+ "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
+ default=self._prev.text.upper() == "DEFAULT"
+ ),
+ "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
}
CONSTRAINT_PARSERS = {
@@ -580,6 +596,7 @@ class Parser(metaclass=_Parser):
}
QUERY_MODIFIER_PARSERS = {
+ "match": lambda self: self._parse_match_recognize(),
"where": lambda self: self._parse_where(),
"group": lambda self: self._parse_group(),
"having": lambda self: self._parse_having(),
@@ -627,7 +644,6 @@ class Parser(metaclass=_Parser):
"max_errors",
"null_ordering",
"_tokens",
- "_chunks",
"_index",
"_curr",
"_next",
@@ -660,7 +676,6 @@ class Parser(metaclass=_Parser):
self.sql = ""
self.errors = []
self._tokens = []
- self._chunks = [[]]
self._index = 0
self._curr = None
self._next = None
@@ -728,17 +743,18 @@ class Parser(metaclass=_Parser):
self.reset()
self.sql = sql or ""
total = len(raw_tokens)
+ chunks: t.List[t.List[Token]] = [[]]
for i, token in enumerate(raw_tokens):
if token.token_type == TokenType.SEMICOLON:
if i < total - 1:
- self._chunks.append([])
+ chunks.append([])
else:
- self._chunks[-1].append(token)
+ chunks[-1].append(token)
expressions = []
- for tokens in self._chunks:
+ for tokens in chunks:
self._index = -1
self._tokens = tokens
self._advance()
@@ -771,7 +787,7 @@ class Parser(metaclass=_Parser):
error level setting.
"""
token = token or self._curr or self._prev or Token.string("")
- start = self._find_token(token, self.sql)
+ start = self._find_token(token)
end = start + len(token.text)
start_context = self.sql[max(start - self.error_message_context, 0) : start]
highlight = self.sql[start:end]
@@ -833,13 +849,16 @@ class Parser(metaclass=_Parser):
for error_message in expression.error_messages(args):
self.raise_error(error_message)
- def _find_token(self, token: Token, sql: str) -> int:
+ def _find_sql(self, start: Token, end: Token) -> str:
+ return self.sql[self._find_token(start) : self._find_token(end)]
+
+ def _find_token(self, token: Token) -> int:
line = 1
col = 1
index = 0
while line < token.line or col < token.col:
- if Tokenizer.WHITE_SPACE.get(sql[index]) == TokenType.BREAK:
+ if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
line += 1
col = 1
else:
@@ -911,6 +930,10 @@ class Parser(metaclass=_Parser):
def _parse_create(self) -> t.Optional[exp.Expression]:
replace = self._match_pair(TokenType.OR, TokenType.REPLACE)
+ set_ = self._match(TokenType.SET) # Teradata
+ multiset = self._match_text_seq("MULTISET") # Teradata
+ global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata
+ volatile = self._match(TokenType.VOLATILE) # Teradata
temporary = self._match(TokenType.TEMPORARY)
transient = self._match_text_seq("TRANSIENT")
external = self._match_text_seq("EXTERNAL")
@@ -954,10 +977,18 @@ class Parser(metaclass=_Parser):
TokenType.VIEW,
TokenType.SCHEMA,
):
- this = self._parse_table(schema=True)
- properties = self._parse_properties()
- if self._match(TokenType.ALIAS):
- expression = self._parse_ddl_select()
+ table_parts = self._parse_table_parts(schema=True)
+
+ if self._match(TokenType.COMMA): # comma-separated properties before schema definition
+ properties = self._parse_properties(before=True)
+
+ this = self._parse_schema(this=table_parts)
+
+ if not properties: # properties after schema definition
+ properties = self._parse_properties()
+
+ self._match(TokenType.ALIAS)
+ expression = self._parse_ddl_select()
if create_token.token_type == TokenType.TABLE:
if self._match_text_seq("WITH", "DATA"):
@@ -988,6 +1019,10 @@ class Parser(metaclass=_Parser):
this=this,
kind=create_token.text,
expression=expression,
+ set=set_,
+ multiset=multiset,
+ global_temporary=global_temporary,
+ volatile=volatile,
exists=exists,
properties=properties,
temporary=temporary,
@@ -1004,9 +1039,19 @@ class Parser(metaclass=_Parser):
begin=begin,
)
+ def _parse_property_before(self) -> t.Optional[exp.Expression]:
+ self._match_text_seq("NO")
+ self._match_text_seq("DUAL")
+ self._match_text_seq("DEFAULT")
+
+ if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
+ return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
+
+ return None
+
def _parse_property(self) -> t.Optional[exp.Expression]:
- if self._match_set(self.PROPERTY_PARSERS):
- return self.PROPERTY_PARSERS[self._prev.token_type](self)
+ if self._match_texts(self.PROPERTY_PARSERS):
+ return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
return self._parse_character_set(True)
@@ -1033,6 +1078,166 @@ class Parser(metaclass=_Parser):
this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
)
+ def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
+ properties = []
+
+ while True:
+ if before:
+ self._match(TokenType.COMMA)
+ identified_property = self._parse_property_before()
+ else:
+ identified_property = self._parse_property()
+
+ if not identified_property:
+ break
+ for p in ensure_collection(identified_property):
+ properties.append(p)
+
+ if properties:
+ return self.expression(exp.Properties, expressions=properties, before=before)
+
+ return None
+
+ def _parse_fallback(self, no=False) -> exp.Expression:
+ self._match_text_seq("FALLBACK")
+ return self.expression(
+ exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
+ )
+
+ def _parse_with_property(
+ self,
+ ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
+ if self._match(TokenType.L_PAREN, advance=False):
+ return self._parse_wrapped_csv(self._parse_property)
+
+ if not self._next:
+ return None
+
+ if self._next.text.upper() == "JOURNAL":
+ return self._parse_withjournaltable()
+
+ return self._parse_withisolatedloading()
+
+ def _parse_withjournaltable(self) -> exp.Expression:
+ self._match_text_seq("WITH", "JOURNAL", "TABLE")
+ self._match(TokenType.EQ)
+ return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
+
+ def _parse_log(self, no=False) -> exp.Expression:
+ self._match_text_seq("LOG")
+ return self.expression(exp.LogProperty, no=no)
+
+ def _parse_journal(self, no=False, dual=False) -> exp.Expression:
+ before = self._match_text_seq("BEFORE")
+ self._match_text_seq("JOURNAL")
+ return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
+
+ def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
+ self._match_text_seq("NOT")
+ self._match_text_seq("LOCAL")
+ self._match_text_seq("AFTER", "JOURNAL")
+ return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
+
+ def _parse_checksum(self) -> exp.Expression:
+ self._match_text_seq("CHECKSUM")
+ self._match(TokenType.EQ)
+
+ on = None
+ if self._match(TokenType.ON):
+ on = True
+ elif self._match_text_seq("OFF"):
+ on = False
+ default = self._match(TokenType.DEFAULT)
+
+ return self.expression(
+ exp.ChecksumProperty,
+ on=on,
+ default=default,
+ )
+
+ def _parse_freespace(self) -> exp.Expression:
+ self._match_text_seq("FREESPACE")
+ self._match(TokenType.EQ)
+ return self.expression(
+ exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
+ )
+
+ def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
+ self._match_text_seq("MERGEBLOCKRATIO")
+ if self._match(TokenType.EQ):
+ return self.expression(
+ exp.MergeBlockRatioProperty,
+ this=self._parse_number(),
+ percent=self._match(TokenType.PERCENT),
+ )
+ else:
+ return self.expression(
+ exp.MergeBlockRatioProperty,
+ no=no,
+ default=default,
+ )
+
+ def _parse_datablocksize(self, default=None) -> exp.Expression:
+ if default:
+ self._match_text_seq("DATABLOCKSIZE")
+ return self.expression(exp.DataBlocksizeProperty, default=True)
+ elif self._match_texts(("MIN", "MINIMUM")):
+ self._match_text_seq("DATABLOCKSIZE")
+ return self.expression(exp.DataBlocksizeProperty, min=True)
+ elif self._match_texts(("MAX", "MAXIMUM")):
+ self._match_text_seq("DATABLOCKSIZE")
+ return self.expression(exp.DataBlocksizeProperty, min=False)
+
+ self._match_text_seq("DATABLOCKSIZE")
+ self._match(TokenType.EQ)
+ size = self._parse_number()
+ units = None
+ if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
+ units = self._prev.text
+ return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
+
+ def _parse_blockcompression(self) -> exp.Expression:
+ self._match_text_seq("BLOCKCOMPRESSION")
+ self._match(TokenType.EQ)
+ always = self._match(TokenType.ALWAYS)
+ manual = self._match_text_seq("MANUAL")
+ never = self._match_text_seq("NEVER")
+ default = self._match_text_seq("DEFAULT")
+ autotemp = None
+ if self._match_text_seq("AUTOTEMP"):
+ autotemp = self._parse_schema()
+
+ return self.expression(
+ exp.BlockCompressionProperty,
+ always=always,
+ manual=manual,
+ never=never,
+ default=default,
+ autotemp=autotemp,
+ )
+
+ def _parse_withisolatedloading(self) -> exp.Expression:
+ self._match(TokenType.WITH)
+ no = self._match_text_seq("NO")
+ concurrent = self._match_text_seq("CONCURRENT")
+ self._match_text_seq("ISOLATED", "LOADING")
+ for_all = self._match_text_seq("FOR", "ALL")
+ for_insert = self._match_text_seq("FOR", "INSERT")
+ for_none = self._match_text_seq("FOR", "NONE")
+ return self.expression(
+ exp.IsolatedLoadingProperty,
+ no=no,
+ concurrent=concurrent,
+ for_all=for_all,
+ for_insert=for_insert,
+ for_none=for_none,
+ )
+
+ def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
+ if self._match(TokenType.PARTITION_BY):
+ return self._parse_csv(self._parse_conjunction)
+ return []
+
def _parse_partitioned_by(self) -> exp.Expression:
self._match(TokenType.EQ)
return self.expression(
@@ -1093,21 +1298,6 @@ class Parser(metaclass=_Parser):
return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
- def _parse_properties(self) -> t.Optional[exp.Expression]:
- properties = []
-
- while True:
- identified_property = self._parse_property()
- if not identified_property:
- break
- for p in ensure_collection(identified_property):
- properties.append(p)
-
- if properties:
- return self.expression(exp.Properties, expressions=properties)
-
- return None
-
def _parse_describe(self) -> exp.Expression:
kind = self._match_set(self.CREATABLES) and self._prev.text
this = self._parse_table()
@@ -1248,11 +1438,9 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.PARTITION):
return None
- def parse_values() -> exp.Property:
- props = self._parse_csv(self._parse_var_or_string, sep=TokenType.EQ)
- return exp.Property(this=seq_get(props, 0), value=seq_get(props, 1))
-
- return self.expression(exp.Partition, this=self._parse_wrapped_csv(parse_values))
+ return self.expression(
+ exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
+ )
def _parse_value(self) -> exp.Expression:
if self._match(TokenType.L_PAREN):
@@ -1360,8 +1548,7 @@ class Parser(metaclass=_Parser):
if not alias or not alias.this:
self.raise_error("Expected CTE to have alias")
- if not self._match(TokenType.ALIAS):
- self.raise_error("Expected AS in CTE")
+ self._match(TokenType.ALIAS)
return self.expression(
exp.CTE,
@@ -1376,10 +1563,11 @@ class Parser(metaclass=_Parser):
alias = self._parse_id_var(
any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
)
+ index = self._index
if self._match(TokenType.L_PAREN):
columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var()))
- self._match_r_paren()
+ self._match_r_paren() if columns else self._retreat(index)
else:
columns = None
@@ -1452,6 +1640,87 @@ class Parser(metaclass=_Parser):
exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
)
+ def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
+ if not self._match(TokenType.MATCH_RECOGNIZE):
+ return None
+ self._match_l_paren()
+
+ partition = self._parse_partition_by()
+ order = self._parse_order()
+ measures = (
+ self._parse_alias(self._parse_conjunction())
+ if self._match_text_seq("MEASURES")
+ else None
+ )
+
+ if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
+ rows = exp.Var(this="ONE ROW PER MATCH")
+ elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
+ text = "ALL ROWS PER MATCH"
+ if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
+ text += f" SHOW EMPTY MATCHES"
+ elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
+ text += f" OMIT EMPTY MATCHES"
+ elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
+ text += f" WITH UNMATCHED ROWS"
+ rows = exp.Var(this=text)
+ else:
+ rows = None
+
+ if self._match_text_seq("AFTER", "MATCH", "SKIP"):
+ text = "AFTER MATCH SKIP"
+ if self._match_text_seq("PAST", "LAST", "ROW"):
+ text += f" PAST LAST ROW"
+ elif self._match_text_seq("TO", "NEXT", "ROW"):
+ text += f" TO NEXT ROW"
+ elif self._match_text_seq("TO", "FIRST"):
+ text += f" TO FIRST {self._advance_any().text}" # type: ignore
+ elif self._match_text_seq("TO", "LAST"):
+ text += f" TO LAST {self._advance_any().text}" # type: ignore
+ after = exp.Var(this=text)
+ else:
+ after = None
+
+ if self._match_text_seq("PATTERN"):
+ self._match_l_paren()
+
+ if not self._curr:
+ self.raise_error("Expecting )", self._curr)
+
+ paren = 1
+ start = self._curr
+
+ while self._curr and paren > 0:
+ if self._curr.token_type == TokenType.L_PAREN:
+ paren += 1
+ if self._curr.token_type == TokenType.R_PAREN:
+ paren -= 1
+ self._advance()
+ if paren > 0:
+ self.raise_error("Expecting )", self._curr)
+ if not self._curr:
+ self.raise_error("Expecting pattern", self._curr)
+ end = self._prev
+ pattern = exp.Var(this=self._find_sql(start, end))
+ else:
+ pattern = None
+
+ define = (
+ self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
+ )
+ self._match_r_paren()
+
+ return self.expression(
+ exp.MatchRecognize,
+ partition_by=partition,
+ order=order,
+ measures=measures,
+ rows=rows,
+ after=after,
+ pattern=pattern,
+ define=define,
+ )
+
def _parse_lateral(self) -> t.Optional[exp.Expression]:
outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
@@ -1772,12 +2041,19 @@ class Parser(metaclass=_Parser):
if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
return None
+ expressions = self._parse_csv(self._parse_conjunction)
+ grouping_sets = self._parse_grouping_sets()
+
+ with_ = self._match(TokenType.WITH)
+ cube = self._match(TokenType.CUBE) and (with_ or self._parse_wrapped_id_vars())
+ rollup = self._match(TokenType.ROLLUP) and (with_ or self._parse_wrapped_id_vars())
+
return self.expression(
exp.Group,
- expressions=self._parse_csv(self._parse_conjunction),
- grouping_sets=self._parse_grouping_sets(),
- cube=self._match(TokenType.CUBE) and self._parse_wrapped_id_vars(),
- rollup=self._match(TokenType.ROLLUP) and self._parse_wrapped_id_vars(),
+ expressions=expressions,
+ grouping_sets=grouping_sets,
+ cube=cube,
+ rollup=rollup,
)
def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
@@ -1788,11 +2064,11 @@ class Parser(metaclass=_Parser):
def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
if self._match(TokenType.L_PAREN):
- grouping_set = self._parse_csv(self._parse_id_var)
+ grouping_set = self._parse_csv(self._parse_column)
self._match_r_paren()
return self.expression(exp.Tuple, expressions=grouping_set)
- return self._parse_id_var()
+ return self._parse_column()
def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
if not skip_having_token and not self._match(TokenType.HAVING):
@@ -2268,7 +2544,6 @@ class Parser(metaclass=_Parser):
args = self._parse_csv(self._parse_lambda)
if function:
-
# Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
# second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
if count_params(function) == 2:
@@ -2541,9 +2816,10 @@ class Parser(metaclass=_Parser):
return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
- if not self._match(TokenType.L_BRACKET):
+ if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
return this
+ bracket_kind = self._prev.token_type
expressions: t.List[t.Optional[exp.Expression]]
if self._match(TokenType.COLON):
@@ -2551,14 +2827,19 @@ class Parser(metaclass=_Parser):
else:
expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
- if not this or this.name.upper() == "ARRAY":
+ # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
+ if bracket_kind == TokenType.L_BRACE:
+ this = self.expression(exp.Struct, expressions=expressions)
+ elif not this or this.name.upper() == "ARRAY":
this = self.expression(exp.Array, expressions=expressions)
else:
expressions = apply_index_offset(expressions, -self.index_offset)
this = self.expression(exp.Bracket, this=this, expressions=expressions)
- if not self._match(TokenType.R_BRACKET):
+ if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
self.raise_error("Expected ]")
+ elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
+ self.raise_error("Expected }")
this.comments = self._prev_comments
return self._parse_bracket(this)
@@ -2727,7 +3008,7 @@ class Parser(metaclass=_Parser):
position = self._prev.text.upper()
expression = self._parse_term()
- if self._match(TokenType.FROM):
+ if self._match_set((TokenType.FROM, TokenType.COMMA)):
this = self._parse_term()
else:
this = expression
@@ -2792,14 +3073,8 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
-
- partition = None
- if self._match(TokenType.PARTITION_BY):
- partition = self._parse_csv(self._parse_conjunction)
-
+ partition = self._parse_partition_by()
order = self._parse_order()
-
- spec = None
kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
if kind:
@@ -2816,6 +3091,8 @@ class Parser(metaclass=_Parser):
end=end["value"],
end_side=end["side"],
)
+ else:
+ spec = None
self._match_r_paren()
@@ -3060,6 +3337,12 @@ class Parser(metaclass=_Parser):
def _parse_drop_column(self) -> t.Optional[exp.Expression]:
return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
+ # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
+ def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
+ return self.expression(
+ exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
+ )
+
def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
this = None
kind = self._prev.token_type
@@ -3092,14 +3375,24 @@ class Parser(metaclass=_Parser):
actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
index = self._index
- if self._match_text_seq("ADD"):
+ if self._match(TokenType.DELETE):
+ actions = [self.expression(exp.Delete, where=self._parse_where())]
+ elif self._match_text_seq("ADD"):
if self._match_set(self.ADD_CONSTRAINT_TOKENS):
actions = self._parse_csv(self._parse_add_constraint)
else:
self._retreat(index)
actions = self._parse_csv(self._parse_add_column)
- elif self._match_text_seq("DROP", advance=False):
- actions = self._parse_csv(self._parse_drop_column)
+ elif self._match_text_seq("DROP"):
+ partition_exists = self._parse_exists()
+
+ if self._match(TokenType.PARTITION, advance=False):
+ actions = self._parse_csv(
+ lambda: self._parse_drop_partition(exists=partition_exists)
+ )
+ else:
+ self._retreat(index)
+ actions = self._parse_csv(self._parse_drop_column)
elif self._match_text_seq("RENAME", "TO"):
actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))
elif self._match_text_seq("ALTER"):