summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:11:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:11:50 +0000
commit8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2 (patch)
tree2e29f131dff77b31e84c957266de8f18655b6f88 /sqlglot/parser.py
parentAdding upstream version 22.2.0. (diff)
downloadsqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.tar.xz
sqlglot-8978da3b39d7ca3cf83ee30fcc63ffe0e5453fb2.zip
Adding upstream version 23.7.0.upstream/23.7.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py493
1 files changed, 354 insertions, 139 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 49dac2e..91d8d13 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -15,6 +15,8 @@ if t.TYPE_CHECKING:
from sqlglot._typing import E, Lit
from sqlglot.dialects.dialect import Dialect, DialectType
+ T = t.TypeVar("T")
+
logger = logging.getLogger("sqlglot")
OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]]
@@ -119,6 +121,9 @@ class Parser(metaclass=_Parser):
"JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
"LIKE": build_like,
"LOG": build_logarithm,
+ "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
+ "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
+ "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)),
"TIME_TO_TIME_STR": lambda args: exp.Cast(
this=seq_get(args, 0),
to=exp.DataType(this=exp.DataType.Type.TEXT),
@@ -144,6 +149,7 @@ class Parser(metaclass=_Parser):
STRUCT_TYPE_TOKENS = {
TokenType.NESTED,
+ TokenType.OBJECT,
TokenType.STRUCT,
}
@@ -258,6 +264,7 @@ class Parser(metaclass=_Parser):
TokenType.IPV6,
TokenType.UNKNOWN,
TokenType.NULL,
+ TokenType.NAME,
*ENUM_TYPE_TOKENS,
*NESTED_TYPE_TOKENS,
*AGGREGATE_TYPE_TOKENS,
@@ -291,6 +298,7 @@ class Parser(metaclass=_Parser):
TokenType.VIEW,
TokenType.MODEL,
TokenType.DICTIONARY,
+ TokenType.SEQUENCE,
TokenType.STORAGE_INTEGRATION,
}
@@ -310,6 +318,7 @@ class Parser(metaclass=_Parser):
TokenType.ANTI,
TokenType.APPLY,
TokenType.ASC,
+ TokenType.ASOF,
TokenType.AUTO_INCREMENT,
TokenType.BEGIN,
TokenType.BPCHAR,
@@ -398,6 +407,8 @@ class Parser(metaclass=_Parser):
TokenType.WINDOW,
}
+ ALIAS_TOKENS = ID_VAR_TOKENS
+
COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
@@ -433,6 +444,7 @@ class Parser(metaclass=_Parser):
TokenType.VAR,
TokenType.LEFT,
TokenType.RIGHT,
+ TokenType.SEQUENCE,
TokenType.DATE,
TokenType.DATETIME,
TokenType.TABLE,
@@ -505,8 +517,9 @@ class Parser(metaclass=_Parser):
}
JOIN_METHODS = {
- TokenType.NATURAL,
TokenType.ASOF,
+ TokenType.NATURAL,
+ TokenType.POSITIONAL,
}
JOIN_SIDES = {
@@ -611,8 +624,8 @@ class Parser(metaclass=_Parser):
TokenType.ALTER: lambda self: self._parse_alter(),
TokenType.BEGIN: lambda self: self._parse_transaction(),
TokenType.CACHE: lambda self: self._parse_cache(),
- TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
TokenType.COMMENT: lambda self: self._parse_comment(),
+ TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
TokenType.CREATE: lambda self: self._parse_create(),
TokenType.DELETE: lambda self: self._parse_delete(),
TokenType.DESC: lambda self: self._parse_describe(),
@@ -627,9 +640,9 @@ class Parser(metaclass=_Parser):
TokenType.REFRESH: lambda self: self._parse_refresh(),
TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
TokenType.SET: lambda self: self._parse_set(),
+ TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
TokenType.UNCACHE: lambda self: self._parse_uncache(),
TokenType.UPDATE: lambda self: self._parse_update(),
- TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
TokenType.USE: lambda self: self.expression(
exp.Use,
kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
@@ -714,6 +727,9 @@ class Parser(metaclass=_Parser):
"ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
"AUTO": lambda self: self._parse_auto_property(),
"AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
+ "BACKUP": lambda self: self.expression(
+ exp.BackupProperty, this=self._parse_var(any_token=True)
+ ),
"BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
"CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
"CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
@@ -739,7 +755,9 @@ class Parser(metaclass=_Parser):
"FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
"FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
"FREESPACE": lambda self: self._parse_freespace(),
+ "GLOBAL": lambda self: self.expression(exp.GlobalProperty),
"HEAP": lambda self: self.expression(exp.HeapProperty),
+ "ICEBERG": lambda self: self.expression(exp.IcebergProperty),
"IMMUTABLE": lambda self: self.expression(
exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
),
@@ -782,6 +800,7 @@ class Parser(metaclass=_Parser):
"SETTINGS": lambda self: self.expression(
exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
),
+ "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
"SORTKEY": lambda self: self._parse_sortkey(),
"SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
"STABLE": lambda self: self.expression(
@@ -789,7 +808,7 @@ class Parser(metaclass=_Parser):
),
"STORED": lambda self: self._parse_stored(),
"SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
- "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
+ "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
"TEMP": lambda self: self.expression(exp.TemporaryProperty),
"TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
"TO": lambda self: self._parse_to_table(),
@@ -799,6 +818,7 @@ class Parser(metaclass=_Parser):
),
"TTL": lambda self: self._parse_ttl(),
"USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
+ "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty),
"VOLATILE": lambda self: self._parse_volatile_property(),
"WITH": lambda self: self._parse_with_property(),
}
@@ -832,6 +852,9 @@ class Parser(metaclass=_Parser):
exp.DefaultColumnConstraint, this=self._parse_bitwise()
),
"ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
+ "EXCLUDE": lambda self: self.expression(
+ exp.ExcludeColumnConstraint, this=self._parse_index_params()
+ ),
"FOREIGN KEY": lambda self: self._parse_foreign_key(),
"FORMAT": lambda self: self.expression(
exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
@@ -858,7 +881,7 @@ class Parser(metaclass=_Parser):
"UNIQUE": lambda self: self._parse_unique(),
"UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
"WITH": lambda self: self.expression(
- exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property)
+ exp.Properties, expressions=self._parse_wrapped_properties()
),
}
@@ -871,7 +894,15 @@ class Parser(metaclass=_Parser):
"RENAME": lambda self: self._parse_alter_table_rename(),
}
- SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"}
+ SCHEMA_UNNAMED_CONSTRAINTS = {
+ "CHECK",
+ "EXCLUDE",
+ "FOREIGN KEY",
+ "LIKE",
+ "PERIOD",
+ "PRIMARY KEY",
+ "UNIQUE",
+ }
NO_PAREN_FUNCTION_PARSERS = {
"ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
@@ -966,18 +997,54 @@ class Parser(metaclass=_Parser):
"READ": ("WRITE", "ONLY"),
}
+ CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys(
+ ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()
+ )
+ CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE")
+
+ CREATE_SEQUENCE: OPTIONS_TYPE = {
+ "SCALE": ("EXTEND", "NOEXTEND"),
+ "SHARD": ("EXTEND", "NOEXTEND"),
+ "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
+ **dict.fromkeys(
+ (
+ "SESSION",
+ "GLOBAL",
+ "KEEP",
+ "NOKEEP",
+ "ORDER",
+ "NOORDER",
+ "NOCACHE",
+ "CYCLE",
+ "NOCYCLE",
+ "NOMINVALUE",
+ "NOMAXVALUE",
+ "NOSCALE",
+ "NOSHARD",
+ ),
+ tuple(),
+ ),
+ }
+
+ ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")}
+
USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple())
+ CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
+
INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
CLONE_KEYWORDS = {"CLONE", "COPY"}
HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"}
- OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
+ OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"}
+
OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN}
TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
+ VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
+
WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
@@ -994,6 +1061,8 @@ class Parser(metaclass=_Parser):
UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
+ SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
+
STRICT_CAST = True
PREFIXED_PIVOT_COLUMNS = False
@@ -1033,6 +1102,9 @@ class Parser(metaclass=_Parser):
# Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
SUPPORTS_IMPLICIT_UNNEST = False
+ # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
+ INTERVAL_SPANS = True
+
__slots__ = (
"error_level",
"error_message_context",
@@ -1285,6 +1357,27 @@ class Parser(metaclass=_Parser):
exp.Command, this=self._prev.text.upper(), expression=self._parse_string()
)
+ def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]:
+ """
+ Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can
+ be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting
+ the parser state accordingly
+ """
+ index = self._index
+ error_level = self.error_level
+
+ self.error_level = ErrorLevel.IMMEDIATE
+ try:
+ this = parse_method()
+ except ParseError:
+ this = None
+ finally:
+ if not this or retreat:
+ self._retreat(index)
+ self.error_level = error_level
+
+ return this
+
def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
start = self._prev
exists = self._parse_exists() if allow_exists else None
@@ -1377,13 +1470,22 @@ class Parser(metaclass=_Parser):
if not kind:
return self._parse_as_command(start)
+ if_exists = exists or self._parse_exists()
+ table = self._parse_table_parts(
+ schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
+ )
+
+ if self._match(TokenType.L_PAREN, advance=False):
+ expressions = self._parse_wrapped_csv(self._parse_types)
+ else:
+ expressions = None
+
return self.expression(
exp.Drop,
comments=start.comments,
- exists=exists or self._parse_exists(),
- this=self._parse_table(
- schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA
- ),
+ exists=if_exists,
+ this=table,
+ expressions=expressions,
kind=kind,
temporary=temporary,
materialized=materialized,
@@ -1409,6 +1511,7 @@ class Parser(metaclass=_Parser):
or self._match_pair(TokenType.OR, TokenType.REPLACE)
or self._match_pair(TokenType.OR, TokenType.ALTER)
)
+
unique = self._match(TokenType.UNIQUE)
if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
@@ -1489,7 +1592,11 @@ class Parser(metaclass=_Parser):
# exp.Properties.Location.POST_ALIAS
extend_props(self._parse_properties())
- expression = self._parse_ddl_select()
+ if create_token.token_type == TokenType.SEQUENCE:
+ expression = self._parse_types()
+ extend_props(self._parse_properties())
+ else:
+ expression = self._parse_ddl_select()
if create_token.token_type == TokenType.TABLE:
# exp.Properties.Location.POST_EXPRESSION
@@ -1539,6 +1646,40 @@ class Parser(metaclass=_Parser):
clone=clone,
)
+ def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]:
+ seq = exp.SequenceProperties()
+
+ options = []
+ index = self._index
+
+ while self._curr:
+ if self._match_text_seq("INCREMENT"):
+ self._match_text_seq("BY")
+ self._match_text_seq("=")
+ seq.set("increment", self._parse_term())
+ elif self._match_text_seq("MINVALUE"):
+ seq.set("minvalue", self._parse_term())
+ elif self._match_text_seq("MAXVALUE"):
+ seq.set("maxvalue", self._parse_term())
+ elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
+ self._match_text_seq("=")
+ seq.set("start", self._parse_term())
+ elif self._match_text_seq("CACHE"):
+ # T-SQL allows empty CACHE which is initialized dynamically
+ seq.set("cache", self._parse_number() or True)
+ elif self._match_text_seq("OWNED", "BY"):
+ # "OWNED BY NONE" is the default
+ seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
+ else:
+ opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
+ if opt:
+ options.append(opt)
+ else:
+ break
+
+ seq.set("options", options if options else None)
+ return None if self._index == index else seq
+
def _parse_property_before(self) -> t.Optional[exp.Expression]:
# only used for teradata currently
self._match(TokenType.COMMA)
@@ -1564,6 +1705,9 @@ class Parser(metaclass=_Parser):
return None
+ def _parse_wrapped_properties(self) -> t.List[exp.Expression]:
+ return self._parse_wrapped_csv(self._parse_property)
+
def _parse_property(self) -> t.Optional[exp.Expression]:
if self._match_texts(self.PROPERTY_PARSERS):
return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
@@ -1582,12 +1726,12 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.EQ):
self._retreat(index)
- return None
+ return self._parse_sequence_properties()
return self.expression(
exp.Property,
this=key.to_dot() if isinstance(key, exp.Column) else key,
- value=self._parse_column() or self._parse_var(any_token=True),
+ value=self._parse_bitwise() or self._parse_var(any_token=True),
)
def _parse_stored(self) -> exp.FileFormatProperty:
@@ -1619,7 +1763,6 @@ class Parser(metaclass=_Parser):
prop = self._parse_property_before()
else:
prop = self._parse_property()
-
if not prop:
break
for p in ensure_list(prop):
@@ -1662,15 +1805,16 @@ class Parser(metaclass=_Parser):
return prop
- def _parse_with_property(
- self,
- ) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
+ def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
if self._match(TokenType.L_PAREN, advance=False):
- return self._parse_wrapped_csv(self._parse_property)
+ return self._parse_wrapped_properties()
if self._match_text_seq("JOURNAL"):
return self._parse_withjournaltable()
+ if self._match_texts(self.VIEW_ATTRIBUTES):
+ return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper())
+
if self._match_text_seq("DATA"):
return self._parse_withdata(no=False)
elif self._match_text_seq("NO", "DATA"):
@@ -1818,20 +1962,18 @@ class Parser(metaclass=_Parser):
autotemp=autotemp,
)
- def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty:
+ def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]:
+ index = self._index
no = self._match_text_seq("NO")
concurrent = self._match_text_seq("CONCURRENT")
- self._match_text_seq("ISOLATED", "LOADING")
- for_all = self._match_text_seq("FOR", "ALL")
- for_insert = self._match_text_seq("FOR", "INSERT")
- for_none = self._match_text_seq("FOR", "NONE")
+
+ if not self._match_text_seq("ISOLATED", "LOADING"):
+ self._retreat(index)
+ return None
+
+ target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
return self.expression(
- exp.IsolatedLoadingProperty,
- no=no,
- concurrent=concurrent,
- for_all=for_all,
- for_insert=for_insert,
- for_none=for_none,
+ exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target
)
def _parse_locking(self) -> exp.LockingProperty:
@@ -2046,20 +2188,22 @@ class Parser(metaclass=_Parser):
def _parse_describe(self) -> exp.Describe:
kind = self._match_set(self.CREATABLES) and self._prev.text
- extended = self._match_text_seq("EXTENDED")
+ style = self._match_texts(("EXTENDED", "FORMATTED")) and self._prev.text.upper()
this = self._parse_table(schema=True)
properties = self._parse_properties()
expressions = properties.expressions if properties else None
return self.expression(
- exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions
+ exp.Describe, this=this, style=style, kind=kind, expressions=expressions
)
def _parse_insert(self) -> exp.Insert:
comments = ensure_list(self._prev_comments)
+ hint = self._parse_hint()
overwrite = self._match(TokenType.OVERWRITE)
ignore = self._match(TokenType.IGNORE)
local = self._match_text_seq("LOCAL")
alternative = None
+ is_function = None
if self._match_text_seq("DIRECTORY"):
this: t.Optional[exp.Expression] = self.expression(
@@ -2075,13 +2219,17 @@ class Parser(metaclass=_Parser):
self._match(TokenType.INTO)
comments += ensure_list(self._prev_comments)
self._match(TokenType.TABLE)
- this = self._parse_table(schema=True)
+ is_function = self._match(TokenType.FUNCTION)
+
+ this = self._parse_table(schema=True) if not is_function else self._parse_function()
returning = self._parse_returning()
return self.expression(
exp.Insert,
comments=comments,
+ hint=hint,
+ is_function=is_function,
this=this,
by_name=self._match_text_seq("BY", "NAME"),
exists=self._parse_exists(),
@@ -2112,31 +2260,29 @@ class Parser(metaclass=_Parser):
if not conflict and not duplicate:
return None
- nothing = None
- expressions = None
- key = None
+ conflict_keys = None
constraint = None
if conflict:
if self._match_text_seq("ON", "CONSTRAINT"):
constraint = self._parse_id_var()
- else:
- key = self._parse_csv(self._parse_value)
+ elif self._match(TokenType.L_PAREN):
+ conflict_keys = self._parse_csv(self._parse_id_var)
+ self._match_r_paren()
- self._match_text_seq("DO")
- if self._match_text_seq("NOTHING"):
- nothing = True
- else:
- self._match(TokenType.UPDATE)
+ action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
+ if self._prev.token_type == TokenType.UPDATE:
self._match(TokenType.SET)
expressions = self._parse_csv(self._parse_equality)
+ else:
+ expressions = None
return self.expression(
exp.OnConflict,
duplicate=duplicate,
expressions=expressions,
- nothing=nothing,
- key=key,
+ action=action,
+ conflict_keys=conflict_keys,
constraint=constraint,
)
@@ -2166,7 +2312,7 @@ class Parser(metaclass=_Parser):
serde_properties = None
if self._match(TokenType.SERDE_PROPERTIES):
serde_properties = self.expression(
- exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property)
+ exp.SerdeProperties, expressions=self._parse_wrapped_properties()
)
return self.expression(
@@ -2433,8 +2579,19 @@ class Parser(metaclass=_Parser):
self.raise_error("Expected CTE to have alias")
self._match(TokenType.ALIAS)
+
+ if self._match_text_seq("NOT", "MATERIALIZED"):
+ materialized = False
+ elif self._match_text_seq("MATERIALIZED"):
+ materialized = True
+ else:
+ materialized = None
+
return self.expression(
- exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias
+ exp.CTE,
+ this=self._parse_wrapped(self._parse_statement),
+ alias=alias,
+ materialized=materialized,
)
def _parse_table_alias(
@@ -2472,7 +2629,9 @@ class Parser(metaclass=_Parser):
)
def _implicit_unnests_to_explicit(self, this: E) -> E:
- from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
+ from sqlglot.optimizer.normalize_identifiers import (
+ normalize_identifiers as _norm,
+ )
refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name}
for i, join in enumerate(this.args.get("joins") or []):
@@ -2502,7 +2661,7 @@ class Parser(metaclass=_Parser):
self, this: t.Optional[exp.Expression]
) -> t.Optional[exp.Expression]:
if isinstance(this, (exp.Query, exp.Table)):
- for join in iter(self._parse_join, None):
+ for join in self._parse_joins():
this.append("joins", join)
for lateral in iter(self._parse_lateral, None):
this.append("laterals", lateral)
@@ -2535,7 +2694,12 @@ class Parser(metaclass=_Parser):
def _parse_hint(self) -> t.Optional[exp.Hint]:
if self._match(TokenType.HINT):
hints = []
- for hint in iter(lambda: self._parse_csv(self._parse_function), []):
+ for hint in iter(
+ lambda: self._parse_csv(
+ lambda: self._parse_function() or self._parse_var(upper=True)
+ ),
+ [],
+ ):
hints.extend(hint)
if not self._match_pair(TokenType.STAR, TokenType.SLASH):
@@ -2743,29 +2907,35 @@ class Parser(metaclass=_Parser):
if hint:
kwargs["hint"] = hint
+ if self._match(TokenType.MATCH_CONDITION):
+ kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
+
if self._match(TokenType.ON):
kwargs["on"] = self._parse_conjunction()
elif self._match(TokenType.USING):
kwargs["using"] = self._parse_wrapped_id_vars()
- elif not (kind and kind.token_type == TokenType.CROSS):
+ elif not isinstance(kwargs["this"], exp.Unnest) and not (
+ kind and kind.token_type == TokenType.CROSS
+ ):
index = self._index
- join = self._parse_join()
+ joins: t.Optional[list] = list(self._parse_joins())
- if join and self._match(TokenType.ON):
+ if joins and self._match(TokenType.ON):
kwargs["on"] = self._parse_conjunction()
- elif join and self._match(TokenType.USING):
+ elif joins and self._match(TokenType.USING):
kwargs["using"] = self._parse_wrapped_id_vars()
else:
- join = None
+ joins = None
self._retreat(index)
- kwargs["this"].set("joins", [join] if join else None)
+ kwargs["this"].set("joins", joins if joins else None)
comments = [c for token in (method, side, kind) if token for c in token.comments]
return self.expression(exp.Join, comments=comments, **kwargs)
def _parse_opclass(self) -> t.Optional[exp.Expression]:
this = self._parse_conjunction()
+
if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
return this
@@ -2774,6 +2944,35 @@ class Parser(metaclass=_Parser):
return this
+ def _parse_index_params(self) -> exp.IndexParameters:
+ using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
+
+ if self._match(TokenType.L_PAREN, advance=False):
+ columns = self._parse_wrapped_csv(self._parse_with_operator)
+ else:
+ columns = None
+
+ include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
+ partition_by = self._parse_partition_by()
+ with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
+ tablespace = (
+ self._parse_var(any_token=True)
+ if self._match_text_seq("USING", "INDEX", "TABLESPACE")
+ else None
+ )
+ where = self._parse_where()
+
+ return self.expression(
+ exp.IndexParameters,
+ using=using,
+ columns=columns,
+ include=include,
+ partition_by=partition_by,
+ where=where,
+ with_storage=with_storage,
+ tablespace=tablespace,
+ )
+
def _parse_index(
self,
index: t.Optional[exp.Expression] = None,
@@ -2797,27 +2996,16 @@ class Parser(metaclass=_Parser):
index = self._parse_id_var()
table = None
- using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
-
- if self._match(TokenType.L_PAREN, advance=False):
- columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass))
- else:
- columns = None
-
- include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
+ params = self._parse_index_params()
return self.expression(
exp.Index,
this=index,
table=table,
- using=using,
- columns=columns,
unique=unique,
primary=primary,
amp=amp,
- include=include,
- partition_by=self._parse_partition_by(),
- where=self._parse_where(),
+ params=params,
)
def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]:
@@ -2977,7 +3165,7 @@ class Parser(metaclass=_Parser):
this = table_sample
if joins:
- for join in iter(self._parse_join, None):
+ for join in self._parse_joins():
this.append("joins", join)
if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
@@ -3126,8 +3314,8 @@ class Parser(metaclass=_Parser):
def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]:
return list(iter(self._parse_pivot, None)) or None
- def _parse_joins(self) -> t.Optional[t.List[exp.Join]]:
- return list(iter(self._parse_join, None)) or None
+ def _parse_joins(self) -> t.Iterator[exp.Join]:
+ return iter(self._parse_join, None)
# https://duckdb.org/docs/sql/statements/pivot
def _parse_simplified_pivot(self) -> exp.Pivot:
@@ -3328,6 +3516,7 @@ class Parser(metaclass=_Parser):
return None
self._match(TokenType.CONNECT_BY)
+ nocycle = self._match_text_seq("NOCYCLE")
self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
exp.Prior, this=self._parse_bitwise()
)
@@ -3337,7 +3526,7 @@ class Parser(metaclass=_Parser):
if not start and self._match(TokenType.START_WITH):
start = self._parse_conjunction()
- return self.expression(exp.Connect, start=start, connect=connect)
+ return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle)
def _parse_name_as_expression(self) -> exp.Alias:
return self.expression(
@@ -3417,9 +3606,12 @@ class Parser(metaclass=_Parser):
)
def _parse_limit(
- self, this: t.Optional[exp.Expression] = None, top: bool = False
+ self,
+ this: t.Optional[exp.Expression] = None,
+ top: bool = False,
+ skip_limit_token: bool = False,
) -> t.Optional[exp.Expression]:
- if self._match(TokenType.TOP if top else TokenType.LIMIT):
+ if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT):
comments = self._prev_comments
if top:
limit_paren = self._match(TokenType.L_PAREN)
@@ -3681,6 +3873,11 @@ class Parser(metaclass=_Parser):
this = exp.Literal.string(parts[0])
unit = self.expression(exp.Var, this=parts[1].upper())
+ if self.INTERVAL_SPANS and self._match_text_seq("TO"):
+ unit = self.expression(
+ exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True)
+ )
+
return self.expression(exp.Interval, this=this, unit=unit)
def _parse_bitwise(self) -> t.Optional[exp.Expression]:
@@ -3783,6 +3980,9 @@ class Parser(metaclass=_Parser):
if not this:
return None
+ if isinstance(this, exp.Column) and not this.table:
+ this = exp.var(this.name.upper())
+
return self.expression(
exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
)
@@ -3900,19 +4100,14 @@ class Parser(metaclass=_Parser):
elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
maybe_func = False
elif type_token == TokenType.INTERVAL:
- unit = self._parse_var()
-
- if self._match_text_seq("TO"):
- span = [exp.IntervalSpan(this=unit, expression=self._parse_var())]
- else:
- span = None
+ unit = self._parse_var(upper=True)
+ if unit:
+ if self._match_text_seq("TO"):
+ unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True))
- if span or not unit:
- this = self.expression(
- exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span
- )
- else:
this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit))
+ else:
+ this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
if maybe_func and check_func:
index2 = self._index
@@ -3996,11 +4191,20 @@ class Parser(metaclass=_Parser):
else:
field = self._parse_field(anonymous_func=True, any_token=True)
- if isinstance(field, exp.Func):
+ if isinstance(field, exp.Func) and this:
# bigquery allows function calls like x.y.count(...)
# SAFE.SUBSTR(...)
# https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
- this = self._replace_columns_with_dots(this)
+ this = exp.replace_tree(
+ this,
+ lambda n: (
+ self.expression(exp.Dot, this=n.args.get("table"), expression=n.this)
+ if n.table
+ else n.this
+ )
+ if isinstance(n, exp.Column)
+ else n,
+ )
if op:
this = op(self, this, field)
@@ -4050,10 +4254,14 @@ class Parser(metaclass=_Parser):
this = self._parse_set_operations(
self._parse_subquery(this=this, parse_alias=False)
)
+ elif isinstance(this, exp.Subquery):
+ this = self._parse_subquery(
+ this=self._parse_set_operations(this), parse_alias=False
+ )
elif len(expressions) > 1:
this = self.expression(exp.Tuple, expressions=expressions)
else:
- this = self.expression(exp.Paren, this=self._parse_set_operations(this))
+ this = self.expression(exp.Paren, this=this)
if this:
this.add_comments(comments)
@@ -4118,7 +4326,7 @@ class Parser(metaclass=_Parser):
parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS:
self._advance()
- return parser(self)
+ return self._parse_window(parser(self))
if not self._next or self._next.token_type != TokenType.L_PAREN:
if optional_parens and token_type in self.NO_PAREN_FUNCTIONS:
@@ -4186,7 +4394,7 @@ class Parser(metaclass=_Parser):
if not isinstance(e, exp.PropertyEQ):
e = self.expression(
- exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression
+ exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression
)
if isinstance(e.this, exp.Column):
@@ -4267,19 +4475,15 @@ class Parser(metaclass=_Parser):
def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
index = self._index
- if not self.errors:
- try:
- if self._parse_select(nested=True):
- return this
- except ParseError:
- pass
- finally:
- self.errors.clear()
- self._retreat(index)
-
if not self._match(TokenType.L_PAREN):
return this
+ # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>),
+ # expr can be of both types
+ if self._match_set(self.SELECT_START_TOKENS):
+ self._retreat(index)
+ return this
+
args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def())
self._match_r_paren()
@@ -4300,7 +4504,7 @@ class Parser(metaclass=_Parser):
constraints: t.List[exp.Expression] = []
- if not kind and self._match(TokenType.ALIAS):
+ if (not kind and self._match(TokenType.ALIAS)) or self._match_text_seq("ALIAS"):
constraints.append(
self.expression(
exp.ComputedColumnConstraint,
@@ -4417,9 +4621,7 @@ class Parser(metaclass=_Parser):
self._match_text_seq("LENGTH")
return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
- def _parse_not_constraint(
- self,
- ) -> t.Optional[exp.Expression]:
+ def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
if self._match_text_seq("NULL"):
return self.expression(exp.NotNullColumnConstraint)
if self._match_text_seq("CASESPECIFIC"):
@@ -4447,16 +4649,21 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.CONSTRAINT):
return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
- this = self._parse_id_var()
- expressions = []
+ return self.expression(
+ exp.Constraint,
+ this=self._parse_id_var(),
+ expressions=self._parse_unnamed_constraints(),
+ )
+ def _parse_unnamed_constraints(self) -> t.List[exp.Expression]:
+ constraints = []
while True:
constraint = self._parse_unnamed_constraint() or self._parse_function()
if not constraint:
break
- expressions.append(constraint)
+ constraints.append(constraint)
- return self.expression(exp.Constraint, this=this, expressions=expressions)
+ return constraints
def _parse_unnamed_constraint(
self, constraints: t.Optional[t.Collection[str]] = None
@@ -4478,6 +4685,7 @@ class Parser(metaclass=_Parser):
exp.UniqueColumnConstraint,
this=self._parse_schema(self._parse_id_var(any_token=False)),
index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text,
+ on_conflict=self._parse_on_conflict(),
)
def _parse_key_constraint_options(self) -> t.List[str]:
@@ -4592,7 +4800,7 @@ class Parser(metaclass=_Parser):
def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]:
return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True))
- def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+ def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
return this
@@ -4601,9 +4809,9 @@ class Parser(metaclass=_Parser):
lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE)
)
- if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
+ if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET):
self.raise_error("Expected ]")
- elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
+ elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE):
self.raise_error("Expected }")
# https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
@@ -4645,8 +4853,8 @@ class Parser(metaclass=_Parser):
else:
self.raise_error("Expected END after CASE", self._prev)
- return self._parse_window(
- self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default)
+ return self.expression(
+ exp.Case, comments=comments, this=expression, ifs=ifs, default=default
)
def _parse_if(self) -> t.Optional[exp.Expression]:
@@ -4672,7 +4880,7 @@ class Parser(metaclass=_Parser):
self._match(TokenType.END)
this = self.expression(exp.If, this=condition, true=true, false=false)
- return self._parse_window(this)
+ return this
def _parse_next_value_for(self) -> t.Optional[exp.Expression]:
if not self._match_text_seq("VALUE", "FOR"):
@@ -4739,7 +4947,12 @@ class Parser(metaclass=_Parser):
to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
return self.expression(
- exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe
+ exp.Cast if strict else exp.TryCast,
+ this=this,
+ to=to,
+ format=fmt,
+ safe=safe,
+ action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False),
)
def _parse_string_agg(self) -> exp.Expression:
@@ -5087,6 +5300,9 @@ class Parser(metaclass=_Parser):
def _parse_window(
self, this: t.Optional[exp.Expression], alias: bool = False
) -> t.Optional[exp.Expression]:
+ func = this
+ comments = func.comments if isinstance(func, exp.Expression) else None
+
if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
self._match(TokenType.WHERE)
this = self.expression(
@@ -5132,9 +5348,16 @@ class Parser(metaclass=_Parser):
else:
over = self._prev.text.upper()
+ if comments:
+ func.comments = None # type: ignore
+
if not self._match(TokenType.L_PAREN):
return self.expression(
- exp.Window, this=this, alias=self._parse_id_var(False), over=over
+ exp.Window,
+ comments=comments,
+ this=this,
+ alias=self._parse_id_var(False),
+ over=over,
)
window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
@@ -5167,6 +5390,7 @@ class Parser(metaclass=_Parser):
window = self.expression(
exp.Window,
+ comments=comments,
this=this,
partition_by=partition,
order=order,
@@ -5218,7 +5442,7 @@ class Parser(metaclass=_Parser):
self._match_r_paren(aliases)
return aliases
- alias = self._parse_id_var(any_token) or (
+ alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or (
self.STRING_ALIASES and self._parse_string_as_identifier()
)
@@ -5512,10 +5736,11 @@ class Parser(metaclass=_Parser):
return self.expression(exp.AlterColumn, this=column, comment=self._parse_string())
self._match_text_seq("SET", "DATA")
+ self._match_text_seq("TYPE")
return self.expression(
exp.AlterColumn,
this=column,
- dtype=self._match_text_seq("TYPE") and self._parse_types(),
+ dtype=self._parse_types(),
collate=self._match(TokenType.COLLATE) and self._parse_term(),
using=self._match(TokenType.USING) and self._parse_conjunction(),
)
@@ -5919,26 +6144,6 @@ class Parser(metaclass=_Parser):
return True
- @t.overload
- def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ...
-
- @t.overload
- def _replace_columns_with_dots(
- self, this: t.Optional[exp.Expression]
- ) -> t.Optional[exp.Expression]: ...
-
- def _replace_columns_with_dots(self, this):
- if isinstance(this, exp.Dot):
- exp.replace_children(this, self._replace_columns_with_dots)
- elif isinstance(this, exp.Column):
- exp.replace_children(this, self._replace_columns_with_dots)
- table = this.args.get("table")
- this = (
- self.expression(exp.Dot, this=table, expression=this.this) if table else this.this
- )
-
- return this
-
def _replace_lambda(
self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
) -> t.Optional[exp.Expression]:
@@ -6011,3 +6216,13 @@ class Parser(metaclass=_Parser):
option=option,
partition=partition,
)
+
+ def _parse_with_operator(self) -> t.Optional[exp.Expression]:
+ this = self._parse_ordered(self._parse_opclass)
+
+ if not self._match(TokenType.WITH):
+ return this
+
+ op = self._parse_var(any_token=True)
+
+ return self.expression(exp.WithOperator, this=this, op=op)