summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-11 12:46:06 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-11 12:46:06 +0000
commit9dae42c19381cf27dc56bd932aebd780aa66722b (patch)
treed70521fc9bf401e225f567a2d6acf1c1e4add2fd /sqlglot/parser.py
parentAdding upstream version 15.0.0. (diff)
downloadsqlglot-9dae42c19381cf27dc56bd932aebd780aa66722b.tar.xz
sqlglot-9dae42c19381cf27dc56bd932aebd780aa66722b.zip
Adding upstream version 15.2.0.upstream/15.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py184
1 files changed, 117 insertions, 67 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index e77bb5a..96bd6e3 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -155,6 +155,18 @@ class Parser(metaclass=_Parser):
TokenType.DATETIME,
TokenType.DATETIME64,
TokenType.DATE,
+ TokenType.INT4RANGE,
+ TokenType.INT4MULTIRANGE,
+ TokenType.INT8RANGE,
+ TokenType.INT8MULTIRANGE,
+ TokenType.NUMRANGE,
+ TokenType.NUMMULTIRANGE,
+ TokenType.TSRANGE,
+ TokenType.TSMULTIRANGE,
+ TokenType.TSTZRANGE,
+ TokenType.TSTZMULTIRANGE,
+ TokenType.DATERANGE,
+ TokenType.DATEMULTIRANGE,
TokenType.DECIMAL,
TokenType.BIGDECIMAL,
TokenType.UUID,
@@ -193,6 +205,7 @@ class Parser(metaclass=_Parser):
TokenType.SCHEMA,
TokenType.TABLE,
TokenType.VIEW,
+ TokenType.DICTIONARY,
}
CREATABLES = {
@@ -220,6 +233,7 @@ class Parser(metaclass=_Parser):
TokenType.DELETE,
TokenType.DESC,
TokenType.DESCRIBE,
+ TokenType.DICTIONARY,
TokenType.DIV,
TokenType.END,
TokenType.EXECUTE,
@@ -272,6 +286,7 @@ class Parser(metaclass=_Parser):
TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
TokenType.APPLY,
+ TokenType.ASOF,
TokenType.FULL,
TokenType.LEFT,
TokenType.LOCK,
@@ -375,6 +390,11 @@ class Parser(metaclass=_Parser):
TokenType.EXCEPT,
}
+ JOIN_METHODS = {
+ TokenType.NATURAL,
+ TokenType.ASOF,
+ }
+
JOIN_SIDES = {
TokenType.LEFT,
TokenType.RIGHT,
@@ -465,7 +485,7 @@ class Parser(metaclass=_Parser):
exp.Where: lambda self: self._parse_where(),
exp.Window: lambda self: self._parse_named_window(),
exp.With: lambda self: self._parse_with(),
- "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
+ "JOIN_TYPE": lambda self: self._parse_join_parts(),
}
STATEMENT_PARSERS = {
@@ -580,6 +600,8 @@ class Parser(metaclass=_Parser):
),
"JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
"LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
+ "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
+ "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
"LIKE": lambda self: self._parse_create_like(),
"LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
"LOCK": lambda self: self._parse_locking(),
@@ -594,7 +616,8 @@ class Parser(metaclass=_Parser):
"PARTITION BY": lambda self: self._parse_partitioned_by(),
"PARTITIONED BY": lambda self: self._parse_partitioned_by(),
"PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
- "PRIMARY KEY": lambda self: self._parse_primary_key(),
+ "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
+ "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
"RETURNS": lambda self: self._parse_returns(),
"ROW": lambda self: self._parse_row(),
"ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
@@ -603,6 +626,7 @@ class Parser(metaclass=_Parser):
exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
),
"SORTKEY": lambda self: self._parse_sortkey(),
+ "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
"STABLE": lambda self: self.expression(
exp.StabilityProperty, this=exp.Literal.string("STABLE")
),
@@ -1133,13 +1157,16 @@ class Parser(metaclass=_Parser):
begin = None
clone = None
+ def extend_props(temp_props: t.Optional[exp.Expression]) -> None:
+ nonlocal properties
+ if properties and temp_props:
+ properties.expressions.extend(temp_props.expressions)
+ elif temp_props:
+ properties = temp_props
+
if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
this = self._parse_user_defined_function(kind=create_token.token_type)
- temp_properties = self._parse_properties()
- if properties and temp_properties:
- properties.expressions.extend(temp_properties.expressions)
- elif temp_properties:
- properties = temp_properties
+ extend_props(self._parse_properties())
self._match(TokenType.ALIAS)
begin = self._match(TokenType.BEGIN)
@@ -1154,21 +1181,13 @@ class Parser(metaclass=_Parser):
table_parts = self._parse_table_parts(schema=True)
# exp.Properties.Location.POST_NAME
- if self._match(TokenType.COMMA):
- temp_properties = self._parse_properties(before=True)
- if properties and temp_properties:
- properties.expressions.extend(temp_properties.expressions)
- elif temp_properties:
- properties = temp_properties
+ self._match(TokenType.COMMA)
+ extend_props(self._parse_properties(before=True))
this = self._parse_schema(this=table_parts)
# exp.Properties.Location.POST_SCHEMA and POST_WITH
- temp_properties = self._parse_properties()
- if properties and temp_properties:
- properties.expressions.extend(temp_properties.expressions)
- elif temp_properties:
- properties = temp_properties
+ extend_props(self._parse_properties())
self._match(TokenType.ALIAS)
@@ -1178,11 +1197,7 @@ class Parser(metaclass=_Parser):
or self._match(TokenType.WITH, advance=False)
or self._match(TokenType.L_PAREN, advance=False)
):
- temp_properties = self._parse_properties()
- if properties and temp_properties:
- properties.expressions.extend(temp_properties.expressions)
- elif temp_properties:
- properties = temp_properties
+ extend_props(self._parse_properties())
expression = self._parse_ddl_select()
@@ -1192,11 +1207,7 @@ class Parser(metaclass=_Parser):
index = self._parse_index()
# exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
- temp_properties = self._parse_properties()
- if properties and temp_properties:
- properties.expressions.extend(temp_properties.expressions)
- elif temp_properties:
- properties = temp_properties
+ extend_props(self._parse_properties())
if not index:
break
@@ -1888,8 +1899,16 @@ class Parser(metaclass=_Parser):
this = self._parse_query_modifiers(this)
elif (table or nested) and self._match(TokenType.L_PAREN):
- this = self._parse_table() if table else self._parse_select(nested=True)
- this = self._parse_set_operations(self._parse_query_modifiers(this))
+ if self._match(TokenType.PIVOT):
+ this = self._parse_simplified_pivot()
+ elif self._match(TokenType.FROM):
+ this = exp.select("*").from_(
+ t.cast(exp.From, self._parse_from(skip_from_token=True))
+ )
+ else:
+ this = self._parse_table() if table else self._parse_select(nested=True)
+ this = self._parse_set_operations(self._parse_query_modifiers(this))
+
self._match_r_paren()
# early return so that subquery unions aren't parsed again
@@ -1902,10 +1921,6 @@ class Parser(metaclass=_Parser):
expressions=self._parse_csv(self._parse_value),
alias=self._parse_table_alias(),
)
- elif self._match(TokenType.PIVOT):
- this = self._parse_simplified_pivot()
- elif self._match(TokenType.FROM):
- this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True)))
else:
this = None
@@ -2154,11 +2169,11 @@ class Parser(metaclass=_Parser):
return expression
- def _parse_join_side_and_kind(
+ def _parse_join_parts(
self,
) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
return (
- self._match(TokenType.NATURAL) and self._prev,
+ self._match_set(self.JOIN_METHODS) and self._prev,
self._match_set(self.JOIN_SIDES) and self._prev,
self._match_set(self.JOIN_KINDS) and self._prev,
)
@@ -2168,14 +2183,14 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Join, this=self._parse_table())
index = self._index
- natural, side, kind = self._parse_join_side_and_kind()
+ method, side, kind = self._parse_join_parts()
hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
join = self._match(TokenType.JOIN)
if not skip_join_token and not join:
self._retreat(index)
kind = None
- natural = None
+ method = None
side = None
outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
@@ -2187,12 +2202,10 @@ class Parser(metaclass=_Parser):
if outer_apply:
side = Token(TokenType.LEFT, "LEFT")
- kwargs: t.Dict[
- str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
- ] = {"this": self._parse_table()}
+ kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()}
- if natural:
- kwargs["natural"] = True
+ if method:
+ kwargs["method"] = method.text
if side:
kwargs["side"] = side.text
if kind:
@@ -2205,7 +2218,7 @@ class Parser(metaclass=_Parser):
elif self._match(TokenType.USING):
kwargs["using"] = self._parse_wrapped_id_vars()
- return self.expression(exp.Join, **kwargs) # type: ignore
+ return self.expression(exp.Join, **kwargs)
def _parse_index(
self,
@@ -2886,7 +2899,9 @@ class Parser(metaclass=_Parser):
exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
)
- def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
+ def _parse_types(
+ self, check_func: bool = False, schema: bool = False
+ ) -> t.Optional[exp.Expression]:
index = self._index
prefix = self._match_text_seq("SYSUDTLIB", ".")
@@ -2908,7 +2923,9 @@ class Parser(metaclass=_Parser):
if is_struct:
expressions = self._parse_csv(self._parse_struct_types)
elif nested:
- expressions = self._parse_csv(self._parse_types)
+ expressions = self._parse_csv(
+ lambda: self._parse_types(check_func=check_func, schema=schema)
+ )
else:
expressions = self._parse_csv(self._parse_type_size)
@@ -2943,7 +2960,9 @@ class Parser(metaclass=_Parser):
if is_struct:
expressions = self._parse_csv(self._parse_struct_types)
else:
- expressions = self._parse_csv(self._parse_types)
+ expressions = self._parse_csv(
+ lambda: self._parse_types(check_func=check_func, schema=schema)
+ )
if not self._match(TokenType.GT):
self.raise_error("Expecting >")
@@ -3038,11 +3057,7 @@ class Parser(metaclass=_Parser):
else exp.Literal.string(value)
)
else:
- field = (
- self._parse_star()
- or self._parse_function(anonymous=True)
- or self._parse_id_var()
- )
+ field = self._parse_field(anonymous_func=True)
if isinstance(field, exp.Func):
# bigquery allows function calls like x.y.count(...)
@@ -3113,10 +3128,11 @@ class Parser(metaclass=_Parser):
self,
any_token: bool = False,
tokens: t.Optional[t.Collection[TokenType]] = None,
+ anonymous_func: bool = False,
) -> t.Optional[exp.Expression]:
return (
self._parse_primary()
- or self._parse_function()
+ or self._parse_function(anonymous=anonymous_func)
or self._parse_id_var(any_token=any_token, tokens=tokens)
)
@@ -3270,7 +3286,7 @@ class Parser(metaclass=_Parser):
# column defs are not really columns, they're identifiers
if isinstance(this, exp.Column):
this = this.this
- kind = self._parse_types()
+ kind = self._parse_types(schema=True)
if self._match_text_seq("FOR", "ORDINALITY"):
return self.expression(exp.ColumnDef, this=this, ordinality=True)
@@ -3483,16 +3499,18 @@ class Parser(metaclass=_Parser):
exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore
)
- def _parse_primary_key(self) -> exp.Expression:
+ def _parse_primary_key(
+ self, wrapped_optional: bool = False, in_props: bool = False
+ ) -> exp.Expression:
desc = (
self._match_set((TokenType.ASC, TokenType.DESC))
and self._prev.token_type == TokenType.DESC
)
- if not self._match(TokenType.L_PAREN, advance=False):
+ if not in_props and not self._match(TokenType.L_PAREN, advance=False):
return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
- expressions = self._parse_wrapped_csv(self._parse_field)
+ expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
options = self._parse_key_constraint_options()
return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
@@ -3509,10 +3527,11 @@ class Parser(metaclass=_Parser):
return this
bracket_kind = self._prev.token_type
- expressions: t.List[t.Optional[exp.Expression]]
if self._match(TokenType.COLON):
- expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
+ expressions: t.List[t.Optional[exp.Expression]] = [
+ self.expression(exp.Slice, expression=self._parse_conjunction())
+ ]
else:
expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
@@ -4011,22 +4030,15 @@ class Parser(metaclass=_Parser):
self,
any_token: bool = True,
tokens: t.Optional[t.Collection[TokenType]] = None,
- prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
) -> t.Optional[exp.Expression]:
identifier = self._parse_identifier()
if identifier:
return identifier
- prefix = ""
-
- if prefix_tokens:
- while self._match_set(prefix_tokens):
- prefix += self._prev.text
-
if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
quoted = self._prev.token_type == TokenType.STRING
- return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
+ return exp.Identifier(this=self._prev.text, quoted=quoted)
return None
@@ -4472,6 +4484,44 @@ class Parser(metaclass=_Parser):
size = len(start.text)
return exp.Command(this=text[:size], expression=text[size:])
+ def _parse_dict_property(self, this: str) -> exp.DictProperty:
+ settings = []
+
+ self._match_l_paren()
+ kind = self._parse_id_var()
+
+ if self._match(TokenType.L_PAREN):
+ while True:
+ key = self._parse_id_var()
+ value = self._parse_primary()
+
+ if not key and value is None:
+ break
+ settings.append(self.expression(exp.DictSubProperty, this=key, value=value))
+ self._match(TokenType.R_PAREN)
+
+ self._match_r_paren()
+
+ return self.expression(
+ exp.DictProperty,
+ this=this,
+ kind=kind.this if kind else None,
+ settings=settings,
+ )
+
+ def _parse_dict_range(self, this: str) -> exp.DictRange:
+ self._match_l_paren()
+ has_min = self._match_text_seq("MIN")
+ if has_min:
+ min = self._parse_var() or self._parse_primary()
+ self._match_text_seq("MAX")
+ max = self._parse_var() or self._parse_primary()
+ else:
+ max = self._parse_var() or self._parse_primary()
+ min = exp.Literal.number(0)
+ self._match_r_paren()
+ return self.expression(exp.DictRange, this=this, min=min, max=max)
+
def _find_parser(
self, parsers: t.Dict[str, t.Callable], trie: t.Dict
) -> t.Optional[t.Callable]: