diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-11 12:46:06 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-11 12:46:06 +0000 |
commit | 9dae42c19381cf27dc56bd932aebd780aa66722b (patch) | |
tree | d70521fc9bf401e225f567a2d6acf1c1e4add2fd /sqlglot/parser.py | |
parent | Adding upstream version 15.0.0. (diff) | |
download | sqlglot-9dae42c19381cf27dc56bd932aebd780aa66722b.tar.xz sqlglot-9dae42c19381cf27dc56bd932aebd780aa66722b.zip |
Adding upstream version 15.2.0.upstream/15.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 184 |
1 files changed, 117 insertions, 67 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index e77bb5a..96bd6e3 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -155,6 +155,18 @@ class Parser(metaclass=_Parser): TokenType.DATETIME, TokenType.DATETIME64, TokenType.DATE, + TokenType.INT4RANGE, + TokenType.INT4MULTIRANGE, + TokenType.INT8RANGE, + TokenType.INT8MULTIRANGE, + TokenType.NUMRANGE, + TokenType.NUMMULTIRANGE, + TokenType.TSRANGE, + TokenType.TSMULTIRANGE, + TokenType.TSTZRANGE, + TokenType.TSTZMULTIRANGE, + TokenType.DATERANGE, + TokenType.DATEMULTIRANGE, TokenType.DECIMAL, TokenType.BIGDECIMAL, TokenType.UUID, @@ -193,6 +205,7 @@ class Parser(metaclass=_Parser): TokenType.SCHEMA, TokenType.TABLE, TokenType.VIEW, + TokenType.DICTIONARY, } CREATABLES = { @@ -220,6 +233,7 @@ class Parser(metaclass=_Parser): TokenType.DELETE, TokenType.DESC, TokenType.DESCRIBE, + TokenType.DICTIONARY, TokenType.DIV, TokenType.END, TokenType.EXECUTE, @@ -272,6 +286,7 @@ class Parser(metaclass=_Parser): TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { TokenType.APPLY, + TokenType.ASOF, TokenType.FULL, TokenType.LEFT, TokenType.LOCK, @@ -375,6 +390,11 @@ class Parser(metaclass=_Parser): TokenType.EXCEPT, } + JOIN_METHODS = { + TokenType.NATURAL, + TokenType.ASOF, + } + JOIN_SIDES = { TokenType.LEFT, TokenType.RIGHT, @@ -465,7 +485,7 @@ class Parser(metaclass=_Parser): exp.Where: lambda self: self._parse_where(), exp.Window: lambda self: self._parse_named_window(), exp.With: lambda self: self._parse_with(), - "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), + "JOIN_TYPE": lambda self: self._parse_join_parts(), } STATEMENT_PARSERS = { @@ -580,6 +600,8 @@ class Parser(metaclass=_Parser): ), "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), + "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), + "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), "LIKE": lambda self: self._parse_create_like(), "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), "LOCK": lambda self: self._parse_locking(), @@ -594,7 +616,8 @@ class Parser(metaclass=_Parser): "PARTITION BY": lambda self: self._parse_partitioned_by(), "PARTITIONED BY": lambda self: self._parse_partitioned_by(), "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), - "PRIMARY KEY": lambda self: self._parse_primary_key(), + "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), + "RANGE": lambda self: self._parse_dict_range(this="RANGE"), "RETURNS": lambda self: self._parse_returns(), "ROW": lambda self: self._parse_row(), "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), @@ -603,6 +626,7 @@ class Parser(metaclass=_Parser): exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) ), "SORTKEY": lambda self: self._parse_sortkey(), + "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), "STABLE": lambda self: self.expression( exp.StabilityProperty, this=exp.Literal.string("STABLE") ), @@ -1133,13 +1157,16 @@ class Parser(metaclass=_Parser): begin = None clone = None + def extend_props(temp_props: t.Optional[exp.Expression]) -> None: + nonlocal properties + if properties and temp_props: + properties.expressions.extend(temp_props.expressions) + elif temp_props: + properties = temp_props + if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): this = self._parse_user_defined_function(kind=create_token.token_type) - temp_properties = self._parse_properties() - if properties and temp_properties: - properties.expressions.extend(temp_properties.expressions) - elif temp_properties: - properties = temp_properties + extend_props(self._parse_properties()) self._match(TokenType.ALIAS) begin = self._match(TokenType.BEGIN) @@ -1154,21 +1181,13 @@ class Parser(metaclass=_Parser): table_parts = self._parse_table_parts(schema=True) # exp.Properties.Location.POST_NAME - if self._match(TokenType.COMMA): - temp_properties = self._parse_properties(before=True) - if properties and temp_properties: - properties.expressions.extend(temp_properties.expressions) - elif temp_properties: - properties = temp_properties + self._match(TokenType.COMMA) + extend_props(self._parse_properties(before=True)) this = self._parse_schema(this=table_parts) # exp.Properties.Location.POST_SCHEMA and POST_WITH - temp_properties = self._parse_properties() - if properties and temp_properties: - properties.expressions.extend(temp_properties.expressions) - elif temp_properties: - properties = temp_properties + extend_props(self._parse_properties()) self._match(TokenType.ALIAS) @@ -1178,11 +1197,7 @@ class Parser(metaclass=_Parser): or self._match(TokenType.WITH, advance=False) or self._match(TokenType.L_PAREN, advance=False) ): - temp_properties = self._parse_properties() - if properties and temp_properties: - properties.expressions.extend(temp_properties.expressions) - elif temp_properties: - properties = temp_properties + extend_props(self._parse_properties()) expression = self._parse_ddl_select() @@ -1192,11 +1207,7 @@ class Parser(metaclass=_Parser): index = self._parse_index() # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX - temp_properties = self._parse_properties() - if properties and temp_properties: - properties.expressions.extend(temp_properties.expressions) - elif temp_properties: - properties = temp_properties + extend_props(self._parse_properties()) if not index: break @@ -1888,8 +1899,16 @@ class Parser(metaclass=_Parser): this = self._parse_query_modifiers(this) elif (table or nested) and self._match(TokenType.L_PAREN): - this = self._parse_table() if table else self._parse_select(nested=True) - this = self._parse_set_operations(self._parse_query_modifiers(this)) + if self._match(TokenType.PIVOT): + this = self._parse_simplified_pivot() + elif self._match(TokenType.FROM): + this = exp.select("*").from_( + t.cast(exp.From, self._parse_from(skip_from_token=True)) + ) + else: + this = self._parse_table() if table else self._parse_select(nested=True) + this = self._parse_set_operations(self._parse_query_modifiers(this)) + self._match_r_paren() # early return so that subquery unions aren't parsed again @@ -1902,10 +1921,6 @@ class Parser(metaclass=_Parser): expressions=self._parse_csv(self._parse_value), alias=self._parse_table_alias(), ) - elif self._match(TokenType.PIVOT): - this = self._parse_simplified_pivot() - elif self._match(TokenType.FROM): - this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True))) else: this = None @@ -2154,11 +2169,11 @@ class Parser(metaclass=_Parser): return expression - def _parse_join_side_and_kind( + def _parse_join_parts( self, ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: return ( - self._match(TokenType.NATURAL) and self._prev, + self._match_set(self.JOIN_METHODS) and self._prev, self._match_set(self.JOIN_SIDES) and self._prev, self._match_set(self.JOIN_KINDS) and self._prev, ) @@ -2168,14 +2183,14 @@ class Parser(metaclass=_Parser): return self.expression(exp.Join, this=self._parse_table()) index = self._index - natural, side, kind = self._parse_join_side_and_kind() + method, side, kind = self._parse_join_parts() hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None join = self._match(TokenType.JOIN) if not skip_join_token and not join: self._retreat(index) kind = None - natural = None + method = None side = None outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) @@ -2187,12 +2202,10 @@ class Parser(metaclass=_Parser): if outer_apply: side = Token(TokenType.LEFT, "LEFT") - kwargs: t.Dict[ - str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] - ] = {"this": self._parse_table()} + kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} - if natural: - kwargs["natural"] = True + if method: + kwargs["method"] = method.text if side: kwargs["side"] = side.text if kind: @@ -2205,7 +2218,7 @@ class Parser(metaclass=_Parser): elif self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() - return self.expression(exp.Join, **kwargs) # type: ignore + return self.expression(exp.Join, **kwargs) def _parse_index( self, @@ -2886,7 +2899,9 @@ class Parser(metaclass=_Parser): exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) ) - def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: + def _parse_types( + self, check_func: bool = False, schema: bool = False + ) -> t.Optional[exp.Expression]: index = self._index prefix = self._match_text_seq("SYSUDTLIB", ".") @@ -2908,7 +2923,9 @@ class Parser(metaclass=_Parser): if is_struct: expressions = self._parse_csv(self._parse_struct_types) elif nested: - expressions = self._parse_csv(self._parse_types) + expressions = self._parse_csv( + lambda: self._parse_types(check_func=check_func, schema=schema) + ) else: expressions = self._parse_csv(self._parse_type_size) @@ -2943,7 +2960,9 @@ class Parser(metaclass=_Parser): if is_struct: expressions = self._parse_csv(self._parse_struct_types) else: - expressions = self._parse_csv(self._parse_types) + expressions = self._parse_csv( + lambda: self._parse_types(check_func=check_func, schema=schema) + ) if not self._match(TokenType.GT): self.raise_error("Expecting >") @@ -3038,11 +3057,7 @@ class Parser(metaclass=_Parser): else exp.Literal.string(value) ) else: - field = ( - self._parse_star() - or self._parse_function(anonymous=True) - or self._parse_id_var() - ) + field = self._parse_field(anonymous_func=True) if isinstance(field, exp.Func): # bigquery allows function calls like x.y.count(...) @@ -3113,10 +3128,11 @@ class Parser(metaclass=_Parser): self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None, + anonymous_func: bool = False, ) -> t.Optional[exp.Expression]: return ( self._parse_primary() - or self._parse_function() + or self._parse_function(anonymous=anonymous_func) or self._parse_id_var(any_token=any_token, tokens=tokens) ) @@ -3270,7 +3286,7 @@ class Parser(metaclass=_Parser): # column defs are not really columns, they're identifiers if isinstance(this, exp.Column): this = this.this - kind = self._parse_types() + kind = self._parse_types(schema=True) if self._match_text_seq("FOR", "ORDINALITY"): return self.expression(exp.ColumnDef, this=this, ordinality=True) @@ -3483,16 +3499,18 @@ class Parser(metaclass=_Parser): exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore ) - def _parse_primary_key(self) -> exp.Expression: + def _parse_primary_key( + self, wrapped_optional: bool = False, in_props: bool = False + ) -> exp.Expression: desc = ( self._match_set((TokenType.ASC, TokenType.DESC)) and self._prev.token_type == TokenType.DESC ) - if not self._match(TokenType.L_PAREN, advance=False): + if not in_props and not self._match(TokenType.L_PAREN, advance=False): return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) - expressions = self._parse_wrapped_csv(self._parse_field) + expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) options = self._parse_key_constraint_options() return self.expression(exp.PrimaryKey, expressions=expressions, options=options) @@ -3509,10 +3527,11 @@ class Parser(metaclass=_Parser): return this bracket_kind = self._prev.token_type - expressions: t.List[t.Optional[exp.Expression]] if self._match(TokenType.COLON): - expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] + expressions: t.List[t.Optional[exp.Expression]] = [ + self.expression(exp.Slice, expression=self._parse_conjunction()) + ] else: expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) @@ -4011,22 +4030,15 @@ class Parser(metaclass=_Parser): self, any_token: bool = True, tokens: t.Optional[t.Collection[TokenType]] = None, - prefix_tokens: t.Optional[t.Collection[TokenType]] = None, ) -> t.Optional[exp.Expression]: identifier = self._parse_identifier() if identifier: return identifier - prefix = "" - - if prefix_tokens: - while self._match_set(prefix_tokens): - prefix += self._prev.text - if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): quoted = self._prev.token_type == TokenType.STRING - return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) + return exp.Identifier(this=self._prev.text, quoted=quoted) return None @@ -4472,6 +4484,44 @@ class Parser(metaclass=_Parser): size = len(start.text) return exp.Command(this=text[:size], expression=text[size:]) + def _parse_dict_property(self, this: str) -> exp.DictProperty: + settings = [] + + self._match_l_paren() + kind = self._parse_id_var() + + if self._match(TokenType.L_PAREN): + while True: + key = self._parse_id_var() + value = self._parse_primary() + + if not key and value is None: + break + settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) + self._match(TokenType.R_PAREN) + + self._match_r_paren() + + return self.expression( + exp.DictProperty, + this=this, + kind=kind.this if kind else None, + settings=settings, + ) + + def _parse_dict_range(self, this: str) -> exp.DictRange: + self._match_l_paren() + has_min = self._match_text_seq("MIN") + if has_min: + min = self._parse_var() or self._parse_primary() + self._match_text_seq("MAX") + max = self._parse_var() or self._parse_primary() + else: + max = self._parse_var() or self._parse_primary() + min = exp.Literal.number(0) + self._match_r_paren() + return self.expression(exp.DictRange, this=this, min=min, max=max) + def _find_parser( self, parsers: t.Dict[str, t.Callable], trie: t.Dict ) -> t.Optional[t.Callable]: |