From b38d717d5933fdae3fe85c87df7aee9a251fb58e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 3 Apr 2023 09:31:54 +0200 Subject: Merging upstream version 11.4.5. Signed-off-by: Daniel Baumann --- sqlglot/parser.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 97 insertions(+), 12 deletions(-) (limited to 'sqlglot/parser.py') diff --git a/sqlglot/parser.py b/sqlglot/parser.py index a36251e..8269525 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -19,7 +19,7 @@ from sqlglot.trie import in_trie, new_trie logger = logging.getLogger("sqlglot") -def parse_var_map(args): +def parse_var_map(args: t.Sequence) -> exp.Expression: keys = [] values = [] for i in range(0, len(args), 2): @@ -31,6 +31,11 @@ def parse_var_map(args): ) +def parse_like(args): + like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) + return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like + + def binary_range_parser( expr_type: t.Type[exp.Expression], ) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: @@ -77,6 +82,9 @@ class Parser(metaclass=_Parser): this=seq_get(args, 0), to=exp.DataType(this=exp.DataType.Type.TEXT), ), + "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), + "IFNULL": exp.Coalesce.from_arg_list, + "LIKE": parse_like, "TIME_TO_TIME_STR": lambda args: exp.Cast( this=seq_get(args, 0), to=exp.DataType(this=exp.DataType.Type.TEXT), @@ -90,7 +98,6 @@ class Parser(metaclass=_Parser): length=exp.Literal.number(10), ), "VAR_MAP": parse_var_map, - "IFNULL": exp.Coalesce.from_arg_list, } NO_PAREN_FUNCTIONS = { @@ -211,6 +218,7 @@ class Parser(metaclass=_Parser): TokenType.FILTER, TokenType.FOLLOWING, TokenType.FORMAT, + TokenType.FULL, TokenType.IF, TokenType.ISNULL, TokenType.INTERVAL, @@ -226,8 +234,10 @@ class Parser(metaclass=_Parser): TokenType.ONLY, TokenType.OPTIONS, TokenType.ORDINALITY, + TokenType.PARTITION, TokenType.PERCENT, TokenType.PIVOT, + TokenType.PRAGMA, TokenType.PRECEDING, TokenType.RANGE, TokenType.REFERENCES, @@ -257,6 +267,7 @@ class Parser(metaclass=_Parser): TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { TokenType.APPLY, + TokenType.FULL, TokenType.LEFT, TokenType.NATURAL, TokenType.OFFSET, @@ -277,6 +288,7 @@ class Parser(metaclass=_Parser): TokenType.FILTER, TokenType.FIRST, TokenType.FORMAT, + TokenType.GLOB, TokenType.IDENTIFIER, TokenType.INDEX, TokenType.ISNULL, @@ -461,6 +473,7 @@ class Parser(metaclass=_Parser): TokenType.INSERT: lambda self: self._parse_insert(), TokenType.LOAD_DATA: lambda self: self._parse_load_data(), TokenType.MERGE: lambda self: self._parse_merge(), + TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), TokenType.SET: lambda self: self._parse_set(), TokenType.UNCACHE: lambda self: self._parse_uncache(), @@ -662,6 +675,8 @@ class Parser(metaclass=_Parser): "CAST": lambda self: self._parse_cast(self.STRICT_CAST), "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), "EXTRACT": lambda self: self._parse_extract(), + "JSON_OBJECT": lambda self: self._parse_json_object(), + "LOG": lambda self: self._parse_logarithm(), "POSITION": lambda self: self._parse_position(), "STRING_AGG": lambda self: self._parse_string_agg(), "SUBSTRING": lambda self: self._parse_substring(), @@ -719,6 +734,9 @@ class Parser(metaclass=_Parser): CONVERT_TYPE_FIRST = False + LOG_BASE_FIRST = True + LOG_DEFAULTS_TO_LN = False + __slots__ = ( "error_level", "error_message_context", @@ -1032,6 +1050,7 @@ class Parser(metaclass=_Parser): temporary=temporary, materialized=materialized, cascade=self._match(TokenType.CASCADE), + constraints=self._match_text_seq("CONSTRAINTS"), ) def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: @@ -1221,7 +1240,7 @@ class Parser(metaclass=_Parser): if not identified_property: break - for p in ensure_collection(identified_property): + for p in ensure_list(identified_property): properties.append(p) if properties: @@ -1704,6 +1723,11 @@ class Parser(metaclass=_Parser): elif self._match(TokenType.SELECT): comments = self._prev_comments + kind = ( + self._match(TokenType.ALIAS) + and self._match_texts(("STRUCT", "VALUE")) + and self._prev.text + ) hint = self._parse_hint() all_ = self._match(TokenType.ALL) distinct = self._match(TokenType.DISTINCT) @@ -1722,6 +1746,7 @@ class Parser(metaclass=_Parser): this = self.expression( exp.Select, + kind=kind, hint=hint, distinct=distinct, expressions=expressions, @@ -2785,7 +2810,6 @@ class Parser(metaclass=_Parser): this = seq_get(expressions, 0) self._parse_query_modifiers(this) - self._match_r_paren() if isinstance(this, exp.Subqueryable): this = self._parse_set_operations( @@ -2794,7 +2818,9 @@ class Parser(metaclass=_Parser): elif len(expressions) > 1: this = self.expression(exp.Tuple, expressions=expressions) else: - this = self.expression(exp.Paren, this=this) + this = self.expression(exp.Paren, this=self._parse_set_operations(this)) + + self._match_r_paren() if this and comments: this.comments = comments @@ -3318,6 +3344,60 @@ class Parser(metaclass=_Parser): return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) + def _parse_json_key_value(self) -> t.Optional[exp.Expression]: + self._match_text_seq("KEY") + key = self._parse_field() + self._match(TokenType.COLON) + self._match_text_seq("VALUE") + value = self._parse_field() + if not key and not value: + return None + return self.expression(exp.JSONKeyValue, this=key, expression=value) + + def _parse_json_object(self) -> exp.Expression: + expressions = self._parse_csv(self._parse_json_key_value) + + null_handling = None + if self._match_text_seq("NULL", "ON", "NULL"): + null_handling = "NULL ON NULL" + elif self._match_text_seq("ABSENT", "ON", "NULL"): + null_handling = "ABSENT ON NULL" + + unique_keys = None + if self._match_text_seq("WITH", "UNIQUE"): + unique_keys = True + elif self._match_text_seq("WITHOUT", "UNIQUE"): + unique_keys = False + + self._match_text_seq("KEYS") + + return_type = self._match_text_seq("RETURNING") and self._parse_type() + format_json = self._match_text_seq("FORMAT", "JSON") + encoding = self._match_text_seq("ENCODING") and self._parse_var() + + return self.expression( + exp.JSONObject, + expressions=expressions, + null_handling=null_handling, + unique_keys=unique_keys, + return_type=return_type, + format_json=format_json, + encoding=encoding, + ) + + def _parse_logarithm(self) -> exp.Expression: + # Default argument order is base, expression + args = self._parse_csv(self._parse_range) + + if len(args) > 1: + if not self.LOG_BASE_FIRST: + args.reverse() + return exp.Log.from_arg_list(args) + + return self.expression( + exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) + ) + def _parse_position(self, haystack_first: bool = False) -> exp.Expression: args = self._parse_csv(self._parse_bitwise) @@ -3654,7 +3734,7 @@ class Parser(metaclass=_Parser): return parse_result def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: - return self._parse_select() or self._parse_expression() + return self._parse_select() or self._parse_set_operations(self._parse_expression()) def _parse_ddl_select(self) -> t.Optional[exp.Expression]: return self._parse_set_operations( @@ -3741,6 +3821,8 @@ class Parser(metaclass=_Parser): expression = self._parse_foreign_key() elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): expression = self._parse_primary_key() + else: + expression = None return self.expression(exp.AddConstraint, this=this, expression=expression) @@ -3799,12 +3881,15 @@ class Parser(metaclass=_Parser): parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None if parser: - return self.expression( - exp.AlterTable, - this=this, - exists=exists, - actions=ensure_list(parser(self)), - ) + actions = ensure_list(parser(self)) + + if not self._curr: + return self.expression( + exp.AlterTable, + this=this, + exists=exists, + actions=actions, + ) return self._parse_as_command(start) def _parse_merge(self) -> exp.Expression: -- cgit v1.2.3