diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-10-04 12:14:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-10-04 12:14:40 +0000 |
commit | d7f0758e21b5111b5327f3839c5c9f49a04d272b (patch) | |
tree | a425f4ebcc159d6bd9443fe4e0e2f9eb20151027 /sqlglot/parser.py | |
parent | Adding upstream version 18.7.0. (diff) | |
download | sqlglot-d7f0758e21b5111b5327f3839c5c9f49a04d272b.tar.xz sqlglot-d7f0758e21b5111b5327f3839c5c9f49a04d272b.zip |
Adding upstream version 18.11.2.upstream/18.11.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 87 |
1 files changed, 66 insertions, 21 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 84b2639..5e56961 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -178,6 +178,7 @@ class Parser(metaclass=_Parser): TokenType.DATERANGE, TokenType.DATEMULTIRANGE, TokenType.DECIMAL, + TokenType.UDECIMAL, TokenType.BIGDECIMAL, TokenType.UUID, TokenType.GEOGRAPHY, @@ -215,6 +216,7 @@ class Parser(metaclass=_Parser): TokenType.MEDIUMINT: TokenType.UMEDIUMINT, TokenType.SMALLINT: TokenType.USMALLINT, TokenType.TINYINT: TokenType.UTINYINT, + TokenType.DECIMAL: TokenType.UDECIMAL, } SUBQUERY_PREDICATES = { @@ -338,6 +340,7 @@ class Parser(metaclass=_Parser): TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} FUNC_TOKENS = { + TokenType.COLLATE, TokenType.COMMAND, TokenType.CURRENT_DATE, TokenType.CURRENT_DATETIME, @@ -590,6 +593,9 @@ class Parser(metaclass=_Parser): exp.National, this=token.text ), TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), + TokenType.HEREDOC_STRING: lambda self, token: self.expression( + exp.RawString, this=token.text + ), TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), } @@ -666,6 +672,9 @@ class Parser(metaclass=_Parser): "RETURNS": lambda self: self._parse_returns(), "ROW": lambda self: self._parse_row(), "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), + "SAMPLE": lambda self: self.expression( + exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() + ), "SET": lambda self: self.expression(exp.SetProperty, multi=False), "SETTINGS": lambda self: self.expression( exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) @@ -847,8 +856,11 @@ class Parser(metaclass=_Parser): INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} + CLONE_KEYWORDS = {"CLONE", "COPY"} CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} + OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} + TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} @@ -863,6 +875,8 @@ class Parser(metaclass=_Parser): NULL_TOKENS = {TokenType.NULL} + UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS + STRICT_CAST = True # A NULL arg in CONCAT yields NULL by default @@ -880,9 +894,12 @@ class Parser(metaclass=_Parser): # Whether or not the table sample clause expects CSV syntax TABLESAMPLE_CSV = False - # Whether or not the SET command needs a delimiter (e.g. "=") for assignments. + # Whether or not the SET command needs a delimiter (e.g. "=") for assignments SET_REQUIRES_ASSIGNMENT_DELIMITER = True + # Whether the TRIM function expects the characters to trim as its first argument + TRIM_PATTERN_FIRST = False + __slots__ = ( "error_level", "error_message_context", @@ -1268,6 +1285,7 @@ class Parser(metaclass=_Parser): indexes = None no_schema_binding = None begin = None + end = None clone = None def extend_props(temp_props: t.Optional[exp.Properties]) -> None: @@ -1299,6 +1317,8 @@ class Parser(metaclass=_Parser): else: expression = self._parse_statement() + end = self._match_text_seq("END") + if return_: expression = self.expression(exp.Return, this=expression) elif create_token.token_type == TokenType.INDEX: @@ -1344,7 +1364,8 @@ class Parser(metaclass=_Parser): shallow = self._match_text_seq("SHALLOW") - if self._match_text_seq("CLONE"): + if self._match_texts(self.CLONE_KEYWORDS): + copy = self._prev.text.lower() == "copy" clone = self._parse_table(schema=True) when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() clone_kind = ( @@ -1361,6 +1382,7 @@ class Parser(metaclass=_Parser): kind=clone_kind, shallow=shallow, expression=clone_expression, + copy=copy, ) return self.expression( @@ -1376,6 +1398,7 @@ class Parser(metaclass=_Parser): indexes=indexes, no_schema_binding=no_schema_binding, begin=begin, + end=end, clone=clone, ) @@ -2445,21 +2468,32 @@ class Parser(metaclass=_Parser): kwargs["using"] = self._parse_wrapped_id_vars() elif not (kind and kind.token_type == TokenType.CROSS): index = self._index - joins = self._parse_joins() + join = self._parse_join() - if joins and self._match(TokenType.ON): + if join and self._match(TokenType.ON): kwargs["on"] = self._parse_conjunction() - elif joins and self._match(TokenType.USING): + elif join and self._match(TokenType.USING): kwargs["using"] = self._parse_wrapped_id_vars() else: - joins = None + join = None self._retreat(index) - kwargs["this"].set("joins", joins) + kwargs["this"].set("joins", [join] if join else None) comments = [c for token in (method, side, kind) if token for c in token.comments] return self.expression(exp.Join, comments=comments, **kwargs) + def _parse_opclass(self) -> t.Optional[exp.Expression]: + this = self._parse_conjunction() + if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): + return this + + opclass = self._parse_var(any_token=True) + if opclass: + return self.expression(exp.Opclass, this=this, expression=opclass) + + return this + def _parse_index( self, index: t.Optional[exp.Expression] = None, @@ -2486,7 +2520,7 @@ class Parser(metaclass=_Parser): using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None if self._match(TokenType.L_PAREN, advance=False): - columns = self._parse_wrapped_csv(self._parse_ordered) + columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) else: columns = None @@ -2677,7 +2711,9 @@ class Parser(metaclass=_Parser): if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): self._match(TokenType.ALIAS) - offset = self._parse_id_var() or exp.to_identifier("offset") + offset = self._parse_id_var( + any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS + ) or exp.to_identifier("offset") return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) @@ -2715,14 +2751,18 @@ class Parser(metaclass=_Parser): ) method = self._parse_var(tokens=(TokenType.ROW,)) - self._match(TokenType.L_PAREN) + matched_l_paren = self._match(TokenType.L_PAREN) if self.TABLESAMPLE_CSV: num = None expressions = self._parse_csv(self._parse_primary) else: expressions = None - num = self._parse_primary() + num = ( + self._parse_factor() + if self._match(TokenType.NUMBER, advance=False) + else self._parse_primary() + ) if self._match_text_seq("BUCKET"): bucket_numerator = self._parse_number() @@ -2737,7 +2777,8 @@ class Parser(metaclass=_Parser): elif num: size = num - self._match(TokenType.R_PAREN) + if matched_l_paren: + self._match_r_paren() if self._match(TokenType.L_PAREN): method = self._parse_var() @@ -2965,8 +3006,8 @@ class Parser(metaclass=_Parser): return None return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) - def _parse_ordered(self) -> exp.Ordered: - this = self._parse_conjunction() + def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: + this = parse_method() if parse_method else self._parse_conjunction() asc = self._match(TokenType.ASC) desc = self._match(TokenType.DESC) or (asc and False) @@ -3144,7 +3185,7 @@ class Parser(metaclass=_Parser): if self._match_text_seq("DISTINCT", "FROM"): klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ - return self.expression(klass, this=this, expression=self._parse_expression()) + return self.expression(klass, this=this, expression=self._parse_conjunction()) expression = self._parse_null() or self._parse_boolean() if not expression: @@ -3760,7 +3801,9 @@ class Parser(metaclass=_Parser): return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) - def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: + def _parse_generated_as_identity( + self, + ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: if self._match_text_seq("BY", "DEFAULT"): on_null = self._match_pair(TokenType.ON, TokenType.NULL) this = self.expression( @@ -4382,16 +4425,18 @@ class Parser(metaclass=_Parser): position = None collation = None + expression = None if self._match_texts(self.TRIM_TYPES): position = self._prev.text.upper() - expression = self._parse_bitwise() + this = self._parse_bitwise() if self._match_set((TokenType.FROM, TokenType.COMMA)): - this = self._parse_bitwise() - else: - this = expression - expression = None + invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST + expression = self._parse_bitwise() + + if invert_order: + this, expression = expression, this if self._match(TokenType.COLLATE): collation = self._parse_bitwise() |