diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-09-07 11:39:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-09-07 11:39:43 +0000 |
commit | 341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87 (patch) | |
tree | 61fb7eca2238fb5d41d3906f4af41de03abd25ea /sqlglot/parser.py | |
parent | Adding upstream version 17.12.0. (diff) | |
download | sqlglot-341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87.tar.xz sqlglot-341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87.zip |
Adding upstream version 18.2.0.upstream/18.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r-- | sqlglot/parser.py | 321 |
1 files changed, 250 insertions, 71 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 3db4453..f8690d5 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -136,6 +136,7 @@ class Parser(metaclass=_Parser): TokenType.UINT128, TokenType.INT256, TokenType.UINT256, + TokenType.MEDIUMINT, TokenType.FIXEDSTRING, TokenType.FLOAT, TokenType.DOUBLE, @@ -186,6 +187,7 @@ class Parser(metaclass=_Parser): TokenType.SMALLSERIAL, TokenType.BIGSERIAL, TokenType.XML, + TokenType.YEAR, TokenType.UNIQUEIDENTIFIER, TokenType.USERDEFINED, TokenType.MONEY, @@ -194,9 +196,12 @@ class Parser(metaclass=_Parser): TokenType.IMAGE, TokenType.VARIANT, TokenType.OBJECT, + TokenType.OBJECT_IDENTIFIER, TokenType.INET, TokenType.IPADDRESS, TokenType.IPPREFIX, + TokenType.UNKNOWN, + TokenType.NULL, *ENUM_TYPE_TOKENS, *NESTED_TYPE_TOKENS, } @@ -332,6 +337,7 @@ class Parser(metaclass=_Parser): TokenType.INDEX, TokenType.ISNULL, TokenType.ILIKE, + TokenType.INSERT, TokenType.LIKE, TokenType.MERGE, TokenType.OFFSET, @@ -487,7 +493,7 @@ class Parser(metaclass=_Parser): exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), exp.Column: lambda self: self._parse_column(), exp.Condition: lambda self: self._parse_conjunction(), - exp.DataType: lambda self: self._parse_types(), + exp.DataType: lambda self: self._parse_types(allow_identifiers=False), exp.Expression: lambda self: self._parse_statement(), exp.From: lambda self: self._parse_from(), exp.Group: lambda self: self._parse_group(), @@ -523,9 +529,6 @@ class Parser(metaclass=_Parser): TokenType.DESC: lambda self: self._parse_describe(), TokenType.DESCRIBE: lambda self: self._parse_describe(), TokenType.DROP: lambda self: self._parse_drop(), - TokenType.FROM: lambda self: exp.select("*").from_( - t.cast(exp.From, self._parse_from(skip_from_token=True)) - ), TokenType.INSERT: lambda self: self._parse_insert(), TokenType.LOAD: lambda self: self._parse_load(), TokenType.MERGE: lambda self: self._parse_merge(), @@ -578,7 +581,7 @@ class Parser(metaclass=_Parser): TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), TokenType.PARAMETER: lambda self: self._parse_parameter(), TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) - if self._match_set((TokenType.NUMBER, TokenType.VAR)) + if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) else None, } @@ -593,6 +596,7 @@ class Parser(metaclass=_Parser): TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), TokenType.RLIKE: binary_range_parser(exp.RegexpLike), TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), + TokenType.FOR: lambda self, this: self._parse_comprehension(this), } PROPERTY_PARSERS: t.Dict[str, t.Callable] = { @@ -684,6 +688,12 @@ class Parser(metaclass=_Parser): exp.CommentColumnConstraint, this=self._parse_string() ), "COMPRESS": lambda self: self._parse_compress(), + "CLUSTERED": lambda self: self.expression( + exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) + ), + "NONCLUSTERED": lambda self: self.expression( + exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) + ), "DEFAULT": lambda self: self.expression( exp.DefaultColumnConstraint, this=self._parse_bitwise() ), @@ -698,8 +708,11 @@ class Parser(metaclass=_Parser): "LIKE": lambda self: self._parse_create_like(), "NOT": lambda self: self._parse_not_constraint(), "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), - "ON": lambda self: self._match(TokenType.UPDATE) - and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), + "ON": lambda self: ( + self._match(TokenType.UPDATE) + and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) + ) + or self.expression(exp.OnProperty, this=self._parse_id_var()), "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), "PRIMARY KEY": lambda self: self._parse_primary_key(), "REFERENCES": lambda self: self._parse_references(match=False), @@ -709,6 +722,9 @@ class Parser(metaclass=_Parser): "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), "UNIQUE": lambda self: self._parse_unique(), "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), + "WITH": lambda self: self.expression( + exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) + ), } ALTER_PARSERS = { @@ -728,6 +744,11 @@ class Parser(metaclass=_Parser): "NEXT": lambda self: self._parse_next_value_for(), } + INVALID_FUNC_NAME_TOKENS = { + TokenType.IDENTIFIER, + TokenType.STRING, + } + FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} FUNCTION_PARSERS = { @@ -774,6 +795,8 @@ class Parser(metaclass=_Parser): self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), ), TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), + TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), + TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), } SET_PARSERS = { @@ -815,6 +838,8 @@ class Parser(metaclass=_Parser): ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} + DISTINCT_TOKENS = {TokenType.DISTINCT} + STRICT_CAST = True # A NULL arg in CONCAT yields NULL by default @@ -826,6 +851,11 @@ class Parser(metaclass=_Parser): LOG_BASE_FIRST = True LOG_DEFAULTS_TO_LN = False + SUPPORTS_USER_DEFINED_TYPES = True + + # Whether or not ADD is present for each column added by ALTER TABLE + ALTER_TABLE_ADD_COLUMN_KEYWORD = True + __slots__ = ( "error_level", "error_message_context", @@ -838,9 +868,11 @@ class Parser(metaclass=_Parser): "_next", "_prev", "_prev_comments", + "_tokenizer", ) # Autofilled + TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer INDEX_OFFSET: int = 0 UNNEST_COLUMN_ONLY: bool = False ALIAS_POST_TABLESAMPLE: bool = False @@ -863,6 +895,7 @@ class Parser(metaclass=_Parser): self.error_level = error_level or ErrorLevel.IMMEDIATE self.error_message_context = error_message_context self.max_errors = max_errors + self._tokenizer = self.TOKENIZER_CLASS() self.reset() def reset(self): @@ -1148,7 +1181,7 @@ class Parser(metaclass=_Parser): expression = self._parse_set_operations(expression) if expression else self._parse_select() return self._parse_query_modifiers(expression) - def _parse_drop(self) -> exp.Drop | exp.Command: + def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: start = self._prev temporary = self._match(TokenType.TEMPORARY) materialized = self._match_text_seq("MATERIALIZED") @@ -1160,7 +1193,7 @@ class Parser(metaclass=_Parser): return self.expression( exp.Drop, comments=start.comments, - exists=self._parse_exists(), + exists=exists or self._parse_exists(), this=self._parse_table(schema=True), kind=kind, temporary=temporary, @@ -1274,6 +1307,8 @@ class Parser(metaclass=_Parser): if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): no_schema_binding = True + shallow = self._match_text_seq("SHALLOW") + if self._match_text_seq("CLONE"): clone = self._parse_table(schema=True) when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() @@ -1285,7 +1320,12 @@ class Parser(metaclass=_Parser): clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() self._match(TokenType.R_PAREN) clone = self.expression( - exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression + exp.Clone, + this=clone, + when=when, + kind=clone_kind, + shallow=shallow, + expression=clone_expression, ) return self.expression( @@ -1349,7 +1389,11 @@ class Parser(metaclass=_Parser): if assignment: key = self._parse_var_or_string() self._match(TokenType.EQ) - return self.expression(exp.Property, this=key, value=self._parse_column()) + return self.expression( + exp.Property, + this=key, + value=self._parse_column() or self._parse_var(any_token=True), + ) return None @@ -1409,7 +1453,7 @@ class Parser(metaclass=_Parser): def _parse_with_property( self, - ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: + ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: if self._match(TokenType.L_PAREN, advance=False): return self._parse_wrapped_csv(self._parse_property) @@ -1622,7 +1666,7 @@ class Parser(metaclass=_Parser): override=override, ) - def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: + def _parse_partition_by(self) -> t.List[exp.Expression]: if self._match(TokenType.PARTITION_BY): return self._parse_csv(self._parse_conjunction) return [] @@ -1652,9 +1696,9 @@ class Parser(metaclass=_Parser): def _parse_on_property(self) -> t.Optional[exp.Expression]: if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): return exp.OnCommitProperty() - elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): + if self._match_text_seq("COMMIT", "DELETE", "ROWS"): return exp.OnCommitProperty(delete=True) - return None + return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) def _parse_distkey(self) -> exp.DistKeyProperty: return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) @@ -1709,8 +1753,10 @@ class Parser(metaclass=_Parser): def _parse_describe(self) -> exp.Describe: kind = self._match_set(self.CREATABLES) and self._prev.text - this = self._parse_table() - return self.expression(exp.Describe, this=this, kind=kind) + this = self._parse_table(schema=True) + properties = self._parse_properties() + expressions = properties.expressions if properties else None + return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) def _parse_insert(self) -> exp.Insert: comments = ensure_list(self._prev_comments) @@ -1741,6 +1787,7 @@ class Parser(metaclass=_Parser): exp.Insert, comments=comments, this=this, + by_name=self._match_text_seq("BY", "NAME"), exists=self._parse_exists(), partition=self._parse_partition(), where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) @@ -1895,6 +1942,7 @@ class Parser(metaclass=_Parser): "from": self._parse_from(joins=True), "where": self._parse_where(), "returning": returning or self._parse_returning(), + "order": self._parse_order(), "limit": self._parse_limit(), }, ) @@ -1948,13 +1996,14 @@ class Parser(metaclass=_Parser): # https://prestodb.io/docs/current/sql/values.html return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) - def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: + def _parse_projections(self) -> t.List[exp.Expression]: return self._parse_expressions() def _parse_select( self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True ) -> t.Optional[exp.Expression]: cte = self._parse_with() + if cte: this = self._parse_statement() @@ -1967,12 +2016,18 @@ class Parser(metaclass=_Parser): else: self.raise_error(f"{this.key} does not support CTE") this = cte - elif self._match(TokenType.SELECT): + + return this + + # duckdb supports leading with FROM x + from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None + + if self._match(TokenType.SELECT): comments = self._prev_comments hint = self._parse_hint() all_ = self._match(TokenType.ALL) - distinct = self._match(TokenType.DISTINCT) + distinct = self._match_set(self.DISTINCT_TOKENS) kind = ( self._match(TokenType.ALIAS) @@ -2006,7 +2061,9 @@ class Parser(metaclass=_Parser): if into: this.set("into", into) - from_ = self._parse_from() + if not from_: + from_ = self._parse_from() + if from_: this.set("from", from_) @@ -2033,6 +2090,8 @@ class Parser(metaclass=_Parser): expressions=self._parse_csv(self._parse_value), alias=self._parse_table_alias(), ) + elif from_: + this = exp.select("*").from_(from_.this, copy=False) else: this = None @@ -2491,6 +2550,11 @@ class Parser(metaclass=_Parser): if schema: return self._parse_schema(this=this) + version = self._parse_version() + + if version: + this.set("version", version) + if self.ALIAS_POST_TABLESAMPLE: table_sample = self._parse_table_sample() @@ -2498,11 +2562,11 @@ class Parser(metaclass=_Parser): if alias: this.set("alias", alias) + this.set("hints", self._parse_table_hints()) + if not this.args.get("pivots"): this.set("pivots", self._parse_pivots()) - this.set("hints", self._parse_table_hints()) - if not self.ALIAS_POST_TABLESAMPLE: table_sample = self._parse_table_sample() @@ -2516,6 +2580,37 @@ class Parser(metaclass=_Parser): return this + def _parse_version(self) -> t.Optional[exp.Version]: + if self._match(TokenType.TIMESTAMP_SNAPSHOT): + this = "TIMESTAMP" + elif self._match(TokenType.VERSION_SNAPSHOT): + this = "VERSION" + else: + return None + + if self._match_set((TokenType.FROM, TokenType.BETWEEN)): + kind = self._prev.text.upper() + start = self._parse_bitwise() + self._match_texts(("TO", "AND")) + end = self._parse_bitwise() + expression: t.Optional[exp.Expression] = self.expression( + exp.Tuple, expressions=[start, end] + ) + elif self._match_text_seq("CONTAINED", "IN"): + kind = "CONTAINED IN" + expression = self.expression( + exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) + ) + elif self._match(TokenType.ALL): + kind = "ALL" + expression = None + else: + self._match_text_seq("AS", "OF") + kind = "AS OF" + expression = self._parse_type() + + return self.expression(exp.Version, this=this, expression=expression, kind=kind) + def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: if not self._match(TokenType.UNNEST): return None @@ -2760,7 +2855,7 @@ class Parser(metaclass=_Parser): return self.expression(exp.Group, **elements) # type: ignore - def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: + def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: if not self._match(TokenType.GROUPING_SETS): return None @@ -2784,6 +2879,22 @@ class Parser(metaclass=_Parser): return None return self.expression(exp.Qualify, this=self._parse_conjunction()) + def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: + if skip_start_token: + start = None + elif self._match(TokenType.START_WITH): + start = self._parse_conjunction() + else: + return None + + self._match(TokenType.CONNECT_BY) + self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( + exp.Prior, this=self._parse_bitwise() + ) + connect = self._parse_conjunction() + self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") + return self.expression(exp.Connect, start=start, connect=connect) + def _parse_order( self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False ) -> t.Optional[exp.Expression]: @@ -2929,6 +3040,7 @@ class Parser(metaclass=_Parser): expression, this=this, distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), + by_name=self._match_text_seq("BY", "NAME"), expression=self._parse_set_operations(self._parse_select(nested=True)), ) @@ -3017,6 +3129,8 @@ class Parser(metaclass=_Parser): return self.expression(exp.Escape, this=this, expression=self._parse_string()) def _parse_interval(self) -> t.Optional[exp.Interval]: + index = self._index + if not self._match(TokenType.INTERVAL): return None @@ -3025,7 +3139,11 @@ class Parser(metaclass=_Parser): else: this = self._parse_term() - unit = self._parse_function() or self._parse_var() + if not this: + self._retreat(index) + return None + + unit = self._parse_function() or self._parse_var(any_token=True) # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse # each INTERVAL expression into this canonical form so it's easy to transpile @@ -3036,12 +3154,12 @@ class Parser(metaclass=_Parser): if len(parts) == 2: if unit: - # this is not actually a unit, it's something else + # This is not actually a unit, it's something else (e.g. a "window side") unit = None self._retreat(self._index - 1) - else: - this = exp.Literal.string(parts[0]) - unit = self.expression(exp.Var, this=parts[1]) + + this = exp.Literal.string(parts[0]) + unit = self.expression(exp.Var, this=parts[1]) return self.expression(exp.Interval, this=this, unit=unit) @@ -3087,7 +3205,7 @@ class Parser(metaclass=_Parser): return interval index = self._index - data_type = self._parse_types(check_func=True) + data_type = self._parse_types(check_func=True, allow_identifiers=False) this = self._parse_column() if data_type: @@ -3103,30 +3221,50 @@ class Parser(metaclass=_Parser): return this - def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: + def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: this = self._parse_type() if not this: return None return self.expression( - exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) + exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) ) def _parse_types( - self, check_func: bool = False, schema: bool = False + self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expression]: index = self._index prefix = self._match_text_seq("SYSUDTLIB", ".") if not self._match_set(self.TYPE_TOKENS): - return None + identifier = allow_identifiers and self._parse_id_var( + any_token=False, tokens=(TokenType.VAR,) + ) + + if identifier: + tokens = self._tokenizer.tokenize(identifier.name) + + if len(tokens) != 1: + self.raise_error("Unexpected identifier", self._prev) + + if tokens[0].token_type in self.TYPE_TOKENS: + self._prev = tokens[0] + elif self.SUPPORTS_USER_DEFINED_TYPES: + return identifier + else: + return None + else: + return None type_token = self._prev.token_type if type_token == TokenType.PSEUDO_TYPE: return self.expression(exp.PseudoType, this=self._prev.text) + if type_token == TokenType.OBJECT_IDENTIFIER: + return self.expression(exp.ObjectIdentifier, this=self._prev.text) + nested = type_token in self.NESTED_TYPE_TOKENS is_struct = type_token in self.STRUCT_TYPE_TOKENS expressions = None @@ -3137,7 +3275,9 @@ class Parser(metaclass=_Parser): expressions = self._parse_csv(self._parse_struct_types) elif nested: expressions = self._parse_csv( - lambda: self._parse_types(check_func=check_func, schema=schema) + lambda: self._parse_types( + check_func=check_func, schema=schema, allow_identifiers=allow_identifiers + ) ) elif type_token in self.ENUM_TYPE_TOKENS: expressions = self._parse_csv(self._parse_equality) @@ -3151,14 +3291,16 @@ class Parser(metaclass=_Parser): maybe_func = True this: t.Optional[exp.Expression] = None - values: t.Optional[t.List[t.Optional[exp.Expression]]] = None + values: t.Optional[t.List[exp.Expression]] = None if nested and self._match(TokenType.LT): if is_struct: expressions = self._parse_csv(self._parse_struct_types) else: expressions = self._parse_csv( - lambda: self._parse_types(check_func=check_func, schema=schema) + lambda: self._parse_types( + check_func=check_func, schema=schema, allow_identifiers=allow_identifiers + ) ) if not self._match(TokenType.GT): @@ -3355,7 +3497,7 @@ class Parser(metaclass=_Parser): upper = this.upper() parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) - if optional_parens and parser: + if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: self._advance() return parser(self) @@ -3442,7 +3584,9 @@ class Parser(metaclass=_Parser): index = self._index if self._match(TokenType.L_PAREN): - expressions = self._parse_csv(self._parse_id_var) + expressions = t.cast( + t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) + ) if not self._match(TokenType.R_PAREN): self._retreat(index) @@ -3481,14 +3625,14 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.L_PAREN): return this - args = self._parse_csv( - lambda: self._parse_constraint() - or self._parse_column_def(self._parse_field(any_token=True)) - ) + args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) self._match_r_paren() return self.expression(exp.Schema, this=this, expressions=args) + def _parse_field_def(self) -> t.Optional[exp.Expression]: + return self._parse_column_def(self._parse_field(any_token=True)) + def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: # column defs are not really columns, they're identifiers if isinstance(this, exp.Column): @@ -3499,7 +3643,18 @@ class Parser(metaclass=_Parser): if self._match_text_seq("FOR", "ORDINALITY"): return self.expression(exp.ColumnDef, this=this, ordinality=True) - constraints = [] + constraints: t.List[exp.Expression] = [] + + if not kind and self._match(TokenType.ALIAS): + constraints.append( + self.expression( + exp.ComputedColumnConstraint, + this=self._parse_conjunction(), + persisted=self._match_text_seq("PERSISTED"), + not_null=self._match_pair(TokenType.NOT, TokenType.NULL), + ) + ) + while True: constraint = self._parse_column_constraint() if not constraint: @@ -3553,7 +3708,7 @@ class Parser(metaclass=_Parser): identity = self._match_text_seq("IDENTITY") if self._match(TokenType.L_PAREN): - if self._match_text_seq("START", "WITH"): + if self._match(TokenType.START_WITH): this.set("start", self._parse_bitwise()) if self._match_text_seq("INCREMENT", "BY"): this.set("increment", self._parse_bitwise()) @@ -3580,11 +3735,13 @@ class Parser(metaclass=_Parser): def _parse_not_constraint( self, - ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: + ) -> t.Optional[exp.Expression]: if self._match_text_seq("NULL"): return self.expression(exp.NotNullColumnConstraint) if self._match_text_seq("CASESPECIFIC"): return self.expression(exp.CaseSpecificColumnConstraint, not_=True) + if self._match_text_seq("FOR", "REPLICATION"): + return self.expression(exp.NotForReplicationColumnConstraint) return None def _parse_column_constraint(self) -> t.Optional[exp.Expression]: @@ -3729,7 +3886,7 @@ class Parser(metaclass=_Parser): bracket_kind = self._prev.token_type if self._match(TokenType.COLON): - expressions: t.List[t.Optional[exp.Expression]] = [ + expressions: t.List[exp.Expression] = [ self.expression(exp.Slice, expression=self._parse_conjunction()) ] else: @@ -3844,17 +4001,17 @@ class Parser(metaclass=_Parser): if not self._match(TokenType.ALIAS): if self._match(TokenType.COMMA): - return self.expression( - exp.CastToStrType, this=this, expression=self._parse_string() - ) - else: - self.raise_error("Expected AS after CAST") + return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) + + self.raise_error("Expected AS after CAST") fmt = None to = self._parse_types() if not to: self.raise_error("Expected TYPE after CAST") + elif isinstance(to, exp.Identifier): + to = exp.DataType.build(to.name, udt=True) elif to.this == exp.DataType.Type.CHAR: if self._match(TokenType.CHARACTER_SET): to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) @@ -3908,7 +4065,7 @@ class Parser(metaclass=_Parser): if self._match(TokenType.COMMA): args.extend(self._parse_csv(self._parse_conjunction)) else: - args = self._parse_csv(self._parse_conjunction) + args = self._parse_csv(self._parse_conjunction) # type: ignore index = self._index if not self._match(TokenType.R_PAREN) and args: @@ -3991,10 +4148,10 @@ class Parser(metaclass=_Parser): def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: self._match_text_seq("KEY") - key = self._parse_field() - self._match(TokenType.COLON) + key = self._parse_column() + self._match_set((TokenType.COLON, TokenType.COMMA)) self._match_text_seq("VALUE") - value = self._parse_field() + value = self._parse_bitwise() if not key and not value: return None @@ -4116,7 +4273,7 @@ class Parser(metaclass=_Parser): # Postgres supports the form: substring(string [from int] [for int]) # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 - args = self._parse_csv(self._parse_bitwise) + args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) if self._match(TokenType.FROM): args.append(self._parse_bitwise()) @@ -4149,7 +4306,7 @@ class Parser(metaclass=_Parser): exp.Trim, this=this, position=position, expression=expression, collation=collation ) - def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: + def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) def _parse_named_window(self) -> t.Optional[exp.Expression]: @@ -4216,8 +4373,7 @@ class Parser(metaclass=_Parser): if self._match_text_seq("LAST"): first = False - partition = self._parse_partition_by() - order = self._parse_order() + partition, order = self._parse_partition_and_order() kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text if kind: @@ -4256,6 +4412,11 @@ class Parser(metaclass=_Parser): return window + def _parse_partition_and_order( + self, + ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: + return self._parse_partition_by(), self._parse_order() + def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: self._match(TokenType.BETWEEN) @@ -4377,14 +4538,14 @@ class Parser(metaclass=_Parser): self._advance(-1) return None - def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: + def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: if not self._match(TokenType.EXCEPT): return None if self._match(TokenType.L_PAREN, advance=False): return self._parse_wrapped_csv(self._parse_column) return self._parse_csv(self._parse_column) - def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: + def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: if not self._match(TokenType.REPLACE): return None if self._match(TokenType.L_PAREN, advance=False): @@ -4393,7 +4554,7 @@ class Parser(metaclass=_Parser): def _parse_csv( self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA - ) -> t.List[t.Optional[exp.Expression]]: + ) -> t.List[exp.Expression]: parse_result = parse_method() items = [parse_result] if parse_result is not None else [] @@ -4420,12 +4581,12 @@ class Parser(metaclass=_Parser): return this - def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: + def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: return self._parse_wrapped_csv(self._parse_id_var, optional=optional) def _parse_wrapped_csv( self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False - ) -> t.List[t.Optional[exp.Expression]]: + ) -> t.List[exp.Expression]: return self._parse_wrapped( lambda: self._parse_csv(parse_method, sep=sep), optional=optional ) @@ -4439,7 +4600,7 @@ class Parser(metaclass=_Parser): self._match_r_paren() return parse_result - def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: + def _parse_expressions(self) -> t.List[exp.Expression]: return self._parse_csv(self._parse_expression) def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: @@ -4498,7 +4659,7 @@ class Parser(metaclass=_Parser): self._match(TokenType.COLUMN) exists_column = self._parse_exists(not_=True) - expression = self._parse_column_def(self._parse_field(any_token=True)) + expression = self._parse_field_def() if expression: expression.set("exists", exists_column) @@ -4549,13 +4710,16 @@ class Parser(metaclass=_Parser): return self.expression(exp.AddConstraint, this=this, expression=expression) - def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: + def _parse_alter_table_add(self) -> t.List[exp.Expression]: index = self._index - 1 if self._match_set(self.ADD_CONSTRAINT_TOKENS): return self._parse_csv(self._parse_add_constraint) self._retreat(index) + if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): + return self._parse_csv(self._parse_field_def) + return self._parse_csv(self._parse_add_column) def _parse_alter_table_alter(self) -> exp.AlterColumn: @@ -4576,7 +4740,7 @@ class Parser(metaclass=_Parser): using=self._match(TokenType.USING) and self._parse_conjunction(), ) - def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: + def _parse_alter_table_drop(self) -> t.List[exp.Expression]: index = self._index - 1 partition_exists = self._parse_exists() @@ -4619,6 +4783,9 @@ class Parser(metaclass=_Parser): self._match(TokenType.INTO) target = self._parse_table() + if target and self._match(TokenType.ALIAS, advance=False): + target.set("alias", self._parse_table_alias()) + self._match(TokenType.USING) using = self._parse_table() @@ -4685,8 +4852,7 @@ class Parser(metaclass=_Parser): parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) if parser: return parser(self) - self._advance() - return self.expression(exp.Show, this=self._prev.text.upper()) + return self._parse_as_command(self._prev) def _parse_set_item_assignment( self, kind: t.Optional[str] = None @@ -4786,6 +4952,19 @@ class Parser(metaclass=_Parser): self._match_r_paren() return self.expression(exp.DictRange, this=this, min=min, max=max) + def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: + expression = self._parse_column() + self._match(TokenType.IN) + iterator = self._parse_column() + condition = self._parse_conjunction() if self._match_text_seq("IF") else None + return self.expression( + exp.Comprehension, + this=this, + expression=expression, + iterator=iterator, + condition=condition, + ) + def _find_parser( self, parsers: t.Dict[str, t.Callable], trie: t.Dict ) -> t.Optional[t.Callable]: |