summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-07 11:39:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-07 11:39:43 +0000
commit341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87 (patch)
tree61fb7eca2238fb5d41d3906f4af41de03abd25ea /sqlglot/parser.py
parentAdding upstream version 17.12.0. (diff)
downloadsqlglot-341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87.tar.xz
sqlglot-341eb1a6bdf0dd5b015e5140d3b068c6fd3f4d87.zip
Adding upstream version 18.2.0.upstream/18.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py321
1 files changed, 250 insertions, 71 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 3db4453..f8690d5 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -136,6 +136,7 @@ class Parser(metaclass=_Parser):
TokenType.UINT128,
TokenType.INT256,
TokenType.UINT256,
+ TokenType.MEDIUMINT,
TokenType.FIXEDSTRING,
TokenType.FLOAT,
TokenType.DOUBLE,
@@ -186,6 +187,7 @@ class Parser(metaclass=_Parser):
TokenType.SMALLSERIAL,
TokenType.BIGSERIAL,
TokenType.XML,
+ TokenType.YEAR,
TokenType.UNIQUEIDENTIFIER,
TokenType.USERDEFINED,
TokenType.MONEY,
@@ -194,9 +196,12 @@ class Parser(metaclass=_Parser):
TokenType.IMAGE,
TokenType.VARIANT,
TokenType.OBJECT,
+ TokenType.OBJECT_IDENTIFIER,
TokenType.INET,
TokenType.IPADDRESS,
TokenType.IPPREFIX,
+ TokenType.UNKNOWN,
+ TokenType.NULL,
*ENUM_TYPE_TOKENS,
*NESTED_TYPE_TOKENS,
}
@@ -332,6 +337,7 @@ class Parser(metaclass=_Parser):
TokenType.INDEX,
TokenType.ISNULL,
TokenType.ILIKE,
+ TokenType.INSERT,
TokenType.LIKE,
TokenType.MERGE,
TokenType.OFFSET,
@@ -487,7 +493,7 @@ class Parser(metaclass=_Parser):
exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
exp.Column: lambda self: self._parse_column(),
exp.Condition: lambda self: self._parse_conjunction(),
- exp.DataType: lambda self: self._parse_types(),
+ exp.DataType: lambda self: self._parse_types(allow_identifiers=False),
exp.Expression: lambda self: self._parse_statement(),
exp.From: lambda self: self._parse_from(),
exp.Group: lambda self: self._parse_group(),
@@ -523,9 +529,6 @@ class Parser(metaclass=_Parser):
TokenType.DESC: lambda self: self._parse_describe(),
TokenType.DESCRIBE: lambda self: self._parse_describe(),
TokenType.DROP: lambda self: self._parse_drop(),
- TokenType.FROM: lambda self: exp.select("*").from_(
- t.cast(exp.From, self._parse_from(skip_from_token=True))
- ),
TokenType.INSERT: lambda self: self._parse_insert(),
TokenType.LOAD: lambda self: self._parse_load(),
TokenType.MERGE: lambda self: self._parse_merge(),
@@ -578,7 +581,7 @@ class Parser(metaclass=_Parser):
TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
TokenType.PARAMETER: lambda self: self._parse_parameter(),
TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
- if self._match_set((TokenType.NUMBER, TokenType.VAR))
+ if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
else None,
}
@@ -593,6 +596,7 @@ class Parser(metaclass=_Parser):
TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
+ TokenType.FOR: lambda self, this: self._parse_comprehension(this),
}
PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
@@ -684,6 +688,12 @@ class Parser(metaclass=_Parser):
exp.CommentColumnConstraint, this=self._parse_string()
),
"COMPRESS": lambda self: self._parse_compress(),
+ "CLUSTERED": lambda self: self.expression(
+ exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
+ ),
+ "NONCLUSTERED": lambda self: self.expression(
+ exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered)
+ ),
"DEFAULT": lambda self: self.expression(
exp.DefaultColumnConstraint, this=self._parse_bitwise()
),
@@ -698,8 +708,11 @@ class Parser(metaclass=_Parser):
"LIKE": lambda self: self._parse_create_like(),
"NOT": lambda self: self._parse_not_constraint(),
"NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
- "ON": lambda self: self._match(TokenType.UPDATE)
- and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
+ "ON": lambda self: (
+ self._match(TokenType.UPDATE)
+ and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function())
+ )
+ or self.expression(exp.OnProperty, this=self._parse_id_var()),
"PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
"PRIMARY KEY": lambda self: self._parse_primary_key(),
"REFERENCES": lambda self: self._parse_references(match=False),
@@ -709,6 +722,9 @@ class Parser(metaclass=_Parser):
"TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
"UNIQUE": lambda self: self._parse_unique(),
"UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
+ "WITH": lambda self: self.expression(
+ exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property)
+ ),
}
ALTER_PARSERS = {
@@ -728,6 +744,11 @@ class Parser(metaclass=_Parser):
"NEXT": lambda self: self._parse_next_value_for(),
}
+ INVALID_FUNC_NAME_TOKENS = {
+ TokenType.IDENTIFIER,
+ TokenType.STRING,
+ }
+
FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
FUNCTION_PARSERS = {
@@ -774,6 +795,8 @@ class Parser(metaclass=_Parser):
self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
),
TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
+ TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
+ TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
}
SET_PARSERS = {
@@ -815,6 +838,8 @@ class Parser(metaclass=_Parser):
ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
+ DISTINCT_TOKENS = {TokenType.DISTINCT}
+
STRICT_CAST = True
# A NULL arg in CONCAT yields NULL by default
@@ -826,6 +851,11 @@ class Parser(metaclass=_Parser):
LOG_BASE_FIRST = True
LOG_DEFAULTS_TO_LN = False
+ SUPPORTS_USER_DEFINED_TYPES = True
+
+ # Whether or not ADD is present for each column added by ALTER TABLE
+ ALTER_TABLE_ADD_COLUMN_KEYWORD = True
+
__slots__ = (
"error_level",
"error_message_context",
@@ -838,9 +868,11 @@ class Parser(metaclass=_Parser):
"_next",
"_prev",
"_prev_comments",
+ "_tokenizer",
)
# Autofilled
+ TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer
INDEX_OFFSET: int = 0
UNNEST_COLUMN_ONLY: bool = False
ALIAS_POST_TABLESAMPLE: bool = False
@@ -863,6 +895,7 @@ class Parser(metaclass=_Parser):
self.error_level = error_level or ErrorLevel.IMMEDIATE
self.error_message_context = error_message_context
self.max_errors = max_errors
+ self._tokenizer = self.TOKENIZER_CLASS()
self.reset()
def reset(self):
@@ -1148,7 +1181,7 @@ class Parser(metaclass=_Parser):
expression = self._parse_set_operations(expression) if expression else self._parse_select()
return self._parse_query_modifiers(expression)
- def _parse_drop(self) -> exp.Drop | exp.Command:
+ def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
start = self._prev
temporary = self._match(TokenType.TEMPORARY)
materialized = self._match_text_seq("MATERIALIZED")
@@ -1160,7 +1193,7 @@ class Parser(metaclass=_Parser):
return self.expression(
exp.Drop,
comments=start.comments,
- exists=self._parse_exists(),
+ exists=exists or self._parse_exists(),
this=self._parse_table(schema=True),
kind=kind,
temporary=temporary,
@@ -1274,6 +1307,8 @@ class Parser(metaclass=_Parser):
if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
no_schema_binding = True
+ shallow = self._match_text_seq("SHALLOW")
+
if self._match_text_seq("CLONE"):
clone = self._parse_table(schema=True)
when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
@@ -1285,7 +1320,12 @@ class Parser(metaclass=_Parser):
clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
self._match(TokenType.R_PAREN)
clone = self.expression(
- exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
+ exp.Clone,
+ this=clone,
+ when=when,
+ kind=clone_kind,
+ shallow=shallow,
+ expression=clone_expression,
)
return self.expression(
@@ -1349,7 +1389,11 @@ class Parser(metaclass=_Parser):
if assignment:
key = self._parse_var_or_string()
self._match(TokenType.EQ)
- return self.expression(exp.Property, this=key, value=self._parse_column())
+ return self.expression(
+ exp.Property,
+ this=key,
+ value=self._parse_column() or self._parse_var(any_token=True),
+ )
return None
@@ -1409,7 +1453,7 @@ class Parser(metaclass=_Parser):
def _parse_with_property(
self,
- ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]:
+ ) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
if self._match(TokenType.L_PAREN, advance=False):
return self._parse_wrapped_csv(self._parse_property)
@@ -1622,7 +1666,7 @@ class Parser(metaclass=_Parser):
override=override,
)
- def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_partition_by(self) -> t.List[exp.Expression]:
if self._match(TokenType.PARTITION_BY):
return self._parse_csv(self._parse_conjunction)
return []
@@ -1652,9 +1696,9 @@ class Parser(metaclass=_Parser):
def _parse_on_property(self) -> t.Optional[exp.Expression]:
if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
return exp.OnCommitProperty()
- elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
+ if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
return exp.OnCommitProperty(delete=True)
- return None
+ return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var()))
def _parse_distkey(self) -> exp.DistKeyProperty:
return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
@@ -1709,8 +1753,10 @@ class Parser(metaclass=_Parser):
def _parse_describe(self) -> exp.Describe:
kind = self._match_set(self.CREATABLES) and self._prev.text
- this = self._parse_table()
- return self.expression(exp.Describe, this=this, kind=kind)
+ this = self._parse_table(schema=True)
+ properties = self._parse_properties()
+ expressions = properties.expressions if properties else None
+ return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions)
def _parse_insert(self) -> exp.Insert:
comments = ensure_list(self._prev_comments)
@@ -1741,6 +1787,7 @@ class Parser(metaclass=_Parser):
exp.Insert,
comments=comments,
this=this,
+ by_name=self._match_text_seq("BY", "NAME"),
exists=self._parse_exists(),
partition=self._parse_partition(),
where=self._match_pair(TokenType.REPLACE, TokenType.WHERE)
@@ -1895,6 +1942,7 @@ class Parser(metaclass=_Parser):
"from": self._parse_from(joins=True),
"where": self._parse_where(),
"returning": returning or self._parse_returning(),
+ "order": self._parse_order(),
"limit": self._parse_limit(),
},
)
@@ -1948,13 +1996,14 @@ class Parser(metaclass=_Parser):
# https://prestodb.io/docs/current/sql/values.html
return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
- def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_projections(self) -> t.List[exp.Expression]:
return self._parse_expressions()
def _parse_select(
self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
) -> t.Optional[exp.Expression]:
cte = self._parse_with()
+
if cte:
this = self._parse_statement()
@@ -1967,12 +2016,18 @@ class Parser(metaclass=_Parser):
else:
self.raise_error(f"{this.key} does not support CTE")
this = cte
- elif self._match(TokenType.SELECT):
+
+ return this
+
+ # duckdb supports leading with FROM x
+ from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None
+
+ if self._match(TokenType.SELECT):
comments = self._prev_comments
hint = self._parse_hint()
all_ = self._match(TokenType.ALL)
- distinct = self._match(TokenType.DISTINCT)
+ distinct = self._match_set(self.DISTINCT_TOKENS)
kind = (
self._match(TokenType.ALIAS)
@@ -2006,7 +2061,9 @@ class Parser(metaclass=_Parser):
if into:
this.set("into", into)
- from_ = self._parse_from()
+ if not from_:
+ from_ = self._parse_from()
+
if from_:
this.set("from", from_)
@@ -2033,6 +2090,8 @@ class Parser(metaclass=_Parser):
expressions=self._parse_csv(self._parse_value),
alias=self._parse_table_alias(),
)
+ elif from_:
+ this = exp.select("*").from_(from_.this, copy=False)
else:
this = None
@@ -2491,6 +2550,11 @@ class Parser(metaclass=_Parser):
if schema:
return self._parse_schema(this=this)
+ version = self._parse_version()
+
+ if version:
+ this.set("version", version)
+
if self.ALIAS_POST_TABLESAMPLE:
table_sample = self._parse_table_sample()
@@ -2498,11 +2562,11 @@ class Parser(metaclass=_Parser):
if alias:
this.set("alias", alias)
+ this.set("hints", self._parse_table_hints())
+
if not this.args.get("pivots"):
this.set("pivots", self._parse_pivots())
- this.set("hints", self._parse_table_hints())
-
if not self.ALIAS_POST_TABLESAMPLE:
table_sample = self._parse_table_sample()
@@ -2516,6 +2580,37 @@ class Parser(metaclass=_Parser):
return this
+ def _parse_version(self) -> t.Optional[exp.Version]:
+ if self._match(TokenType.TIMESTAMP_SNAPSHOT):
+ this = "TIMESTAMP"
+ elif self._match(TokenType.VERSION_SNAPSHOT):
+ this = "VERSION"
+ else:
+ return None
+
+ if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
+ kind = self._prev.text.upper()
+ start = self._parse_bitwise()
+ self._match_texts(("TO", "AND"))
+ end = self._parse_bitwise()
+ expression: t.Optional[exp.Expression] = self.expression(
+ exp.Tuple, expressions=[start, end]
+ )
+ elif self._match_text_seq("CONTAINED", "IN"):
+ kind = "CONTAINED IN"
+ expression = self.expression(
+ exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise)
+ )
+ elif self._match(TokenType.ALL):
+ kind = "ALL"
+ expression = None
+ else:
+ self._match_text_seq("AS", "OF")
+ kind = "AS OF"
+ expression = self._parse_type()
+
+ return self.expression(exp.Version, this=this, expression=expression, kind=kind)
+
def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
if not self._match(TokenType.UNNEST):
return None
@@ -2760,7 +2855,7 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Group, **elements) # type: ignore
- def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
+ def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]:
if not self._match(TokenType.GROUPING_SETS):
return None
@@ -2784,6 +2879,22 @@ class Parser(metaclass=_Parser):
return None
return self.expression(exp.Qualify, this=self._parse_conjunction())
+ def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]:
+ if skip_start_token:
+ start = None
+ elif self._match(TokenType.START_WITH):
+ start = self._parse_conjunction()
+ else:
+ return None
+
+ self._match(TokenType.CONNECT_BY)
+ self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
+ exp.Prior, this=self._parse_bitwise()
+ )
+ connect = self._parse_conjunction()
+ self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR")
+ return self.expression(exp.Connect, start=start, connect=connect)
+
def _parse_order(
self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
) -> t.Optional[exp.Expression]:
@@ -2929,6 +3040,7 @@ class Parser(metaclass=_Parser):
expression,
this=this,
distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
+ by_name=self._match_text_seq("BY", "NAME"),
expression=self._parse_set_operations(self._parse_select(nested=True)),
)
@@ -3017,6 +3129,8 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Escape, this=this, expression=self._parse_string())
def _parse_interval(self) -> t.Optional[exp.Interval]:
+ index = self._index
+
if not self._match(TokenType.INTERVAL):
return None
@@ -3025,7 +3139,11 @@ class Parser(metaclass=_Parser):
else:
this = self._parse_term()
- unit = self._parse_function() or self._parse_var()
+ if not this:
+ self._retreat(index)
+ return None
+
+ unit = self._parse_function() or self._parse_var(any_token=True)
# Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
# each INTERVAL expression into this canonical form so it's easy to transpile
@@ -3036,12 +3154,12 @@ class Parser(metaclass=_Parser):
if len(parts) == 2:
if unit:
- # this is not actually a unit, it's something else
+ # This is not actually a unit, it's something else (e.g. a "window side")
unit = None
self._retreat(self._index - 1)
- else:
- this = exp.Literal.string(parts[0])
- unit = self.expression(exp.Var, this=parts[1])
+
+ this = exp.Literal.string(parts[0])
+ unit = self.expression(exp.Var, this=parts[1])
return self.expression(exp.Interval, this=this, unit=unit)
@@ -3087,7 +3205,7 @@ class Parser(metaclass=_Parser):
return interval
index = self._index
- data_type = self._parse_types(check_func=True)
+ data_type = self._parse_types(check_func=True, allow_identifiers=False)
this = self._parse_column()
if data_type:
@@ -3103,30 +3221,50 @@ class Parser(metaclass=_Parser):
return this
- def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]:
+ def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]:
this = self._parse_type()
if not this:
return None
return self.expression(
- exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
+ exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True)
)
def _parse_types(
- self, check_func: bool = False, schema: bool = False
+ self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
) -> t.Optional[exp.Expression]:
index = self._index
prefix = self._match_text_seq("SYSUDTLIB", ".")
if not self._match_set(self.TYPE_TOKENS):
- return None
+ identifier = allow_identifiers and self._parse_id_var(
+ any_token=False, tokens=(TokenType.VAR,)
+ )
+
+ if identifier:
+ tokens = self._tokenizer.tokenize(identifier.name)
+
+ if len(tokens) != 1:
+ self.raise_error("Unexpected identifier", self._prev)
+
+ if tokens[0].token_type in self.TYPE_TOKENS:
+ self._prev = tokens[0]
+ elif self.SUPPORTS_USER_DEFINED_TYPES:
+ return identifier
+ else:
+ return None
+ else:
+ return None
type_token = self._prev.token_type
if type_token == TokenType.PSEUDO_TYPE:
return self.expression(exp.PseudoType, this=self._prev.text)
+ if type_token == TokenType.OBJECT_IDENTIFIER:
+ return self.expression(exp.ObjectIdentifier, this=self._prev.text)
+
nested = type_token in self.NESTED_TYPE_TOKENS
is_struct = type_token in self.STRUCT_TYPE_TOKENS
expressions = None
@@ -3137,7 +3275,9 @@ class Parser(metaclass=_Parser):
expressions = self._parse_csv(self._parse_struct_types)
elif nested:
expressions = self._parse_csv(
- lambda: self._parse_types(check_func=check_func, schema=schema)
+ lambda: self._parse_types(
+ check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
+ )
)
elif type_token in self.ENUM_TYPE_TOKENS:
expressions = self._parse_csv(self._parse_equality)
@@ -3151,14 +3291,16 @@ class Parser(metaclass=_Parser):
maybe_func = True
this: t.Optional[exp.Expression] = None
- values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
+ values: t.Optional[t.List[exp.Expression]] = None
if nested and self._match(TokenType.LT):
if is_struct:
expressions = self._parse_csv(self._parse_struct_types)
else:
expressions = self._parse_csv(
- lambda: self._parse_types(check_func=check_func, schema=schema)
+ lambda: self._parse_types(
+ check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
+ )
)
if not self._match(TokenType.GT):
@@ -3355,7 +3497,7 @@ class Parser(metaclass=_Parser):
upper = this.upper()
parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
- if optional_parens and parser:
+ if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS:
self._advance()
return parser(self)
@@ -3442,7 +3584,9 @@ class Parser(metaclass=_Parser):
index = self._index
if self._match(TokenType.L_PAREN):
- expressions = self._parse_csv(self._parse_id_var)
+ expressions = t.cast(
+ t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var)
+ )
if not self._match(TokenType.R_PAREN):
self._retreat(index)
@@ -3481,14 +3625,14 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.L_PAREN):
return this
- args = self._parse_csv(
- lambda: self._parse_constraint()
- or self._parse_column_def(self._parse_field(any_token=True))
- )
+ args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def())
self._match_r_paren()
return self.expression(exp.Schema, this=this, expressions=args)
+ def _parse_field_def(self) -> t.Optional[exp.Expression]:
+ return self._parse_column_def(self._parse_field(any_token=True))
+
def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
# column defs are not really columns, they're identifiers
if isinstance(this, exp.Column):
@@ -3499,7 +3643,18 @@ class Parser(metaclass=_Parser):
if self._match_text_seq("FOR", "ORDINALITY"):
return self.expression(exp.ColumnDef, this=this, ordinality=True)
- constraints = []
+ constraints: t.List[exp.Expression] = []
+
+ if not kind and self._match(TokenType.ALIAS):
+ constraints.append(
+ self.expression(
+ exp.ComputedColumnConstraint,
+ this=self._parse_conjunction(),
+ persisted=self._match_text_seq("PERSISTED"),
+ not_null=self._match_pair(TokenType.NOT, TokenType.NULL),
+ )
+ )
+
while True:
constraint = self._parse_column_constraint()
if not constraint:
@@ -3553,7 +3708,7 @@ class Parser(metaclass=_Parser):
identity = self._match_text_seq("IDENTITY")
if self._match(TokenType.L_PAREN):
- if self._match_text_seq("START", "WITH"):
+ if self._match(TokenType.START_WITH):
this.set("start", self._parse_bitwise())
if self._match_text_seq("INCREMENT", "BY"):
this.set("increment", self._parse_bitwise())
@@ -3580,11 +3735,13 @@ class Parser(metaclass=_Parser):
def _parse_not_constraint(
self,
- ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]:
+ ) -> t.Optional[exp.Expression]:
if self._match_text_seq("NULL"):
return self.expression(exp.NotNullColumnConstraint)
if self._match_text_seq("CASESPECIFIC"):
return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
+ if self._match_text_seq("FOR", "REPLICATION"):
+ return self.expression(exp.NotForReplicationColumnConstraint)
return None
def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
@@ -3729,7 +3886,7 @@ class Parser(metaclass=_Parser):
bracket_kind = self._prev.token_type
if self._match(TokenType.COLON):
- expressions: t.List[t.Optional[exp.Expression]] = [
+ expressions: t.List[exp.Expression] = [
self.expression(exp.Slice, expression=self._parse_conjunction())
]
else:
@@ -3844,17 +4001,17 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.ALIAS):
if self._match(TokenType.COMMA):
- return self.expression(
- exp.CastToStrType, this=this, expression=self._parse_string()
- )
- else:
- self.raise_error("Expected AS after CAST")
+ return self.expression(exp.CastToStrType, this=this, to=self._parse_string())
+
+ self.raise_error("Expected AS after CAST")
fmt = None
to = self._parse_types()
if not to:
self.raise_error("Expected TYPE after CAST")
+ elif isinstance(to, exp.Identifier):
+ to = exp.DataType.build(to.name, udt=True)
elif to.this == exp.DataType.Type.CHAR:
if self._match(TokenType.CHARACTER_SET):
to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
@@ -3908,7 +4065,7 @@ class Parser(metaclass=_Parser):
if self._match(TokenType.COMMA):
args.extend(self._parse_csv(self._parse_conjunction))
else:
- args = self._parse_csv(self._parse_conjunction)
+ args = self._parse_csv(self._parse_conjunction) # type: ignore
index = self._index
if not self._match(TokenType.R_PAREN) and args:
@@ -3991,10 +4148,10 @@ class Parser(metaclass=_Parser):
def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]:
self._match_text_seq("KEY")
- key = self._parse_field()
- self._match(TokenType.COLON)
+ key = self._parse_column()
+ self._match_set((TokenType.COLON, TokenType.COMMA))
self._match_text_seq("VALUE")
- value = self._parse_field()
+ value = self._parse_bitwise()
if not key and not value:
return None
@@ -4116,7 +4273,7 @@ class Parser(metaclass=_Parser):
# Postgres supports the form: substring(string [from int] [for int])
# https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
- args = self._parse_csv(self._parse_bitwise)
+ args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise))
if self._match(TokenType.FROM):
args.append(self._parse_bitwise())
@@ -4149,7 +4306,7 @@ class Parser(metaclass=_Parser):
exp.Trim, this=this, position=position, expression=expression, collation=collation
)
- def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
+ def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]:
return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
def _parse_named_window(self) -> t.Optional[exp.Expression]:
@@ -4216,8 +4373,7 @@ class Parser(metaclass=_Parser):
if self._match_text_seq("LAST"):
first = False
- partition = self._parse_partition_by()
- order = self._parse_order()
+ partition, order = self._parse_partition_and_order()
kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
if kind:
@@ -4256,6 +4412,11 @@ class Parser(metaclass=_Parser):
return window
+ def _parse_partition_and_order(
+ self,
+ ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
+ return self._parse_partition_by(), self._parse_order()
+
def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
self._match(TokenType.BETWEEN)
@@ -4377,14 +4538,14 @@ class Parser(metaclass=_Parser):
self._advance(-1)
return None
- def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
+ def _parse_except(self) -> t.Optional[t.List[exp.Expression]]:
if not self._match(TokenType.EXCEPT):
return None
if self._match(TokenType.L_PAREN, advance=False):
return self._parse_wrapped_csv(self._parse_column)
return self._parse_csv(self._parse_column)
- def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
+ def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]:
if not self._match(TokenType.REPLACE):
return None
if self._match(TokenType.L_PAREN, advance=False):
@@ -4393,7 +4554,7 @@ class Parser(metaclass=_Parser):
def _parse_csv(
self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
- ) -> t.List[t.Optional[exp.Expression]]:
+ ) -> t.List[exp.Expression]:
parse_result = parse_method()
items = [parse_result] if parse_result is not None else []
@@ -4420,12 +4581,12 @@ class Parser(metaclass=_Parser):
return this
- def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
def _parse_wrapped_csv(
self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
- ) -> t.List[t.Optional[exp.Expression]]:
+ ) -> t.List[exp.Expression]:
return self._parse_wrapped(
lambda: self._parse_csv(parse_method, sep=sep), optional=optional
)
@@ -4439,7 +4600,7 @@ class Parser(metaclass=_Parser):
self._match_r_paren()
return parse_result
- def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_expressions(self) -> t.List[exp.Expression]:
return self._parse_csv(self._parse_expression)
def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
@@ -4498,7 +4659,7 @@ class Parser(metaclass=_Parser):
self._match(TokenType.COLUMN)
exists_column = self._parse_exists(not_=True)
- expression = self._parse_column_def(self._parse_field(any_token=True))
+ expression = self._parse_field_def()
if expression:
expression.set("exists", exists_column)
@@ -4549,13 +4710,16 @@ class Parser(metaclass=_Parser):
return self.expression(exp.AddConstraint, this=this, expression=expression)
- def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_alter_table_add(self) -> t.List[exp.Expression]:
index = self._index - 1
if self._match_set(self.ADD_CONSTRAINT_TOKENS):
return self._parse_csv(self._parse_add_constraint)
self._retreat(index)
+ if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"):
+ return self._parse_csv(self._parse_field_def)
+
return self._parse_csv(self._parse_add_column)
def _parse_alter_table_alter(self) -> exp.AlterColumn:
@@ -4576,7 +4740,7 @@ class Parser(metaclass=_Parser):
using=self._match(TokenType.USING) and self._parse_conjunction(),
)
- def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
+ def _parse_alter_table_drop(self) -> t.List[exp.Expression]:
index = self._index - 1
partition_exists = self._parse_exists()
@@ -4619,6 +4783,9 @@ class Parser(metaclass=_Parser):
self._match(TokenType.INTO)
target = self._parse_table()
+ if target and self._match(TokenType.ALIAS, advance=False):
+ target.set("alias", self._parse_table_alias())
+
self._match(TokenType.USING)
using = self._parse_table()
@@ -4685,8 +4852,7 @@ class Parser(metaclass=_Parser):
parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE)
if parser:
return parser(self)
- self._advance()
- return self.expression(exp.Show, this=self._prev.text.upper())
+ return self._parse_as_command(self._prev)
def _parse_set_item_assignment(
self, kind: t.Optional[str] = None
@@ -4786,6 +4952,19 @@ class Parser(metaclass=_Parser):
self._match_r_paren()
return self.expression(exp.DictRange, this=this, min=min, max=max)
+ def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension:
+ expression = self._parse_column()
+ self._match(TokenType.IN)
+ iterator = self._parse_column()
+ condition = self._parse_conjunction() if self._match_text_seq("IF") else None
+ return self.expression(
+ exp.Comprehension,
+ this=this,
+ expression=expression,
+ iterator=iterator,
+ condition=condition,
+ )
+
def _find_parser(
self, parsers: t.Dict[str, t.Callable], trie: t.Dict
) -> t.Optional[t.Callable]: