summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/parser.py')
-rw-r--r--sqlglot/parser.py121
1 files changed, 65 insertions, 56 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index abb23ad..d8d9f88 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -126,9 +126,17 @@ class Parser(metaclass=_Parser):
TokenType.BIT,
TokenType.BOOLEAN,
TokenType.TINYINT,
+ TokenType.UTINYINT,
TokenType.SMALLINT,
+ TokenType.USMALLINT,
TokenType.INT,
+ TokenType.UINT,
TokenType.BIGINT,
+ TokenType.UBIGINT,
+ TokenType.INT128,
+ TokenType.UINT128,
+ TokenType.INT256,
+ TokenType.UINT256,
TokenType.FLOAT,
TokenType.DOUBLE,
TokenType.CHAR,
@@ -961,14 +969,15 @@ class Parser(metaclass=_Parser):
The target expression.
"""
instance = exp_class(**kwargs)
- if self._prev_comments:
- instance.comments = self._prev_comments
- self._prev_comments = None
- if comments:
- instance.comments = comments
+ instance.add_comments(comments) if comments else self._add_comments(instance)
self.validate_expression(instance)
return instance
+ def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
+ if expression and self._prev_comments:
+ expression.add_comments(self._prev_comments)
+ self._prev_comments = None
+
def validate_expression(
self, expression: exp.Expression, args: t.Optional[t.List] = None
) -> None:
@@ -1567,7 +1576,7 @@ class Parser(metaclass=_Parser):
value = self.expression(
exp.Schema,
this="TABLE",
- expressions=self._parse_csv(self._parse_struct_kwargs),
+ expressions=self._parse_csv(self._parse_struct_types),
)
if not self._match(TokenType.GT):
self.raise_error("Expecting >")
@@ -1802,14 +1811,15 @@ class Parser(metaclass=_Parser):
elif self._match(TokenType.SELECT):
comments = self._prev_comments
+ hint = self._parse_hint()
+ all_ = self._match(TokenType.ALL)
+ distinct = self._match(TokenType.DISTINCT)
+
kind = (
self._match(TokenType.ALIAS)
and self._match_texts(("STRUCT", "VALUE"))
and self._prev.text
)
- hint = self._parse_hint()
- all_ = self._match(TokenType.ALL)
- distinct = self._match(TokenType.DISTINCT)
if distinct:
distinct = self.expression(
@@ -2284,7 +2294,7 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.UNNEST):
return None
- expressions = self._parse_wrapped_csv(self._parse_column)
+ expressions = self._parse_wrapped_csv(self._parse_type)
ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
alias = self._parse_table_alias()
@@ -2333,7 +2343,9 @@ class Parser(metaclass=_Parser):
size = None
seed = None
- kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
+ kind = (
+ self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
+ )
method = self._parse_var(tokens=(TokenType.ROW,))
self._match(TokenType.L_PAREN)
@@ -2684,7 +2696,7 @@ class Parser(metaclass=_Parser):
else:
this = self.expression(exp.In, this=this, expressions=expressions)
- self._match_r_paren()
+ self._match_r_paren(this)
else:
this = self.expression(exp.In, this=this, field=self._parse_field())
@@ -2798,7 +2810,7 @@ class Parser(metaclass=_Parser):
if self._match(TokenType.L_PAREN):
if is_struct:
- expressions = self._parse_csv(self._parse_struct_kwargs)
+ expressions = self._parse_csv(self._parse_struct_types)
elif nested:
expressions = self._parse_csv(self._parse_types)
else:
@@ -2833,7 +2845,7 @@ class Parser(metaclass=_Parser):
values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
if nested and self._match(TokenType.LT):
if is_struct:
- expressions = self._parse_csv(self._parse_struct_kwargs)
+ expressions = self._parse_csv(self._parse_struct_types)
else:
expressions = self._parse_csv(self._parse_types)
@@ -2891,16 +2903,10 @@ class Parser(metaclass=_Parser):
prefix=prefix,
)
- def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]:
- index = self._index
- this = self._parse_id_var()
+ def _parse_struct_types(self) -> t.Optional[exp.Expression]:
+ this = self._parse_type() or self._parse_id_var()
self._match(TokenType.COLON)
- data_type = self._parse_types()
-
- if not data_type:
- self._retreat(index)
- return self._parse_types()
- return self.expression(exp.StructKwarg, this=this, expression=data_type)
+ return self._parse_column_def(this)
def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
if not self._match(TokenType.AT_TIME_ZONE):
@@ -2932,7 +2938,11 @@ class Parser(metaclass=_Parser):
else exp.Literal.string(value)
)
else:
- field = self._parse_star() or self._parse_function() or self._parse_id_var()
+ field = (
+ self._parse_star()
+ or self._parse_function(anonymous=True)
+ or self._parse_id_var()
+ )
if isinstance(field, exp.Func):
# bigquery allows function calls like x.y.count(...)
@@ -2995,11 +3005,9 @@ class Parser(metaclass=_Parser):
else:
this = self.expression(exp.Paren, this=self._parse_set_operations(this))
- self._match_r_paren()
- comments.extend(self._prev_comments)
-
- if this and comments:
- this.comments = comments
+ if this:
+ this.add_comments(comments)
+ self._match_r_paren(expression=this)
return this
@@ -3017,7 +3025,7 @@ class Parser(metaclass=_Parser):
)
def _parse_function(
- self, functions: t.Optional[t.Dict[str, t.Callable]] = None
+ self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
) -> t.Optional[exp.Expression]:
if not self._curr:
return None
@@ -3043,7 +3051,7 @@ class Parser(metaclass=_Parser):
parser = self.FUNCTION_PARSERS.get(upper)
- if parser:
+ if parser and not anonymous:
this = parser(self)
else:
subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
@@ -3059,7 +3067,7 @@ class Parser(metaclass=_Parser):
function = functions.get(upper)
args = self._parse_csv(self._parse_lambda)
- if function:
+ if function and not anonymous:
# Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
# second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
if count_params(function) == 2:
@@ -3148,12 +3156,7 @@ class Parser(metaclass=_Parser):
if isinstance(left, exp.Column):
left.replace(exp.Var(this=left.text("this")))
- if self._match(TokenType.IGNORE_NULLS):
- this = self.expression(exp.IgnoreNulls, this=this)
- else:
- self._match(TokenType.RESPECT_NULLS)
-
- return self._parse_limit(self._parse_order(this))
+ return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
index = self._index
@@ -3177,6 +3180,9 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Schema, this=this, expressions=args)
def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+ # column defs are not really columns, they're identifiers
+ if isinstance(this, exp.Column):
+ this = this.this
kind = self._parse_types()
if self._match_text_seq("FOR", "ORDINALITY"):
@@ -3420,7 +3426,7 @@ class Parser(metaclass=_Parser):
elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
self.raise_error("Expected }")
- this.comments = self._prev_comments
+ self._add_comments(this)
return self._parse_bracket(this)
def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
@@ -3584,7 +3590,9 @@ class Parser(metaclass=_Parser):
exp.and_(
exp.Is(this=expression.copy(), expression=exp.Null()),
exp.Is(this=search.copy(), expression=exp.Null()),
+ copy=False,
),
+ copy=False,
)
ifs.append(exp.If(this=cond, true=result))
@@ -3717,15 +3725,15 @@ class Parser(metaclass=_Parser):
if self._match_set(self.TRIM_TYPES):
position = self._prev.text.upper()
- expression = self._parse_term()
+ expression = self._parse_bitwise()
if self._match_set((TokenType.FROM, TokenType.COMMA)):
- this = self._parse_term()
+ this = self._parse_bitwise()
else:
this = expression
expression = None
if self._match(TokenType.COLLATE):
- collation = self._parse_term()
+ collation = self._parse_bitwise()
return self.expression(
exp.Trim,
@@ -3741,6 +3749,15 @@ class Parser(metaclass=_Parser):
def _parse_named_window(self) -> t.Optional[exp.Expression]:
return self._parse_window(self._parse_id_var(), alias=True)
+ def _parse_respect_or_ignore_nulls(
+ self, this: t.Optional[exp.Expression]
+ ) -> t.Optional[exp.Expression]:
+ if self._match(TokenType.IGNORE_NULLS):
+ return self.expression(exp.IgnoreNulls, this=this)
+ if self._match(TokenType.RESPECT_NULLS):
+ return self.expression(exp.RespectNulls, this=this)
+ return this
+
def _parse_window(
self, this: t.Optional[exp.Expression], alias: bool = False
) -> t.Optional[exp.Expression]:
@@ -3768,10 +3785,7 @@ class Parser(metaclass=_Parser):
# (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
# and Snowflake chose to do the same for familiarity
# https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
- if self._match(TokenType.IGNORE_NULLS):
- this = self.expression(exp.IgnoreNulls, this=this)
- elif self._match(TokenType.RESPECT_NULLS):
- this = self.expression(exp.RespectNulls, this=this)
+ this = self._parse_respect_or_ignore_nulls(this)
# bigquery select from window x AS (partition by ...)
if alias:
@@ -3975,9 +3989,7 @@ class Parser(metaclass=_Parser):
items = [parse_result] if parse_result is not None else []
while self._match(sep):
- if parse_result and self._prev_comments:
- parse_result.comments = self._prev_comments
-
+ self._add_comments(parse_result)
parse_result = parse_method()
if parse_result is not None:
items.append(parse_result)
@@ -4345,13 +4357,14 @@ class Parser(metaclass=_Parser):
self._retreat(index)
return None
- def _match(self, token_type, advance=True):
+ def _match(self, token_type, advance=True, expression=None):
if not self._curr:
return None
if self._curr.token_type == token_type:
if advance:
self._advance()
+ self._add_comments(expression)
return True
return None
@@ -4379,16 +4392,12 @@ class Parser(metaclass=_Parser):
return None
def _match_l_paren(self, expression=None):
- if not self._match(TokenType.L_PAREN):
+ if not self._match(TokenType.L_PAREN, expression=expression):
self.raise_error("Expecting (")
- if expression and self._prev_comments:
- expression.comments = self._prev_comments
def _match_r_paren(self, expression=None):
- if not self._match(TokenType.R_PAREN):
+ if not self._match(TokenType.R_PAREN, expression=expression):
self.raise_error("Expecting )")
- if expression and self._prev_comments:
- expression.comments = self._prev_comments
def _match_texts(self, texts, advance=True):
if self._curr and self._curr.text.upper() in texts: