summaryrefslogtreecommitdiffstats
path: root/sqlglot/parser.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sqlglot/parser.py87
1 files changed, 66 insertions, 21 deletions
diff --git a/sqlglot/parser.py b/sqlglot/parser.py
index 84b2639..5e56961 100644
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@@ -178,6 +178,7 @@ class Parser(metaclass=_Parser):
TokenType.DATERANGE,
TokenType.DATEMULTIRANGE,
TokenType.DECIMAL,
+ TokenType.UDECIMAL,
TokenType.BIGDECIMAL,
TokenType.UUID,
TokenType.GEOGRAPHY,
@@ -215,6 +216,7 @@ class Parser(metaclass=_Parser):
TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
TokenType.SMALLINT: TokenType.USMALLINT,
TokenType.TINYINT: TokenType.UTINYINT,
+ TokenType.DECIMAL: TokenType.UDECIMAL,
}
SUBQUERY_PREDICATES = {
@@ -338,6 +340,7 @@ class Parser(metaclass=_Parser):
TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
FUNC_TOKENS = {
+ TokenType.COLLATE,
TokenType.COMMAND,
TokenType.CURRENT_DATE,
TokenType.CURRENT_DATETIME,
@@ -590,6 +593,9 @@ class Parser(metaclass=_Parser):
exp.National, this=token.text
),
TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
+ TokenType.HEREDOC_STRING: lambda self, token: self.expression(
+ exp.RawString, this=token.text
+ ),
TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
}
@@ -666,6 +672,9 @@ class Parser(metaclass=_Parser):
"RETURNS": lambda self: self._parse_returns(),
"ROW": lambda self: self._parse_row(),
"ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
+ "SAMPLE": lambda self: self.expression(
+ exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise()
+ ),
"SET": lambda self: self.expression(exp.SetProperty, multi=False),
"SETTINGS": lambda self: self.expression(
exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
@@ -847,8 +856,11 @@ class Parser(metaclass=_Parser):
INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
+ CLONE_KEYWORDS = {"CLONE", "COPY"}
CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
+ OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"}
+
TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
@@ -863,6 +875,8 @@ class Parser(metaclass=_Parser):
NULL_TOKENS = {TokenType.NULL}
+ UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS
+
STRICT_CAST = True
# A NULL arg in CONCAT yields NULL by default
@@ -880,9 +894,12 @@ class Parser(metaclass=_Parser):
# Whether or not the table sample clause expects CSV syntax
TABLESAMPLE_CSV = False
- # Whether or not the SET command needs a delimiter (e.g. "=") for assignments.
+ # Whether or not the SET command needs a delimiter (e.g. "=") for assignments
SET_REQUIRES_ASSIGNMENT_DELIMITER = True
+ # Whether the TRIM function expects the characters to trim as its first argument
+ TRIM_PATTERN_FIRST = False
+
__slots__ = (
"error_level",
"error_message_context",
@@ -1268,6 +1285,7 @@ class Parser(metaclass=_Parser):
indexes = None
no_schema_binding = None
begin = None
+ end = None
clone = None
def extend_props(temp_props: t.Optional[exp.Properties]) -> None:
@@ -1299,6 +1317,8 @@ class Parser(metaclass=_Parser):
else:
expression = self._parse_statement()
+ end = self._match_text_seq("END")
+
if return_:
expression = self.expression(exp.Return, this=expression)
elif create_token.token_type == TokenType.INDEX:
@@ -1344,7 +1364,8 @@ class Parser(metaclass=_Parser):
shallow = self._match_text_seq("SHALLOW")
- if self._match_text_seq("CLONE"):
+ if self._match_texts(self.CLONE_KEYWORDS):
+ copy = self._prev.text.lower() == "copy"
clone = self._parse_table(schema=True)
when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
clone_kind = (
@@ -1361,6 +1382,7 @@ class Parser(metaclass=_Parser):
kind=clone_kind,
shallow=shallow,
expression=clone_expression,
+ copy=copy,
)
return self.expression(
@@ -1376,6 +1398,7 @@ class Parser(metaclass=_Parser):
indexes=indexes,
no_schema_binding=no_schema_binding,
begin=begin,
+ end=end,
clone=clone,
)
@@ -2445,21 +2468,32 @@ class Parser(metaclass=_Parser):
kwargs["using"] = self._parse_wrapped_id_vars()
elif not (kind and kind.token_type == TokenType.CROSS):
index = self._index
- joins = self._parse_joins()
+ join = self._parse_join()
- if joins and self._match(TokenType.ON):
+ if join and self._match(TokenType.ON):
kwargs["on"] = self._parse_conjunction()
- elif joins and self._match(TokenType.USING):
+ elif join and self._match(TokenType.USING):
kwargs["using"] = self._parse_wrapped_id_vars()
else:
- joins = None
+ join = None
self._retreat(index)
- kwargs["this"].set("joins", joins)
+ kwargs["this"].set("joins", [join] if join else None)
comments = [c for token in (method, side, kind) if token for c in token.comments]
return self.expression(exp.Join, comments=comments, **kwargs)
+ def _parse_opclass(self) -> t.Optional[exp.Expression]:
+ this = self._parse_conjunction()
+ if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
+ return this
+
+ opclass = self._parse_var(any_token=True)
+ if opclass:
+ return self.expression(exp.Opclass, this=this, expression=opclass)
+
+ return this
+
def _parse_index(
self,
index: t.Optional[exp.Expression] = None,
@@ -2486,7 +2520,7 @@ class Parser(metaclass=_Parser):
using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
if self._match(TokenType.L_PAREN, advance=False):
- columns = self._parse_wrapped_csv(self._parse_ordered)
+ columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass))
else:
columns = None
@@ -2677,7 +2711,9 @@ class Parser(metaclass=_Parser):
if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
self._match(TokenType.ALIAS)
- offset = self._parse_id_var() or exp.to_identifier("offset")
+ offset = self._parse_id_var(
+ any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
+ ) or exp.to_identifier("offset")
return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset)
@@ -2715,14 +2751,18 @@ class Parser(metaclass=_Parser):
)
method = self._parse_var(tokens=(TokenType.ROW,))
- self._match(TokenType.L_PAREN)
+ matched_l_paren = self._match(TokenType.L_PAREN)
if self.TABLESAMPLE_CSV:
num = None
expressions = self._parse_csv(self._parse_primary)
else:
expressions = None
- num = self._parse_primary()
+ num = (
+ self._parse_factor()
+ if self._match(TokenType.NUMBER, advance=False)
+ else self._parse_primary()
+ )
if self._match_text_seq("BUCKET"):
bucket_numerator = self._parse_number()
@@ -2737,7 +2777,8 @@ class Parser(metaclass=_Parser):
elif num:
size = num
- self._match(TokenType.R_PAREN)
+ if matched_l_paren:
+ self._match_r_paren()
if self._match(TokenType.L_PAREN):
method = self._parse_var()
@@ -2965,8 +3006,8 @@ class Parser(metaclass=_Parser):
return None
return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
- def _parse_ordered(self) -> exp.Ordered:
- this = self._parse_conjunction()
+ def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered:
+ this = parse_method() if parse_method else self._parse_conjunction()
asc = self._match(TokenType.ASC)
desc = self._match(TokenType.DESC) or (asc and False)
@@ -3144,7 +3185,7 @@ class Parser(metaclass=_Parser):
if self._match_text_seq("DISTINCT", "FROM"):
klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
- return self.expression(klass, this=this, expression=self._parse_expression())
+ return self.expression(klass, this=this, expression=self._parse_conjunction())
expression = self._parse_null() or self._parse_boolean()
if not expression:
@@ -3760,7 +3801,9 @@ class Parser(metaclass=_Parser):
return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
- def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint:
+ def _parse_generated_as_identity(
+ self,
+ ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint:
if self._match_text_seq("BY", "DEFAULT"):
on_null = self._match_pair(TokenType.ON, TokenType.NULL)
this = self.expression(
@@ -4382,16 +4425,18 @@ class Parser(metaclass=_Parser):
position = None
collation = None
+ expression = None
if self._match_texts(self.TRIM_TYPES):
position = self._prev.text.upper()
- expression = self._parse_bitwise()
+ this = self._parse_bitwise()
if self._match_set((TokenType.FROM, TokenType.COMMA)):
- this = self._parse_bitwise()
- else:
- this = expression
- expression = None
+ invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST
+ expression = self._parse_bitwise()
+
+ if invert_order:
+ this, expression = expression, this
if self._match(TokenType.COLLATE):
collation = self._parse_bitwise()