From f1c2dbe3b17a0d5edffbb65b85b642d0bb2756c5 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 19 Dec 2023 12:01:55 +0100 Subject: Merging upstream version 20.3.0. Signed-off-by: Daniel Baumann --- sqlglot/dialects/presto.py | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'sqlglot/dialects/presto.py') diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index 10a6074..360ab65 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -186,6 +186,27 @@ def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str return "" +def _to_int(expression: exp.Expression) -> exp.Expression: + if not expression.type: + from sqlglot.optimizer.annotate_types import annotate_types + + annotate_types(expression) + if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: + return exp.cast(expression, to=exp.DataType.Type.BIGINT) + return expression + + +def _parse_to_char(args: t.List) -> exp.TimeToStr: + fmt = seq_get(args, 1) + if isinstance(fmt, exp.Literal): + # We uppercase this to match Teradata's format mapping keys + fmt.set("this", fmt.this.upper()) + + # We use "teradata" on purpose here, because the time formats are different in Presto. + # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char + return format_time_lambda(exp.TimeToStr, "teradata")(args) + + class Presto(Dialect): INDEX_OFFSET = 1 NULL_ORDERING = "nulls_are_last" @@ -201,6 +222,12 @@ class Presto(Dialect): NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE class Tokenizer(tokens.Tokenizer): + UNICODE_STRINGS = [ + (prefix + q, q) + for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) + for prefix in ("U&", "u&") + ] + KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "START": TokenType.BEGIN, @@ -253,8 +280,9 @@ class Presto(Dialect): "STRPOS": lambda args: exp.StrPosition( this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) ), - "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, + "TO_CHAR": _parse_to_char, "TO_HEX": exp.Hex.from_arg_list, + "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, "TO_UTF8": lambda args: exp.Encode( this=seq_get(args, 0), charset=exp.Literal.string("utf-8") ), @@ -315,7 +343,12 @@ class Presto(Dialect): exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.DateAdd: lambda self, e: self.func( - "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this + "DATE_ADD", + exp.Literal.string(e.text("unit") or "day"), + _to_int( + e.expression, + ), + e.this, ), exp.DateDiff: lambda self, e: self.func( "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this @@ -325,7 +358,7 @@ class Presto(Dialect): exp.DateSub: lambda self, e: self.func( "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), - e.expression * -1, + _to_int(e.expression * -1), e.this, ), exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), @@ -354,6 +387,7 @@ class Presto(Dialect): exp.Right: right_to_substring_sql, exp.SafeDivide: no_safe_divide_sql, exp.Schema: _schema_sql, + exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.Select: transforms.preprocess( [ transforms.eliminate_qualify, @@ -377,6 +411,7 @@ class Presto(Dialect): exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))", exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("TO_UNIXTIME"), + exp.ToChar: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _ts_or_ds_add_sql, -- cgit v1.2.3