Edit on GitHub

sqlglot.dialects.presto

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens, transforms
  6from sqlglot.dialects.dialect import (
  7    Dialect,
  8    date_trunc_to_time,
  9    format_time_lambda,
 10    if_sql,
 11    left_to_substring_sql,
 12    no_ilike_sql,
 13    no_pivot_sql,
 14    no_safe_divide_sql,
 15    rename_func,
 16    right_to_substring_sql,
 17    struct_extract_sql,
 18    timestamptrunc_sql,
 19    timestrtotime_sql,
 20)
 21from sqlglot.dialects.mysql import MySQL
 22from sqlglot.errors import UnsupportedError
 23from sqlglot.helper import seq_get
 24from sqlglot.tokens import TokenType
 25
 26
 27def _approx_distinct_sql(self: generator.Generator, expression: exp.ApproxDistinct) -> str:
 28    accuracy = expression.args.get("accuracy")
 29    accuracy = ", " + self.sql(accuracy) if accuracy else ""
 30    return f"APPROX_DISTINCT({self.sql(expression, 'this')}{accuracy})"
 31
 32
 33def _datatype_sql(self: generator.Generator, expression: exp.DataType) -> str:
 34    sql = self.datatype_sql(expression)
 35    if expression.is_type("timestamptz"):
 36        sql = f"{sql} WITH TIME ZONE"
 37    return sql
 38
 39
 40def _explode_to_unnest_sql(self: generator.Generator, expression: exp.Lateral) -> str:
 41    if isinstance(expression.this, (exp.Explode, exp.Posexplode)):
 42        return self.sql(
 43            exp.Join(
 44                this=exp.Unnest(
 45                    expressions=[expression.this.this],
 46                    alias=expression.args.get("alias"),
 47                    ordinality=isinstance(expression.this, exp.Posexplode),
 48                ),
 49                kind="cross",
 50            )
 51        )
 52    return self.lateral_sql(expression)
 53
 54
 55def _initcap_sql(self: generator.Generator, expression: exp.Initcap) -> str:
 56    regex = r"(\w)(\w*)"
 57    return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
 58
 59
 60def _decode_sql(self: generator.Generator, expression: exp.Decode) -> str:
 61    _ensure_utf8(expression.args["charset"])
 62    return self.func("FROM_UTF8", expression.this, expression.args.get("replace"))
 63
 64
 65def _encode_sql(self: generator.Generator, expression: exp.Encode) -> str:
 66    _ensure_utf8(expression.args["charset"])
 67    return f"TO_UTF8({self.sql(expression, 'this')})"
 68
 69
 70def _no_sort_array(self: generator.Generator, expression: exp.SortArray) -> str:
 71    if expression.args.get("asc") == exp.false():
 72        comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END"
 73    else:
 74        comparator = None
 75    return self.func("ARRAY_SORT", expression.this, comparator)
 76
 77
 78def _schema_sql(self: generator.Generator, expression: exp.Schema) -> str:
 79    if isinstance(expression.parent, exp.Property):
 80        columns = ", ".join(f"'{c.name}'" for c in expression.expressions)
 81        return f"ARRAY[{columns}]"
 82
 83    if expression.parent:
 84        for schema in expression.parent.find_all(exp.Schema):
 85            if isinstance(schema.parent, exp.Property):
 86                expression = expression.copy()
 87                expression.expressions.extend(schema.expressions)
 88
 89    return self.schema_sql(expression)
 90
 91
 92def _quantile_sql(self: generator.Generator, expression: exp.Quantile) -> str:
 93    self.unsupported("Presto does not support exact quantiles")
 94    return f"APPROX_PERCENTILE({self.sql(expression, 'this')}, {self.sql(expression, 'quantile')})"
 95
 96
 97def _str_to_time_sql(
 98    self: generator.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate
 99) -> str:
100    return f"DATE_PARSE({self.sql(expression, 'this')}, {self.format_time(expression)})"
101
102
103def _ts_or_ds_to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str:
104    time_format = self.format_time(expression)
105    if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT):
106        return exp.cast(_str_to_time_sql(self, expression), "DATE").sql(dialect="presto")
107    return exp.cast(exp.cast(expression.this, "TIMESTAMP"), "DATE").sql(dialect="presto")
108
109
110def _ts_or_ds_add_sql(self: generator.Generator, expression: exp.TsOrDsAdd) -> str:
111    this = expression.this
112
113    if not isinstance(this, exp.CurrentDate):
114        this = exp.cast(exp.cast(expression.this, "TIMESTAMP"), "DATE")
115
116    return self.func(
117        "DATE_ADD",
118        exp.Literal.string(expression.text("unit") or "day"),
119        expression.expression,
120        this,
121    )
122
123
124def _ensure_utf8(charset: exp.Literal) -> None:
125    if charset.name.lower() != "utf-8":
126        raise UnsupportedError(f"Unsupported charset {charset}")
127
128
129def _approx_percentile(args: t.List) -> exp.Expression:
130    if len(args) == 4:
131        return exp.ApproxQuantile(
132            this=seq_get(args, 0),
133            weight=seq_get(args, 1),
134            quantile=seq_get(args, 2),
135            accuracy=seq_get(args, 3),
136        )
137    if len(args) == 3:
138        return exp.ApproxQuantile(
139            this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2)
140        )
141    return exp.ApproxQuantile.from_arg_list(args)
142
143
144def _from_unixtime(args: t.List) -> exp.Expression:
145    if len(args) == 3:
146        return exp.UnixToTime(
147            this=seq_get(args, 0),
148            hours=seq_get(args, 1),
149            minutes=seq_get(args, 2),
150        )
151    if len(args) == 2:
152        return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1))
153
154    return exp.UnixToTime.from_arg_list(args)
155
156
157def _unnest_sequence(expression: exp.Expression) -> exp.Expression:
158    if isinstance(expression, exp.Table):
159        if isinstance(expression.this, exp.GenerateSeries):
160            unnest = exp.Unnest(expressions=[expression.this])
161
162            if expression.alias:
163                return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False)
164            return unnest
165    return expression
166
167
168class Presto(Dialect):
169    INDEX_OFFSET = 1
170    NULL_ORDERING = "nulls_are_last"
171    TIME_FORMAT = MySQL.TIME_FORMAT
172    TIME_MAPPING = MySQL.TIME_MAPPING
173    STRICT_STRING_CONCAT = True
174
175    # https://github.com/trinodb/trino/issues/17
176    # https://github.com/trinodb/trino/issues/12289
177    # https://github.com/prestodb/presto/issues/2863
178    RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
179
180    class Tokenizer(tokens.Tokenizer):
181        KEYWORDS = {
182            **tokens.Tokenizer.KEYWORDS,
183            "START": TokenType.BEGIN,
184            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
185            "ROW": TokenType.STRUCT,
186        }
187
188    class Parser(parser.Parser):
189        FUNCTIONS = {
190            **parser.Parser.FUNCTIONS,
191            "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list,
192            "APPROX_PERCENTILE": _approx_percentile,
193            "CARDINALITY": exp.ArraySize.from_arg_list,
194            "CONTAINS": exp.ArrayContains.from_arg_list,
195            "DATE_ADD": lambda args: exp.DateAdd(
196                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
197            ),
198            "DATE_DIFF": lambda args: exp.DateDiff(
199                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
200            ),
201            "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
202            "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
203            "DATE_TRUNC": date_trunc_to_time,
204            "FROM_HEX": exp.Unhex.from_arg_list,
205            "FROM_UNIXTIME": _from_unixtime,
206            "FROM_UTF8": lambda args: exp.Decode(
207                this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8")
208            ),
209            "NOW": exp.CurrentTimestamp.from_arg_list,
210            "SEQUENCE": exp.GenerateSeries.from_arg_list,
211            "STRPOS": lambda args: exp.StrPosition(
212                this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2)
213            ),
214            "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
215            "TO_HEX": exp.Hex.from_arg_list,
216            "TO_UTF8": lambda args: exp.Encode(
217                this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
218            ),
219        }
220        FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy()
221        FUNCTION_PARSERS.pop("TRIM")
222
223    class Generator(generator.Generator):
224        INTERVAL_ALLOWS_PLURAL_FORM = False
225        JOIN_HINTS = False
226        TABLE_HINTS = False
227        IS_BOOL_ALLOWED = False
228        STRUCT_DELIMITER = ("(", ")")
229
230        PROPERTIES_LOCATION = {
231            **generator.Generator.PROPERTIES_LOCATION,
232            exp.LocationProperty: exp.Properties.Location.UNSUPPORTED,
233            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
234        }
235
236        TYPE_MAPPING = {
237            **generator.Generator.TYPE_MAPPING,
238            exp.DataType.Type.INT: "INTEGER",
239            exp.DataType.Type.FLOAT: "REAL",
240            exp.DataType.Type.BINARY: "VARBINARY",
241            exp.DataType.Type.TEXT: "VARCHAR",
242            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
243            exp.DataType.Type.STRUCT: "ROW",
244        }
245
246        TRANSFORMS = {
247            **generator.Generator.TRANSFORMS,
248            exp.ApproxDistinct: _approx_distinct_sql,
249            exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
250            exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
251            exp.ArrayConcat: rename_func("CONCAT"),
252            exp.ArrayContains: rename_func("CONTAINS"),
253            exp.ArraySize: rename_func("CARDINALITY"),
254            exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
255            exp.BitwiseLeftShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
256            exp.BitwiseNot: lambda self, e: f"BITWISE_NOT({self.sql(e, 'this')})",
257            exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
258            exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
259            exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
260            exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]),
261            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
262            exp.DataType: _datatype_sql,
263            exp.DateAdd: lambda self, e: self.func(
264                "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
265            ),
266            exp.DateDiff: lambda self, e: self.func(
267                "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
268            ),
269            exp.DateStrToDate: lambda self, e: f"CAST(DATE_PARSE({self.sql(e, 'this')}, {Presto.DATE_FORMAT}) AS DATE)",
270            exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)",
271            exp.Decode: _decode_sql,
272            exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)",
273            exp.Encode: _encode_sql,
274            exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'",
275            exp.Group: transforms.preprocess([transforms.unalias_group]),
276            exp.Hex: rename_func("TO_HEX"),
277            exp.If: if_sql,
278            exp.ILike: no_ilike_sql,
279            exp.Initcap: _initcap_sql,
280            exp.Lateral: _explode_to_unnest_sql,
281            exp.Left: left_to_substring_sql,
282            exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"),
283            exp.LogicalAnd: rename_func("BOOL_AND"),
284            exp.LogicalOr: rename_func("BOOL_OR"),
285            exp.Pivot: no_pivot_sql,
286            exp.Quantile: _quantile_sql,
287            exp.Right: right_to_substring_sql,
288            exp.SafeDivide: no_safe_divide_sql,
289            exp.Schema: _schema_sql,
290            exp.Select: transforms.preprocess(
291                [
292                    transforms.eliminate_qualify,
293                    transforms.eliminate_distinct_on,
294                    transforms.explode_to_unnest,
295                ]
296            ),
297            exp.SortArray: _no_sort_array,
298            exp.StrPosition: rename_func("STRPOS"),
299            exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
300            exp.StrToTime: _str_to_time_sql,
301            exp.StrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))",
302            exp.StructExtract: struct_extract_sql,
303            exp.Table: transforms.preprocess([_unnest_sequence]),
304            exp.TimestampTrunc: timestamptrunc_sql,
305            exp.TimeStrToDate: timestrtotime_sql,
306            exp.TimeStrToTime: timestrtotime_sql,
307            exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))",
308            exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
309            exp.TimeToUnix: rename_func("TO_UNIXTIME"),
310            exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]),
311            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
312            exp.TsOrDsAdd: _ts_or_ds_add_sql,
313            exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
314            exp.Unhex: rename_func("FROM_HEX"),
315            exp.UnixToStr: lambda self, e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})",
316            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
317            exp.UnixToTimeStr: lambda self, e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)",
318            exp.VariancePop: rename_func("VAR_POP"),
319            exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]),
320            exp.WithinGroup: transforms.preprocess(
321                [transforms.remove_within_group_for_percentiles]
322            ),
323        }
324
325        def interval_sql(self, expression: exp.Interval) -> str:
326            unit = self.sql(expression, "unit")
327            if expression.this and unit.lower().startswith("week"):
328                return f"({expression.this.name} * INTERVAL '7' day)"
329            return super().interval_sql(expression)
330
331        def transaction_sql(self, expression: exp.Transaction) -> str:
332            modes = expression.args.get("modes")
333            modes = f" {', '.join(modes)}" if modes else ""
334            return f"START TRANSACTION{modes}"
335
336        def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
337            start = expression.args["start"]
338            end = expression.args["end"]
339            step = expression.args.get("step")
340
341            if isinstance(start, exp.Cast):
342                target_type = start.to
343            elif isinstance(end, exp.Cast):
344                target_type = end.to
345            else:
346                target_type = None
347
348            if target_type and target_type.is_type("timestamp"):
349                to = target_type.copy()
350
351                if target_type is start.to:
352                    end = exp.cast(end, to)
353                else:
354                    start = exp.cast(start, to)
355
356            return self.func("SEQUENCE", start, end, step)
357
358        def offset_limit_modifiers(
359            self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit]
360        ) -> t.List[str]:
361            return [
362                self.sql(expression, "offset"),
363                self.sql(limit),
364            ]
class Presto(sqlglot.dialects.dialect.Dialect):
169class Presto(Dialect):
170    INDEX_OFFSET = 1
171    NULL_ORDERING = "nulls_are_last"
172    TIME_FORMAT = MySQL.TIME_FORMAT
173    TIME_MAPPING = MySQL.TIME_MAPPING
174    STRICT_STRING_CONCAT = True
175
176    # https://github.com/trinodb/trino/issues/17
177    # https://github.com/trinodb/trino/issues/12289
178    # https://github.com/prestodb/presto/issues/2863
179    RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
180
181    class Tokenizer(tokens.Tokenizer):
182        KEYWORDS = {
183            **tokens.Tokenizer.KEYWORDS,
184            "START": TokenType.BEGIN,
185            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
186            "ROW": TokenType.STRUCT,
187        }
188
189    class Parser(parser.Parser):
190        FUNCTIONS = {
191            **parser.Parser.FUNCTIONS,
192            "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list,
193            "APPROX_PERCENTILE": _approx_percentile,
194            "CARDINALITY": exp.ArraySize.from_arg_list,
195            "CONTAINS": exp.ArrayContains.from_arg_list,
196            "DATE_ADD": lambda args: exp.DateAdd(
197                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
198            ),
199            "DATE_DIFF": lambda args: exp.DateDiff(
200                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
201            ),
202            "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
203            "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
204            "DATE_TRUNC": date_trunc_to_time,
205            "FROM_HEX": exp.Unhex.from_arg_list,
206            "FROM_UNIXTIME": _from_unixtime,
207            "FROM_UTF8": lambda args: exp.Decode(
208                this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8")
209            ),
210            "NOW": exp.CurrentTimestamp.from_arg_list,
211            "SEQUENCE": exp.GenerateSeries.from_arg_list,
212            "STRPOS": lambda args: exp.StrPosition(
213                this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2)
214            ),
215            "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
216            "TO_HEX": exp.Hex.from_arg_list,
217            "TO_UTF8": lambda args: exp.Encode(
218                this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
219            ),
220        }
221        FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy()
222        FUNCTION_PARSERS.pop("TRIM")
223
224    class Generator(generator.Generator):
225        INTERVAL_ALLOWS_PLURAL_FORM = False
226        JOIN_HINTS = False
227        TABLE_HINTS = False
228        IS_BOOL_ALLOWED = False
229        STRUCT_DELIMITER = ("(", ")")
230
231        PROPERTIES_LOCATION = {
232            **generator.Generator.PROPERTIES_LOCATION,
233            exp.LocationProperty: exp.Properties.Location.UNSUPPORTED,
234            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
235        }
236
237        TYPE_MAPPING = {
238            **generator.Generator.TYPE_MAPPING,
239            exp.DataType.Type.INT: "INTEGER",
240            exp.DataType.Type.FLOAT: "REAL",
241            exp.DataType.Type.BINARY: "VARBINARY",
242            exp.DataType.Type.TEXT: "VARCHAR",
243            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
244            exp.DataType.Type.STRUCT: "ROW",
245        }
246
247        TRANSFORMS = {
248            **generator.Generator.TRANSFORMS,
249            exp.ApproxDistinct: _approx_distinct_sql,
250            exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
251            exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
252            exp.ArrayConcat: rename_func("CONCAT"),
253            exp.ArrayContains: rename_func("CONTAINS"),
254            exp.ArraySize: rename_func("CARDINALITY"),
255            exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
256            exp.BitwiseLeftShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
257            exp.BitwiseNot: lambda self, e: f"BITWISE_NOT({self.sql(e, 'this')})",
258            exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
259            exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
260            exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
261            exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]),
262            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
263            exp.DataType: _datatype_sql,
264            exp.DateAdd: lambda self, e: self.func(
265                "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
266            ),
267            exp.DateDiff: lambda self, e: self.func(
268                "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
269            ),
270            exp.DateStrToDate: lambda self, e: f"CAST(DATE_PARSE({self.sql(e, 'this')}, {Presto.DATE_FORMAT}) AS DATE)",
271            exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)",
272            exp.Decode: _decode_sql,
273            exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)",
274            exp.Encode: _encode_sql,
275            exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'",
276            exp.Group: transforms.preprocess([transforms.unalias_group]),
277            exp.Hex: rename_func("TO_HEX"),
278            exp.If: if_sql,
279            exp.ILike: no_ilike_sql,
280            exp.Initcap: _initcap_sql,
281            exp.Lateral: _explode_to_unnest_sql,
282            exp.Left: left_to_substring_sql,
283            exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"),
284            exp.LogicalAnd: rename_func("BOOL_AND"),
285            exp.LogicalOr: rename_func("BOOL_OR"),
286            exp.Pivot: no_pivot_sql,
287            exp.Quantile: _quantile_sql,
288            exp.Right: right_to_substring_sql,
289            exp.SafeDivide: no_safe_divide_sql,
290            exp.Schema: _schema_sql,
291            exp.Select: transforms.preprocess(
292                [
293                    transforms.eliminate_qualify,
294                    transforms.eliminate_distinct_on,
295                    transforms.explode_to_unnest,
296                ]
297            ),
298            exp.SortArray: _no_sort_array,
299            exp.StrPosition: rename_func("STRPOS"),
300            exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
301            exp.StrToTime: _str_to_time_sql,
302            exp.StrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))",
303            exp.StructExtract: struct_extract_sql,
304            exp.Table: transforms.preprocess([_unnest_sequence]),
305            exp.TimestampTrunc: timestamptrunc_sql,
306            exp.TimeStrToDate: timestrtotime_sql,
307            exp.TimeStrToTime: timestrtotime_sql,
308            exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))",
309            exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
310            exp.TimeToUnix: rename_func("TO_UNIXTIME"),
311            exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]),
312            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
313            exp.TsOrDsAdd: _ts_or_ds_add_sql,
314            exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
315            exp.Unhex: rename_func("FROM_HEX"),
316            exp.UnixToStr: lambda self, e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})",
317            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
318            exp.UnixToTimeStr: lambda self, e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)",
319            exp.VariancePop: rename_func("VAR_POP"),
320            exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]),
321            exp.WithinGroup: transforms.preprocess(
322                [transforms.remove_within_group_for_percentiles]
323            ),
324        }
325
326        def interval_sql(self, expression: exp.Interval) -> str:
327            unit = self.sql(expression, "unit")
328            if expression.this and unit.lower().startswith("week"):
329                return f"({expression.this.name} * INTERVAL '7' day)"
330            return super().interval_sql(expression)
331
332        def transaction_sql(self, expression: exp.Transaction) -> str:
333            modes = expression.args.get("modes")
334            modes = f" {', '.join(modes)}" if modes else ""
335            return f"START TRANSACTION{modes}"
336
337        def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
338            start = expression.args["start"]
339            end = expression.args["end"]
340            step = expression.args.get("step")
341
342            if isinstance(start, exp.Cast):
343                target_type = start.to
344            elif isinstance(end, exp.Cast):
345                target_type = end.to
346            else:
347                target_type = None
348
349            if target_type and target_type.is_type("timestamp"):
350                to = target_type.copy()
351
352                if target_type is start.to:
353                    end = exp.cast(end, to)
354                else:
355                    start = exp.cast(start, to)
356
357            return self.func("SEQUENCE", start, end, step)
358
359        def offset_limit_modifiers(
360            self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit]
361        ) -> t.List[str]:
362            return [
363                self.sql(expression, "offset"),
364                self.sql(limit),
365            ]
class Presto.Tokenizer(sqlglot.tokens.Tokenizer):
181    class Tokenizer(tokens.Tokenizer):
182        KEYWORDS = {
183            **tokens.Tokenizer.KEYWORDS,
184            "START": TokenType.BEGIN,
185            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
186            "ROW": TokenType.STRUCT,
187        }
class Presto.Parser(sqlglot.parser.Parser):
189    class Parser(parser.Parser):
190        FUNCTIONS = {
191            **parser.Parser.FUNCTIONS,
192            "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list,
193            "APPROX_PERCENTILE": _approx_percentile,
194            "CARDINALITY": exp.ArraySize.from_arg_list,
195            "CONTAINS": exp.ArrayContains.from_arg_list,
196            "DATE_ADD": lambda args: exp.DateAdd(
197                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
198            ),
199            "DATE_DIFF": lambda args: exp.DateDiff(
200                this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
201            ),
202            "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
203            "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
204            "DATE_TRUNC": date_trunc_to_time,
205            "FROM_HEX": exp.Unhex.from_arg_list,
206            "FROM_UNIXTIME": _from_unixtime,
207            "FROM_UTF8": lambda args: exp.Decode(
208                this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8")
209            ),
210            "NOW": exp.CurrentTimestamp.from_arg_list,
211            "SEQUENCE": exp.GenerateSeries.from_arg_list,
212            "STRPOS": lambda args: exp.StrPosition(
213                this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2)
214            ),
215            "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
216            "TO_HEX": exp.Hex.from_arg_list,
217            "TO_UTF8": lambda args: exp.Encode(
218                this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
219            ),
220        }
221        FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy()
222        FUNCTION_PARSERS.pop("TRIM")

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
class Presto.Generator(sqlglot.generator.Generator):
224    class Generator(generator.Generator):
225        INTERVAL_ALLOWS_PLURAL_FORM = False
226        JOIN_HINTS = False
227        TABLE_HINTS = False
228        IS_BOOL_ALLOWED = False
229        STRUCT_DELIMITER = ("(", ")")
230
231        PROPERTIES_LOCATION = {
232            **generator.Generator.PROPERTIES_LOCATION,
233            exp.LocationProperty: exp.Properties.Location.UNSUPPORTED,
234            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
235        }
236
237        TYPE_MAPPING = {
238            **generator.Generator.TYPE_MAPPING,
239            exp.DataType.Type.INT: "INTEGER",
240            exp.DataType.Type.FLOAT: "REAL",
241            exp.DataType.Type.BINARY: "VARBINARY",
242            exp.DataType.Type.TEXT: "VARCHAR",
243            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
244            exp.DataType.Type.STRUCT: "ROW",
245        }
246
247        TRANSFORMS = {
248            **generator.Generator.TRANSFORMS,
249            exp.ApproxDistinct: _approx_distinct_sql,
250            exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
251            exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
252            exp.ArrayConcat: rename_func("CONCAT"),
253            exp.ArrayContains: rename_func("CONTAINS"),
254            exp.ArraySize: rename_func("CARDINALITY"),
255            exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
256            exp.BitwiseLeftShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
257            exp.BitwiseNot: lambda self, e: f"BITWISE_NOT({self.sql(e, 'this')})",
258            exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
259            exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
260            exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
261            exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]),
262            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
263            exp.DataType: _datatype_sql,
264            exp.DateAdd: lambda self, e: self.func(
265                "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
266            ),
267            exp.DateDiff: lambda self, e: self.func(
268                "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
269            ),
270            exp.DateStrToDate: lambda self, e: f"CAST(DATE_PARSE({self.sql(e, 'this')}, {Presto.DATE_FORMAT}) AS DATE)",
271            exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)",
272            exp.Decode: _decode_sql,
273            exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)",
274            exp.Encode: _encode_sql,
275            exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'",
276            exp.Group: transforms.preprocess([transforms.unalias_group]),
277            exp.Hex: rename_func("TO_HEX"),
278            exp.If: if_sql,
279            exp.ILike: no_ilike_sql,
280            exp.Initcap: _initcap_sql,
281            exp.Lateral: _explode_to_unnest_sql,
282            exp.Left: left_to_substring_sql,
283            exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"),
284            exp.LogicalAnd: rename_func("BOOL_AND"),
285            exp.LogicalOr: rename_func("BOOL_OR"),
286            exp.Pivot: no_pivot_sql,
287            exp.Quantile: _quantile_sql,
288            exp.Right: right_to_substring_sql,
289            exp.SafeDivide: no_safe_divide_sql,
290            exp.Schema: _schema_sql,
291            exp.Select: transforms.preprocess(
292                [
293                    transforms.eliminate_qualify,
294                    transforms.eliminate_distinct_on,
295                    transforms.explode_to_unnest,
296                ]
297            ),
298            exp.SortArray: _no_sort_array,
299            exp.StrPosition: rename_func("STRPOS"),
300            exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
301            exp.StrToTime: _str_to_time_sql,
302            exp.StrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))",
303            exp.StructExtract: struct_extract_sql,
304            exp.Table: transforms.preprocess([_unnest_sequence]),
305            exp.TimestampTrunc: timestamptrunc_sql,
306            exp.TimeStrToDate: timestrtotime_sql,
307            exp.TimeStrToTime: timestrtotime_sql,
308            exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))",
309            exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
310            exp.TimeToUnix: rename_func("TO_UNIXTIME"),
311            exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]),
312            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
313            exp.TsOrDsAdd: _ts_or_ds_add_sql,
314            exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
315            exp.Unhex: rename_func("FROM_HEX"),
316            exp.UnixToStr: lambda self, e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})",
317            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
318            exp.UnixToTimeStr: lambda self, e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)",
319            exp.VariancePop: rename_func("VAR_POP"),
320            exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]),
321            exp.WithinGroup: transforms.preprocess(
322                [transforms.remove_within_group_for_percentiles]
323            ),
324        }
325
326        def interval_sql(self, expression: exp.Interval) -> str:
327            unit = self.sql(expression, "unit")
328            if expression.this and unit.lower().startswith("week"):
329                return f"({expression.this.name} * INTERVAL '7' day)"
330            return super().interval_sql(expression)
331
332        def transaction_sql(self, expression: exp.Transaction) -> str:
333            modes = expression.args.get("modes")
334            modes = f" {', '.join(modes)}" if modes else ""
335            return f"START TRANSACTION{modes}"
336
337        def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
338            start = expression.args["start"]
339            end = expression.args["end"]
340            step = expression.args.get("step")
341
342            if isinstance(start, exp.Cast):
343                target_type = start.to
344            elif isinstance(end, exp.Cast):
345                target_type = end.to
346            else:
347                target_type = None
348
349            if target_type and target_type.is_type("timestamp"):
350                to = target_type.copy()
351
352                if target_type is start.to:
353                    end = exp.cast(end, to)
354                else:
355                    start = exp.cast(start, to)
356
357            return self.func("SEQUENCE", start, end, step)
358
359        def offset_limit_modifiers(
360            self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit]
361        ) -> t.List[str]:
362            return [
363                self.sql(expression, "offset"),
364                self.sql(limit),
365            ]

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether or not to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether or not to normalize identifiers to lowercase. Default: False.
  • pad: Determines the pad size in a formatted string. Default: 2.
  • indent: Determines the indentation size in a formatted string. Default: 2.
  • normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def interval_sql(self, expression: sqlglot.expressions.Interval) -> str:
326        def interval_sql(self, expression: exp.Interval) -> str:
327            unit = self.sql(expression, "unit")
328            if expression.this and unit.lower().startswith("week"):
329                return f"({expression.this.name} * INTERVAL '7' day)"
330            return super().interval_sql(expression)
def transaction_sql(self, expression: sqlglot.expressions.Transaction) -> str:
332        def transaction_sql(self, expression: exp.Transaction) -> str:
333            modes = expression.args.get("modes")
334            modes = f" {', '.join(modes)}" if modes else ""
335            return f"START TRANSACTION{modes}"
def generateseries_sql(self, expression: sqlglot.expressions.GenerateSeries) -> str:
337        def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
338            start = expression.args["start"]
339            end = expression.args["end"]
340            step = expression.args.get("step")
341
342            if isinstance(start, exp.Cast):
343                target_type = start.to
344            elif isinstance(end, exp.Cast):
345                target_type = end.to
346            else:
347                target_type = None
348
349            if target_type and target_type.is_type("timestamp"):
350                to = target_type.copy()
351
352                if target_type is start.to:
353                    end = exp.cast(end, to)
354                else:
355                    start = exp.cast(start, to)
356
357            return self.func("SEQUENCE", start, end, step)
def offset_limit_modifiers( self, expression: sqlglot.expressions.Expression, fetch: bool, limit: Union[sqlglot.expressions.Fetch, sqlglot.expressions.Limit, NoneType]) -> List[str]:
359        def offset_limit_modifiers(
360            self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit]
361        ) -> t.List[str]:
362            return [
363                self.sql(expression, "offset"),
364                self.sql(limit),
365            ]
@classmethod
def can_identify(text: str, identify: str | bool = 'safe') -> bool:
247    @classmethod
248    def can_identify(cls, text: str, identify: str | bool = "safe") -> bool:
249        """Checks if text can be identified given an identify option.
250
251        Args:
252            text: The text to check.
253            identify:
254                "always" or `True`: Always returns true.
255                "safe": True if the identifier is case-insensitive.
256
257        Returns:
258            Whether or not the given text can be identified.
259        """
260        if identify is True or identify == "always":
261            return True
262
263        if identify == "safe":
264            return not cls.case_sensitive(text)
265
266        return False

Checks if text can be identified given an identify option.

Arguments:
  • text: The text to check.
  • identify: "always" or True: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:

Whether or not the given text can be identified.

Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
createable_sql
create_sql
clone_sql
describe_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
rawstring_sql
datatypesize_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
pseudotype_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
after_having_modifiers
after_limit_modifiers
select_sql
schema_sql
schema_columns_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
extract_sql
trim_sql
safeconcat_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonobject_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
safedpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
oncluster_sql