Edit on GitHub

sqlglot.dialects.snowflake

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects.dialect import (
  7    Dialect,
  8    datestrtodate_sql,
  9    format_time_lambda,
 10    inline_array_sql,
 11    min_or_least,
 12    rename_func,
 13    timestrtotime_sql,
 14    ts_or_ds_to_date_sql,
 15    var_map_sql,
 16)
 17from sqlglot.expressions import Literal
 18from sqlglot.helper import flatten, seq_get
 19from sqlglot.tokens import TokenType
 20
 21
 22def _check_int(s):
 23    if s[0] in ("-", "+"):
 24        return s[1:].isdigit()
 25    return s.isdigit()
 26
 27
 28# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
 29def _snowflake_to_timestamp(args):
 30    if len(args) == 2:
 31        first_arg, second_arg = args
 32        if second_arg.is_string:
 33            # case: <string_expr> [ , <format> ]
 34            return format_time_lambda(exp.StrToTime, "snowflake")(args)
 35
 36        # case: <numeric_expr> [ , <scale> ]
 37        if second_arg.name not in ["0", "3", "9"]:
 38            raise ValueError(
 39                f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9"
 40            )
 41
 42        if second_arg.name == "0":
 43            timescale = exp.UnixToTime.SECONDS
 44        elif second_arg.name == "3":
 45            timescale = exp.UnixToTime.MILLIS
 46        elif second_arg.name == "9":
 47            timescale = exp.UnixToTime.MICROS
 48
 49        return exp.UnixToTime(this=first_arg, scale=timescale)
 50
 51    first_arg = seq_get(args, 0)
 52    if not isinstance(first_arg, Literal):
 53        # case: <variant_expr>
 54        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 55
 56    if first_arg.is_string:
 57        if _check_int(first_arg.this):
 58            # case: <integer>
 59            return exp.UnixToTime.from_arg_list(args)
 60
 61        # case: <date_expr>
 62        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 63
 64    # case: <numeric_expr>
 65    return exp.UnixToTime.from_arg_list(args)
 66
 67
 68def _unix_to_time_sql(self, expression):
 69    scale = expression.args.get("scale")
 70    timestamp = self.sql(expression, "this")
 71    if scale in [None, exp.UnixToTime.SECONDS]:
 72        return f"TO_TIMESTAMP({timestamp})"
 73    if scale == exp.UnixToTime.MILLIS:
 74        return f"TO_TIMESTAMP({timestamp}, 3)"
 75    if scale == exp.UnixToTime.MICROS:
 76        return f"TO_TIMESTAMP({timestamp}, 9)"
 77
 78    raise ValueError("Improper scale for timestamp")
 79
 80
 81# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
 82# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
 83def _parse_date_part(self):
 84    this = self._parse_var() or self._parse_type()
 85    self._match(TokenType.COMMA)
 86    expression = self._parse_bitwise()
 87
 88    name = this.name.upper()
 89    if name.startswith("EPOCH"):
 90        if name.startswith("EPOCH_MILLISECOND"):
 91            scale = 10**3
 92        elif name.startswith("EPOCH_MICROSECOND"):
 93            scale = 10**6
 94        elif name.startswith("EPOCH_NANOSECOND"):
 95            scale = 10**9
 96        else:
 97            scale = None
 98
 99        ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
100        to_unix = self.expression(exp.TimeToUnix, this=ts)
101
102        if scale:
103            to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
104
105        return to_unix
106
107    return self.expression(exp.Extract, this=this, expression=expression)
108
109
110# https://docs.snowflake.com/en/sql-reference/functions/div0
111def _div0_to_if(args):
112    cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0))
113    true = exp.Literal.number(0)
114    false = exp.FloatDiv(this=seq_get(args, 0), expression=seq_get(args, 1))
115    return exp.If(this=cond, true=true, false=false)
116
117
118# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
119def _zeroifnull_to_if(args):
120    cond = exp.Is(this=seq_get(args, 0), expression=exp.Null())
121    return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
122
123
124# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
125def _nullifzero_to_if(args):
126    cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
127    return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0))
128
129
130def _datatype_sql(self, expression):
131    if expression.this == exp.DataType.Type.ARRAY:
132        return "ARRAY"
133    elif expression.this == exp.DataType.Type.MAP:
134        return "OBJECT"
135    return self.datatype_sql(expression)
136
137
138class Snowflake(Dialect):
139    null_ordering = "nulls_are_large"
140    time_format = "'yyyy-mm-dd hh24:mi:ss'"
141
142    time_mapping = {
143        "YYYY": "%Y",
144        "yyyy": "%Y",
145        "YY": "%y",
146        "yy": "%y",
147        "MMMM": "%B",
148        "mmmm": "%B",
149        "MON": "%b",
150        "mon": "%b",
151        "MM": "%m",
152        "mm": "%m",
153        "DD": "%d",
154        "dd": "%d",
155        "d": "%-d",
156        "DY": "%w",
157        "dy": "%w",
158        "HH24": "%H",
159        "hh24": "%H",
160        "HH12": "%I",
161        "hh12": "%I",
162        "MI": "%M",
163        "mi": "%M",
164        "SS": "%S",
165        "ss": "%S",
166        "FF": "%f",
167        "ff": "%f",
168        "FF6": "%f",
169        "ff6": "%f",
170    }
171
172    class Parser(parser.Parser):
173        FUNCTIONS = {
174            **parser.Parser.FUNCTIONS,
175            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
176            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
177            "DATEADD": lambda args: exp.DateAdd(
178                this=seq_get(args, 2),
179                expression=seq_get(args, 1),
180                unit=seq_get(args, 0),
181            ),
182            "DATE_TRUNC": lambda args: exp.DateTrunc(
183                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
184                this=seq_get(args, 1),
185            ),
186            "DIV0": _div0_to_if,
187            "IFF": exp.If.from_arg_list,
188            "TO_ARRAY": exp.Array.from_arg_list,
189            "TO_TIMESTAMP": _snowflake_to_timestamp,
190            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
191            "RLIKE": exp.RegexpLike.from_arg_list,
192            "DECODE": exp.Matches.from_arg_list,
193            "OBJECT_CONSTRUCT": parser.parse_var_map,
194            "ZEROIFNULL": _zeroifnull_to_if,
195            "NULLIFZERO": _nullifzero_to_if,
196        }
197
198        FUNCTION_PARSERS = {
199            **parser.Parser.FUNCTION_PARSERS,
200            "DATE_PART": _parse_date_part,
201        }
202        FUNCTION_PARSERS.pop("TRIM")
203
204        FUNC_TOKENS = {
205            *parser.Parser.FUNC_TOKENS,
206            TokenType.RLIKE,
207            TokenType.TABLE,
208        }
209
210        COLUMN_OPERATORS = {
211            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
212            TokenType.COLON: lambda self, this, path: self.expression(
213                exp.Bracket,
214                this=this,
215                expressions=[path],
216            ),
217        }
218
219        RANGE_PARSERS = {
220            **parser.Parser.RANGE_PARSERS,  # type: ignore
221            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
222                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
223            ),
224            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
225                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
226            ),
227        }
228
229        ALTER_PARSERS = {
230            **parser.Parser.ALTER_PARSERS,  # type: ignore
231            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
232            "SET": lambda self: self._parse_alter_table_set_tag(),
233        }
234
235        INTEGER_DIVISION = False
236
237        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
238            self._match_text_seq("TAG")
239            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
240            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
241
242    class Tokenizer(tokens.Tokenizer):
243        QUOTES = ["'", "$$"]
244        STRING_ESCAPES = ["\\", "'"]
245
246        KEYWORDS = {
247            **tokens.Tokenizer.KEYWORDS,
248            "EXCLUDE": TokenType.EXCEPT,
249            "ILIKE ANY": TokenType.ILIKE_ANY,
250            "LIKE ANY": TokenType.LIKE_ANY,
251            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
252            "PUT": TokenType.COMMAND,
253            "RENAME": TokenType.REPLACE,
254            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
255            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
256            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
257            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
258            "MINUS": TokenType.EXCEPT,
259            "SAMPLE": TokenType.TABLE_SAMPLE,
260        }
261
262        SINGLE_TOKENS = {
263            **tokens.Tokenizer.SINGLE_TOKENS,
264            "$": TokenType.PARAMETER,
265        }
266
267    class Generator(generator.Generator):
268        PARAMETER_TOKEN = "$"
269        INTEGER_DIVISION = False
270        MATCHED_BY_SOURCE = False
271
272        TRANSFORMS = {
273            **generator.Generator.TRANSFORMS,  # type: ignore
274            exp.Array: inline_array_sql,
275            exp.ArrayConcat: rename_func("ARRAY_CAT"),
276            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
277            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
278            exp.DateStrToDate: datestrtodate_sql,
279            exp.DataType: _datatype_sql,
280            exp.If: rename_func("IFF"),
281            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
282            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
283            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
284            exp.Matches: rename_func("DECODE"),
285            exp.StrPosition: lambda self, e: self.func(
286                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
287            ),
288            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
289            exp.TimeStrToTime: timestrtotime_sql,
290            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
291            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
292            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
293            exp.UnixToTime: _unix_to_time_sql,
294            exp.DayOfWeek: rename_func("DAYOFWEEK"),
295            exp.Min: min_or_least,
296        }
297
298        TYPE_MAPPING = {
299            **generator.Generator.TYPE_MAPPING,  # type: ignore
300            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
301        }
302
303        STAR_MAPPING = {
304            "except": "EXCLUDE",
305            "replace": "RENAME",
306        }
307
308        PROPERTIES_LOCATION = {
309            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
310            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
311        }
312
313        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
314            return self.binary(expression, "ILIKE ANY")
315
316        def likeany_sql(self, expression: exp.LikeAny) -> str:
317            return self.binary(expression, "LIKE ANY")
318
319        def except_op(self, expression):
320            if not expression.args.get("distinct", False):
321                self.unsupported("EXCEPT with All is not supported in Snowflake")
322            return super().except_op(expression)
323
324        def intersect_op(self, expression):
325            if not expression.args.get("distinct", False):
326                self.unsupported("INTERSECT with All is not supported in Snowflake")
327            return super().intersect_op(expression)
328
329        def values_sql(self, expression: exp.Values) -> str:
330            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
331
332            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
333            from adding quotes to the column by using the `identify` argument when generating the SQL.
334            """
335            alias = expression.args.get("alias")
336            if alias and alias.args.get("columns"):
337                expression = expression.transform(
338                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
339                    if isinstance(node, exp.Identifier)
340                    and isinstance(node.parent, exp.TableAlias)
341                    and node.arg_key == "columns"
342                    else node,
343                )
344                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
345            return super().values_sql(expression)
346
347        def settag_sql(self, expression: exp.SetTag) -> str:
348            action = "UNSET" if expression.args.get("unset") else "SET"
349            return f"{action} TAG {self.expressions(expression)}"
350
351        def select_sql(self, expression: exp.Select) -> str:
352            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
353            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
354            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
355            generating the SQL.
356
357            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
358            expression. This might not be true in a case where the same column name can be sourced from another table that can
359            properly quote but should be true in most cases.
360            """
361            values_identifiers = set(
362                flatten(
363                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
364                    for v in expression.find_all(exp.Values)
365                )
366            )
367            if values_identifiers:
368                expression = expression.transform(
369                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
370                    if isinstance(node, exp.Identifier) and node in values_identifiers
371                    else node,
372                )
373                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
374            return super().select_sql(expression)
375
376        def describe_sql(self, expression: exp.Describe) -> str:
377            # Default to table if kind is unknown
378            kind_value = expression.args.get("kind") or "TABLE"
379            kind = f" {kind_value}" if kind_value else ""
380            this = f" {self.sql(expression, 'this')}"
381            return f"DESCRIBE{kind}{this}"
382
383        def generatedasidentitycolumnconstraint_sql(
384            self, expression: exp.GeneratedAsIdentityColumnConstraint
385        ) -> str:
386            start = expression.args.get("start")
387            start = f" START {start}" if start else ""
388            increment = expression.args.get("increment")
389            increment = f" INCREMENT {increment}" if increment else ""
390            return f"AUTOINCREMENT{start}{increment}"
class Snowflake(sqlglot.dialects.dialect.Dialect):
139class Snowflake(Dialect):
140    null_ordering = "nulls_are_large"
141    time_format = "'yyyy-mm-dd hh24:mi:ss'"
142
143    time_mapping = {
144        "YYYY": "%Y",
145        "yyyy": "%Y",
146        "YY": "%y",
147        "yy": "%y",
148        "MMMM": "%B",
149        "mmmm": "%B",
150        "MON": "%b",
151        "mon": "%b",
152        "MM": "%m",
153        "mm": "%m",
154        "DD": "%d",
155        "dd": "%d",
156        "d": "%-d",
157        "DY": "%w",
158        "dy": "%w",
159        "HH24": "%H",
160        "hh24": "%H",
161        "HH12": "%I",
162        "hh12": "%I",
163        "MI": "%M",
164        "mi": "%M",
165        "SS": "%S",
166        "ss": "%S",
167        "FF": "%f",
168        "ff": "%f",
169        "FF6": "%f",
170        "ff6": "%f",
171    }
172
173    class Parser(parser.Parser):
174        FUNCTIONS = {
175            **parser.Parser.FUNCTIONS,
176            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
177            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
178            "DATEADD": lambda args: exp.DateAdd(
179                this=seq_get(args, 2),
180                expression=seq_get(args, 1),
181                unit=seq_get(args, 0),
182            ),
183            "DATE_TRUNC": lambda args: exp.DateTrunc(
184                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
185                this=seq_get(args, 1),
186            ),
187            "DIV0": _div0_to_if,
188            "IFF": exp.If.from_arg_list,
189            "TO_ARRAY": exp.Array.from_arg_list,
190            "TO_TIMESTAMP": _snowflake_to_timestamp,
191            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
192            "RLIKE": exp.RegexpLike.from_arg_list,
193            "DECODE": exp.Matches.from_arg_list,
194            "OBJECT_CONSTRUCT": parser.parse_var_map,
195            "ZEROIFNULL": _zeroifnull_to_if,
196            "NULLIFZERO": _nullifzero_to_if,
197        }
198
199        FUNCTION_PARSERS = {
200            **parser.Parser.FUNCTION_PARSERS,
201            "DATE_PART": _parse_date_part,
202        }
203        FUNCTION_PARSERS.pop("TRIM")
204
205        FUNC_TOKENS = {
206            *parser.Parser.FUNC_TOKENS,
207            TokenType.RLIKE,
208            TokenType.TABLE,
209        }
210
211        COLUMN_OPERATORS = {
212            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
213            TokenType.COLON: lambda self, this, path: self.expression(
214                exp.Bracket,
215                this=this,
216                expressions=[path],
217            ),
218        }
219
220        RANGE_PARSERS = {
221            **parser.Parser.RANGE_PARSERS,  # type: ignore
222            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
223                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
224            ),
225            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
226                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
227            ),
228        }
229
230        ALTER_PARSERS = {
231            **parser.Parser.ALTER_PARSERS,  # type: ignore
232            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
233            "SET": lambda self: self._parse_alter_table_set_tag(),
234        }
235
236        INTEGER_DIVISION = False
237
238        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
239            self._match_text_seq("TAG")
240            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
241            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
242
243    class Tokenizer(tokens.Tokenizer):
244        QUOTES = ["'", "$$"]
245        STRING_ESCAPES = ["\\", "'"]
246
247        KEYWORDS = {
248            **tokens.Tokenizer.KEYWORDS,
249            "EXCLUDE": TokenType.EXCEPT,
250            "ILIKE ANY": TokenType.ILIKE_ANY,
251            "LIKE ANY": TokenType.LIKE_ANY,
252            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
253            "PUT": TokenType.COMMAND,
254            "RENAME": TokenType.REPLACE,
255            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
256            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
257            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
258            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
259            "MINUS": TokenType.EXCEPT,
260            "SAMPLE": TokenType.TABLE_SAMPLE,
261        }
262
263        SINGLE_TOKENS = {
264            **tokens.Tokenizer.SINGLE_TOKENS,
265            "$": TokenType.PARAMETER,
266        }
267
268    class Generator(generator.Generator):
269        PARAMETER_TOKEN = "$"
270        INTEGER_DIVISION = False
271        MATCHED_BY_SOURCE = False
272
273        TRANSFORMS = {
274            **generator.Generator.TRANSFORMS,  # type: ignore
275            exp.Array: inline_array_sql,
276            exp.ArrayConcat: rename_func("ARRAY_CAT"),
277            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
278            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
279            exp.DateStrToDate: datestrtodate_sql,
280            exp.DataType: _datatype_sql,
281            exp.If: rename_func("IFF"),
282            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
283            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
284            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
285            exp.Matches: rename_func("DECODE"),
286            exp.StrPosition: lambda self, e: self.func(
287                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
288            ),
289            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
290            exp.TimeStrToTime: timestrtotime_sql,
291            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
292            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
293            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
294            exp.UnixToTime: _unix_to_time_sql,
295            exp.DayOfWeek: rename_func("DAYOFWEEK"),
296            exp.Min: min_or_least,
297        }
298
299        TYPE_MAPPING = {
300            **generator.Generator.TYPE_MAPPING,  # type: ignore
301            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
302        }
303
304        STAR_MAPPING = {
305            "except": "EXCLUDE",
306            "replace": "RENAME",
307        }
308
309        PROPERTIES_LOCATION = {
310            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
311            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
312        }
313
314        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
315            return self.binary(expression, "ILIKE ANY")
316
317        def likeany_sql(self, expression: exp.LikeAny) -> str:
318            return self.binary(expression, "LIKE ANY")
319
320        def except_op(self, expression):
321            if not expression.args.get("distinct", False):
322                self.unsupported("EXCEPT with All is not supported in Snowflake")
323            return super().except_op(expression)
324
325        def intersect_op(self, expression):
326            if not expression.args.get("distinct", False):
327                self.unsupported("INTERSECT with All is not supported in Snowflake")
328            return super().intersect_op(expression)
329
330        def values_sql(self, expression: exp.Values) -> str:
331            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
332
333            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
334            from adding quotes to the column by using the `identify` argument when generating the SQL.
335            """
336            alias = expression.args.get("alias")
337            if alias and alias.args.get("columns"):
338                expression = expression.transform(
339                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
340                    if isinstance(node, exp.Identifier)
341                    and isinstance(node.parent, exp.TableAlias)
342                    and node.arg_key == "columns"
343                    else node,
344                )
345                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
346            return super().values_sql(expression)
347
348        def settag_sql(self, expression: exp.SetTag) -> str:
349            action = "UNSET" if expression.args.get("unset") else "SET"
350            return f"{action} TAG {self.expressions(expression)}"
351
352        def select_sql(self, expression: exp.Select) -> str:
353            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
354            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
355            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
356            generating the SQL.
357
358            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
359            expression. This might not be true in a case where the same column name can be sourced from another table that can
360            properly quote but should be true in most cases.
361            """
362            values_identifiers = set(
363                flatten(
364                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
365                    for v in expression.find_all(exp.Values)
366                )
367            )
368            if values_identifiers:
369                expression = expression.transform(
370                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
371                    if isinstance(node, exp.Identifier) and node in values_identifiers
372                    else node,
373                )
374                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
375            return super().select_sql(expression)
376
377        def describe_sql(self, expression: exp.Describe) -> str:
378            # Default to table if kind is unknown
379            kind_value = expression.args.get("kind") or "TABLE"
380            kind = f" {kind_value}" if kind_value else ""
381            this = f" {self.sql(expression, 'this')}"
382            return f"DESCRIBE{kind}{this}"
383
384        def generatedasidentitycolumnconstraint_sql(
385            self, expression: exp.GeneratedAsIdentityColumnConstraint
386        ) -> str:
387            start = expression.args.get("start")
388            start = f" START {start}" if start else ""
389            increment = expression.args.get("increment")
390            increment = f" INCREMENT {increment}" if increment else ""
391            return f"AUTOINCREMENT{start}{increment}"
class Snowflake.Parser(sqlglot.parser.Parser):
173    class Parser(parser.Parser):
174        FUNCTIONS = {
175            **parser.Parser.FUNCTIONS,
176            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
177            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
178            "DATEADD": lambda args: exp.DateAdd(
179                this=seq_get(args, 2),
180                expression=seq_get(args, 1),
181                unit=seq_get(args, 0),
182            ),
183            "DATE_TRUNC": lambda args: exp.DateTrunc(
184                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
185                this=seq_get(args, 1),
186            ),
187            "DIV0": _div0_to_if,
188            "IFF": exp.If.from_arg_list,
189            "TO_ARRAY": exp.Array.from_arg_list,
190            "TO_TIMESTAMP": _snowflake_to_timestamp,
191            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
192            "RLIKE": exp.RegexpLike.from_arg_list,
193            "DECODE": exp.Matches.from_arg_list,
194            "OBJECT_CONSTRUCT": parser.parse_var_map,
195            "ZEROIFNULL": _zeroifnull_to_if,
196            "NULLIFZERO": _nullifzero_to_if,
197        }
198
199        FUNCTION_PARSERS = {
200            **parser.Parser.FUNCTION_PARSERS,
201            "DATE_PART": _parse_date_part,
202        }
203        FUNCTION_PARSERS.pop("TRIM")
204
205        FUNC_TOKENS = {
206            *parser.Parser.FUNC_TOKENS,
207            TokenType.RLIKE,
208            TokenType.TABLE,
209        }
210
211        COLUMN_OPERATORS = {
212            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
213            TokenType.COLON: lambda self, this, path: self.expression(
214                exp.Bracket,
215                this=this,
216                expressions=[path],
217            ),
218        }
219
220        RANGE_PARSERS = {
221            **parser.Parser.RANGE_PARSERS,  # type: ignore
222            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
223                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
224            ),
225            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
226                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
227            ),
228        }
229
230        ALTER_PARSERS = {
231            **parser.Parser.ALTER_PARSERS,  # type: ignore
232            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
233            "SET": lambda self: self._parse_alter_table_set_tag(),
234        }
235
236        INTEGER_DIVISION = False
237
238        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
239            self._match_text_seq("TAG")
240            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
241            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
class Snowflake.Tokenizer(sqlglot.tokens.Tokenizer):
243    class Tokenizer(tokens.Tokenizer):
244        QUOTES = ["'", "$$"]
245        STRING_ESCAPES = ["\\", "'"]
246
247        KEYWORDS = {
248            **tokens.Tokenizer.KEYWORDS,
249            "EXCLUDE": TokenType.EXCEPT,
250            "ILIKE ANY": TokenType.ILIKE_ANY,
251            "LIKE ANY": TokenType.LIKE_ANY,
252            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
253            "PUT": TokenType.COMMAND,
254            "RENAME": TokenType.REPLACE,
255            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
256            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
257            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
258            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
259            "MINUS": TokenType.EXCEPT,
260            "SAMPLE": TokenType.TABLE_SAMPLE,
261        }
262
263        SINGLE_TOKENS = {
264            **tokens.Tokenizer.SINGLE_TOKENS,
265            "$": TokenType.PARAMETER,
266        }
class Snowflake.Generator(sqlglot.generator.Generator):
268    class Generator(generator.Generator):
269        PARAMETER_TOKEN = "$"
270        INTEGER_DIVISION = False
271        MATCHED_BY_SOURCE = False
272
273        TRANSFORMS = {
274            **generator.Generator.TRANSFORMS,  # type: ignore
275            exp.Array: inline_array_sql,
276            exp.ArrayConcat: rename_func("ARRAY_CAT"),
277            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
278            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
279            exp.DateStrToDate: datestrtodate_sql,
280            exp.DataType: _datatype_sql,
281            exp.If: rename_func("IFF"),
282            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
283            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
284            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
285            exp.Matches: rename_func("DECODE"),
286            exp.StrPosition: lambda self, e: self.func(
287                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
288            ),
289            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
290            exp.TimeStrToTime: timestrtotime_sql,
291            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
292            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
293            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
294            exp.UnixToTime: _unix_to_time_sql,
295            exp.DayOfWeek: rename_func("DAYOFWEEK"),
296            exp.Min: min_or_least,
297        }
298
299        TYPE_MAPPING = {
300            **generator.Generator.TYPE_MAPPING,  # type: ignore
301            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
302        }
303
304        STAR_MAPPING = {
305            "except": "EXCLUDE",
306            "replace": "RENAME",
307        }
308
309        PROPERTIES_LOCATION = {
310            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
311            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
312        }
313
314        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
315            return self.binary(expression, "ILIKE ANY")
316
317        def likeany_sql(self, expression: exp.LikeAny) -> str:
318            return self.binary(expression, "LIKE ANY")
319
320        def except_op(self, expression):
321            if not expression.args.get("distinct", False):
322                self.unsupported("EXCEPT with All is not supported in Snowflake")
323            return super().except_op(expression)
324
325        def intersect_op(self, expression):
326            if not expression.args.get("distinct", False):
327                self.unsupported("INTERSECT with All is not supported in Snowflake")
328            return super().intersect_op(expression)
329
330        def values_sql(self, expression: exp.Values) -> str:
331            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
332
333            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
334            from adding quotes to the column by using the `identify` argument when generating the SQL.
335            """
336            alias = expression.args.get("alias")
337            if alias and alias.args.get("columns"):
338                expression = expression.transform(
339                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
340                    if isinstance(node, exp.Identifier)
341                    and isinstance(node.parent, exp.TableAlias)
342                    and node.arg_key == "columns"
343                    else node,
344                )
345                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
346            return super().values_sql(expression)
347
348        def settag_sql(self, expression: exp.SetTag) -> str:
349            action = "UNSET" if expression.args.get("unset") else "SET"
350            return f"{action} TAG {self.expressions(expression)}"
351
352        def select_sql(self, expression: exp.Select) -> str:
353            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
354            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
355            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
356            generating the SQL.
357
358            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
359            expression. This might not be true in a case where the same column name can be sourced from another table that can
360            properly quote but should be true in most cases.
361            """
362            values_identifiers = set(
363                flatten(
364                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
365                    for v in expression.find_all(exp.Values)
366                )
367            )
368            if values_identifiers:
369                expression = expression.transform(
370                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
371                    if isinstance(node, exp.Identifier) and node in values_identifiers
372                    else node,
373                )
374                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
375            return super().select_sql(expression)
376
377        def describe_sql(self, expression: exp.Describe) -> str:
378            # Default to table if kind is unknown
379            kind_value = expression.args.get("kind") or "TABLE"
380            kind = f" {kind_value}" if kind_value else ""
381            this = f" {self.sql(expression, 'this')}"
382            return f"DESCRIBE{kind}{this}"
383
384        def generatedasidentitycolumnconstraint_sql(
385            self, expression: exp.GeneratedAsIdentityColumnConstraint
386        ) -> str:
387            start = expression.args.get("start")
388            start = f" START {start}" if start else ""
389            increment = expression.args.get("increment")
390            increment = f" INCREMENT {increment}" if increment else ""
391            return f"AUTOINCREMENT{start}{increment}"

Generator interprets the given syntax tree and produces a SQL string as an output.

Arguments:
  • time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
  • time_trie (trie): a trie of the time_mapping keys
  • pretty (bool): if set to True the returned string will be formatted. Default: False.
  • quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
  • quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
  • identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
  • identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
  • identify (bool): if set to True all identifiers will be delimited by the corresponding character.
  • normalize (bool): if set to True all identifiers will lower cased
  • string_escape (str): specifies a string escape character. Default: '.
  • identifier_escape (str): specifies an identifier escape character. Default: ".
  • pad (int): determines padding in a formatted string. Default: 2.
  • indent (int): determines the size of indentation in a formatted string. Default: 4.
  • unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
  • normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
  • alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
  • unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
  • max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def ilikeany_sql(self, expression: sqlglot.expressions.ILikeAny) -> str:
314        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
315            return self.binary(expression, "ILIKE ANY")
def likeany_sql(self, expression: sqlglot.expressions.LikeAny) -> str:
317        def likeany_sql(self, expression: exp.LikeAny) -> str:
318            return self.binary(expression, "LIKE ANY")
def except_op(self, expression):
320        def except_op(self, expression):
321            if not expression.args.get("distinct", False):
322                self.unsupported("EXCEPT with All is not supported in Snowflake")
323            return super().except_op(expression)
def intersect_op(self, expression):
325        def intersect_op(self, expression):
326            if not expression.args.get("distinct", False):
327                self.unsupported("INTERSECT with All is not supported in Snowflake")
328            return super().intersect_op(expression)
def values_sql(self, expression: sqlglot.expressions.Values) -> str:
330        def values_sql(self, expression: exp.Values) -> str:
331            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
332
333            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
334            from adding quotes to the column by using the `identify` argument when generating the SQL.
335            """
336            alias = expression.args.get("alias")
337            if alias and alias.args.get("columns"):
338                expression = expression.transform(
339                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
340                    if isinstance(node, exp.Identifier)
341                    and isinstance(node.parent, exp.TableAlias)
342                    and node.arg_key == "columns"
343                    else node,
344                )
345                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
346            return super().values_sql(expression)

Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.

We also want to make sure that after we find matches where we need to unquote a column that we prevent users from adding quotes to the column by using the identify argument when generating the SQL.

def settag_sql(self, expression: sqlglot.expressions.SetTag) -> str:
348        def settag_sql(self, expression: exp.SetTag) -> str:
349            action = "UNSET" if expression.args.get("unset") else "SET"
350            return f"{action} TAG {self.expressions(expression)}"
def select_sql(self, expression: sqlglot.expressions.Select) -> str:
352        def select_sql(self, expression: exp.Select) -> str:
353            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
354            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
355            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
356            generating the SQL.
357
358            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
359            expression. This might not be true in a case where the same column name can be sourced from another table that can
360            properly quote but should be true in most cases.
361            """
362            values_identifiers = set(
363                flatten(
364                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
365                    for v in expression.find_all(exp.Values)
366                )
367            )
368            if values_identifiers:
369                expression = expression.transform(
370                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
371                    if isinstance(node, exp.Identifier) and node in values_identifiers
372                    else node,
373                )
374                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
375            return super().select_sql(expression)

Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need to unquote a column that we prevent users from adding quotes to the column by using the identify argument when generating the SQL.

Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the expression. This might not be true in a case where the same column name can be sourced from another table that can properly quote but should be true in most cases.

def describe_sql(self, expression: sqlglot.expressions.Describe) -> str:
377        def describe_sql(self, expression: exp.Describe) -> str:
378            # Default to table if kind is unknown
379            kind_value = expression.args.get("kind") or "TABLE"
380            kind = f" {kind_value}" if kind_value else ""
381            this = f" {self.sql(expression, 'this')}"
382            return f"DESCRIBE{kind}{this}"
def generatedasidentitycolumnconstraint_sql( self, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
384        def generatedasidentitycolumnconstraint_sql(
385            self, expression: exp.GeneratedAsIdentityColumnConstraint
386        ) -> str:
387            start = expression.args.get("start")
388            start = f" START {start}" if start else ""
389            increment = expression.args.get("increment")
390            increment = f" INCREMENT {increment}" if increment else ""
391            return f"AUTOINCREMENT{start}{increment}"
Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
create_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
afterjournalproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
introducer_sql
pseudotype_sql
returning_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
lock_sql
literal_sql
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
schema_sql
star_sql
structkwarg_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
window_spec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
extract_sql
trim_sql
concat_sql
check_sql
foreignkey_sql
primarykey_sql
unique_sql
if_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
div_sql
floatdiv_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
is_sql
like_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql