Edit on GitHub

sqlglot.dialects.snowflake

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects.dialect import (
  7    Dialect,
  8    datestrtodate_sql,
  9    format_time_lambda,
 10    inline_array_sql,
 11    rename_func,
 12    timestrtotime_sql,
 13    ts_or_ds_to_date_sql,
 14    var_map_sql,
 15)
 16from sqlglot.expressions import Literal
 17from sqlglot.helper import flatten, seq_get
 18from sqlglot.tokens import TokenType
 19
 20
 21def _check_int(s):
 22    if s[0] in ("-", "+"):
 23        return s[1:].isdigit()
 24    return s.isdigit()
 25
 26
 27# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
 28def _snowflake_to_timestamp(args):
 29    if len(args) == 2:
 30        first_arg, second_arg = args
 31        if second_arg.is_string:
 32            # case: <string_expr> [ , <format> ]
 33            return format_time_lambda(exp.StrToTime, "snowflake")(args)
 34
 35        # case: <numeric_expr> [ , <scale> ]
 36        if second_arg.name not in ["0", "3", "9"]:
 37            raise ValueError(
 38                f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9"
 39            )
 40
 41        if second_arg.name == "0":
 42            timescale = exp.UnixToTime.SECONDS
 43        elif second_arg.name == "3":
 44            timescale = exp.UnixToTime.MILLIS
 45        elif second_arg.name == "9":
 46            timescale = exp.UnixToTime.MICROS
 47
 48        return exp.UnixToTime(this=first_arg, scale=timescale)
 49
 50    first_arg = seq_get(args, 0)
 51    if not isinstance(first_arg, Literal):
 52        # case: <variant_expr>
 53        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 54
 55    if first_arg.is_string:
 56        if _check_int(first_arg.this):
 57            # case: <integer>
 58            return exp.UnixToTime.from_arg_list(args)
 59
 60        # case: <date_expr>
 61        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 62
 63    # case: <numeric_expr>
 64    return exp.UnixToTime.from_arg_list(args)
 65
 66
 67def _unix_to_time_sql(self, expression):
 68    scale = expression.args.get("scale")
 69    timestamp = self.sql(expression, "this")
 70    if scale in [None, exp.UnixToTime.SECONDS]:
 71        return f"TO_TIMESTAMP({timestamp})"
 72    if scale == exp.UnixToTime.MILLIS:
 73        return f"TO_TIMESTAMP({timestamp}, 3)"
 74    if scale == exp.UnixToTime.MICROS:
 75        return f"TO_TIMESTAMP({timestamp}, 9)"
 76
 77    raise ValueError("Improper scale for timestamp")
 78
 79
 80# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
 81# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
 82def _parse_date_part(self):
 83    this = self._parse_var() or self._parse_type()
 84    self._match(TokenType.COMMA)
 85    expression = self._parse_bitwise()
 86
 87    name = this.name.upper()
 88    if name.startswith("EPOCH"):
 89        if name.startswith("EPOCH_MILLISECOND"):
 90            scale = 10**3
 91        elif name.startswith("EPOCH_MICROSECOND"):
 92            scale = 10**6
 93        elif name.startswith("EPOCH_NANOSECOND"):
 94            scale = 10**9
 95        else:
 96            scale = None
 97
 98        ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
 99        to_unix = self.expression(exp.TimeToUnix, this=ts)
100
101        if scale:
102            to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
103
104        return to_unix
105
106    return self.expression(exp.Extract, this=this, expression=expression)
107
108
109# https://docs.snowflake.com/en/sql-reference/functions/div0
110def _div0_to_if(args):
111    cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0))
112    true = exp.Literal.number(0)
113    false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1))
114    return exp.If(this=cond, true=true, false=false)
115
116
117# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
118def _zeroifnull_to_if(args):
119    cond = exp.EQ(this=seq_get(args, 0), expression=exp.Null())
120    return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
121
122
123def _datatype_sql(self, expression):
124    if expression.this == exp.DataType.Type.ARRAY:
125        return "ARRAY"
126    elif expression.this == exp.DataType.Type.MAP:
127        return "OBJECT"
128    return self.datatype_sql(expression)
129
130
131class Snowflake(Dialect):
132    null_ordering = "nulls_are_large"
133    time_format = "'yyyy-mm-dd hh24:mi:ss'"
134
135    time_mapping = {
136        "YYYY": "%Y",
137        "yyyy": "%Y",
138        "YY": "%y",
139        "yy": "%y",
140        "MMMM": "%B",
141        "mmmm": "%B",
142        "MON": "%b",
143        "mon": "%b",
144        "MM": "%m",
145        "mm": "%m",
146        "DD": "%d",
147        "dd": "%d",
148        "d": "%-d",
149        "DY": "%w",
150        "dy": "%w",
151        "HH24": "%H",
152        "hh24": "%H",
153        "HH12": "%I",
154        "hh12": "%I",
155        "MI": "%M",
156        "mi": "%M",
157        "SS": "%S",
158        "ss": "%S",
159        "FF": "%f",
160        "ff": "%f",
161        "FF6": "%f",
162        "ff6": "%f",
163    }
164
165    class Parser(parser.Parser):
166        FUNCTIONS = {
167            **parser.Parser.FUNCTIONS,
168            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
169            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
170            "DATE_TRUNC": lambda args: exp.DateTrunc(
171                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
172                this=seq_get(args, 1),
173            ),
174            "DIV0": _div0_to_if,
175            "IFF": exp.If.from_arg_list,
176            "TO_ARRAY": exp.Array.from_arg_list,
177            "TO_TIMESTAMP": _snowflake_to_timestamp,
178            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
179            "RLIKE": exp.RegexpLike.from_arg_list,
180            "DECODE": exp.Matches.from_arg_list,
181            "OBJECT_CONSTRUCT": parser.parse_var_map,
182            "ZEROIFNULL": _zeroifnull_to_if,
183        }
184
185        FUNCTION_PARSERS = {
186            **parser.Parser.FUNCTION_PARSERS,
187            "DATE_PART": _parse_date_part,
188        }
189        FUNCTION_PARSERS.pop("TRIM")
190
191        FUNC_TOKENS = {
192            *parser.Parser.FUNC_TOKENS,
193            TokenType.RLIKE,
194            TokenType.TABLE,
195        }
196
197        COLUMN_OPERATORS = {
198            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
199            TokenType.COLON: lambda self, this, path: self.expression(
200                exp.Bracket,
201                this=this,
202                expressions=[path],
203            ),
204        }
205
206        RANGE_PARSERS = {
207            **parser.Parser.RANGE_PARSERS,  # type: ignore
208            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
209                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
210            ),
211            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
212                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
213            ),
214        }
215
216        ALTER_PARSERS = {
217            **parser.Parser.ALTER_PARSERS,  # type: ignore
218            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
219            "SET": lambda self: self._parse_alter_table_set_tag(),
220        }
221
222        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
223            self._match_text_seq("TAG")
224            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
225            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
226
227    class Tokenizer(tokens.Tokenizer):
228        QUOTES = ["'", "$$"]
229        STRING_ESCAPES = ["\\", "'"]
230
231        KEYWORDS = {
232            **tokens.Tokenizer.KEYWORDS,
233            "EXCLUDE": TokenType.EXCEPT,
234            "ILIKE ANY": TokenType.ILIKE_ANY,
235            "LIKE ANY": TokenType.LIKE_ANY,
236            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
237            "PUT": TokenType.COMMAND,
238            "RENAME": TokenType.REPLACE,
239            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
240            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
241            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
242            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
243            "MINUS": TokenType.EXCEPT,
244            "SAMPLE": TokenType.TABLE_SAMPLE,
245        }
246
247        SINGLE_TOKENS = {
248            **tokens.Tokenizer.SINGLE_TOKENS,
249            "$": TokenType.PARAMETER,
250        }
251
252    class Generator(generator.Generator):
253        PARAMETER_TOKEN = "$"
254
255        TRANSFORMS = {
256            **generator.Generator.TRANSFORMS,  # type: ignore
257            exp.Array: inline_array_sql,
258            exp.ArrayConcat: rename_func("ARRAY_CAT"),
259            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
260            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
261            exp.DateStrToDate: datestrtodate_sql,
262            exp.DataType: _datatype_sql,
263            exp.If: rename_func("IFF"),
264            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
265            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
266            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
267            exp.Matches: rename_func("DECODE"),
268            exp.StrPosition: lambda self, e: self.func(
269                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
270            ),
271            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
272            exp.TimeStrToTime: timestrtotime_sql,
273            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
274            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
275            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
276            exp.UnixToTime: _unix_to_time_sql,
277            exp.DayOfWeek: rename_func("DAYOFWEEK"),
278        }
279
280        TYPE_MAPPING = {
281            **generator.Generator.TYPE_MAPPING,  # type: ignore
282            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
283        }
284
285        STAR_MAPPING = {
286            "except": "EXCLUDE",
287            "replace": "RENAME",
288        }
289
290        PROPERTIES_LOCATION = {
291            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
292            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
293        }
294
295        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
296            return self.binary(expression, "ILIKE ANY")
297
298        def likeany_sql(self, expression: exp.LikeAny) -> str:
299            return self.binary(expression, "LIKE ANY")
300
301        def except_op(self, expression):
302            if not expression.args.get("distinct", False):
303                self.unsupported("EXCEPT with All is not supported in Snowflake")
304            return super().except_op(expression)
305
306        def intersect_op(self, expression):
307            if not expression.args.get("distinct", False):
308                self.unsupported("INTERSECT with All is not supported in Snowflake")
309            return super().intersect_op(expression)
310
311        def values_sql(self, expression: exp.Values) -> str:
312            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
313
314            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
315            from adding quotes to the column by using the `identify` argument when generating the SQL.
316            """
317            alias = expression.args.get("alias")
318            if alias and alias.args.get("columns"):
319                expression = expression.transform(
320                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
321                    if isinstance(node, exp.Identifier)
322                    and isinstance(node.parent, exp.TableAlias)
323                    and node.arg_key == "columns"
324                    else node,
325                )
326                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
327            return super().values_sql(expression)
328
329        def settag_sql(self, expression: exp.SetTag) -> str:
330            action = "UNSET" if expression.args.get("unset") else "SET"
331            return f"{action} TAG {self.expressions(expression)}"
332
333        def select_sql(self, expression: exp.Select) -> str:
334            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
335            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
336            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
337            generating the SQL.
338
339            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
340            expression. This might not be true in a case where the same column name can be sourced from another table that can
341            properly quote but should be true in most cases.
342            """
343            values_expressions = expression.find_all(exp.Values)
344            values_identifiers = set(
345                flatten(
346                    v.args.get("alias", exp.Alias()).args.get("columns", [])
347                    for v in values_expressions
348                )
349            )
350            if values_identifiers:
351                expression = expression.transform(
352                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
353                    if isinstance(node, exp.Identifier) and node in values_identifiers
354                    else node,
355                )
356                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
357            return super().select_sql(expression)
358
359        def describe_sql(self, expression: exp.Describe) -> str:
360            # Default to table if kind is unknown
361            kind_value = expression.args.get("kind") or "TABLE"
362            kind = f" {kind_value}" if kind_value else ""
363            this = f" {self.sql(expression, 'this')}"
364            return f"DESCRIBE{kind}{this}"
365
366        def generatedasidentitycolumnconstraint_sql(
367            self, expression: exp.GeneratedAsIdentityColumnConstraint
368        ) -> str:
369            start = expression.args.get("start")
370            start = f" START {start}" if start else ""
371            increment = expression.args.get("increment")
372            increment = f" INCREMENT {increment}" if increment else ""
373            return f"AUTOINCREMENT{start}{increment}"
class Snowflake(sqlglot.dialects.dialect.Dialect):
132class Snowflake(Dialect):
133    null_ordering = "nulls_are_large"
134    time_format = "'yyyy-mm-dd hh24:mi:ss'"
135
136    time_mapping = {
137        "YYYY": "%Y",
138        "yyyy": "%Y",
139        "YY": "%y",
140        "yy": "%y",
141        "MMMM": "%B",
142        "mmmm": "%B",
143        "MON": "%b",
144        "mon": "%b",
145        "MM": "%m",
146        "mm": "%m",
147        "DD": "%d",
148        "dd": "%d",
149        "d": "%-d",
150        "DY": "%w",
151        "dy": "%w",
152        "HH24": "%H",
153        "hh24": "%H",
154        "HH12": "%I",
155        "hh12": "%I",
156        "MI": "%M",
157        "mi": "%M",
158        "SS": "%S",
159        "ss": "%S",
160        "FF": "%f",
161        "ff": "%f",
162        "FF6": "%f",
163        "ff6": "%f",
164    }
165
166    class Parser(parser.Parser):
167        FUNCTIONS = {
168            **parser.Parser.FUNCTIONS,
169            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
170            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
171            "DATE_TRUNC": lambda args: exp.DateTrunc(
172                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
173                this=seq_get(args, 1),
174            ),
175            "DIV0": _div0_to_if,
176            "IFF": exp.If.from_arg_list,
177            "TO_ARRAY": exp.Array.from_arg_list,
178            "TO_TIMESTAMP": _snowflake_to_timestamp,
179            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
180            "RLIKE": exp.RegexpLike.from_arg_list,
181            "DECODE": exp.Matches.from_arg_list,
182            "OBJECT_CONSTRUCT": parser.parse_var_map,
183            "ZEROIFNULL": _zeroifnull_to_if,
184        }
185
186        FUNCTION_PARSERS = {
187            **parser.Parser.FUNCTION_PARSERS,
188            "DATE_PART": _parse_date_part,
189        }
190        FUNCTION_PARSERS.pop("TRIM")
191
192        FUNC_TOKENS = {
193            *parser.Parser.FUNC_TOKENS,
194            TokenType.RLIKE,
195            TokenType.TABLE,
196        }
197
198        COLUMN_OPERATORS = {
199            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
200            TokenType.COLON: lambda self, this, path: self.expression(
201                exp.Bracket,
202                this=this,
203                expressions=[path],
204            ),
205        }
206
207        RANGE_PARSERS = {
208            **parser.Parser.RANGE_PARSERS,  # type: ignore
209            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
210                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
211            ),
212            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
213                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
214            ),
215        }
216
217        ALTER_PARSERS = {
218            **parser.Parser.ALTER_PARSERS,  # type: ignore
219            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
220            "SET": lambda self: self._parse_alter_table_set_tag(),
221        }
222
223        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
224            self._match_text_seq("TAG")
225            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
226            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
227
228    class Tokenizer(tokens.Tokenizer):
229        QUOTES = ["'", "$$"]
230        STRING_ESCAPES = ["\\", "'"]
231
232        KEYWORDS = {
233            **tokens.Tokenizer.KEYWORDS,
234            "EXCLUDE": TokenType.EXCEPT,
235            "ILIKE ANY": TokenType.ILIKE_ANY,
236            "LIKE ANY": TokenType.LIKE_ANY,
237            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
238            "PUT": TokenType.COMMAND,
239            "RENAME": TokenType.REPLACE,
240            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
241            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
242            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
243            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
244            "MINUS": TokenType.EXCEPT,
245            "SAMPLE": TokenType.TABLE_SAMPLE,
246        }
247
248        SINGLE_TOKENS = {
249            **tokens.Tokenizer.SINGLE_TOKENS,
250            "$": TokenType.PARAMETER,
251        }
252
253    class Generator(generator.Generator):
254        PARAMETER_TOKEN = "$"
255
256        TRANSFORMS = {
257            **generator.Generator.TRANSFORMS,  # type: ignore
258            exp.Array: inline_array_sql,
259            exp.ArrayConcat: rename_func("ARRAY_CAT"),
260            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
261            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
262            exp.DateStrToDate: datestrtodate_sql,
263            exp.DataType: _datatype_sql,
264            exp.If: rename_func("IFF"),
265            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
266            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
267            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
268            exp.Matches: rename_func("DECODE"),
269            exp.StrPosition: lambda self, e: self.func(
270                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
271            ),
272            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
273            exp.TimeStrToTime: timestrtotime_sql,
274            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
275            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
276            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
277            exp.UnixToTime: _unix_to_time_sql,
278            exp.DayOfWeek: rename_func("DAYOFWEEK"),
279        }
280
281        TYPE_MAPPING = {
282            **generator.Generator.TYPE_MAPPING,  # type: ignore
283            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
284        }
285
286        STAR_MAPPING = {
287            "except": "EXCLUDE",
288            "replace": "RENAME",
289        }
290
291        PROPERTIES_LOCATION = {
292            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
293            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
294        }
295
296        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
297            return self.binary(expression, "ILIKE ANY")
298
299        def likeany_sql(self, expression: exp.LikeAny) -> str:
300            return self.binary(expression, "LIKE ANY")
301
302        def except_op(self, expression):
303            if not expression.args.get("distinct", False):
304                self.unsupported("EXCEPT with All is not supported in Snowflake")
305            return super().except_op(expression)
306
307        def intersect_op(self, expression):
308            if not expression.args.get("distinct", False):
309                self.unsupported("INTERSECT with All is not supported in Snowflake")
310            return super().intersect_op(expression)
311
312        def values_sql(self, expression: exp.Values) -> str:
313            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
314
315            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
316            from adding quotes to the column by using the `identify` argument when generating the SQL.
317            """
318            alias = expression.args.get("alias")
319            if alias and alias.args.get("columns"):
320                expression = expression.transform(
321                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
322                    if isinstance(node, exp.Identifier)
323                    and isinstance(node.parent, exp.TableAlias)
324                    and node.arg_key == "columns"
325                    else node,
326                )
327                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
328            return super().values_sql(expression)
329
330        def settag_sql(self, expression: exp.SetTag) -> str:
331            action = "UNSET" if expression.args.get("unset") else "SET"
332            return f"{action} TAG {self.expressions(expression)}"
333
334        def select_sql(self, expression: exp.Select) -> str:
335            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
336            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
337            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
338            generating the SQL.
339
340            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
341            expression. This might not be true in a case where the same column name can be sourced from another table that can
342            properly quote but should be true in most cases.
343            """
344            values_expressions = expression.find_all(exp.Values)
345            values_identifiers = set(
346                flatten(
347                    v.args.get("alias", exp.Alias()).args.get("columns", [])
348                    for v in values_expressions
349                )
350            )
351            if values_identifiers:
352                expression = expression.transform(
353                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
354                    if isinstance(node, exp.Identifier) and node in values_identifiers
355                    else node,
356                )
357                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
358            return super().select_sql(expression)
359
360        def describe_sql(self, expression: exp.Describe) -> str:
361            # Default to table if kind is unknown
362            kind_value = expression.args.get("kind") or "TABLE"
363            kind = f" {kind_value}" if kind_value else ""
364            this = f" {self.sql(expression, 'this')}"
365            return f"DESCRIBE{kind}{this}"
366
367        def generatedasidentitycolumnconstraint_sql(
368            self, expression: exp.GeneratedAsIdentityColumnConstraint
369        ) -> str:
370            start = expression.args.get("start")
371            start = f" START {start}" if start else ""
372            increment = expression.args.get("increment")
373            increment = f" INCREMENT {increment}" if increment else ""
374            return f"AUTOINCREMENT{start}{increment}"
class Snowflake.Parser(sqlglot.parser.Parser):
166    class Parser(parser.Parser):
167        FUNCTIONS = {
168            **parser.Parser.FUNCTIONS,
169            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
170            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
171            "DATE_TRUNC": lambda args: exp.DateTrunc(
172                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
173                this=seq_get(args, 1),
174            ),
175            "DIV0": _div0_to_if,
176            "IFF": exp.If.from_arg_list,
177            "TO_ARRAY": exp.Array.from_arg_list,
178            "TO_TIMESTAMP": _snowflake_to_timestamp,
179            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
180            "RLIKE": exp.RegexpLike.from_arg_list,
181            "DECODE": exp.Matches.from_arg_list,
182            "OBJECT_CONSTRUCT": parser.parse_var_map,
183            "ZEROIFNULL": _zeroifnull_to_if,
184        }
185
186        FUNCTION_PARSERS = {
187            **parser.Parser.FUNCTION_PARSERS,
188            "DATE_PART": _parse_date_part,
189        }
190        FUNCTION_PARSERS.pop("TRIM")
191
192        FUNC_TOKENS = {
193            *parser.Parser.FUNC_TOKENS,
194            TokenType.RLIKE,
195            TokenType.TABLE,
196        }
197
198        COLUMN_OPERATORS = {
199            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
200            TokenType.COLON: lambda self, this, path: self.expression(
201                exp.Bracket,
202                this=this,
203                expressions=[path],
204            ),
205        }
206
207        RANGE_PARSERS = {
208            **parser.Parser.RANGE_PARSERS,  # type: ignore
209            TokenType.LIKE_ANY: lambda self, this: self._parse_escape(
210                self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise())
211            ),
212            TokenType.ILIKE_ANY: lambda self, this: self._parse_escape(
213                self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise())
214            ),
215        }
216
217        ALTER_PARSERS = {
218            **parser.Parser.ALTER_PARSERS,  # type: ignore
219            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
220            "SET": lambda self: self._parse_alter_table_set_tag(),
221        }
222
223        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
224            self._match_text_seq("TAG")
225            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
226            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
class Snowflake.Tokenizer(sqlglot.tokens.Tokenizer):
228    class Tokenizer(tokens.Tokenizer):
229        QUOTES = ["'", "$$"]
230        STRING_ESCAPES = ["\\", "'"]
231
232        KEYWORDS = {
233            **tokens.Tokenizer.KEYWORDS,
234            "EXCLUDE": TokenType.EXCEPT,
235            "ILIKE ANY": TokenType.ILIKE_ANY,
236            "LIKE ANY": TokenType.LIKE_ANY,
237            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
238            "PUT": TokenType.COMMAND,
239            "RENAME": TokenType.REPLACE,
240            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
241            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
242            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
243            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
244            "MINUS": TokenType.EXCEPT,
245            "SAMPLE": TokenType.TABLE_SAMPLE,
246        }
247
248        SINGLE_TOKENS = {
249            **tokens.Tokenizer.SINGLE_TOKENS,
250            "$": TokenType.PARAMETER,
251        }
class Snowflake.Generator(sqlglot.generator.Generator):
253    class Generator(generator.Generator):
254        PARAMETER_TOKEN = "$"
255
256        TRANSFORMS = {
257            **generator.Generator.TRANSFORMS,  # type: ignore
258            exp.Array: inline_array_sql,
259            exp.ArrayConcat: rename_func("ARRAY_CAT"),
260            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
261            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
262            exp.DateStrToDate: datestrtodate_sql,
263            exp.DataType: _datatype_sql,
264            exp.If: rename_func("IFF"),
265            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
266            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
267            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
268            exp.Matches: rename_func("DECODE"),
269            exp.StrPosition: lambda self, e: self.func(
270                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
271            ),
272            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
273            exp.TimeStrToTime: timestrtotime_sql,
274            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
275            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
276            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
277            exp.UnixToTime: _unix_to_time_sql,
278            exp.DayOfWeek: rename_func("DAYOFWEEK"),
279        }
280
281        TYPE_MAPPING = {
282            **generator.Generator.TYPE_MAPPING,  # type: ignore
283            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
284        }
285
286        STAR_MAPPING = {
287            "except": "EXCLUDE",
288            "replace": "RENAME",
289        }
290
291        PROPERTIES_LOCATION = {
292            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
293            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
294        }
295
296        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
297            return self.binary(expression, "ILIKE ANY")
298
299        def likeany_sql(self, expression: exp.LikeAny) -> str:
300            return self.binary(expression, "LIKE ANY")
301
302        def except_op(self, expression):
303            if not expression.args.get("distinct", False):
304                self.unsupported("EXCEPT with All is not supported in Snowflake")
305            return super().except_op(expression)
306
307        def intersect_op(self, expression):
308            if not expression.args.get("distinct", False):
309                self.unsupported("INTERSECT with All is not supported in Snowflake")
310            return super().intersect_op(expression)
311
312        def values_sql(self, expression: exp.Values) -> str:
313            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
314
315            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
316            from adding quotes to the column by using the `identify` argument when generating the SQL.
317            """
318            alias = expression.args.get("alias")
319            if alias and alias.args.get("columns"):
320                expression = expression.transform(
321                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
322                    if isinstance(node, exp.Identifier)
323                    and isinstance(node.parent, exp.TableAlias)
324                    and node.arg_key == "columns"
325                    else node,
326                )
327                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
328            return super().values_sql(expression)
329
330        def settag_sql(self, expression: exp.SetTag) -> str:
331            action = "UNSET" if expression.args.get("unset") else "SET"
332            return f"{action} TAG {self.expressions(expression)}"
333
334        def select_sql(self, expression: exp.Select) -> str:
335            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
336            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
337            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
338            generating the SQL.
339
340            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
341            expression. This might not be true in a case where the same column name can be sourced from another table that can
342            properly quote but should be true in most cases.
343            """
344            values_expressions = expression.find_all(exp.Values)
345            values_identifiers = set(
346                flatten(
347                    v.args.get("alias", exp.Alias()).args.get("columns", [])
348                    for v in values_expressions
349                )
350            )
351            if values_identifiers:
352                expression = expression.transform(
353                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
354                    if isinstance(node, exp.Identifier) and node in values_identifiers
355                    else node,
356                )
357                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
358            return super().select_sql(expression)
359
360        def describe_sql(self, expression: exp.Describe) -> str:
361            # Default to table if kind is unknown
362            kind_value = expression.args.get("kind") or "TABLE"
363            kind = f" {kind_value}" if kind_value else ""
364            this = f" {self.sql(expression, 'this')}"
365            return f"DESCRIBE{kind}{this}"
366
367        def generatedasidentitycolumnconstraint_sql(
368            self, expression: exp.GeneratedAsIdentityColumnConstraint
369        ) -> str:
370            start = expression.args.get("start")
371            start = f" START {start}" if start else ""
372            increment = expression.args.get("increment")
373            increment = f" INCREMENT {increment}" if increment else ""
374            return f"AUTOINCREMENT{start}{increment}"

Generator interprets the given syntax tree and produces a SQL string as an output.

Arguments:
  • time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
  • time_trie (trie): a trie of the time_mapping keys
  • pretty (bool): if set to True the returned string will be formatted. Default: False.
  • quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
  • quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
  • identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
  • identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
  • identify (bool): if set to True all identifiers will be delimited by the corresponding character.
  • normalize (bool): if set to True all identifiers will lower cased
  • string_escape (str): specifies a string escape character. Default: '.
  • identifier_escape (str): specifies an identifier escape character. Default: ".
  • pad (int): determines padding in a formatted string. Default: 2.
  • indent (int): determines the size of indentation in a formatted string. Default: 4.
  • unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
  • normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
  • alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
  • unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
  • max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def ilikeany_sql(self, expression: sqlglot.expressions.ILikeAny) -> str:
296        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
297            return self.binary(expression, "ILIKE ANY")
def likeany_sql(self, expression: sqlglot.expressions.LikeAny) -> str:
299        def likeany_sql(self, expression: exp.LikeAny) -> str:
300            return self.binary(expression, "LIKE ANY")
def except_op(self, expression):
302        def except_op(self, expression):
303            if not expression.args.get("distinct", False):
304                self.unsupported("EXCEPT with All is not supported in Snowflake")
305            return super().except_op(expression)
def intersect_op(self, expression):
307        def intersect_op(self, expression):
308            if not expression.args.get("distinct", False):
309                self.unsupported("INTERSECT with All is not supported in Snowflake")
310            return super().intersect_op(expression)
def values_sql(self, expression: sqlglot.expressions.Values) -> str:
312        def values_sql(self, expression: exp.Values) -> str:
313            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
314
315            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
316            from adding quotes to the column by using the `identify` argument when generating the SQL.
317            """
318            alias = expression.args.get("alias")
319            if alias and alias.args.get("columns"):
320                expression = expression.transform(
321                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
322                    if isinstance(node, exp.Identifier)
323                    and isinstance(node.parent, exp.TableAlias)
324                    and node.arg_key == "columns"
325                    else node,
326                )
327                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
328            return super().values_sql(expression)

Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.

We also want to make sure that after we find matches where we need to unquote a column that we prevent users from adding quotes to the column by using the identify argument when generating the SQL.

def settag_sql(self, expression: sqlglot.expressions.SetTag) -> str:
330        def settag_sql(self, expression: exp.SetTag) -> str:
331            action = "UNSET" if expression.args.get("unset") else "SET"
332            return f"{action} TAG {self.expressions(expression)}"
def select_sql(self, expression: sqlglot.expressions.Select) -> str:
334        def select_sql(self, expression: exp.Select) -> str:
335            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
336            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
337            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
338            generating the SQL.
339
340            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
341            expression. This might not be true in a case where the same column name can be sourced from another table that can
342            properly quote but should be true in most cases.
343            """
344            values_expressions = expression.find_all(exp.Values)
345            values_identifiers = set(
346                flatten(
347                    v.args.get("alias", exp.Alias()).args.get("columns", [])
348                    for v in values_expressions
349                )
350            )
351            if values_identifiers:
352                expression = expression.transform(
353                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
354                    if isinstance(node, exp.Identifier) and node in values_identifiers
355                    else node,
356                )
357                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
358            return super().select_sql(expression)

Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need to unquote a column that we prevent users from adding quotes to the column by using the identify argument when generating the SQL.

Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the expression. This might not be true in a case where the same column name can be sourced from another table that can properly quote but should be true in most cases.

def describe_sql(self, expression: sqlglot.expressions.Describe) -> str:
360        def describe_sql(self, expression: exp.Describe) -> str:
361            # Default to table if kind is unknown
362            kind_value = expression.args.get("kind") or "TABLE"
363            kind = f" {kind_value}" if kind_value else ""
364            this = f" {self.sql(expression, 'this')}"
365            return f"DESCRIBE{kind}{this}"
def generatedasidentitycolumnconstraint_sql( self, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
367        def generatedasidentitycolumnconstraint_sql(
368            self, expression: exp.GeneratedAsIdentityColumnConstraint
369        ) -> str:
370            start = expression.args.get("start")
371            start = f" START {start}" if start else ""
372            increment = expression.args.get("increment")
373            increment = f" INCREMENT {increment}" if increment else ""
374            return f"AUTOINCREMENT{start}{increment}"
Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
create_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
afterjournalproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
introducer_sql
pseudotype_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
lock_sql
literal_sql
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
schema_sql
star_sql
structkwarg_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
window_spec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
extract_sql
trim_sql
concat_sql
check_sql
foreignkey_sql
primarykey_sql
unique_sql
if_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
is_sql
like_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql