Edit on GitHub

sqlglot.dialects.snowflake

View Source

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects.dialect import (
  7    Dialect,
  8    datestrtodate_sql,
  9    format_time_lambda,
 10    inline_array_sql,
 11    min_or_least,
 12    rename_func,
 13    timestrtotime_sql,
 14    ts_or_ds_to_date_sql,
 15    var_map_sql,
 16)
 17from sqlglot.expressions import Literal
 18from sqlglot.helper import flatten, seq_get
 19from sqlglot.parser import binary_range_parser
 20from sqlglot.tokens import TokenType
 21
 22
 23def _check_int(s):
 24    if s[0] in ("-", "+"):
 25        return s[1:].isdigit()
 26    return s.isdigit()
 27
 28
 29# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
 30def _snowflake_to_timestamp(args):
 31    if len(args) == 2:
 32        first_arg, second_arg = args
 33        if second_arg.is_string:
 34            # case: <string_expr> [ , <format> ]
 35            return format_time_lambda(exp.StrToTime, "snowflake")(args)
 36
 37        # case: <numeric_expr> [ , <scale> ]
 38        if second_arg.name not in ["0", "3", "9"]:
 39            raise ValueError(
 40                f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9"
 41            )
 42
 43        if second_arg.name == "0":
 44            timescale = exp.UnixToTime.SECONDS
 45        elif second_arg.name == "3":
 46            timescale = exp.UnixToTime.MILLIS
 47        elif second_arg.name == "9":
 48            timescale = exp.UnixToTime.MICROS
 49
 50        return exp.UnixToTime(this=first_arg, scale=timescale)
 51
 52    first_arg = seq_get(args, 0)
 53    if not isinstance(first_arg, Literal):
 54        # case: <variant_expr>
 55        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 56
 57    if first_arg.is_string:
 58        if _check_int(first_arg.this):
 59            # case: <integer>
 60            return exp.UnixToTime.from_arg_list(args)
 61
 62        # case: <date_expr>
 63        return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
 64
 65    # case: <numeric_expr>
 66    return exp.UnixToTime.from_arg_list(args)
 67
 68
 69def _unix_to_time_sql(self, expression):
 70    scale = expression.args.get("scale")
 71    timestamp = self.sql(expression, "this")
 72    if scale in [None, exp.UnixToTime.SECONDS]:
 73        return f"TO_TIMESTAMP({timestamp})"
 74    if scale == exp.UnixToTime.MILLIS:
 75        return f"TO_TIMESTAMP({timestamp}, 3)"
 76    if scale == exp.UnixToTime.MICROS:
 77        return f"TO_TIMESTAMP({timestamp}, 9)"
 78
 79    raise ValueError("Improper scale for timestamp")
 80
 81
 82# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
 83# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
 84def _parse_date_part(self):
 85    this = self._parse_var() or self._parse_type()
 86    self._match(TokenType.COMMA)
 87    expression = self._parse_bitwise()
 88
 89    name = this.name.upper()
 90    if name.startswith("EPOCH"):
 91        if name.startswith("EPOCH_MILLISECOND"):
 92            scale = 10**3
 93        elif name.startswith("EPOCH_MICROSECOND"):
 94            scale = 10**6
 95        elif name.startswith("EPOCH_NANOSECOND"):
 96            scale = 10**9
 97        else:
 98            scale = None
 99
100        ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
101        to_unix = self.expression(exp.TimeToUnix, this=ts)
102
103        if scale:
104            to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
105
106        return to_unix
107
108    return self.expression(exp.Extract, this=this, expression=expression)
109
110
111# https://docs.snowflake.com/en/sql-reference/functions/div0
112def _div0_to_if(args):
113    cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0))
114    true = exp.Literal.number(0)
115    false = exp.FloatDiv(this=seq_get(args, 0), expression=seq_get(args, 1))
116    return exp.If(this=cond, true=true, false=false)
117
118
119# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
120def _zeroifnull_to_if(args):
121    cond = exp.Is(this=seq_get(args, 0), expression=exp.Null())
122    return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
123
124
125# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
126def _nullifzero_to_if(args):
127    cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
128    return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0))
129
130
131def _datatype_sql(self, expression):
132    if expression.this == exp.DataType.Type.ARRAY:
133        return "ARRAY"
134    elif expression.this == exp.DataType.Type.MAP:
135        return "OBJECT"
136    return self.datatype_sql(expression)
137
138
139class Snowflake(Dialect):
140    null_ordering = "nulls_are_large"
141    time_format = "'yyyy-mm-dd hh24:mi:ss'"
142
143    time_mapping = {
144        "YYYY": "%Y",
145        "yyyy": "%Y",
146        "YY": "%y",
147        "yy": "%y",
148        "MMMM": "%B",
149        "mmmm": "%B",
150        "MON": "%b",
151        "mon": "%b",
152        "MM": "%m",
153        "mm": "%m",
154        "DD": "%d",
155        "dd": "%d",
156        "d": "%-d",
157        "DY": "%w",
158        "dy": "%w",
159        "HH24": "%H",
160        "hh24": "%H",
161        "HH12": "%I",
162        "hh12": "%I",
163        "MI": "%M",
164        "mi": "%M",
165        "SS": "%S",
166        "ss": "%S",
167        "FF": "%f",
168        "ff": "%f",
169        "FF6": "%f",
170        "ff6": "%f",
171    }
172
173    class Parser(parser.Parser):
174        FUNCTIONS = {
175            **parser.Parser.FUNCTIONS,
176            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
177            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
178            "DATEADD": lambda args: exp.DateAdd(
179                this=seq_get(args, 2),
180                expression=seq_get(args, 1),
181                unit=seq_get(args, 0),
182            ),
183            "DATEDIFF": lambda args: exp.DateDiff(
184                this=seq_get(args, 2),
185                expression=seq_get(args, 1),
186                unit=seq_get(args, 0),
187            ),
188            "DATE_TRUNC": lambda args: exp.DateTrunc(
189                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
190                this=seq_get(args, 1),
191            ),
192            "DIV0": _div0_to_if,
193            "IFF": exp.If.from_arg_list,
194            "TO_ARRAY": exp.Array.from_arg_list,
195            "TO_TIMESTAMP": _snowflake_to_timestamp,
196            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
197            "RLIKE": exp.RegexpLike.from_arg_list,
198            "DECODE": exp.Matches.from_arg_list,
199            "OBJECT_CONSTRUCT": parser.parse_var_map,
200            "ZEROIFNULL": _zeroifnull_to_if,
201            "NULLIFZERO": _nullifzero_to_if,
202        }
203
204        FUNCTION_PARSERS = {
205            **parser.Parser.FUNCTION_PARSERS,
206            "DATE_PART": _parse_date_part,
207        }
208        FUNCTION_PARSERS.pop("TRIM")
209
210        FUNC_TOKENS = {
211            *parser.Parser.FUNC_TOKENS,
212            TokenType.RLIKE,
213            TokenType.TABLE,
214        }
215
216        COLUMN_OPERATORS = {
217            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
218            TokenType.COLON: lambda self, this, path: self.expression(
219                exp.Bracket,
220                this=this,
221                expressions=[path],
222            ),
223        }
224
225        RANGE_PARSERS = {
226            **parser.Parser.RANGE_PARSERS,  # type: ignore
227            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
228            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
229        }
230
231        ALTER_PARSERS = {
232            **parser.Parser.ALTER_PARSERS,  # type: ignore
233            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
234            "SET": lambda self: self._parse_alter_table_set_tag(),
235        }
236
237        INTEGER_DIVISION = False
238
239        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
240            self._match_text_seq("TAG")
241            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
242            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
243
244    class Tokenizer(tokens.Tokenizer):
245        QUOTES = ["'", "$$"]
246        STRING_ESCAPES = ["\\", "'"]
247
248        KEYWORDS = {
249            **tokens.Tokenizer.KEYWORDS,
250            "EXCLUDE": TokenType.EXCEPT,
251            "ILIKE ANY": TokenType.ILIKE_ANY,
252            "LIKE ANY": TokenType.LIKE_ANY,
253            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
254            "PUT": TokenType.COMMAND,
255            "RENAME": TokenType.REPLACE,
256            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
257            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
258            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
259            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
260            "MINUS": TokenType.EXCEPT,
261            "SAMPLE": TokenType.TABLE_SAMPLE,
262        }
263
264        SINGLE_TOKENS = {
265            **tokens.Tokenizer.SINGLE_TOKENS,
266            "$": TokenType.PARAMETER,
267        }
268
269    class Generator(generator.Generator):
270        PARAMETER_TOKEN = "$"
271        INTEGER_DIVISION = False
272        MATCHED_BY_SOURCE = False
273
274        TRANSFORMS = {
275            **generator.Generator.TRANSFORMS,  # type: ignore
276            exp.Array: inline_array_sql,
277            exp.ArrayConcat: rename_func("ARRAY_CAT"),
278            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
279            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
280            exp.DateStrToDate: datestrtodate_sql,
281            exp.DataType: _datatype_sql,
282            exp.If: rename_func("IFF"),
283            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
284            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
285            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
286            exp.Matches: rename_func("DECODE"),
287            exp.StrPosition: lambda self, e: self.func(
288                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
289            ),
290            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
291            exp.TimeStrToTime: timestrtotime_sql,
292            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
293            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
294            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
295            exp.UnixToTime: _unix_to_time_sql,
296            exp.DayOfWeek: rename_func("DAYOFWEEK"),
297            exp.Min: min_or_least,
298        }
299
300        TYPE_MAPPING = {
301            **generator.Generator.TYPE_MAPPING,  # type: ignore
302            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
303        }
304
305        STAR_MAPPING = {
306            "except": "EXCLUDE",
307            "replace": "RENAME",
308        }
309
310        PROPERTIES_LOCATION = {
311            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
312            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
313        }
314
315        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
316            return self.binary(expression, "ILIKE ANY")
317
318        def likeany_sql(self, expression: exp.LikeAny) -> str:
319            return self.binary(expression, "LIKE ANY")
320
321        def except_op(self, expression):
322            if not expression.args.get("distinct", False):
323                self.unsupported("EXCEPT with All is not supported in Snowflake")
324            return super().except_op(expression)
325
326        def intersect_op(self, expression):
327            if not expression.args.get("distinct", False):
328                self.unsupported("INTERSECT with All is not supported in Snowflake")
329            return super().intersect_op(expression)
330
331        def values_sql(self, expression: exp.Values) -> str:
332            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
333
334            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
335            from adding quotes to the column by using the `identify` argument when generating the SQL.
336            """
337            alias = expression.args.get("alias")
338            if alias and alias.args.get("columns"):
339                expression = expression.transform(
340                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
341                    if isinstance(node, exp.Identifier)
342                    and isinstance(node.parent, exp.TableAlias)
343                    and node.arg_key == "columns"
344                    else node,
345                )
346                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
347            return super().values_sql(expression)
348
349        def settag_sql(self, expression: exp.SetTag) -> str:
350            action = "UNSET" if expression.args.get("unset") else "SET"
351            return f"{action} TAG {self.expressions(expression)}"
352
353        def select_sql(self, expression: exp.Select) -> str:
354            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
355            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
356            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
357            generating the SQL.
358
359            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
360            expression. This might not be true in a case where the same column name can be sourced from another table that can
361            properly quote but should be true in most cases.
362            """
363            values_identifiers = set(
364                flatten(
365                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
366                    for v in expression.find_all(exp.Values)
367                )
368            )
369            if values_identifiers:
370                expression = expression.transform(
371                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
372                    if isinstance(node, exp.Identifier) and node in values_identifiers
373                    else node,
374                )
375                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
376            return super().select_sql(expression)
377
378        def describe_sql(self, expression: exp.Describe) -> str:
379            # Default to table if kind is unknown
380            kind_value = expression.args.get("kind") or "TABLE"
381            kind = f" {kind_value}" if kind_value else ""
382            this = f" {self.sql(expression, 'this')}"
383            return f"DESCRIBE{kind}{this}"
384
385        def generatedasidentitycolumnconstraint_sql(
386            self, expression: exp.GeneratedAsIdentityColumnConstraint
387        ) -> str:
388            start = expression.args.get("start")
389            start = f" START {start}" if start else ""
390            increment = expression.args.get("increment")
391            increment = f" INCREMENT {increment}" if increment else ""
392            return f"AUTOINCREMENT{start}{increment}"

class Snowflake(sqlglot.dialects.dialect.Dialect): View Source

140class Snowflake(Dialect):
141    null_ordering = "nulls_are_large"
142    time_format = "'yyyy-mm-dd hh24:mi:ss'"
143
144    time_mapping = {
145        "YYYY": "%Y",
146        "yyyy": "%Y",
147        "YY": "%y",
148        "yy": "%y",
149        "MMMM": "%B",
150        "mmmm": "%B",
151        "MON": "%b",
152        "mon": "%b",
153        "MM": "%m",
154        "mm": "%m",
155        "DD": "%d",
156        "dd": "%d",
157        "d": "%-d",
158        "DY": "%w",
159        "dy": "%w",
160        "HH24": "%H",
161        "hh24": "%H",
162        "HH12": "%I",
163        "hh12": "%I",
164        "MI": "%M",
165        "mi": "%M",
166        "SS": "%S",
167        "ss": "%S",
168        "FF": "%f",
169        "ff": "%f",
170        "FF6": "%f",
171        "ff6": "%f",
172    }
173
174    class Parser(parser.Parser):
175        FUNCTIONS = {
176            **parser.Parser.FUNCTIONS,
177            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
178            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
179            "DATEADD": lambda args: exp.DateAdd(
180                this=seq_get(args, 2),
181                expression=seq_get(args, 1),
182                unit=seq_get(args, 0),
183            ),
184            "DATEDIFF": lambda args: exp.DateDiff(
185                this=seq_get(args, 2),
186                expression=seq_get(args, 1),
187                unit=seq_get(args, 0),
188            ),
189            "DATE_TRUNC": lambda args: exp.DateTrunc(
190                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
191                this=seq_get(args, 1),
192            ),
193            "DIV0": _div0_to_if,
194            "IFF": exp.If.from_arg_list,
195            "TO_ARRAY": exp.Array.from_arg_list,
196            "TO_TIMESTAMP": _snowflake_to_timestamp,
197            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
198            "RLIKE": exp.RegexpLike.from_arg_list,
199            "DECODE": exp.Matches.from_arg_list,
200            "OBJECT_CONSTRUCT": parser.parse_var_map,
201            "ZEROIFNULL": _zeroifnull_to_if,
202            "NULLIFZERO": _nullifzero_to_if,
203        }
204
205        FUNCTION_PARSERS = {
206            **parser.Parser.FUNCTION_PARSERS,
207            "DATE_PART": _parse_date_part,
208        }
209        FUNCTION_PARSERS.pop("TRIM")
210
211        FUNC_TOKENS = {
212            *parser.Parser.FUNC_TOKENS,
213            TokenType.RLIKE,
214            TokenType.TABLE,
215        }
216
217        COLUMN_OPERATORS = {
218            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
219            TokenType.COLON: lambda self, this, path: self.expression(
220                exp.Bracket,
221                this=this,
222                expressions=[path],
223            ),
224        }
225
226        RANGE_PARSERS = {
227            **parser.Parser.RANGE_PARSERS,  # type: ignore
228            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
229            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
230        }
231
232        ALTER_PARSERS = {
233            **parser.Parser.ALTER_PARSERS,  # type: ignore
234            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
235            "SET": lambda self: self._parse_alter_table_set_tag(),
236        }
237
238        INTEGER_DIVISION = False
239
240        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
241            self._match_text_seq("TAG")
242            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
243            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
244
245    class Tokenizer(tokens.Tokenizer):
246        QUOTES = ["'", "$$"]
247        STRING_ESCAPES = ["\\", "'"]
248
249        KEYWORDS = {
250            **tokens.Tokenizer.KEYWORDS,
251            "EXCLUDE": TokenType.EXCEPT,
252            "ILIKE ANY": TokenType.ILIKE_ANY,
253            "LIKE ANY": TokenType.LIKE_ANY,
254            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
255            "PUT": TokenType.COMMAND,
256            "RENAME": TokenType.REPLACE,
257            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
258            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
259            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
260            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
261            "MINUS": TokenType.EXCEPT,
262            "SAMPLE": TokenType.TABLE_SAMPLE,
263        }
264
265        SINGLE_TOKENS = {
266            **tokens.Tokenizer.SINGLE_TOKENS,
267            "$": TokenType.PARAMETER,
268        }
269
270    class Generator(generator.Generator):
271        PARAMETER_TOKEN = "$"
272        INTEGER_DIVISION = False
273        MATCHED_BY_SOURCE = False
274
275        TRANSFORMS = {
276            **generator.Generator.TRANSFORMS,  # type: ignore
277            exp.Array: inline_array_sql,
278            exp.ArrayConcat: rename_func("ARRAY_CAT"),
279            exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
280            exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this),
281            exp.DateStrToDate: datestrtodate_sql,
282            exp.DataType: _datatype_sql,
283            exp.If: rename_func("IFF"),
284            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
285            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
286            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
287            exp.Matches: rename_func("DECODE"),
288            exp.StrPosition: lambda self, e: self.func(
289                "POSITION", e.args.get("substr"), e.this, e.args.get("position")
290            ),
291            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
292            exp.TimeStrToTime: timestrtotime_sql,
293            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
294            exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
295            exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"),
296            exp.UnixToTime: _unix_to_time_sql,
297            exp.DayOfWeek: rename_func("DAYOFWEEK"),
298            exp.Min: min_or_least,
299        }
300
301        TYPE_MAPPING = {
302            **generator.Generator.TYPE_MAPPING,  # type: ignore
303            exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
304        }
305
306        STAR_MAPPING = {
307            "except": "EXCLUDE",
308            "replace": "RENAME",
309        }
310
311        PROPERTIES_LOCATION = {
312            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
313            exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
314        }
315
316        def ilikeany_sql(self, expression: exp.ILikeAny) -> str:
317            return self.binary(expression, "ILIKE ANY")
318
319        def likeany_sql(self, expression: exp.LikeAny) -> str:
320            return self.binary(expression, "LIKE ANY")
321
322        def except_op(self, expression):
323            if not expression.args.get("distinct", False):
324                self.unsupported("EXCEPT with All is not supported in Snowflake")
325            return super().except_op(expression)
326
327        def intersect_op(self, expression):
328            if not expression.args.get("distinct", False):
329                self.unsupported("INTERSECT with All is not supported in Snowflake")
330            return super().intersect_op(expression)
331
332        def values_sql(self, expression: exp.Values) -> str:
333            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
334
335            We also want to make sure that after we find matches where we need to unquote a column that we prevent users
336            from adding quotes to the column by using the `identify` argument when generating the SQL.
337            """
338            alias = expression.args.get("alias")
339            if alias and alias.args.get("columns"):
340                expression = expression.transform(
341                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
342                    if isinstance(node, exp.Identifier)
343                    and isinstance(node.parent, exp.TableAlias)
344                    and node.arg_key == "columns"
345                    else node,
346                )
347                return self.no_identify(lambda: super(self.__class__, self).values_sql(expression))
348            return super().values_sql(expression)
349
350        def settag_sql(self, expression: exp.SetTag) -> str:
351            action = "UNSET" if expression.args.get("unset") else "SET"
352            return f"{action} TAG {self.expressions(expression)}"
353
354        def select_sql(self, expression: exp.Select) -> str:
355            """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
356            that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
357            to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when
358            generating the SQL.
359
360            Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the
361            expression. This might not be true in a case where the same column name can be sourced from another table that can
362            properly quote but should be true in most cases.
363            """
364            values_identifiers = set(
365                flatten(
366                    (v.args.get("alias") or exp.Alias()).args.get("columns", [])
367                    for v in expression.find_all(exp.Values)
368                )
369            )
370            if values_identifiers:
371                expression = expression.transform(
372                    lambda node: exp.Identifier(**{**node.args, "quoted": False})
373                    if isinstance(node, exp.Identifier) and node in values_identifiers
374                    else node,
375                )
376                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
377            return super().select_sql(expression)
378
379        def describe_sql(self, expression: exp.Describe) -> str:
380            # Default to table if kind is unknown
381            kind_value = expression.args.get("kind") or "TABLE"
382            kind = f" {kind_value}" if kind_value else ""
383            this = f" {self.sql(expression, 'this')}"
384            return f"DESCRIBE{kind}{this}"
385
386        def generatedasidentitycolumnconstraint_sql(
387            self, expression: exp.GeneratedAsIdentityColumnConstraint
388        ) -> str:
389            start = expression.args.get("start")
390            start = f" START {start}" if start else ""
391            increment = expression.args.get("increment")
392            increment = f" INCREMENT {increment}" if increment else ""
393            return f"AUTOINCREMENT{start}{increment}"

Inherited Members

sqlglot.dialects.dialect.Dialect: get_or_raise; format_time; parse; parse_into; generate; transpile; parser; generator

class Snowflake.Parser(sqlglot.parser.Parser): View Source

174    class Parser(parser.Parser):
175        FUNCTIONS = {
176            **parser.Parser.FUNCTIONS,
177            "ARRAYAGG": exp.ArrayAgg.from_arg_list,
178            "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
179            "DATEADD": lambda args: exp.DateAdd(
180                this=seq_get(args, 2),
181                expression=seq_get(args, 1),
182                unit=seq_get(args, 0),
183            ),
184            "DATEDIFF": lambda args: exp.DateDiff(
185                this=seq_get(args, 2),
186                expression=seq_get(args, 1),
187                unit=seq_get(args, 0),
188            ),
189            "DATE_TRUNC": lambda args: exp.DateTrunc(
190                unit=exp.Literal.string(seq_get(args, 0).name),  # type: ignore
191                this=seq_get(args, 1),
192            ),
193            "DIV0": _div0_to_if,
194            "IFF": exp.If.from_arg_list,
195            "TO_ARRAY": exp.Array.from_arg_list,
196            "TO_TIMESTAMP": _snowflake_to_timestamp,
197            "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
198            "RLIKE": exp.RegexpLike.from_arg_list,
199            "DECODE": exp.Matches.from_arg_list,
200            "OBJECT_CONSTRUCT": parser.parse_var_map,
201            "ZEROIFNULL": _zeroifnull_to_if,
202            "NULLIFZERO": _nullifzero_to_if,
203        }
204
205        FUNCTION_PARSERS = {
206            **parser.Parser.FUNCTION_PARSERS,
207            "DATE_PART": _parse_date_part,
208        }
209        FUNCTION_PARSERS.pop("TRIM")
210
211        FUNC_TOKENS = {
212            *parser.Parser.FUNC_TOKENS,
213            TokenType.RLIKE,
214            TokenType.TABLE,
215        }
216
217        COLUMN_OPERATORS = {
218            **parser.Parser.COLUMN_OPERATORS,  # type: ignore
219            TokenType.COLON: lambda self, this, path: self.expression(
220                exp.Bracket,
221                this=this,
222                expressions=[path],
223            ),
224        }
225
226        RANGE_PARSERS = {
227            **parser.Parser.RANGE_PARSERS,  # type: ignore
228            TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny),
229            TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny),
230        }
231
232        ALTER_PARSERS = {
233            **parser.Parser.ALTER_PARSERS,  # type: ignore
234            "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True),
235            "SET": lambda self: self._parse_alter_table_set_tag(),
236        }
237
238        INTEGER_DIVISION = False
239
240        def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression:
241            self._match_text_seq("TAG")
242            parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction)
243            return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:

error_level: the desired error level. Default: ErrorLevel.RAISE
error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
alias_post_tablesample: If the table alias comes after tablesample. Default: False
max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"

Inherited Members

sqlglot.parser.Parser: Parser; reset; parse; parse_into; check_errors; raise_error; expression; validate_expression

class Snowflake.Tokenizer(sqlglot.tokens.Tokenizer): View Source

245    class Tokenizer(tokens.Tokenizer):
246        QUOTES = ["'", "$$"]
247        STRING_ESCAPES = ["\\", "'"]
248
249        KEYWORDS = {
250            **tokens.Tokenizer.KEYWORDS,
251            "EXCLUDE": TokenType.EXCEPT,
252            "ILIKE ANY": TokenType.ILIKE_ANY,
253            "LIKE ANY": TokenType.LIKE_ANY,
254            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
255            "PUT": TokenType.COMMAND,
256            "RENAME": TokenType.REPLACE,
257            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
258            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
259            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
260            "TIMESTAMPNTZ": TokenType.TIMESTAMP,
261            "MINUS": TokenType.EXCEPT,
262            "SAMPLE": TokenType.TABLE_SAMPLE,
263        }
264
265        SINGLE_TOKENS = {
266            **tokens.Tokenizer.SINGLE_TOKENS,
267            "$": TokenType.PARAMETER,
268        }

Inherited Members

sqlglot.tokens.Tokenizer: reset; tokenize