sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 date_trunc_to_time, 9 datestrtodate_sql, 10 format_time_lambda, 11 inline_array_sql, 12 max_or_greatest, 13 min_or_least, 14 rename_func, 15 timestamptrunc_sql, 16 timestrtotime_sql, 17 ts_or_ds_to_date_sql, 18 var_map_sql, 19) 20from sqlglot.expressions import Literal 21from sqlglot.helper import seq_get 22from sqlglot.parser import binary_range_parser 23from sqlglot.tokens import TokenType 24 25 26def _check_int(s: str) -> bool: 27 if s[0] in ("-", "+"): 28 return s[1:].isdigit() 29 return s.isdigit() 30 31 32# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 33def _snowflake_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime]: 34 if len(args) == 2: 35 first_arg, second_arg = args 36 if second_arg.is_string: 37 # case: <string_expr> [ , <format> ] 38 return format_time_lambda(exp.StrToTime, "snowflake")(args) 39 40 # case: <numeric_expr> [ , <scale> ] 41 if second_arg.name not in ["0", "3", "9"]: 42 raise ValueError( 43 f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9" 44 ) 45 46 if second_arg.name == "0": 47 timescale = exp.UnixToTime.SECONDS 48 elif second_arg.name == "3": 49 timescale = exp.UnixToTime.MILLIS 50 elif second_arg.name == "9": 51 timescale = exp.UnixToTime.MICROS 52 53 return exp.UnixToTime(this=first_arg, scale=timescale) 54 55 from sqlglot.optimizer.simplify import simplify_literals 56 57 # The first argument might be an expression like 40 * 365 * 86400, so we try to 58 # reduce it using `simplify_literals` first and then check if it's a Literal. 59 first_arg = seq_get(args, 0) 60 if not isinstance(simplify_literals(first_arg, root=True), Literal): 61 # case: <variant_expr> 62 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 63 64 if first_arg.is_string: 65 if _check_int(first_arg.this): 66 # case: <integer> 67 return exp.UnixToTime.from_arg_list(args) 68 69 # case: <date_expr> 70 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 71 72 # case: <numeric_expr> 73 return exp.UnixToTime.from_arg_list(args) 74 75 76def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 77 expression = parser.parse_var_map(args) 78 79 if isinstance(expression, exp.StarMap): 80 return expression 81 82 return exp.Struct( 83 expressions=[ 84 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 85 ] 86 ) 87 88 89def _unix_to_time_sql(self: generator.Generator, expression: exp.UnixToTime) -> str: 90 scale = expression.args.get("scale") 91 timestamp = self.sql(expression, "this") 92 if scale in [None, exp.UnixToTime.SECONDS]: 93 return f"TO_TIMESTAMP({timestamp})" 94 if scale == exp.UnixToTime.MILLIS: 95 return f"TO_TIMESTAMP({timestamp}, 3)" 96 if scale == exp.UnixToTime.MICROS: 97 return f"TO_TIMESTAMP({timestamp}, 9)" 98 99 raise ValueError("Improper scale for timestamp") 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 103# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 104def _parse_date_part(self: parser.Parser) -> t.Optional[exp.Expression]: 105 this = self._parse_var() or self._parse_type() 106 107 if not this: 108 return None 109 110 self._match(TokenType.COMMA) 111 expression = self._parse_bitwise() 112 113 name = this.name.upper() 114 if name.startswith("EPOCH"): 115 if name.startswith("EPOCH_MILLISECOND"): 116 scale = 10**3 117 elif name.startswith("EPOCH_MICROSECOND"): 118 scale = 10**6 119 elif name.startswith("EPOCH_NANOSECOND"): 120 scale = 10**9 121 else: 122 scale = None 123 124 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 125 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 126 127 if scale: 128 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 129 130 return to_unix 131 132 return self.expression(exp.Extract, this=this, expression=expression) 133 134 135# https://docs.snowflake.com/en/sql-reference/functions/div0 136def _div0_to_if(args: t.List) -> exp.Expression: 137 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 138 true = exp.Literal.number(0) 139 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 140 return exp.If(this=cond, true=true, false=false) 141 142 143# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 144def _zeroifnull_to_if(args: t.List) -> exp.Expression: 145 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 146 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 147 148 149# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 150def _nullifzero_to_if(args: t.List) -> exp.Expression: 151 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 152 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 153 154 155def _datatype_sql(self: generator.Generator, expression: exp.DataType) -> str: 156 if expression.is_type("array"): 157 return "ARRAY" 158 elif expression.is_type("map"): 159 return "OBJECT" 160 return self.datatype_sql(expression) 161 162 163def _parse_convert_timezone(args: t.List) -> exp.Expression: 164 if len(args) == 3: 165 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 166 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 167 168 169class Snowflake(Dialect): 170 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 171 RESOLVES_IDENTIFIERS_AS_UPPERCASE = True 172 NULL_ORDERING = "nulls_are_large" 173 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 174 175 TIME_MAPPING = { 176 "YYYY": "%Y", 177 "yyyy": "%Y", 178 "YY": "%y", 179 "yy": "%y", 180 "MMMM": "%B", 181 "mmmm": "%B", 182 "MON": "%b", 183 "mon": "%b", 184 "MM": "%m", 185 "mm": "%m", 186 "DD": "%d", 187 "dd": "%-d", 188 "DY": "%a", 189 "dy": "%w", 190 "HH24": "%H", 191 "hh24": "%H", 192 "HH12": "%I", 193 "hh12": "%I", 194 "MI": "%M", 195 "mi": "%M", 196 "SS": "%S", 197 "ss": "%S", 198 "FF": "%f", 199 "ff": "%f", 200 "FF6": "%f", 201 "ff6": "%f", 202 } 203 204 class Parser(parser.Parser): 205 IDENTIFY_PIVOT_STRINGS = True 206 207 FUNCTIONS = { 208 **parser.Parser.FUNCTIONS, 209 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 210 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 211 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 212 "CONVERT_TIMEZONE": _parse_convert_timezone, 213 "DATE_TRUNC": date_trunc_to_time, 214 "DATEADD": lambda args: exp.DateAdd( 215 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 216 ), 217 "DATEDIFF": lambda args: exp.DateDiff( 218 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 219 ), 220 "DIV0": _div0_to_if, 221 "IFF": exp.If.from_arg_list, 222 "NULLIFZERO": _nullifzero_to_if, 223 "OBJECT_CONSTRUCT": _parse_object_construct, 224 "RLIKE": exp.RegexpLike.from_arg_list, 225 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 226 "TO_ARRAY": exp.Array.from_arg_list, 227 "TO_VARCHAR": exp.ToChar.from_arg_list, 228 "TO_TIMESTAMP": _snowflake_to_timestamp, 229 "ZEROIFNULL": _zeroifnull_to_if, 230 } 231 232 FUNCTION_PARSERS = { 233 **parser.Parser.FUNCTION_PARSERS, 234 "DATE_PART": _parse_date_part, 235 } 236 FUNCTION_PARSERS.pop("TRIM") 237 238 FUNC_TOKENS = { 239 *parser.Parser.FUNC_TOKENS, 240 TokenType.RLIKE, 241 TokenType.TABLE, 242 } 243 244 COLUMN_OPERATORS = { 245 **parser.Parser.COLUMN_OPERATORS, 246 TokenType.COLON: lambda self, this, path: self.expression( 247 exp.Bracket, this=this, expressions=[path] 248 ), 249 } 250 251 TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME} 252 253 RANGE_PARSERS = { 254 **parser.Parser.RANGE_PARSERS, 255 TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny), 256 TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny), 257 } 258 259 ALTER_PARSERS = { 260 **parser.Parser.ALTER_PARSERS, 261 "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True), 262 "SET": lambda self: self._parse_alter_table_set_tag(), 263 } 264 265 def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression: 266 self._match_text_seq("TAG") 267 parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction) 268 return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset) 269 270 class Tokenizer(tokens.Tokenizer): 271 QUOTES = ["'", "$$"] 272 STRING_ESCAPES = ["\\", "'"] 273 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 274 COMMENTS = ["--", "//", ("/*", "*/")] 275 276 KEYWORDS = { 277 **tokens.Tokenizer.KEYWORDS, 278 "CHAR VARYING": TokenType.VARCHAR, 279 "CHARACTER VARYING": TokenType.VARCHAR, 280 "EXCLUDE": TokenType.EXCEPT, 281 "ILIKE ANY": TokenType.ILIKE_ANY, 282 "LIKE ANY": TokenType.LIKE_ANY, 283 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 284 "MINUS": TokenType.EXCEPT, 285 "NCHAR VARYING": TokenType.VARCHAR, 286 "PUT": TokenType.COMMAND, 287 "RENAME": TokenType.REPLACE, 288 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 289 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 290 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 291 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 292 "SAMPLE": TokenType.TABLE_SAMPLE, 293 } 294 295 SINGLE_TOKENS = { 296 **tokens.Tokenizer.SINGLE_TOKENS, 297 "$": TokenType.PARAMETER, 298 } 299 300 VAR_SINGLE_TOKENS = {"$"} 301 302 class Generator(generator.Generator): 303 PARAMETER_TOKEN = "$" 304 MATCHED_BY_SOURCE = False 305 SINGLE_STRING_INTERVAL = True 306 JOIN_HINTS = False 307 TABLE_HINTS = False 308 309 TRANSFORMS = { 310 **generator.Generator.TRANSFORMS, 311 exp.Array: inline_array_sql, 312 exp.ArrayConcat: rename_func("ARRAY_CAT"), 313 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 314 exp.AtTimeZone: lambda self, e: self.func( 315 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 316 ), 317 exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this), 318 exp.DateDiff: lambda self, e: self.func( 319 "DATEDIFF", e.text("unit"), e.expression, e.this 320 ), 321 exp.DateStrToDate: datestrtodate_sql, 322 exp.DataType: _datatype_sql, 323 exp.DayOfWeek: rename_func("DAYOFWEEK"), 324 exp.Extract: rename_func("DATE_PART"), 325 exp.If: rename_func("IFF"), 326 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 327 exp.LogicalOr: rename_func("BOOLOR_AGG"), 328 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 329 exp.Max: max_or_greatest, 330 exp.Min: min_or_least, 331 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 332 exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]), 333 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 334 exp.StrPosition: lambda self, e: self.func( 335 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 336 ), 337 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 338 exp.Struct: lambda self, e: self.func( 339 "OBJECT_CONSTRUCT", 340 *(arg for expression in e.expressions for arg in expression.flatten()), 341 ), 342 exp.TimeStrToTime: timestrtotime_sql, 343 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 344 exp.TimeToStr: lambda self, e: self.func( 345 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 346 ), 347 exp.TimestampTrunc: timestamptrunc_sql, 348 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 349 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 350 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 351 exp.UnixToTime: _unix_to_time_sql, 352 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 353 } 354 355 TYPE_MAPPING = { 356 **generator.Generator.TYPE_MAPPING, 357 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 358 } 359 360 STAR_MAPPING = { 361 "except": "EXCLUDE", 362 "replace": "RENAME", 363 } 364 365 PROPERTIES_LOCATION = { 366 **generator.Generator.PROPERTIES_LOCATION, 367 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 368 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 369 } 370 371 def except_op(self, expression: exp.Except) -> str: 372 if not expression.args.get("distinct", False): 373 self.unsupported("EXCEPT with All is not supported in Snowflake") 374 return super().except_op(expression) 375 376 def intersect_op(self, expression: exp.Intersect) -> str: 377 if not expression.args.get("distinct", False): 378 self.unsupported("INTERSECT with All is not supported in Snowflake") 379 return super().intersect_op(expression) 380 381 def settag_sql(self, expression: exp.SetTag) -> str: 382 action = "UNSET" if expression.args.get("unset") else "SET" 383 return f"{action} TAG {self.expressions(expression)}" 384 385 def describe_sql(self, expression: exp.Describe) -> str: 386 # Default to table if kind is unknown 387 kind_value = expression.args.get("kind") or "TABLE" 388 kind = f" {kind_value}" if kind_value else "" 389 this = f" {self.sql(expression, 'this')}" 390 return f"DESCRIBE{kind}{this}" 391 392 def generatedasidentitycolumnconstraint_sql( 393 self, expression: exp.GeneratedAsIdentityColumnConstraint 394 ) -> str: 395 start = expression.args.get("start") 396 start = f" START {start}" if start else "" 397 increment = expression.args.get("increment") 398 increment = f" INCREMENT {increment}" if increment else "" 399 return f"AUTOINCREMENT{start}{increment}"
170class Snowflake(Dialect): 171 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 172 RESOLVES_IDENTIFIERS_AS_UPPERCASE = True 173 NULL_ORDERING = "nulls_are_large" 174 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 175 176 TIME_MAPPING = { 177 "YYYY": "%Y", 178 "yyyy": "%Y", 179 "YY": "%y", 180 "yy": "%y", 181 "MMMM": "%B", 182 "mmmm": "%B", 183 "MON": "%b", 184 "mon": "%b", 185 "MM": "%m", 186 "mm": "%m", 187 "DD": "%d", 188 "dd": "%-d", 189 "DY": "%a", 190 "dy": "%w", 191 "HH24": "%H", 192 "hh24": "%H", 193 "HH12": "%I", 194 "hh12": "%I", 195 "MI": "%M", 196 "mi": "%M", 197 "SS": "%S", 198 "ss": "%S", 199 "FF": "%f", 200 "ff": "%f", 201 "FF6": "%f", 202 "ff6": "%f", 203 } 204 205 class Parser(parser.Parser): 206 IDENTIFY_PIVOT_STRINGS = True 207 208 FUNCTIONS = { 209 **parser.Parser.FUNCTIONS, 210 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 211 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 212 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 213 "CONVERT_TIMEZONE": _parse_convert_timezone, 214 "DATE_TRUNC": date_trunc_to_time, 215 "DATEADD": lambda args: exp.DateAdd( 216 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 217 ), 218 "DATEDIFF": lambda args: exp.DateDiff( 219 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 220 ), 221 "DIV0": _div0_to_if, 222 "IFF": exp.If.from_arg_list, 223 "NULLIFZERO": _nullifzero_to_if, 224 "OBJECT_CONSTRUCT": _parse_object_construct, 225 "RLIKE": exp.RegexpLike.from_arg_list, 226 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 227 "TO_ARRAY": exp.Array.from_arg_list, 228 "TO_VARCHAR": exp.ToChar.from_arg_list, 229 "TO_TIMESTAMP": _snowflake_to_timestamp, 230 "ZEROIFNULL": _zeroifnull_to_if, 231 } 232 233 FUNCTION_PARSERS = { 234 **parser.Parser.FUNCTION_PARSERS, 235 "DATE_PART": _parse_date_part, 236 } 237 FUNCTION_PARSERS.pop("TRIM") 238 239 FUNC_TOKENS = { 240 *parser.Parser.FUNC_TOKENS, 241 TokenType.RLIKE, 242 TokenType.TABLE, 243 } 244 245 COLUMN_OPERATORS = { 246 **parser.Parser.COLUMN_OPERATORS, 247 TokenType.COLON: lambda self, this, path: self.expression( 248 exp.Bracket, this=this, expressions=[path] 249 ), 250 } 251 252 TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME} 253 254 RANGE_PARSERS = { 255 **parser.Parser.RANGE_PARSERS, 256 TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny), 257 TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny), 258 } 259 260 ALTER_PARSERS = { 261 **parser.Parser.ALTER_PARSERS, 262 "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True), 263 "SET": lambda self: self._parse_alter_table_set_tag(), 264 } 265 266 def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression: 267 self._match_text_seq("TAG") 268 parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction) 269 return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset) 270 271 class Tokenizer(tokens.Tokenizer): 272 QUOTES = ["'", "$$"] 273 STRING_ESCAPES = ["\\", "'"] 274 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 275 COMMENTS = ["--", "//", ("/*", "*/")] 276 277 KEYWORDS = { 278 **tokens.Tokenizer.KEYWORDS, 279 "CHAR VARYING": TokenType.VARCHAR, 280 "CHARACTER VARYING": TokenType.VARCHAR, 281 "EXCLUDE": TokenType.EXCEPT, 282 "ILIKE ANY": TokenType.ILIKE_ANY, 283 "LIKE ANY": TokenType.LIKE_ANY, 284 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 285 "MINUS": TokenType.EXCEPT, 286 "NCHAR VARYING": TokenType.VARCHAR, 287 "PUT": TokenType.COMMAND, 288 "RENAME": TokenType.REPLACE, 289 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 290 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 291 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 292 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 293 "SAMPLE": TokenType.TABLE_SAMPLE, 294 } 295 296 SINGLE_TOKENS = { 297 **tokens.Tokenizer.SINGLE_TOKENS, 298 "$": TokenType.PARAMETER, 299 } 300 301 VAR_SINGLE_TOKENS = {"$"} 302 303 class Generator(generator.Generator): 304 PARAMETER_TOKEN = "$" 305 MATCHED_BY_SOURCE = False 306 SINGLE_STRING_INTERVAL = True 307 JOIN_HINTS = False 308 TABLE_HINTS = False 309 310 TRANSFORMS = { 311 **generator.Generator.TRANSFORMS, 312 exp.Array: inline_array_sql, 313 exp.ArrayConcat: rename_func("ARRAY_CAT"), 314 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 315 exp.AtTimeZone: lambda self, e: self.func( 316 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 317 ), 318 exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this), 319 exp.DateDiff: lambda self, e: self.func( 320 "DATEDIFF", e.text("unit"), e.expression, e.this 321 ), 322 exp.DateStrToDate: datestrtodate_sql, 323 exp.DataType: _datatype_sql, 324 exp.DayOfWeek: rename_func("DAYOFWEEK"), 325 exp.Extract: rename_func("DATE_PART"), 326 exp.If: rename_func("IFF"), 327 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 328 exp.LogicalOr: rename_func("BOOLOR_AGG"), 329 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 330 exp.Max: max_or_greatest, 331 exp.Min: min_or_least, 332 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 333 exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]), 334 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 335 exp.StrPosition: lambda self, e: self.func( 336 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 337 ), 338 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 339 exp.Struct: lambda self, e: self.func( 340 "OBJECT_CONSTRUCT", 341 *(arg for expression in e.expressions for arg in expression.flatten()), 342 ), 343 exp.TimeStrToTime: timestrtotime_sql, 344 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 345 exp.TimeToStr: lambda self, e: self.func( 346 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 347 ), 348 exp.TimestampTrunc: timestamptrunc_sql, 349 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 350 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 351 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 352 exp.UnixToTime: _unix_to_time_sql, 353 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 354 } 355 356 TYPE_MAPPING = { 357 **generator.Generator.TYPE_MAPPING, 358 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 359 } 360 361 STAR_MAPPING = { 362 "except": "EXCLUDE", 363 "replace": "RENAME", 364 } 365 366 PROPERTIES_LOCATION = { 367 **generator.Generator.PROPERTIES_LOCATION, 368 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 369 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 370 } 371 372 def except_op(self, expression: exp.Except) -> str: 373 if not expression.args.get("distinct", False): 374 self.unsupported("EXCEPT with All is not supported in Snowflake") 375 return super().except_op(expression) 376 377 def intersect_op(self, expression: exp.Intersect) -> str: 378 if not expression.args.get("distinct", False): 379 self.unsupported("INTERSECT with All is not supported in Snowflake") 380 return super().intersect_op(expression) 381 382 def settag_sql(self, expression: exp.SetTag) -> str: 383 action = "UNSET" if expression.args.get("unset") else "SET" 384 return f"{action} TAG {self.expressions(expression)}" 385 386 def describe_sql(self, expression: exp.Describe) -> str: 387 # Default to table if kind is unknown 388 kind_value = expression.args.get("kind") or "TABLE" 389 kind = f" {kind_value}" if kind_value else "" 390 this = f" {self.sql(expression, 'this')}" 391 return f"DESCRIBE{kind}{this}" 392 393 def generatedasidentitycolumnconstraint_sql( 394 self, expression: exp.GeneratedAsIdentityColumnConstraint 395 ) -> str: 396 start = expression.args.get("start") 397 start = f" START {start}" if start else "" 398 increment = expression.args.get("increment") 399 increment = f" INCREMENT {increment}" if increment else "" 400 return f"AUTOINCREMENT{start}{increment}"
205 class Parser(parser.Parser): 206 IDENTIFY_PIVOT_STRINGS = True 207 208 FUNCTIONS = { 209 **parser.Parser.FUNCTIONS, 210 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 211 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 212 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 213 "CONVERT_TIMEZONE": _parse_convert_timezone, 214 "DATE_TRUNC": date_trunc_to_time, 215 "DATEADD": lambda args: exp.DateAdd( 216 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 217 ), 218 "DATEDIFF": lambda args: exp.DateDiff( 219 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 220 ), 221 "DIV0": _div0_to_if, 222 "IFF": exp.If.from_arg_list, 223 "NULLIFZERO": _nullifzero_to_if, 224 "OBJECT_CONSTRUCT": _parse_object_construct, 225 "RLIKE": exp.RegexpLike.from_arg_list, 226 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 227 "TO_ARRAY": exp.Array.from_arg_list, 228 "TO_VARCHAR": exp.ToChar.from_arg_list, 229 "TO_TIMESTAMP": _snowflake_to_timestamp, 230 "ZEROIFNULL": _zeroifnull_to_if, 231 } 232 233 FUNCTION_PARSERS = { 234 **parser.Parser.FUNCTION_PARSERS, 235 "DATE_PART": _parse_date_part, 236 } 237 FUNCTION_PARSERS.pop("TRIM") 238 239 FUNC_TOKENS = { 240 *parser.Parser.FUNC_TOKENS, 241 TokenType.RLIKE, 242 TokenType.TABLE, 243 } 244 245 COLUMN_OPERATORS = { 246 **parser.Parser.COLUMN_OPERATORS, 247 TokenType.COLON: lambda self, this, path: self.expression( 248 exp.Bracket, this=this, expressions=[path] 249 ), 250 } 251 252 TIMESTAMPS = parser.Parser.TIMESTAMPS.copy() - {TokenType.TIME} 253 254 RANGE_PARSERS = { 255 **parser.Parser.RANGE_PARSERS, 256 TokenType.LIKE_ANY: binary_range_parser(exp.LikeAny), 257 TokenType.ILIKE_ANY: binary_range_parser(exp.ILikeAny), 258 } 259 260 ALTER_PARSERS = { 261 **parser.Parser.ALTER_PARSERS, 262 "UNSET": lambda self: self._parse_alter_table_set_tag(unset=True), 263 "SET": lambda self: self._parse_alter_table_set_tag(), 264 } 265 266 def _parse_alter_table_set_tag(self, unset: bool = False) -> exp.Expression: 267 self._match_text_seq("TAG") 268 parser = t.cast(t.Callable, self._parse_id_var if unset else self._parse_conjunction) 269 return self.expression(exp.SetTag, expressions=self._parse_csv(parser), unset=unset)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
271 class Tokenizer(tokens.Tokenizer): 272 QUOTES = ["'", "$$"] 273 STRING_ESCAPES = ["\\", "'"] 274 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 275 COMMENTS = ["--", "//", ("/*", "*/")] 276 277 KEYWORDS = { 278 **tokens.Tokenizer.KEYWORDS, 279 "CHAR VARYING": TokenType.VARCHAR, 280 "CHARACTER VARYING": TokenType.VARCHAR, 281 "EXCLUDE": TokenType.EXCEPT, 282 "ILIKE ANY": TokenType.ILIKE_ANY, 283 "LIKE ANY": TokenType.LIKE_ANY, 284 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 285 "MINUS": TokenType.EXCEPT, 286 "NCHAR VARYING": TokenType.VARCHAR, 287 "PUT": TokenType.COMMAND, 288 "RENAME": TokenType.REPLACE, 289 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 290 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 291 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 292 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 293 "SAMPLE": TokenType.TABLE_SAMPLE, 294 } 295 296 SINGLE_TOKENS = { 297 **tokens.Tokenizer.SINGLE_TOKENS, 298 "$": TokenType.PARAMETER, 299 } 300 301 VAR_SINGLE_TOKENS = {"$"}
Inherited Members
303 class Generator(generator.Generator): 304 PARAMETER_TOKEN = "$" 305 MATCHED_BY_SOURCE = False 306 SINGLE_STRING_INTERVAL = True 307 JOIN_HINTS = False 308 TABLE_HINTS = False 309 310 TRANSFORMS = { 311 **generator.Generator.TRANSFORMS, 312 exp.Array: inline_array_sql, 313 exp.ArrayConcat: rename_func("ARRAY_CAT"), 314 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 315 exp.AtTimeZone: lambda self, e: self.func( 316 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 317 ), 318 exp.DateAdd: lambda self, e: self.func("DATEADD", e.text("unit"), e.expression, e.this), 319 exp.DateDiff: lambda self, e: self.func( 320 "DATEDIFF", e.text("unit"), e.expression, e.this 321 ), 322 exp.DateStrToDate: datestrtodate_sql, 323 exp.DataType: _datatype_sql, 324 exp.DayOfWeek: rename_func("DAYOFWEEK"), 325 exp.Extract: rename_func("DATE_PART"), 326 exp.If: rename_func("IFF"), 327 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 328 exp.LogicalOr: rename_func("BOOLOR_AGG"), 329 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 330 exp.Max: max_or_greatest, 331 exp.Min: min_or_least, 332 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 333 exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]), 334 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 335 exp.StrPosition: lambda self, e: self.func( 336 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 337 ), 338 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 339 exp.Struct: lambda self, e: self.func( 340 "OBJECT_CONSTRUCT", 341 *(arg for expression in e.expressions for arg in expression.flatten()), 342 ), 343 exp.TimeStrToTime: timestrtotime_sql, 344 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 345 exp.TimeToStr: lambda self, e: self.func( 346 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 347 ), 348 exp.TimestampTrunc: timestamptrunc_sql, 349 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 350 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 351 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 352 exp.UnixToTime: _unix_to_time_sql, 353 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 354 } 355 356 TYPE_MAPPING = { 357 **generator.Generator.TYPE_MAPPING, 358 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 359 } 360 361 STAR_MAPPING = { 362 "except": "EXCLUDE", 363 "replace": "RENAME", 364 } 365 366 PROPERTIES_LOCATION = { 367 **generator.Generator.PROPERTIES_LOCATION, 368 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 369 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 370 } 371 372 def except_op(self, expression: exp.Except) -> str: 373 if not expression.args.get("distinct", False): 374 self.unsupported("EXCEPT with All is not supported in Snowflake") 375 return super().except_op(expression) 376 377 def intersect_op(self, expression: exp.Intersect) -> str: 378 if not expression.args.get("distinct", False): 379 self.unsupported("INTERSECT with All is not supported in Snowflake") 380 return super().intersect_op(expression) 381 382 def settag_sql(self, expression: exp.SetTag) -> str: 383 action = "UNSET" if expression.args.get("unset") else "SET" 384 return f"{action} TAG {self.expressions(expression)}" 385 386 def describe_sql(self, expression: exp.Describe) -> str: 387 # Default to table if kind is unknown 388 kind_value = expression.args.get("kind") or "TABLE" 389 kind = f" {kind_value}" if kind_value else "" 390 this = f" {self.sql(expression, 'this')}" 391 return f"DESCRIBE{kind}{this}" 392 393 def generatedasidentitycolumnconstraint_sql( 394 self, expression: exp.GeneratedAsIdentityColumnConstraint 395 ) -> str: 396 start = expression.args.get("start") 397 start = f" START {start}" if start else "" 398 increment = expression.args.get("increment") 399 increment = f" INCREMENT {increment}" if increment else "" 400 return f"AUTOINCREMENT{start}{increment}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
def
generatedasidentitycolumnconstraint_sql( self, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
393 def generatedasidentitycolumnconstraint_sql( 394 self, expression: exp.GeneratedAsIdentityColumnConstraint 395 ) -> str: 396 start = expression.args.get("start") 397 start = f" START {start}" if start else "" 398 increment = expression.args.get("increment") 399 increment = f" INCREMENT {increment}" if increment else "" 400 return f"AUTOINCREMENT{start}{increment}"
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
247 @classmethod 248 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 249 """Checks if text can be identified given an identify option. 250 251 Args: 252 text: The text to check. 253 identify: 254 "always" or `True`: Always returns true. 255 "safe": True if the identifier is case-insensitive. 256 257 Returns: 258 Whether or not the given text can be identified. 259 """ 260 if identify is True or identify == "always": 261 return True 262 263 if identify == "safe": 264 return not cls.case_sensitive(text) 265 266 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql