sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 format_time_lambda, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return format_time_lambda(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.parse_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _parse_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 81# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 82def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 83 this = self._parse_var() or self._parse_type() 84 85 if not this: 86 return None 87 88 self._match(TokenType.COMMA) 89 expression = self._parse_bitwise() 90 this = _map_date_part(this) 91 name = this.name.upper() 92 93 if name.startswith("EPOCH"): 94 if name == "EPOCH_MILLISECOND": 95 scale = 10**3 96 elif name == "EPOCH_MICROSECOND": 97 scale = 10**6 98 elif name == "EPOCH_NANOSECOND": 99 scale = 10**9 100 else: 101 scale = None 102 103 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 104 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 105 106 if scale: 107 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 108 109 return to_unix 110 111 return self.expression(exp.Extract, this=this, expression=expression) 112 113 114# https://docs.snowflake.com/en/sql-reference/functions/div0 115def _div0_to_if(args: t.List) -> exp.If: 116 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 117 true = exp.Literal.number(0) 118 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 119 return exp.If(this=cond, true=true, false=false) 120 121 122# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 123def _zeroifnull_to_if(args: t.List) -> exp.If: 124 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 125 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 126 127 128# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 129def _nullifzero_to_if(args: t.List) -> exp.If: 130 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 131 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 132 133 134def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 135 if expression.is_type("array"): 136 return "ARRAY" 137 elif expression.is_type("map"): 138 return "OBJECT" 139 return self.datatype_sql(expression) 140 141 142def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 143 flag = expression.text("flag") 144 145 if "i" not in flag: 146 flag += "i" 147 148 return self.func( 149 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 150 ) 151 152 153def _parse_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 154 if len(args) == 3: 155 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 156 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 157 158 159def _parse_regexp_replace(args: t.List) -> exp.RegexpReplace: 160 regexp_replace = exp.RegexpReplace.from_arg_list(args) 161 162 if not regexp_replace.args.get("replacement"): 163 regexp_replace.set("replacement", exp.Literal.string("")) 164 165 return regexp_replace 166 167 168def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 169 def _parse(self: Snowflake.Parser) -> exp.Show: 170 return self._parse_show_snowflake(*args, **kwargs) 171 172 return _parse 173 174 175DATE_PART_MAPPING = { 176 "Y": "YEAR", 177 "YY": "YEAR", 178 "YYY": "YEAR", 179 "YYYY": "YEAR", 180 "YR": "YEAR", 181 "YEARS": "YEAR", 182 "YRS": "YEAR", 183 "MM": "MONTH", 184 "MON": "MONTH", 185 "MONS": "MONTH", 186 "MONTHS": "MONTH", 187 "D": "DAY", 188 "DD": "DAY", 189 "DAYS": "DAY", 190 "DAYOFMONTH": "DAY", 191 "WEEKDAY": "DAYOFWEEK", 192 "DOW": "DAYOFWEEK", 193 "DW": "DAYOFWEEK", 194 "WEEKDAY_ISO": "DAYOFWEEKISO", 195 "DOW_ISO": "DAYOFWEEKISO", 196 "DW_ISO": "DAYOFWEEKISO", 197 "YEARDAY": "DAYOFYEAR", 198 "DOY": "DAYOFYEAR", 199 "DY": "DAYOFYEAR", 200 "W": "WEEK", 201 "WK": "WEEK", 202 "WEEKOFYEAR": "WEEK", 203 "WOY": "WEEK", 204 "WY": "WEEK", 205 "WEEK_ISO": "WEEKISO", 206 "WEEKOFYEARISO": "WEEKISO", 207 "WEEKOFYEAR_ISO": "WEEKISO", 208 "Q": "QUARTER", 209 "QTR": "QUARTER", 210 "QTRS": "QUARTER", 211 "QUARTERS": "QUARTER", 212 "H": "HOUR", 213 "HH": "HOUR", 214 "HR": "HOUR", 215 "HOURS": "HOUR", 216 "HRS": "HOUR", 217 "M": "MINUTE", 218 "MI": "MINUTE", 219 "MIN": "MINUTE", 220 "MINUTES": "MINUTE", 221 "MINS": "MINUTE", 222 "S": "SECOND", 223 "SEC": "SECOND", 224 "SECONDS": "SECOND", 225 "SECS": "SECOND", 226 "MS": "MILLISECOND", 227 "MSEC": "MILLISECOND", 228 "MILLISECONDS": "MILLISECOND", 229 "US": "MICROSECOND", 230 "USEC": "MICROSECOND", 231 "MICROSECONDS": "MICROSECOND", 232 "NS": "NANOSECOND", 233 "NSEC": "NANOSECOND", 234 "NANOSEC": "NANOSECOND", 235 "NSECOND": "NANOSECOND", 236 "NSECONDS": "NANOSECOND", 237 "NANOSECS": "NANOSECOND", 238 "EPOCH": "EPOCH_SECOND", 239 "EPOCH_SECONDS": "EPOCH_SECOND", 240 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 241 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 242 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 243 "TZH": "TIMEZONE_HOUR", 244 "TZM": "TIMEZONE_MINUTE", 245} 246 247 248@t.overload 249def _map_date_part(part: exp.Expression) -> exp.Var: 250 pass 251 252 253@t.overload 254def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 255 pass 256 257 258def _map_date_part(part): 259 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 260 return exp.var(mapped) if mapped else part 261 262 263def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 264 trunc = date_trunc_to_time(args) 265 trunc.set("unit", _map_date_part(trunc.args["unit"])) 266 return trunc 267 268 269def _parse_colon_get_path( 270 self: parser.Parser, this: t.Optional[exp.Expression] 271) -> t.Optional[exp.Expression]: 272 while True: 273 path = self._parse_bitwise() 274 275 # The cast :: operator has a lower precedence than the extraction operator :, so 276 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 277 if isinstance(path, exp.Cast): 278 target_type = path.to 279 path = path.this 280 else: 281 target_type = None 282 283 if isinstance(path, exp.Expression): 284 path = exp.Literal.string(path.sql(dialect="snowflake")) 285 286 # The extraction operator : is left-associative 287 this = self.expression( 288 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 289 ) 290 291 if target_type: 292 this = exp.cast(this, target_type) 293 294 if not self._match(TokenType.COLON): 295 break 296 297 return self._parse_range(this) 298 299 300def _parse_timestamp_from_parts(args: t.List) -> exp.Func: 301 if len(args) == 2: 302 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 303 # so we parse this into Anonymous for now instead of introducing complexity 304 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 305 306 return exp.TimestampFromParts.from_arg_list(args) 307 308 309def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 310 """ 311 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 312 so we need to unqualify them. 313 314 Example: 315 >>> from sqlglot import parse_one 316 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 317 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 318 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 319 """ 320 if isinstance(expression, exp.Pivot) and expression.unpivot: 321 expression = transforms.unqualify_columns(expression) 322 323 return expression 324 325 326class Snowflake(Dialect): 327 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 328 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 329 NULL_ORDERING = "nulls_are_large" 330 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 PREFER_CTE_ALIAS_COLUMN = True 334 TABLESAMPLE_SIZE_IS_PERCENT = True 335 336 TIME_MAPPING = { 337 "YYYY": "%Y", 338 "yyyy": "%Y", 339 "YY": "%y", 340 "yy": "%y", 341 "MMMM": "%B", 342 "mmmm": "%B", 343 "MON": "%b", 344 "mon": "%b", 345 "MM": "%m", 346 "mm": "%m", 347 "DD": "%d", 348 "dd": "%-d", 349 "DY": "%a", 350 "dy": "%w", 351 "HH24": "%H", 352 "hh24": "%H", 353 "HH12": "%I", 354 "hh12": "%I", 355 "MI": "%M", 356 "mi": "%M", 357 "SS": "%S", 358 "ss": "%S", 359 "FF": "%f", 360 "ff": "%f", 361 "FF6": "%f", 362 "ff6": "%f", 363 } 364 365 def quote_identifier(self, expression: E, identify: bool = True) -> E: 366 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 367 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 368 if ( 369 isinstance(expression, exp.Identifier) 370 and isinstance(expression.parent, exp.Table) 371 and expression.name.lower() == "dual" 372 ): 373 return expression # type: ignore 374 375 return super().quote_identifier(expression, identify=identify) 376 377 class Parser(parser.Parser): 378 IDENTIFY_PIVOT_STRINGS = True 379 380 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 381 382 FUNCTIONS = { 383 **parser.Parser.FUNCTIONS, 384 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 385 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 386 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 387 this=seq_get(args, 1), expression=seq_get(args, 0) 388 ), 389 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 390 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 391 start=seq_get(args, 0), 392 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 393 step=seq_get(args, 2), 394 ), 395 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 396 "BITXOR": binary_from_function(exp.BitwiseXor), 397 "BIT_XOR": binary_from_function(exp.BitwiseXor), 398 "BOOLXOR": binary_from_function(exp.Xor), 399 "CONVERT_TIMEZONE": _parse_convert_timezone, 400 "DATE_TRUNC": _date_trunc_to_time, 401 "DATEADD": lambda args: exp.DateAdd( 402 this=seq_get(args, 2), 403 expression=seq_get(args, 1), 404 unit=_map_date_part(seq_get(args, 0)), 405 ), 406 "DATEDIFF": _parse_datediff, 407 "DIV0": _div0_to_if, 408 "FLATTEN": exp.Explode.from_arg_list, 409 "GET_PATH": lambda args, dialect: exp.JSONExtract( 410 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 411 ), 412 "IFF": exp.If.from_arg_list, 413 "LAST_DAY": lambda args: exp.LastDay( 414 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 415 ), 416 "LISTAGG": exp.GroupConcat.from_arg_list, 417 "NULLIFZERO": _nullifzero_to_if, 418 "OBJECT_CONSTRUCT": _parse_object_construct, 419 "REGEXP_REPLACE": _parse_regexp_replace, 420 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 421 "RLIKE": exp.RegexpLike.from_arg_list, 422 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 423 "TIMEDIFF": _parse_datediff, 424 "TIMESTAMPDIFF": _parse_datediff, 425 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 426 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 427 "TO_TIMESTAMP": _parse_to_timestamp, 428 "TO_VARCHAR": exp.ToChar.from_arg_list, 429 "ZEROIFNULL": _zeroifnull_to_if, 430 } 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 "DATE_PART": _parse_date_part, 435 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 436 } 437 FUNCTION_PARSERS.pop("TRIM") 438 439 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 440 441 RANGE_PARSERS = { 442 **parser.Parser.RANGE_PARSERS, 443 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 444 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 445 TokenType.COLON: _parse_colon_get_path, 446 } 447 448 ALTER_PARSERS = { 449 **parser.Parser.ALTER_PARSERS, 450 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 451 "UNSET": lambda self: self.expression( 452 exp.Set, 453 tag=self._match_text_seq("TAG"), 454 expressions=self._parse_csv(self._parse_id_var), 455 unset=True, 456 ), 457 "SWAP": lambda self: self._parse_alter_table_swap(), 458 } 459 460 STATEMENT_PARSERS = { 461 **parser.Parser.STATEMENT_PARSERS, 462 TokenType.SHOW: lambda self: self._parse_show(), 463 } 464 465 PROPERTY_PARSERS = { 466 **parser.Parser.PROPERTY_PARSERS, 467 "LOCATION": lambda self: self._parse_location(), 468 } 469 470 SHOW_PARSERS = { 471 "SCHEMAS": _show_parser("SCHEMAS"), 472 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 473 "OBJECTS": _show_parser("OBJECTS"), 474 "TERSE OBJECTS": _show_parser("OBJECTS"), 475 "TABLES": _show_parser("TABLES"), 476 "TERSE TABLES": _show_parser("TABLES"), 477 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 478 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "COLUMNS": _show_parser("COLUMNS"), 480 "USERS": _show_parser("USERS"), 481 "TERSE USERS": _show_parser("USERS"), 482 } 483 484 STAGED_FILE_SINGLE_TOKENS = { 485 TokenType.DOT, 486 TokenType.MOD, 487 TokenType.SLASH, 488 } 489 490 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 491 492 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 493 if is_map: 494 # Keys are strings in Snowflake's objects, see also: 495 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 496 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 497 return self._parse_slice(self._parse_string()) 498 499 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 500 501 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 502 lateral = super()._parse_lateral() 503 if not lateral: 504 return lateral 505 506 if isinstance(lateral.this, exp.Explode): 507 table_alias = lateral.args.get("alias") 508 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 509 if table_alias and not table_alias.args.get("columns"): 510 table_alias.set("columns", columns) 511 elif not table_alias: 512 exp.alias_(lateral, "_flattened", table=columns, copy=False) 513 514 return lateral 515 516 def _parse_at_before(self, table: exp.Table) -> exp.Table: 517 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 518 index = self._index 519 if self._match_texts(("AT", "BEFORE")): 520 this = self._prev.text.upper() 521 kind = ( 522 self._match(TokenType.L_PAREN) 523 and self._match_texts(self.HISTORICAL_DATA_KIND) 524 and self._prev.text.upper() 525 ) 526 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 527 528 if expression: 529 self._match_r_paren() 530 when = self.expression( 531 exp.HistoricalData, this=this, kind=kind, expression=expression 532 ) 533 table.set("when", when) 534 else: 535 self._retreat(index) 536 537 return table 538 539 def _parse_table_parts( 540 self, schema: bool = False, is_db_reference: bool = False 541 ) -> exp.Table: 542 # https://docs.snowflake.com/en/user-guide/querying-stage 543 if self._match(TokenType.STRING, advance=False): 544 table = self._parse_string() 545 elif self._match_text_seq("@", advance=False): 546 table = self._parse_location_path() 547 else: 548 table = None 549 550 if table: 551 file_format = None 552 pattern = None 553 554 self._match(TokenType.L_PAREN) 555 while self._curr and not self._match(TokenType.R_PAREN): 556 if self._match_text_seq("FILE_FORMAT", "=>"): 557 file_format = self._parse_string() or super()._parse_table_parts( 558 is_db_reference=is_db_reference 559 ) 560 elif self._match_text_seq("PATTERN", "=>"): 561 pattern = self._parse_string() 562 else: 563 break 564 565 self._match(TokenType.COMMA) 566 567 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 568 else: 569 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 570 571 return self._parse_at_before(table) 572 573 def _parse_id_var( 574 self, 575 any_token: bool = True, 576 tokens: t.Optional[t.Collection[TokenType]] = None, 577 ) -> t.Optional[exp.Expression]: 578 if self._match_text_seq("IDENTIFIER", "("): 579 identifier = ( 580 super()._parse_id_var(any_token=any_token, tokens=tokens) 581 or self._parse_string() 582 ) 583 self._match_r_paren() 584 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 585 586 return super()._parse_id_var(any_token=any_token, tokens=tokens) 587 588 def _parse_show_snowflake(self, this: str) -> exp.Show: 589 scope = None 590 scope_kind = None 591 592 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 593 # which is syntactically valid but has no effect on the output 594 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 595 596 history = self._match_text_seq("HISTORY") 597 598 like = self._parse_string() if self._match(TokenType.LIKE) else None 599 600 if self._match(TokenType.IN): 601 if self._match_text_seq("ACCOUNT"): 602 scope_kind = "ACCOUNT" 603 elif self._match_set(self.DB_CREATABLES): 604 scope_kind = self._prev.text.upper() 605 if self._curr: 606 scope = self._parse_table_parts() 607 elif self._curr: 608 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 609 scope = self._parse_table_parts() 610 611 return self.expression( 612 exp.Show, 613 **{ 614 "terse": terse, 615 "this": this, 616 "history": history, 617 "like": like, 618 "scope": scope, 619 "scope_kind": scope_kind, 620 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 621 "limit": self._parse_limit(), 622 "from": self._parse_string() if self._match(TokenType.FROM) else None, 623 }, 624 ) 625 626 def _parse_alter_table_swap(self) -> exp.SwapTable: 627 self._match_text_seq("WITH") 628 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 629 630 def _parse_location(self) -> exp.LocationProperty: 631 self._match(TokenType.EQ) 632 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 633 634 def _parse_location_path(self) -> exp.Var: 635 parts = [self._advance_any(ignore_reserved=True)] 636 637 # We avoid consuming a comma token because external tables like @foo and @bar 638 # can be joined in a query with a comma separator. 639 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 640 parts.append(self._advance_any(ignore_reserved=True)) 641 642 return exp.var("".join(part.text for part in parts if part)) 643 644 class Tokenizer(tokens.Tokenizer): 645 STRING_ESCAPES = ["\\", "'"] 646 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 647 RAW_STRINGS = ["$$"] 648 COMMENTS = ["--", "//", ("/*", "*/")] 649 650 KEYWORDS = { 651 **tokens.Tokenizer.KEYWORDS, 652 "BYTEINT": TokenType.INT, 653 "CHAR VARYING": TokenType.VARCHAR, 654 "CHARACTER VARYING": TokenType.VARCHAR, 655 "EXCLUDE": TokenType.EXCEPT, 656 "ILIKE ANY": TokenType.ILIKE_ANY, 657 "LIKE ANY": TokenType.LIKE_ANY, 658 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 659 "MINUS": TokenType.EXCEPT, 660 "NCHAR VARYING": TokenType.VARCHAR, 661 "PUT": TokenType.COMMAND, 662 "REMOVE": TokenType.COMMAND, 663 "RENAME": TokenType.REPLACE, 664 "RM": TokenType.COMMAND, 665 "SAMPLE": TokenType.TABLE_SAMPLE, 666 "SQL_DOUBLE": TokenType.DOUBLE, 667 "SQL_VARCHAR": TokenType.VARCHAR, 668 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 669 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 670 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 671 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 672 "TOP": TokenType.TOP, 673 } 674 675 SINGLE_TOKENS = { 676 **tokens.Tokenizer.SINGLE_TOKENS, 677 "$": TokenType.PARAMETER, 678 } 679 680 VAR_SINGLE_TOKENS = {"$"} 681 682 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 683 684 class Generator(generator.Generator): 685 PARAMETER_TOKEN = "$" 686 MATCHED_BY_SOURCE = False 687 SINGLE_STRING_INTERVAL = True 688 JOIN_HINTS = False 689 TABLE_HINTS = False 690 QUERY_HINTS = False 691 AGGREGATE_FILTER_SUPPORTED = False 692 SUPPORTS_TABLE_COPY = False 693 COLLATE_IS_FUNC = True 694 LIMIT_ONLY_LITERALS = True 695 JSON_KEY_VALUE_PAIR_SEP = "," 696 INSERT_OVERWRITE = " OVERWRITE INTO" 697 698 TRANSFORMS = { 699 **generator.Generator.TRANSFORMS, 700 exp.ArgMax: rename_func("MAX_BY"), 701 exp.ArgMin: rename_func("MIN_BY"), 702 exp.Array: inline_array_sql, 703 exp.ArrayConcat: rename_func("ARRAY_CAT"), 704 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 705 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 706 exp.AtTimeZone: lambda self, e: self.func( 707 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 708 ), 709 exp.BitwiseXor: rename_func("BITXOR"), 710 exp.DateAdd: date_delta_sql("DATEADD"), 711 exp.DateDiff: date_delta_sql("DATEDIFF"), 712 exp.DateStrToDate: datestrtodate_sql, 713 exp.DataType: _datatype_sql, 714 exp.DayOfMonth: rename_func("DAYOFMONTH"), 715 exp.DayOfWeek: rename_func("DAYOFWEEK"), 716 exp.DayOfYear: rename_func("DAYOFYEAR"), 717 exp.Explode: rename_func("FLATTEN"), 718 exp.Extract: rename_func("DATE_PART"), 719 exp.FromTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 721 ), 722 exp.GenerateSeries: lambda self, e: self.func( 723 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 724 ), 725 exp.GroupConcat: rename_func("LISTAGG"), 726 exp.If: if_sql(name="IFF", false_value="NULL"), 727 exp.JSONExtract: rename_func("GET_PATH"), 728 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 729 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 730 exp.JSONPathRoot: lambda *_: "", 731 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 732 exp.LogicalOr: rename_func("BOOLOR_AGG"), 733 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 734 exp.Max: max_or_greatest, 735 exp.Min: min_or_least, 736 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 737 exp.PercentileCont: transforms.preprocess( 738 [transforms.add_within_group_for_percentiles] 739 ), 740 exp.PercentileDisc: transforms.preprocess( 741 [transforms.add_within_group_for_percentiles] 742 ), 743 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 744 exp.RegexpILike: _regexpilike_sql, 745 exp.Rand: rename_func("RANDOM"), 746 exp.Select: transforms.preprocess( 747 [ 748 transforms.eliminate_distinct_on, 749 transforms.explode_to_unnest(), 750 transforms.eliminate_semi_and_anti_joins, 751 ] 752 ), 753 exp.SHA: rename_func("SHA1"), 754 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 755 exp.StartsWith: rename_func("STARTSWITH"), 756 exp.StrPosition: lambda self, e: self.func( 757 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 758 ), 759 exp.StrToTime: lambda self, 760 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 761 exp.Struct: lambda self, e: self.func( 762 "OBJECT_CONSTRUCT", 763 *(arg for expression in e.expressions for arg in expression.flatten()), 764 ), 765 exp.Stuff: rename_func("INSERT"), 766 exp.TimestampDiff: lambda self, e: self.func( 767 "TIMESTAMPDIFF", e.unit, e.expression, e.this 768 ), 769 exp.TimestampTrunc: timestamptrunc_sql, 770 exp.TimeStrToTime: timestrtotime_sql, 771 exp.TimeToStr: lambda self, e: self.func( 772 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 773 ), 774 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 775 exp.ToArray: rename_func("TO_ARRAY"), 776 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 777 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 778 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 779 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 780 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 781 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 782 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 783 exp.Xor: rename_func("BOOLXOR"), 784 } 785 786 SUPPORTED_JSON_PATH_PARTS = { 787 exp.JSONPathKey, 788 exp.JSONPathRoot, 789 exp.JSONPathSubscript, 790 } 791 792 TYPE_MAPPING = { 793 **generator.Generator.TYPE_MAPPING, 794 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 795 } 796 797 STAR_MAPPING = { 798 "except": "EXCLUDE", 799 "replace": "RENAME", 800 } 801 802 PROPERTIES_LOCATION = { 803 **generator.Generator.PROPERTIES_LOCATION, 804 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 805 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 806 } 807 808 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 809 milli = expression.args.get("milli") 810 if milli is not None: 811 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 812 expression.set("nano", milli_to_nano) 813 814 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 815 816 def trycast_sql(self, expression: exp.TryCast) -> str: 817 value = expression.this 818 819 if value.type is None: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 value = annotate_types(value) 823 824 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 825 return super().trycast_sql(expression) 826 827 # TRY_CAST only works for string values in Snowflake 828 return self.cast_sql(expression) 829 830 def log_sql(self, expression: exp.Log) -> str: 831 if not expression.expression: 832 return self.func("LN", expression.this) 833 834 return super().log_sql(expression) 835 836 def unnest_sql(self, expression: exp.Unnest) -> str: 837 unnest_alias = expression.args.get("alias") 838 offset = expression.args.get("offset") 839 840 columns = [ 841 exp.to_identifier("seq"), 842 exp.to_identifier("key"), 843 exp.to_identifier("path"), 844 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 845 seq_get(unnest_alias.columns if unnest_alias else [], 0) 846 or exp.to_identifier("value"), 847 exp.to_identifier("this"), 848 ] 849 850 if unnest_alias: 851 unnest_alias.set("columns", columns) 852 else: 853 unnest_alias = exp.TableAlias(this="_u", columns=columns) 854 855 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 856 alias = self.sql(unnest_alias) 857 alias = f" AS {alias}" if alias else "" 858 return f"{explode}{alias}" 859 860 def show_sql(self, expression: exp.Show) -> str: 861 terse = "TERSE " if expression.args.get("terse") else "" 862 history = " HISTORY" if expression.args.get("history") else "" 863 like = self.sql(expression, "like") 864 like = f" LIKE {like}" if like else "" 865 866 scope = self.sql(expression, "scope") 867 scope = f" {scope}" if scope else "" 868 869 scope_kind = self.sql(expression, "scope_kind") 870 if scope_kind: 871 scope_kind = f" IN {scope_kind}" 872 873 starts_with = self.sql(expression, "starts_with") 874 if starts_with: 875 starts_with = f" STARTS WITH {starts_with}" 876 877 limit = self.sql(expression, "limit") 878 879 from_ = self.sql(expression, "from") 880 if from_: 881 from_ = f" FROM {from_}" 882 883 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 884 885 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 886 # Other dialects don't support all of the following parameters, so we need to 887 # generate default values as necessary to ensure the transpilation is correct 888 group = expression.args.get("group") 889 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 890 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 891 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 892 893 return self.func( 894 "REGEXP_SUBSTR", 895 expression.this, 896 expression.expression, 897 position, 898 occurrence, 899 parameters, 900 group, 901 ) 902 903 def except_op(self, expression: exp.Except) -> str: 904 if not expression.args.get("distinct", False): 905 self.unsupported("EXCEPT with All is not supported in Snowflake") 906 return super().except_op(expression) 907 908 def intersect_op(self, expression: exp.Intersect) -> str: 909 if not expression.args.get("distinct", False): 910 self.unsupported("INTERSECT with All is not supported in Snowflake") 911 return super().intersect_op(expression) 912 913 def describe_sql(self, expression: exp.Describe) -> str: 914 # Default to table if kind is unknown 915 kind_value = expression.args.get("kind") or "TABLE" 916 kind = f" {kind_value}" if kind_value else "" 917 this = f" {self.sql(expression, 'this')}" 918 expressions = self.expressions(expression, flat=True) 919 expressions = f" {expressions}" if expressions else "" 920 return f"DESCRIBE{kind}{this}{expressions}" 921 922 def generatedasidentitycolumnconstraint_sql( 923 self, expression: exp.GeneratedAsIdentityColumnConstraint 924 ) -> str: 925 start = expression.args.get("start") 926 start = f" START {start}" if start else "" 927 increment = expression.args.get("increment") 928 increment = f" INCREMENT {increment}" if increment else "" 929 return f"AUTOINCREMENT{start}{increment}" 930 931 def swaptable_sql(self, expression: exp.SwapTable) -> str: 932 this = self.sql(expression, "this") 933 return f"SWAP WITH {this}" 934 935 def with_properties(self, properties: exp.Properties) -> str: 936 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 937 938 def cluster_sql(self, expression: exp.Cluster) -> str: 939 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
327class Snowflake(Dialect): 328 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 329 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 330 NULL_ORDERING = "nulls_are_large" 331 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 332 SUPPORTS_USER_DEFINED_TYPES = False 333 SUPPORTS_SEMI_ANTI_JOIN = False 334 PREFER_CTE_ALIAS_COLUMN = True 335 TABLESAMPLE_SIZE_IS_PERCENT = True 336 337 TIME_MAPPING = { 338 "YYYY": "%Y", 339 "yyyy": "%Y", 340 "YY": "%y", 341 "yy": "%y", 342 "MMMM": "%B", 343 "mmmm": "%B", 344 "MON": "%b", 345 "mon": "%b", 346 "MM": "%m", 347 "mm": "%m", 348 "DD": "%d", 349 "dd": "%-d", 350 "DY": "%a", 351 "dy": "%w", 352 "HH24": "%H", 353 "hh24": "%H", 354 "HH12": "%I", 355 "hh12": "%I", 356 "MI": "%M", 357 "mi": "%M", 358 "SS": "%S", 359 "ss": "%S", 360 "FF": "%f", 361 "ff": "%f", 362 "FF6": "%f", 363 "ff6": "%f", 364 } 365 366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify) 377 378 class Parser(parser.Parser): 379 IDENTIFY_PIVOT_STRINGS = True 380 381 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 382 383 FUNCTIONS = { 384 **parser.Parser.FUNCTIONS, 385 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 386 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 387 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 388 this=seq_get(args, 1), expression=seq_get(args, 0) 389 ), 390 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 391 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 392 start=seq_get(args, 0), 393 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 394 step=seq_get(args, 2), 395 ), 396 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 397 "BITXOR": binary_from_function(exp.BitwiseXor), 398 "BIT_XOR": binary_from_function(exp.BitwiseXor), 399 "BOOLXOR": binary_from_function(exp.Xor), 400 "CONVERT_TIMEZONE": _parse_convert_timezone, 401 "DATE_TRUNC": _date_trunc_to_time, 402 "DATEADD": lambda args: exp.DateAdd( 403 this=seq_get(args, 2), 404 expression=seq_get(args, 1), 405 unit=_map_date_part(seq_get(args, 0)), 406 ), 407 "DATEDIFF": _parse_datediff, 408 "DIV0": _div0_to_if, 409 "FLATTEN": exp.Explode.from_arg_list, 410 "GET_PATH": lambda args, dialect: exp.JSONExtract( 411 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 412 ), 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "TABLES": _show_parser("TABLES"), 477 "TERSE TABLES": _show_parser("TABLES"), 478 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 480 "COLUMNS": _show_parser("COLUMNS"), 481 "USERS": _show_parser("USERS"), 482 "TERSE USERS": _show_parser("USERS"), 483 } 484 485 STAGED_FILE_SINGLE_TOKENS = { 486 TokenType.DOT, 487 TokenType.MOD, 488 TokenType.SLASH, 489 } 490 491 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 492 493 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 494 if is_map: 495 # Keys are strings in Snowflake's objects, see also: 496 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 497 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 498 return self._parse_slice(self._parse_string()) 499 500 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 501 502 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 503 lateral = super()._parse_lateral() 504 if not lateral: 505 return lateral 506 507 if isinstance(lateral.this, exp.Explode): 508 table_alias = lateral.args.get("alias") 509 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 510 if table_alias and not table_alias.args.get("columns"): 511 table_alias.set("columns", columns) 512 elif not table_alias: 513 exp.alias_(lateral, "_flattened", table=columns, copy=False) 514 515 return lateral 516 517 def _parse_at_before(self, table: exp.Table) -> exp.Table: 518 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 519 index = self._index 520 if self._match_texts(("AT", "BEFORE")): 521 this = self._prev.text.upper() 522 kind = ( 523 self._match(TokenType.L_PAREN) 524 and self._match_texts(self.HISTORICAL_DATA_KIND) 525 and self._prev.text.upper() 526 ) 527 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 528 529 if expression: 530 self._match_r_paren() 531 when = self.expression( 532 exp.HistoricalData, this=this, kind=kind, expression=expression 533 ) 534 table.set("when", when) 535 else: 536 self._retreat(index) 537 538 return table 539 540 def _parse_table_parts( 541 self, schema: bool = False, is_db_reference: bool = False 542 ) -> exp.Table: 543 # https://docs.snowflake.com/en/user-guide/querying-stage 544 if self._match(TokenType.STRING, advance=False): 545 table = self._parse_string() 546 elif self._match_text_seq("@", advance=False): 547 table = self._parse_location_path() 548 else: 549 table = None 550 551 if table: 552 file_format = None 553 pattern = None 554 555 self._match(TokenType.L_PAREN) 556 while self._curr and not self._match(TokenType.R_PAREN): 557 if self._match_text_seq("FILE_FORMAT", "=>"): 558 file_format = self._parse_string() or super()._parse_table_parts( 559 is_db_reference=is_db_reference 560 ) 561 elif self._match_text_seq("PATTERN", "=>"): 562 pattern = self._parse_string() 563 else: 564 break 565 566 self._match(TokenType.COMMA) 567 568 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 569 else: 570 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 571 572 return self._parse_at_before(table) 573 574 def _parse_id_var( 575 self, 576 any_token: bool = True, 577 tokens: t.Optional[t.Collection[TokenType]] = None, 578 ) -> t.Optional[exp.Expression]: 579 if self._match_text_seq("IDENTIFIER", "("): 580 identifier = ( 581 super()._parse_id_var(any_token=any_token, tokens=tokens) 582 or self._parse_string() 583 ) 584 self._match_r_paren() 585 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 586 587 return super()._parse_id_var(any_token=any_token, tokens=tokens) 588 589 def _parse_show_snowflake(self, this: str) -> exp.Show: 590 scope = None 591 scope_kind = None 592 593 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 594 # which is syntactically valid but has no effect on the output 595 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 596 597 history = self._match_text_seq("HISTORY") 598 599 like = self._parse_string() if self._match(TokenType.LIKE) else None 600 601 if self._match(TokenType.IN): 602 if self._match_text_seq("ACCOUNT"): 603 scope_kind = "ACCOUNT" 604 elif self._match_set(self.DB_CREATABLES): 605 scope_kind = self._prev.text.upper() 606 if self._curr: 607 scope = self._parse_table_parts() 608 elif self._curr: 609 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 610 scope = self._parse_table_parts() 611 612 return self.expression( 613 exp.Show, 614 **{ 615 "terse": terse, 616 "this": this, 617 "history": history, 618 "like": like, 619 "scope": scope, 620 "scope_kind": scope_kind, 621 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 622 "limit": self._parse_limit(), 623 "from": self._parse_string() if self._match(TokenType.FROM) else None, 624 }, 625 ) 626 627 def _parse_alter_table_swap(self) -> exp.SwapTable: 628 self._match_text_seq("WITH") 629 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 630 631 def _parse_location(self) -> exp.LocationProperty: 632 self._match(TokenType.EQ) 633 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 634 635 def _parse_location_path(self) -> exp.Var: 636 parts = [self._advance_any(ignore_reserved=True)] 637 638 # We avoid consuming a comma token because external tables like @foo and @bar 639 # can be joined in a query with a comma separator. 640 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 641 parts.append(self._advance_any(ignore_reserved=True)) 642 643 return exp.var("".join(part.text for part in parts if part)) 644 645 class Tokenizer(tokens.Tokenizer): 646 STRING_ESCAPES = ["\\", "'"] 647 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 648 RAW_STRINGS = ["$$"] 649 COMMENTS = ["--", "//", ("/*", "*/")] 650 651 KEYWORDS = { 652 **tokens.Tokenizer.KEYWORDS, 653 "BYTEINT": TokenType.INT, 654 "CHAR VARYING": TokenType.VARCHAR, 655 "CHARACTER VARYING": TokenType.VARCHAR, 656 "EXCLUDE": TokenType.EXCEPT, 657 "ILIKE ANY": TokenType.ILIKE_ANY, 658 "LIKE ANY": TokenType.LIKE_ANY, 659 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 660 "MINUS": TokenType.EXCEPT, 661 "NCHAR VARYING": TokenType.VARCHAR, 662 "PUT": TokenType.COMMAND, 663 "REMOVE": TokenType.COMMAND, 664 "RENAME": TokenType.REPLACE, 665 "RM": TokenType.COMMAND, 666 "SAMPLE": TokenType.TABLE_SAMPLE, 667 "SQL_DOUBLE": TokenType.DOUBLE, 668 "SQL_VARCHAR": TokenType.VARCHAR, 669 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 670 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 671 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 672 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 673 "TOP": TokenType.TOP, 674 } 675 676 SINGLE_TOKENS = { 677 **tokens.Tokenizer.SINGLE_TOKENS, 678 "$": TokenType.PARAMETER, 679 } 680 681 VAR_SINGLE_TOKENS = {"$"} 682 683 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 684 685 class Generator(generator.Generator): 686 PARAMETER_TOKEN = "$" 687 MATCHED_BY_SOURCE = False 688 SINGLE_STRING_INTERVAL = True 689 JOIN_HINTS = False 690 TABLE_HINTS = False 691 QUERY_HINTS = False 692 AGGREGATE_FILTER_SUPPORTED = False 693 SUPPORTS_TABLE_COPY = False 694 COLLATE_IS_FUNC = True 695 LIMIT_ONLY_LITERALS = True 696 JSON_KEY_VALUE_PAIR_SEP = "," 697 INSERT_OVERWRITE = " OVERWRITE INTO" 698 699 TRANSFORMS = { 700 **generator.Generator.TRANSFORMS, 701 exp.ArgMax: rename_func("MAX_BY"), 702 exp.ArgMin: rename_func("MIN_BY"), 703 exp.Array: inline_array_sql, 704 exp.ArrayConcat: rename_func("ARRAY_CAT"), 705 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 706 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 707 exp.AtTimeZone: lambda self, e: self.func( 708 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 709 ), 710 exp.BitwiseXor: rename_func("BITXOR"), 711 exp.DateAdd: date_delta_sql("DATEADD"), 712 exp.DateDiff: date_delta_sql("DATEDIFF"), 713 exp.DateStrToDate: datestrtodate_sql, 714 exp.DataType: _datatype_sql, 715 exp.DayOfMonth: rename_func("DAYOFMONTH"), 716 exp.DayOfWeek: rename_func("DAYOFWEEK"), 717 exp.DayOfYear: rename_func("DAYOFYEAR"), 718 exp.Explode: rename_func("FLATTEN"), 719 exp.Extract: rename_func("DATE_PART"), 720 exp.FromTimeZone: lambda self, e: self.func( 721 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 722 ), 723 exp.GenerateSeries: lambda self, e: self.func( 724 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 725 ), 726 exp.GroupConcat: rename_func("LISTAGG"), 727 exp.If: if_sql(name="IFF", false_value="NULL"), 728 exp.JSONExtract: rename_func("GET_PATH"), 729 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 730 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 731 exp.JSONPathRoot: lambda *_: "", 732 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 733 exp.LogicalOr: rename_func("BOOLOR_AGG"), 734 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 735 exp.Max: max_or_greatest, 736 exp.Min: min_or_least, 737 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 738 exp.PercentileCont: transforms.preprocess( 739 [transforms.add_within_group_for_percentiles] 740 ), 741 exp.PercentileDisc: transforms.preprocess( 742 [transforms.add_within_group_for_percentiles] 743 ), 744 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 745 exp.RegexpILike: _regexpilike_sql, 746 exp.Rand: rename_func("RANDOM"), 747 exp.Select: transforms.preprocess( 748 [ 749 transforms.eliminate_distinct_on, 750 transforms.explode_to_unnest(), 751 transforms.eliminate_semi_and_anti_joins, 752 ] 753 ), 754 exp.SHA: rename_func("SHA1"), 755 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 756 exp.StartsWith: rename_func("STARTSWITH"), 757 exp.StrPosition: lambda self, e: self.func( 758 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 759 ), 760 exp.StrToTime: lambda self, 761 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 762 exp.Struct: lambda self, e: self.func( 763 "OBJECT_CONSTRUCT", 764 *(arg for expression in e.expressions for arg in expression.flatten()), 765 ), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimestampDiff: lambda self, e: self.func( 768 "TIMESTAMPDIFF", e.unit, e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql, 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeToStr: lambda self, e: self.func( 773 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 774 ), 775 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 776 exp.ToArray: rename_func("TO_ARRAY"), 777 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 778 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 779 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 780 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 781 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 782 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 783 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 784 exp.Xor: rename_func("BOOLXOR"), 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 796 } 797 798 STAR_MAPPING = { 799 "except": "EXCLUDE", 800 "replace": "RENAME", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 816 817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression) 830 831 def log_sql(self, expression: exp.Log) -> str: 832 if not expression.expression: 833 return self.func("LN", expression.this) 834 835 return super().log_sql(expression) 836 837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}" 860 861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 885 886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 ) 903 904 def except_op(self, expression: exp.Except) -> str: 905 if not expression.args.get("distinct", False): 906 self.unsupported("EXCEPT with All is not supported in Snowflake") 907 return super().except_op(expression) 908 909 def intersect_op(self, expression: exp.Intersect) -> str: 910 if not expression.args.get("distinct", False): 911 self.unsupported("INTERSECT with All is not supported in Snowflake") 912 return super().intersect_op(expression) 913 914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}" 922 923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}" 931 932 def swaptable_sql(self, expression: exp.SwapTable) -> str: 933 this = self.sql(expression, "this") 934 return f"SWAP WITH {this}" 935 936 def with_properties(self, properties: exp.Properties) -> str: 937 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 938 939 def cluster_sql(self, expression: exp.Cluster) -> str: 940 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Specifies the strategy according to which identifiers should be normalized.
Indicates the default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Determines whether or not user-defined data types are supported.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Determines whether or not a size in the table sample clause represents percentage.
Associates this dialect's time formats with their equivalent Python strftime
format.
366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
378 class Parser(parser.Parser): 379 IDENTIFY_PIVOT_STRINGS = True 380 381 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 382 383 FUNCTIONS = { 384 **parser.Parser.FUNCTIONS, 385 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 386 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 387 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 388 this=seq_get(args, 1), expression=seq_get(args, 0) 389 ), 390 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 391 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 392 start=seq_get(args, 0), 393 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 394 step=seq_get(args, 2), 395 ), 396 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 397 "BITXOR": binary_from_function(exp.BitwiseXor), 398 "BIT_XOR": binary_from_function(exp.BitwiseXor), 399 "BOOLXOR": binary_from_function(exp.Xor), 400 "CONVERT_TIMEZONE": _parse_convert_timezone, 401 "DATE_TRUNC": _date_trunc_to_time, 402 "DATEADD": lambda args: exp.DateAdd( 403 this=seq_get(args, 2), 404 expression=seq_get(args, 1), 405 unit=_map_date_part(seq_get(args, 0)), 406 ), 407 "DATEDIFF": _parse_datediff, 408 "DIV0": _div0_to_if, 409 "FLATTEN": exp.Explode.from_arg_list, 410 "GET_PATH": lambda args, dialect: exp.JSONExtract( 411 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 412 ), 413 "IFF": exp.If.from_arg_list, 414 "LAST_DAY": lambda args: exp.LastDay( 415 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 416 ), 417 "LISTAGG": exp.GroupConcat.from_arg_list, 418 "NULLIFZERO": _nullifzero_to_if, 419 "OBJECT_CONSTRUCT": _parse_object_construct, 420 "REGEXP_REPLACE": _parse_regexp_replace, 421 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEDIFF": _parse_datediff, 425 "TIMESTAMPDIFF": _parse_datediff, 426 "TIMESTAMPFROMPARTS": _parse_timestamp_from_parts, 427 "TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts, 428 "TO_TIMESTAMP": _parse_to_timestamp, 429 "TO_VARCHAR": exp.ToChar.from_arg_list, 430 "ZEROIFNULL": _zeroifnull_to_if, 431 } 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "DATE_PART": _parse_date_part, 436 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 437 } 438 FUNCTION_PARSERS.pop("TRIM") 439 440 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 441 442 RANGE_PARSERS = { 443 **parser.Parser.RANGE_PARSERS, 444 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 445 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 446 TokenType.COLON: _parse_colon_get_path, 447 } 448 449 ALTER_PARSERS = { 450 **parser.Parser.ALTER_PARSERS, 451 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 452 "UNSET": lambda self: self.expression( 453 exp.Set, 454 tag=self._match_text_seq("TAG"), 455 expressions=self._parse_csv(self._parse_id_var), 456 unset=True, 457 ), 458 "SWAP": lambda self: self._parse_alter_table_swap(), 459 } 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.SHOW: lambda self: self._parse_show(), 464 } 465 466 PROPERTY_PARSERS = { 467 **parser.Parser.PROPERTY_PARSERS, 468 "LOCATION": lambda self: self._parse_location(), 469 } 470 471 SHOW_PARSERS = { 472 "SCHEMAS": _show_parser("SCHEMAS"), 473 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 474 "OBJECTS": _show_parser("OBJECTS"), 475 "TERSE OBJECTS": _show_parser("OBJECTS"), 476 "TABLES": _show_parser("TABLES"), 477 "TERSE TABLES": _show_parser("TABLES"), 478 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 479 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 480 "COLUMNS": _show_parser("COLUMNS"), 481 "USERS": _show_parser("USERS"), 482 "TERSE USERS": _show_parser("USERS"), 483 } 484 485 STAGED_FILE_SINGLE_TOKENS = { 486 TokenType.DOT, 487 TokenType.MOD, 488 TokenType.SLASH, 489 } 490 491 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 492 493 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 494 if is_map: 495 # Keys are strings in Snowflake's objects, see also: 496 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 497 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 498 return self._parse_slice(self._parse_string()) 499 500 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 501 502 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 503 lateral = super()._parse_lateral() 504 if not lateral: 505 return lateral 506 507 if isinstance(lateral.this, exp.Explode): 508 table_alias = lateral.args.get("alias") 509 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 510 if table_alias and not table_alias.args.get("columns"): 511 table_alias.set("columns", columns) 512 elif not table_alias: 513 exp.alias_(lateral, "_flattened", table=columns, copy=False) 514 515 return lateral 516 517 def _parse_at_before(self, table: exp.Table) -> exp.Table: 518 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 519 index = self._index 520 if self._match_texts(("AT", "BEFORE")): 521 this = self._prev.text.upper() 522 kind = ( 523 self._match(TokenType.L_PAREN) 524 and self._match_texts(self.HISTORICAL_DATA_KIND) 525 and self._prev.text.upper() 526 ) 527 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 528 529 if expression: 530 self._match_r_paren() 531 when = self.expression( 532 exp.HistoricalData, this=this, kind=kind, expression=expression 533 ) 534 table.set("when", when) 535 else: 536 self._retreat(index) 537 538 return table 539 540 def _parse_table_parts( 541 self, schema: bool = False, is_db_reference: bool = False 542 ) -> exp.Table: 543 # https://docs.snowflake.com/en/user-guide/querying-stage 544 if self._match(TokenType.STRING, advance=False): 545 table = self._parse_string() 546 elif self._match_text_seq("@", advance=False): 547 table = self._parse_location_path() 548 else: 549 table = None 550 551 if table: 552 file_format = None 553 pattern = None 554 555 self._match(TokenType.L_PAREN) 556 while self._curr and not self._match(TokenType.R_PAREN): 557 if self._match_text_seq("FILE_FORMAT", "=>"): 558 file_format = self._parse_string() or super()._parse_table_parts( 559 is_db_reference=is_db_reference 560 ) 561 elif self._match_text_seq("PATTERN", "=>"): 562 pattern = self._parse_string() 563 else: 564 break 565 566 self._match(TokenType.COMMA) 567 568 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 569 else: 570 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 571 572 return self._parse_at_before(table) 573 574 def _parse_id_var( 575 self, 576 any_token: bool = True, 577 tokens: t.Optional[t.Collection[TokenType]] = None, 578 ) -> t.Optional[exp.Expression]: 579 if self._match_text_seq("IDENTIFIER", "("): 580 identifier = ( 581 super()._parse_id_var(any_token=any_token, tokens=tokens) 582 or self._parse_string() 583 ) 584 self._match_r_paren() 585 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 586 587 return super()._parse_id_var(any_token=any_token, tokens=tokens) 588 589 def _parse_show_snowflake(self, this: str) -> exp.Show: 590 scope = None 591 scope_kind = None 592 593 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 594 # which is syntactically valid but has no effect on the output 595 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 596 597 history = self._match_text_seq("HISTORY") 598 599 like = self._parse_string() if self._match(TokenType.LIKE) else None 600 601 if self._match(TokenType.IN): 602 if self._match_text_seq("ACCOUNT"): 603 scope_kind = "ACCOUNT" 604 elif self._match_set(self.DB_CREATABLES): 605 scope_kind = self._prev.text.upper() 606 if self._curr: 607 scope = self._parse_table_parts() 608 elif self._curr: 609 scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" 610 scope = self._parse_table_parts() 611 612 return self.expression( 613 exp.Show, 614 **{ 615 "terse": terse, 616 "this": this, 617 "history": history, 618 "like": like, 619 "scope": scope, 620 "scope_kind": scope_kind, 621 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 622 "limit": self._parse_limit(), 623 "from": self._parse_string() if self._match(TokenType.FROM) else None, 624 }, 625 ) 626 627 def _parse_alter_table_swap(self) -> exp.SwapTable: 628 self._match_text_seq("WITH") 629 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 630 631 def _parse_location(self) -> exp.LocationProperty: 632 self._match(TokenType.EQ) 633 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 634 635 def _parse_location_path(self) -> exp.Var: 636 parts = [self._advance_any(ignore_reserved=True)] 637 638 # We avoid consuming a comma token because external tables like @foo and @bar 639 # can be joined in a query with a comma separator. 640 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 641 parts.append(self._advance_any(ignore_reserved=True)) 642 643 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
645 class Tokenizer(tokens.Tokenizer): 646 STRING_ESCAPES = ["\\", "'"] 647 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 648 RAW_STRINGS = ["$$"] 649 COMMENTS = ["--", "//", ("/*", "*/")] 650 651 KEYWORDS = { 652 **tokens.Tokenizer.KEYWORDS, 653 "BYTEINT": TokenType.INT, 654 "CHAR VARYING": TokenType.VARCHAR, 655 "CHARACTER VARYING": TokenType.VARCHAR, 656 "EXCLUDE": TokenType.EXCEPT, 657 "ILIKE ANY": TokenType.ILIKE_ANY, 658 "LIKE ANY": TokenType.LIKE_ANY, 659 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 660 "MINUS": TokenType.EXCEPT, 661 "NCHAR VARYING": TokenType.VARCHAR, 662 "PUT": TokenType.COMMAND, 663 "REMOVE": TokenType.COMMAND, 664 "RENAME": TokenType.REPLACE, 665 "RM": TokenType.COMMAND, 666 "SAMPLE": TokenType.TABLE_SAMPLE, 667 "SQL_DOUBLE": TokenType.DOUBLE, 668 "SQL_VARCHAR": TokenType.VARCHAR, 669 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 670 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 671 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 672 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 673 "TOP": TokenType.TOP, 674 } 675 676 SINGLE_TOKENS = { 677 **tokens.Tokenizer.SINGLE_TOKENS, 678 "$": TokenType.PARAMETER, 679 } 680 681 VAR_SINGLE_TOKENS = {"$"} 682 683 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
685 class Generator(generator.Generator): 686 PARAMETER_TOKEN = "$" 687 MATCHED_BY_SOURCE = False 688 SINGLE_STRING_INTERVAL = True 689 JOIN_HINTS = False 690 TABLE_HINTS = False 691 QUERY_HINTS = False 692 AGGREGATE_FILTER_SUPPORTED = False 693 SUPPORTS_TABLE_COPY = False 694 COLLATE_IS_FUNC = True 695 LIMIT_ONLY_LITERALS = True 696 JSON_KEY_VALUE_PAIR_SEP = "," 697 INSERT_OVERWRITE = " OVERWRITE INTO" 698 699 TRANSFORMS = { 700 **generator.Generator.TRANSFORMS, 701 exp.ArgMax: rename_func("MAX_BY"), 702 exp.ArgMin: rename_func("MIN_BY"), 703 exp.Array: inline_array_sql, 704 exp.ArrayConcat: rename_func("ARRAY_CAT"), 705 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 706 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 707 exp.AtTimeZone: lambda self, e: self.func( 708 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 709 ), 710 exp.BitwiseXor: rename_func("BITXOR"), 711 exp.DateAdd: date_delta_sql("DATEADD"), 712 exp.DateDiff: date_delta_sql("DATEDIFF"), 713 exp.DateStrToDate: datestrtodate_sql, 714 exp.DataType: _datatype_sql, 715 exp.DayOfMonth: rename_func("DAYOFMONTH"), 716 exp.DayOfWeek: rename_func("DAYOFWEEK"), 717 exp.DayOfYear: rename_func("DAYOFYEAR"), 718 exp.Explode: rename_func("FLATTEN"), 719 exp.Extract: rename_func("DATE_PART"), 720 exp.FromTimeZone: lambda self, e: self.func( 721 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 722 ), 723 exp.GenerateSeries: lambda self, e: self.func( 724 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 725 ), 726 exp.GroupConcat: rename_func("LISTAGG"), 727 exp.If: if_sql(name="IFF", false_value="NULL"), 728 exp.JSONExtract: rename_func("GET_PATH"), 729 exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), 730 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 731 exp.JSONPathRoot: lambda *_: "", 732 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 733 exp.LogicalOr: rename_func("BOOLOR_AGG"), 734 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 735 exp.Max: max_or_greatest, 736 exp.Min: min_or_least, 737 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 738 exp.PercentileCont: transforms.preprocess( 739 [transforms.add_within_group_for_percentiles] 740 ), 741 exp.PercentileDisc: transforms.preprocess( 742 [transforms.add_within_group_for_percentiles] 743 ), 744 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 745 exp.RegexpILike: _regexpilike_sql, 746 exp.Rand: rename_func("RANDOM"), 747 exp.Select: transforms.preprocess( 748 [ 749 transforms.eliminate_distinct_on, 750 transforms.explode_to_unnest(), 751 transforms.eliminate_semi_and_anti_joins, 752 ] 753 ), 754 exp.SHA: rename_func("SHA1"), 755 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 756 exp.StartsWith: rename_func("STARTSWITH"), 757 exp.StrPosition: lambda self, e: self.func( 758 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 759 ), 760 exp.StrToTime: lambda self, 761 e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 762 exp.Struct: lambda self, e: self.func( 763 "OBJECT_CONSTRUCT", 764 *(arg for expression in e.expressions for arg in expression.flatten()), 765 ), 766 exp.Stuff: rename_func("INSERT"), 767 exp.TimestampDiff: lambda self, e: self.func( 768 "TIMESTAMPDIFF", e.unit, e.expression, e.this 769 ), 770 exp.TimestampTrunc: timestamptrunc_sql, 771 exp.TimeStrToTime: timestrtotime_sql, 772 exp.TimeToStr: lambda self, e: self.func( 773 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 774 ), 775 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 776 exp.ToArray: rename_func("TO_ARRAY"), 777 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 778 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 779 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 780 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 781 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 782 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 783 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 784 exp.Xor: rename_func("BOOLXOR"), 785 } 786 787 SUPPORTED_JSON_PATH_PARTS = { 788 exp.JSONPathKey, 789 exp.JSONPathRoot, 790 exp.JSONPathSubscript, 791 } 792 793 TYPE_MAPPING = { 794 **generator.Generator.TYPE_MAPPING, 795 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 796 } 797 798 STAR_MAPPING = { 799 "except": "EXCLUDE", 800 "replace": "RENAME", 801 } 802 803 PROPERTIES_LOCATION = { 804 **generator.Generator.PROPERTIES_LOCATION, 805 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 806 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 807 } 808 809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 816 817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression) 830 831 def log_sql(self, expression: exp.Log) -> str: 832 if not expression.expression: 833 return self.func("LN", expression.this) 834 835 return super().log_sql(expression) 836 837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}" 860 861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 885 886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 ) 903 904 def except_op(self, expression: exp.Except) -> str: 905 if not expression.args.get("distinct", False): 906 self.unsupported("EXCEPT with All is not supported in Snowflake") 907 return super().except_op(expression) 908 909 def intersect_op(self, expression: exp.Intersect) -> str: 910 if not expression.args.get("distinct", False): 911 self.unsupported("INTERSECT with All is not supported in Snowflake") 912 return super().intersect_op(expression) 913 914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}" 922 923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}" 931 932 def swaptable_sql(self, expression: exp.SwapTable) -> str: 933 this = self.sql(expression, "this") 934 return f"SWAP WITH {this}" 935 936 def with_properties(self, properties: exp.Properties) -> str: 937 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 938 939 def cluster_sql(self, expression: exp.Cluster) -> str: 940 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
809 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 810 milli = expression.args.get("milli") 811 if milli is not None: 812 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 813 expression.set("nano", milli_to_nano) 814 815 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
817 def trycast_sql(self, expression: exp.TryCast) -> str: 818 value = expression.this 819 820 if value.type is None: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 value = annotate_types(value) 824 825 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 826 return super().trycast_sql(expression) 827 828 # TRY_CAST only works for string values in Snowflake 829 return self.cast_sql(expression)
837 def unnest_sql(self, expression: exp.Unnest) -> str: 838 unnest_alias = expression.args.get("alias") 839 offset = expression.args.get("offset") 840 841 columns = [ 842 exp.to_identifier("seq"), 843 exp.to_identifier("key"), 844 exp.to_identifier("path"), 845 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 846 seq_get(unnest_alias.columns if unnest_alias else [], 0) 847 or exp.to_identifier("value"), 848 exp.to_identifier("this"), 849 ] 850 851 if unnest_alias: 852 unnest_alias.set("columns", columns) 853 else: 854 unnest_alias = exp.TableAlias(this="_u", columns=columns) 855 856 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 857 alias = self.sql(unnest_alias) 858 alias = f" AS {alias}" if alias else "" 859 return f"{explode}{alias}"
861 def show_sql(self, expression: exp.Show) -> str: 862 terse = "TERSE " if expression.args.get("terse") else "" 863 history = " HISTORY" if expression.args.get("history") else "" 864 like = self.sql(expression, "like") 865 like = f" LIKE {like}" if like else "" 866 867 scope = self.sql(expression, "scope") 868 scope = f" {scope}" if scope else "" 869 870 scope_kind = self.sql(expression, "scope_kind") 871 if scope_kind: 872 scope_kind = f" IN {scope_kind}" 873 874 starts_with = self.sql(expression, "starts_with") 875 if starts_with: 876 starts_with = f" STARTS WITH {starts_with}" 877 878 limit = self.sql(expression, "limit") 879 880 from_ = self.sql(expression, "from") 881 if from_: 882 from_ = f" FROM {from_}" 883 884 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
886 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 887 # Other dialects don't support all of the following parameters, so we need to 888 # generate default values as necessary to ensure the transpilation is correct 889 group = expression.args.get("group") 890 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 891 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 892 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 893 894 return self.func( 895 "REGEXP_SUBSTR", 896 expression.this, 897 expression.expression, 898 position, 899 occurrence, 900 parameters, 901 group, 902 )
914 def describe_sql(self, expression: exp.Describe) -> str: 915 # Default to table if kind is unknown 916 kind_value = expression.args.get("kind") or "TABLE" 917 kind = f" {kind_value}" if kind_value else "" 918 this = f" {self.sql(expression, 'this')}" 919 expressions = self.expressions(expression, flat=True) 920 expressions = f" {expressions}" if expressions else "" 921 return f"DESCRIBE{kind}{this}{expressions}"
923 def generatedasidentitycolumnconstraint_sql( 924 self, expression: exp.GeneratedAsIdentityColumnConstraint 925 ) -> str: 926 start = expression.args.get("start") 927 start = f" START {start}" if start else "" 928 increment = expression.args.get("increment") 929 increment = f" INCREMENT {increment}" if increment else "" 930 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql