sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 65 arg = seq_get(args, 0) 66 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 67 68 69def build_lower(args: t.List) -> exp.Lower | exp.Hex: 70 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 73 74 75def build_upper(args: t.List) -> exp.Upper | exp.Hex: 76 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 79 80 81def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 82 def _builder(args: t.List, dialect: Dialect) -> E: 83 expression = expr_type( 84 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 85 ) 86 if len(args) > 2 and expr_type is exp.JSONExtract: 87 expression.set("expressions", args[2:]) 88 89 return expression 90 91 return _builder 92 93 94def build_mod(args: t.List) -> exp.Mod: 95 this = seq_get(args, 0) 96 expression = seq_get(args, 1) 97 98 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 99 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 100 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 101 102 return exp.Mod(this=this, expression=expression) 103 104 105class _Parser(type): 106 def __new__(cls, clsname, bases, attrs): 107 klass = super().__new__(cls, clsname, bases, attrs) 108 109 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 110 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 111 112 return klass 113 114 115class Parser(metaclass=_Parser): 116 """ 117 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 118 119 Args: 120 error_level: The desired error level. 121 Default: ErrorLevel.IMMEDIATE 122 error_message_context: The amount of context to capture from a query string when displaying 123 the error message (in number of characters). 124 Default: 100 125 max_errors: Maximum number of error messages to include in a raised ParseError. 126 This is only relevant if error_level is ErrorLevel.RAISE. 127 Default: 3 128 """ 129 130 FUNCTIONS: t.Dict[str, t.Callable] = { 131 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 132 "CONCAT": lambda args, dialect: exp.Concat( 133 expressions=args, 134 safe=not dialect.STRICT_STRING_CONCAT, 135 coalesce=dialect.CONCAT_COALESCE, 136 ), 137 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 138 expressions=args, 139 safe=not dialect.STRICT_STRING_CONCAT, 140 coalesce=dialect.CONCAT_COALESCE, 141 ), 142 "DATE_TO_DATE_STR": lambda args: exp.Cast( 143 this=seq_get(args, 0), 144 to=exp.DataType(this=exp.DataType.Type.TEXT), 145 ), 146 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 147 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 148 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 149 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 150 "LIKE": build_like, 151 "LOG": build_logarithm, 152 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 153 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 154 "MOD": build_mod, 155 "TIME_TO_TIME_STR": lambda args: exp.Cast( 156 this=seq_get(args, 0), 157 to=exp.DataType(this=exp.DataType.Type.TEXT), 158 ), 159 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 160 this=exp.Cast( 161 this=seq_get(args, 0), 162 to=exp.DataType(this=exp.DataType.Type.TEXT), 163 ), 164 start=exp.Literal.number(1), 165 length=exp.Literal.number(10), 166 ), 167 "VAR_MAP": build_var_map, 168 "LOWER": build_lower, 169 "UPPER": build_upper, 170 "HEX": build_hex, 171 "TO_HEX": build_hex, 172 } 173 174 NO_PAREN_FUNCTIONS = { 175 TokenType.CURRENT_DATE: exp.CurrentDate, 176 TokenType.CURRENT_DATETIME: exp.CurrentDate, 177 TokenType.CURRENT_TIME: exp.CurrentTime, 178 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 179 TokenType.CURRENT_USER: exp.CurrentUser, 180 } 181 182 STRUCT_TYPE_TOKENS = { 183 TokenType.NESTED, 184 TokenType.OBJECT, 185 TokenType.STRUCT, 186 } 187 188 NESTED_TYPE_TOKENS = { 189 TokenType.ARRAY, 190 TokenType.LOWCARDINALITY, 191 TokenType.MAP, 192 TokenType.NULLABLE, 193 *STRUCT_TYPE_TOKENS, 194 } 195 196 ENUM_TYPE_TOKENS = { 197 TokenType.ENUM, 198 TokenType.ENUM8, 199 TokenType.ENUM16, 200 } 201 202 AGGREGATE_TYPE_TOKENS = { 203 TokenType.AGGREGATEFUNCTION, 204 TokenType.SIMPLEAGGREGATEFUNCTION, 205 } 206 207 TYPE_TOKENS = { 208 TokenType.BIT, 209 TokenType.BOOLEAN, 210 TokenType.TINYINT, 211 TokenType.UTINYINT, 212 TokenType.SMALLINT, 213 TokenType.USMALLINT, 214 TokenType.INT, 215 TokenType.UINT, 216 TokenType.BIGINT, 217 TokenType.UBIGINT, 218 TokenType.INT128, 219 TokenType.UINT128, 220 TokenType.INT256, 221 TokenType.UINT256, 222 TokenType.MEDIUMINT, 223 TokenType.UMEDIUMINT, 224 TokenType.FIXEDSTRING, 225 TokenType.FLOAT, 226 TokenType.DOUBLE, 227 TokenType.CHAR, 228 TokenType.NCHAR, 229 TokenType.VARCHAR, 230 TokenType.NVARCHAR, 231 TokenType.BPCHAR, 232 TokenType.TEXT, 233 TokenType.MEDIUMTEXT, 234 TokenType.LONGTEXT, 235 TokenType.MEDIUMBLOB, 236 TokenType.LONGBLOB, 237 TokenType.BINARY, 238 TokenType.VARBINARY, 239 TokenType.JSON, 240 TokenType.JSONB, 241 TokenType.INTERVAL, 242 TokenType.TINYBLOB, 243 TokenType.TINYTEXT, 244 TokenType.TIME, 245 TokenType.TIMETZ, 246 TokenType.TIMESTAMP, 247 TokenType.TIMESTAMP_S, 248 TokenType.TIMESTAMP_MS, 249 TokenType.TIMESTAMP_NS, 250 TokenType.TIMESTAMPTZ, 251 TokenType.TIMESTAMPLTZ, 252 TokenType.TIMESTAMPNTZ, 253 TokenType.DATETIME, 254 TokenType.DATETIME64, 255 TokenType.DATE, 256 TokenType.DATE32, 257 TokenType.INT4RANGE, 258 TokenType.INT4MULTIRANGE, 259 TokenType.INT8RANGE, 260 TokenType.INT8MULTIRANGE, 261 TokenType.NUMRANGE, 262 TokenType.NUMMULTIRANGE, 263 TokenType.TSRANGE, 264 TokenType.TSMULTIRANGE, 265 TokenType.TSTZRANGE, 266 TokenType.TSTZMULTIRANGE, 267 TokenType.DATERANGE, 268 TokenType.DATEMULTIRANGE, 269 TokenType.DECIMAL, 270 TokenType.UDECIMAL, 271 TokenType.BIGDECIMAL, 272 TokenType.UUID, 273 TokenType.GEOGRAPHY, 274 TokenType.GEOMETRY, 275 TokenType.HLLSKETCH, 276 TokenType.HSTORE, 277 TokenType.PSEUDO_TYPE, 278 TokenType.SUPER, 279 TokenType.SERIAL, 280 TokenType.SMALLSERIAL, 281 TokenType.BIGSERIAL, 282 TokenType.XML, 283 TokenType.YEAR, 284 TokenType.UNIQUEIDENTIFIER, 285 TokenType.USERDEFINED, 286 TokenType.MONEY, 287 TokenType.SMALLMONEY, 288 TokenType.ROWVERSION, 289 TokenType.IMAGE, 290 TokenType.VARIANT, 291 TokenType.OBJECT, 292 TokenType.OBJECT_IDENTIFIER, 293 TokenType.INET, 294 TokenType.IPADDRESS, 295 TokenType.IPPREFIX, 296 TokenType.IPV4, 297 TokenType.IPV6, 298 TokenType.UNKNOWN, 299 TokenType.NULL, 300 TokenType.NAME, 301 TokenType.TDIGEST, 302 *ENUM_TYPE_TOKENS, 303 *NESTED_TYPE_TOKENS, 304 *AGGREGATE_TYPE_TOKENS, 305 } 306 307 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 308 TokenType.BIGINT: TokenType.UBIGINT, 309 TokenType.INT: TokenType.UINT, 310 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 311 TokenType.SMALLINT: TokenType.USMALLINT, 312 TokenType.TINYINT: TokenType.UTINYINT, 313 TokenType.DECIMAL: TokenType.UDECIMAL, 314 } 315 316 SUBQUERY_PREDICATES = { 317 TokenType.ANY: exp.Any, 318 TokenType.ALL: exp.All, 319 TokenType.EXISTS: exp.Exists, 320 TokenType.SOME: exp.Any, 321 } 322 323 RESERVED_TOKENS = { 324 *Tokenizer.SINGLE_TOKENS.values(), 325 TokenType.SELECT, 326 } - {TokenType.IDENTIFIER} 327 328 DB_CREATABLES = { 329 TokenType.DATABASE, 330 TokenType.DICTIONARY, 331 TokenType.MODEL, 332 TokenType.SCHEMA, 333 TokenType.SEQUENCE, 334 TokenType.STORAGE_INTEGRATION, 335 TokenType.TABLE, 336 TokenType.TAG, 337 TokenType.VIEW, 338 } 339 340 CREATABLES = { 341 TokenType.COLUMN, 342 TokenType.CONSTRAINT, 343 TokenType.FOREIGN_KEY, 344 TokenType.FUNCTION, 345 TokenType.INDEX, 346 TokenType.PROCEDURE, 347 *DB_CREATABLES, 348 } 349 350 # Tokens that can represent identifiers 351 ID_VAR_TOKENS = { 352 TokenType.VAR, 353 TokenType.ANTI, 354 TokenType.APPLY, 355 TokenType.ASC, 356 TokenType.ASOF, 357 TokenType.AUTO_INCREMENT, 358 TokenType.BEGIN, 359 TokenType.BPCHAR, 360 TokenType.CACHE, 361 TokenType.CASE, 362 TokenType.COLLATE, 363 TokenType.COMMAND, 364 TokenType.COMMENT, 365 TokenType.COMMIT, 366 TokenType.CONSTRAINT, 367 TokenType.COPY, 368 TokenType.DEFAULT, 369 TokenType.DELETE, 370 TokenType.DESC, 371 TokenType.DESCRIBE, 372 TokenType.DICTIONARY, 373 TokenType.DIV, 374 TokenType.END, 375 TokenType.EXECUTE, 376 TokenType.ESCAPE, 377 TokenType.FALSE, 378 TokenType.FIRST, 379 TokenType.FILTER, 380 TokenType.FINAL, 381 TokenType.FORMAT, 382 TokenType.FULL, 383 TokenType.IDENTIFIER, 384 TokenType.IS, 385 TokenType.ISNULL, 386 TokenType.INTERVAL, 387 TokenType.KEEP, 388 TokenType.KILL, 389 TokenType.LEFT, 390 TokenType.LOAD, 391 TokenType.MERGE, 392 TokenType.NATURAL, 393 TokenType.NEXT, 394 TokenType.OFFSET, 395 TokenType.OPERATOR, 396 TokenType.ORDINALITY, 397 TokenType.OVERLAPS, 398 TokenType.OVERWRITE, 399 TokenType.PARTITION, 400 TokenType.PERCENT, 401 TokenType.PIVOT, 402 TokenType.PRAGMA, 403 TokenType.RANGE, 404 TokenType.RECURSIVE, 405 TokenType.REFERENCES, 406 TokenType.REFRESH, 407 TokenType.REPLACE, 408 TokenType.RIGHT, 409 TokenType.ROLLUP, 410 TokenType.ROW, 411 TokenType.ROWS, 412 TokenType.SEMI, 413 TokenType.SET, 414 TokenType.SETTINGS, 415 TokenType.SHOW, 416 TokenType.TEMPORARY, 417 TokenType.TOP, 418 TokenType.TRUE, 419 TokenType.TRUNCATE, 420 TokenType.UNIQUE, 421 TokenType.UNPIVOT, 422 TokenType.UPDATE, 423 TokenType.USE, 424 TokenType.VOLATILE, 425 TokenType.WINDOW, 426 *CREATABLES, 427 *SUBQUERY_PREDICATES, 428 *TYPE_TOKENS, 429 *NO_PAREN_FUNCTIONS, 430 } 431 432 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 433 434 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 435 TokenType.ANTI, 436 TokenType.APPLY, 437 TokenType.ASOF, 438 TokenType.FULL, 439 TokenType.LEFT, 440 TokenType.LOCK, 441 TokenType.NATURAL, 442 TokenType.OFFSET, 443 TokenType.RIGHT, 444 TokenType.SEMI, 445 TokenType.WINDOW, 446 } 447 448 ALIAS_TOKENS = ID_VAR_TOKENS 449 450 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 451 452 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 453 454 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 455 456 FUNC_TOKENS = { 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.CURRENT_DATE, 460 TokenType.CURRENT_DATETIME, 461 TokenType.CURRENT_TIMESTAMP, 462 TokenType.CURRENT_TIME, 463 TokenType.CURRENT_USER, 464 TokenType.FILTER, 465 TokenType.FIRST, 466 TokenType.FORMAT, 467 TokenType.GLOB, 468 TokenType.IDENTIFIER, 469 TokenType.INDEX, 470 TokenType.ISNULL, 471 TokenType.ILIKE, 472 TokenType.INSERT, 473 TokenType.LIKE, 474 TokenType.MERGE, 475 TokenType.OFFSET, 476 TokenType.PRIMARY_KEY, 477 TokenType.RANGE, 478 TokenType.REPLACE, 479 TokenType.RLIKE, 480 TokenType.ROW, 481 TokenType.UNNEST, 482 TokenType.VAR, 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.SEQUENCE, 486 TokenType.DATE, 487 TokenType.DATETIME, 488 TokenType.TABLE, 489 TokenType.TIMESTAMP, 490 TokenType.TIMESTAMPTZ, 491 TokenType.TRUNCATE, 492 TokenType.WINDOW, 493 TokenType.XOR, 494 *TYPE_TOKENS, 495 *SUBQUERY_PREDICATES, 496 } 497 498 CONJUNCTION = { 499 TokenType.AND: exp.And, 500 TokenType.OR: exp.Or, 501 } 502 503 EQUALITY = { 504 TokenType.EQ: exp.EQ, 505 TokenType.NEQ: exp.NEQ, 506 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 507 } 508 509 COMPARISON = { 510 TokenType.GT: exp.GT, 511 TokenType.GTE: exp.GTE, 512 TokenType.LT: exp.LT, 513 TokenType.LTE: exp.LTE, 514 } 515 516 BITWISE = { 517 TokenType.AMP: exp.BitwiseAnd, 518 TokenType.CARET: exp.BitwiseXor, 519 TokenType.PIPE: exp.BitwiseOr, 520 } 521 522 TERM = { 523 TokenType.DASH: exp.Sub, 524 TokenType.PLUS: exp.Add, 525 TokenType.MOD: exp.Mod, 526 TokenType.COLLATE: exp.Collate, 527 } 528 529 FACTOR = { 530 TokenType.DIV: exp.IntDiv, 531 TokenType.LR_ARROW: exp.Distance, 532 TokenType.SLASH: exp.Div, 533 TokenType.STAR: exp.Mul, 534 } 535 536 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 537 538 TIMES = { 539 TokenType.TIME, 540 TokenType.TIMETZ, 541 } 542 543 TIMESTAMPS = { 544 TokenType.TIMESTAMP, 545 TokenType.TIMESTAMPTZ, 546 TokenType.TIMESTAMPLTZ, 547 *TIMES, 548 } 549 550 SET_OPERATIONS = { 551 TokenType.UNION, 552 TokenType.INTERSECT, 553 TokenType.EXCEPT, 554 } 555 556 JOIN_METHODS = { 557 TokenType.ASOF, 558 TokenType.NATURAL, 559 TokenType.POSITIONAL, 560 } 561 562 JOIN_SIDES = { 563 TokenType.LEFT, 564 TokenType.RIGHT, 565 TokenType.FULL, 566 } 567 568 JOIN_KINDS = { 569 TokenType.INNER, 570 TokenType.OUTER, 571 TokenType.CROSS, 572 TokenType.SEMI, 573 TokenType.ANTI, 574 } 575 576 JOIN_HINTS: t.Set[str] = set() 577 578 LAMBDAS = { 579 TokenType.ARROW: lambda self, expressions: self.expression( 580 exp.Lambda, 581 this=self._replace_lambda( 582 self._parse_conjunction(), 583 expressions, 584 ), 585 expressions=expressions, 586 ), 587 TokenType.FARROW: lambda self, expressions: self.expression( 588 exp.Kwarg, 589 this=exp.var(expressions[0].name), 590 expression=self._parse_conjunction(), 591 ), 592 } 593 594 COLUMN_OPERATORS = { 595 TokenType.DOT: None, 596 TokenType.DCOLON: lambda self, this, to: self.expression( 597 exp.Cast if self.STRICT_CAST else exp.TryCast, 598 this=this, 599 to=to, 600 ), 601 TokenType.ARROW: lambda self, this, path: self.expression( 602 exp.JSONExtract, 603 this=this, 604 expression=self.dialect.to_json_path(path), 605 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 606 ), 607 TokenType.DARROW: lambda self, this, path: self.expression( 608 exp.JSONExtractScalar, 609 this=this, 610 expression=self.dialect.to_json_path(path), 611 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 612 ), 613 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 614 exp.JSONBExtract, 615 this=this, 616 expression=path, 617 ), 618 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 619 exp.JSONBExtractScalar, 620 this=this, 621 expression=path, 622 ), 623 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 624 exp.JSONBContains, 625 this=this, 626 expression=key, 627 ), 628 } 629 630 EXPRESSION_PARSERS = { 631 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 632 exp.Column: lambda self: self._parse_column(), 633 exp.Condition: lambda self: self._parse_conjunction(), 634 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 635 exp.Expression: lambda self: self._parse_expression(), 636 exp.From: lambda self: self._parse_from(joins=True), 637 exp.Group: lambda self: self._parse_group(), 638 exp.Having: lambda self: self._parse_having(), 639 exp.Identifier: lambda self: self._parse_id_var(), 640 exp.Join: lambda self: self._parse_join(), 641 exp.Lambda: lambda self: self._parse_lambda(), 642 exp.Lateral: lambda self: self._parse_lateral(), 643 exp.Limit: lambda self: self._parse_limit(), 644 exp.Offset: lambda self: self._parse_offset(), 645 exp.Order: lambda self: self._parse_order(), 646 exp.Ordered: lambda self: self._parse_ordered(), 647 exp.Properties: lambda self: self._parse_properties(), 648 exp.Qualify: lambda self: self._parse_qualify(), 649 exp.Returning: lambda self: self._parse_returning(), 650 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 651 exp.Table: lambda self: self._parse_table_parts(), 652 exp.TableAlias: lambda self: self._parse_table_alias(), 653 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 654 exp.Where: lambda self: self._parse_where(), 655 exp.Window: lambda self: self._parse_named_window(), 656 exp.With: lambda self: self._parse_with(), 657 "JOIN_TYPE": lambda self: self._parse_join_parts(), 658 } 659 660 STATEMENT_PARSERS = { 661 TokenType.ALTER: lambda self: self._parse_alter(), 662 TokenType.BEGIN: lambda self: self._parse_transaction(), 663 TokenType.CACHE: lambda self: self._parse_cache(), 664 TokenType.COMMENT: lambda self: self._parse_comment(), 665 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 666 TokenType.COPY: lambda self: self._parse_copy(), 667 TokenType.CREATE: lambda self: self._parse_create(), 668 TokenType.DELETE: lambda self: self._parse_delete(), 669 TokenType.DESC: lambda self: self._parse_describe(), 670 TokenType.DESCRIBE: lambda self: self._parse_describe(), 671 TokenType.DROP: lambda self: self._parse_drop(), 672 TokenType.INSERT: lambda self: self._parse_insert(), 673 TokenType.KILL: lambda self: self._parse_kill(), 674 TokenType.LOAD: lambda self: self._parse_load(), 675 TokenType.MERGE: lambda self: self._parse_merge(), 676 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 677 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 678 TokenType.REFRESH: lambda self: self._parse_refresh(), 679 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 680 TokenType.SET: lambda self: self._parse_set(), 681 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 682 TokenType.UNCACHE: lambda self: self._parse_uncache(), 683 TokenType.UPDATE: lambda self: self._parse_update(), 684 TokenType.USE: lambda self: self.expression( 685 exp.Use, 686 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 687 this=self._parse_table(schema=False), 688 ), 689 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 690 } 691 692 UNARY_PARSERS = { 693 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 694 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 695 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 696 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 697 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 698 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 699 } 700 701 STRING_PARSERS = { 702 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 703 exp.RawString, this=token.text 704 ), 705 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 706 exp.National, this=token.text 707 ), 708 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 709 TokenType.STRING: lambda self, token: self.expression( 710 exp.Literal, this=token.text, is_string=True 711 ), 712 TokenType.UNICODE_STRING: lambda self, token: self.expression( 713 exp.UnicodeString, 714 this=token.text, 715 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 716 ), 717 } 718 719 NUMERIC_PARSERS = { 720 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 721 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 722 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 723 TokenType.NUMBER: lambda self, token: self.expression( 724 exp.Literal, this=token.text, is_string=False 725 ), 726 } 727 728 PRIMARY_PARSERS = { 729 **STRING_PARSERS, 730 **NUMERIC_PARSERS, 731 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 732 TokenType.NULL: lambda self, _: self.expression(exp.Null), 733 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 734 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 735 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 736 TokenType.STAR: lambda self, _: self.expression( 737 exp.Star, 738 **{ 739 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 740 "replace": self._parse_star_op("REPLACE"), 741 "rename": self._parse_star_op("RENAME"), 742 }, 743 ), 744 } 745 746 PLACEHOLDER_PARSERS = { 747 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 748 TokenType.PARAMETER: lambda self: self._parse_parameter(), 749 TokenType.COLON: lambda self: ( 750 self.expression(exp.Placeholder, this=self._prev.text) 751 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 752 else None 753 ), 754 } 755 756 RANGE_PARSERS = { 757 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 758 TokenType.GLOB: binary_range_parser(exp.Glob), 759 TokenType.ILIKE: binary_range_parser(exp.ILike), 760 TokenType.IN: lambda self, this: self._parse_in(this), 761 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 762 TokenType.IS: lambda self, this: self._parse_is(this), 763 TokenType.LIKE: binary_range_parser(exp.Like), 764 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 765 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 766 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 767 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 768 } 769 770 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 771 "ALLOWED_VALUES": lambda self: self.expression( 772 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 773 ), 774 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 775 "AUTO": lambda self: self._parse_auto_property(), 776 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 777 "BACKUP": lambda self: self.expression( 778 exp.BackupProperty, this=self._parse_var(any_token=True) 779 ), 780 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 781 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 782 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHECKSUM": lambda self: self._parse_checksum(), 784 "CLUSTER BY": lambda self: self._parse_cluster(), 785 "CLUSTERED": lambda self: self._parse_clustered_by(), 786 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 787 exp.CollateProperty, **kwargs 788 ), 789 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 790 "CONTAINS": lambda self: self._parse_contains_property(), 791 "COPY": lambda self: self._parse_copy_property(), 792 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 793 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 794 "DEFINER": lambda self: self._parse_definer(), 795 "DETERMINISTIC": lambda self: self.expression( 796 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 797 ), 798 "DISTKEY": lambda self: self._parse_distkey(), 799 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 800 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 801 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 802 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 803 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 804 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 805 "FREESPACE": lambda self: self._parse_freespace(), 806 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 807 "HEAP": lambda self: self.expression(exp.HeapProperty), 808 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 809 "IMMUTABLE": lambda self: self.expression( 810 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 811 ), 812 "INHERITS": lambda self: self.expression( 813 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 814 ), 815 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 816 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 817 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 818 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 819 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 820 "LIKE": lambda self: self._parse_create_like(), 821 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 822 "LOCK": lambda self: self._parse_locking(), 823 "LOCKING": lambda self: self._parse_locking(), 824 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 825 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 826 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 827 "MODIFIES": lambda self: self._parse_modifies_property(), 828 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 829 "NO": lambda self: self._parse_no_property(), 830 "ON": lambda self: self._parse_on_property(), 831 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 832 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 833 "PARTITION": lambda self: self._parse_partitioned_of(), 834 "PARTITION BY": lambda self: self._parse_partitioned_by(), 835 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 838 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 839 "READS": lambda self: self._parse_reads_property(), 840 "REMOTE": lambda self: self._parse_remote_with_connection(), 841 "RETURNS": lambda self: self._parse_returns(), 842 "STRICT": lambda self: self.expression(exp.StrictProperty), 843 "ROW": lambda self: self._parse_row(), 844 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 845 "SAMPLE": lambda self: self.expression( 846 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 847 ), 848 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 849 "SETTINGS": lambda self: self.expression( 850 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 851 ), 852 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 853 "SORTKEY": lambda self: self._parse_sortkey(), 854 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 855 "STABLE": lambda self: self.expression( 856 exp.StabilityProperty, this=exp.Literal.string("STABLE") 857 ), 858 "STORED": lambda self: self._parse_stored(), 859 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 860 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 861 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 862 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 863 "TO": lambda self: self._parse_to_table(), 864 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 865 "TRANSFORM": lambda self: self.expression( 866 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 867 ), 868 "TTL": lambda self: self._parse_ttl(), 869 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 870 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 871 "VOLATILE": lambda self: self._parse_volatile_property(), 872 "WITH": lambda self: self._parse_with_property(), 873 } 874 875 CONSTRAINT_PARSERS = { 876 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 877 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 878 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 879 "CHARACTER SET": lambda self: self.expression( 880 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "CHECK": lambda self: self.expression( 883 exp.CheckColumnConstraint, 884 this=self._parse_wrapped(self._parse_conjunction), 885 enforced=self._match_text_seq("ENFORCED"), 886 ), 887 "COLLATE": lambda self: self.expression( 888 exp.CollateColumnConstraint, this=self._parse_var() 889 ), 890 "COMMENT": lambda self: self.expression( 891 exp.CommentColumnConstraint, this=self._parse_string() 892 ), 893 "COMPRESS": lambda self: self._parse_compress(), 894 "CLUSTERED": lambda self: self.expression( 895 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 896 ), 897 "NONCLUSTERED": lambda self: self.expression( 898 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 899 ), 900 "DEFAULT": lambda self: self.expression( 901 exp.DefaultColumnConstraint, this=self._parse_bitwise() 902 ), 903 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 904 "EPHEMERAL": lambda self: self.expression( 905 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 906 ), 907 "EXCLUDE": lambda self: self.expression( 908 exp.ExcludeColumnConstraint, this=self._parse_index_params() 909 ), 910 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 911 "FORMAT": lambda self: self.expression( 912 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 913 ), 914 "GENERATED": lambda self: self._parse_generated_as_identity(), 915 "IDENTITY": lambda self: self._parse_auto_increment(), 916 "INLINE": lambda self: self._parse_inline(), 917 "LIKE": lambda self: self._parse_create_like(), 918 "NOT": lambda self: self._parse_not_constraint(), 919 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 920 "ON": lambda self: ( 921 self._match(TokenType.UPDATE) 922 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 923 ) 924 or self.expression(exp.OnProperty, this=self._parse_id_var()), 925 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 926 "PERIOD": lambda self: self._parse_period_for_system_time(), 927 "PRIMARY KEY": lambda self: self._parse_primary_key(), 928 "REFERENCES": lambda self: self._parse_references(match=False), 929 "TITLE": lambda self: self.expression( 930 exp.TitleColumnConstraint, this=self._parse_var_or_string() 931 ), 932 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 933 "UNIQUE": lambda self: self._parse_unique(), 934 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 935 "WITH": lambda self: self.expression( 936 exp.Properties, expressions=self._parse_wrapped_properties() 937 ), 938 } 939 940 ALTER_PARSERS = { 941 "ADD": lambda self: self._parse_alter_table_add(), 942 "ALTER": lambda self: self._parse_alter_table_alter(), 943 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 944 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 945 "DROP": lambda self: self._parse_alter_table_drop(), 946 "RENAME": lambda self: self._parse_alter_table_rename(), 947 "SET": lambda self: self._parse_alter_table_set(), 948 } 949 950 ALTER_ALTER_PARSERS = { 951 "DISTKEY": lambda self: self._parse_alter_diststyle(), 952 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 953 "SORTKEY": lambda self: self._parse_alter_sortkey(), 954 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 955 } 956 957 SCHEMA_UNNAMED_CONSTRAINTS = { 958 "CHECK", 959 "EXCLUDE", 960 "FOREIGN KEY", 961 "LIKE", 962 "PERIOD", 963 "PRIMARY KEY", 964 "UNIQUE", 965 } 966 967 NO_PAREN_FUNCTION_PARSERS = { 968 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 969 "CASE": lambda self: self._parse_case(), 970 "IF": lambda self: self._parse_if(), 971 "NEXT": lambda self: self._parse_next_value_for(), 972 } 973 974 INVALID_FUNC_NAME_TOKENS = { 975 TokenType.IDENTIFIER, 976 TokenType.STRING, 977 } 978 979 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 980 981 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 982 983 FUNCTION_PARSERS = { 984 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 985 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 986 "DECODE": lambda self: self._parse_decode(), 987 "EXTRACT": lambda self: self._parse_extract(), 988 "JSON_OBJECT": lambda self: self._parse_json_object(), 989 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 990 "JSON_TABLE": lambda self: self._parse_json_table(), 991 "MATCH": lambda self: self._parse_match_against(), 992 "OPENJSON": lambda self: self._parse_open_json(), 993 "POSITION": lambda self: self._parse_position(), 994 "PREDICT": lambda self: self._parse_predict(), 995 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 996 "STRING_AGG": lambda self: self._parse_string_agg(), 997 "SUBSTRING": lambda self: self._parse_substring(), 998 "TRIM": lambda self: self._parse_trim(), 999 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1000 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1001 } 1002 1003 QUERY_MODIFIER_PARSERS = { 1004 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1005 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1006 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1007 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1008 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1009 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1010 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1011 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1012 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1013 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1014 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1015 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1016 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1017 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1018 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1019 TokenType.CLUSTER_BY: lambda self: ( 1020 "cluster", 1021 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1022 ), 1023 TokenType.DISTRIBUTE_BY: lambda self: ( 1024 "distribute", 1025 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1026 ), 1027 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1028 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1029 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1030 } 1031 1032 SET_PARSERS = { 1033 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1034 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1035 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1036 "TRANSACTION": lambda self: self._parse_set_transaction(), 1037 } 1038 1039 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1040 1041 TYPE_LITERAL_PARSERS = { 1042 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1043 } 1044 1045 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1046 1047 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1048 1049 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1050 1051 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1052 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1053 "ISOLATION": ( 1054 ("LEVEL", "REPEATABLE", "READ"), 1055 ("LEVEL", "READ", "COMMITTED"), 1056 ("LEVEL", "READ", "UNCOMITTED"), 1057 ("LEVEL", "SERIALIZABLE"), 1058 ), 1059 "READ": ("WRITE", "ONLY"), 1060 } 1061 1062 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1063 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1064 ) 1065 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1066 1067 CREATE_SEQUENCE: OPTIONS_TYPE = { 1068 "SCALE": ("EXTEND", "NOEXTEND"), 1069 "SHARD": ("EXTEND", "NOEXTEND"), 1070 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1071 **dict.fromkeys( 1072 ( 1073 "SESSION", 1074 "GLOBAL", 1075 "KEEP", 1076 "NOKEEP", 1077 "ORDER", 1078 "NOORDER", 1079 "NOCACHE", 1080 "CYCLE", 1081 "NOCYCLE", 1082 "NOMINVALUE", 1083 "NOMAXVALUE", 1084 "NOSCALE", 1085 "NOSHARD", 1086 ), 1087 tuple(), 1088 ), 1089 } 1090 1091 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1092 1093 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1094 1095 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1096 1097 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1098 1099 CLONE_KEYWORDS = {"CLONE", "COPY"} 1100 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1101 1102 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1103 1104 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1105 1106 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1107 1108 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1109 1110 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1111 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1112 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1113 1114 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1115 1116 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1117 1118 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1119 1120 DISTINCT_TOKENS = {TokenType.DISTINCT} 1121 1122 NULL_TOKENS = {TokenType.NULL} 1123 1124 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1125 1126 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1127 1128 STRICT_CAST = True 1129 1130 PREFIXED_PIVOT_COLUMNS = False 1131 IDENTIFY_PIVOT_STRINGS = False 1132 1133 LOG_DEFAULTS_TO_LN = False 1134 1135 # Whether ADD is present for each column added by ALTER TABLE 1136 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1137 1138 # Whether the table sample clause expects CSV syntax 1139 TABLESAMPLE_CSV = False 1140 1141 # The default method used for table sampling 1142 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1143 1144 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1145 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1146 1147 # Whether the TRIM function expects the characters to trim as its first argument 1148 TRIM_PATTERN_FIRST = False 1149 1150 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1151 STRING_ALIASES = False 1152 1153 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1154 MODIFIERS_ATTACHED_TO_UNION = True 1155 UNION_MODIFIERS = {"order", "limit", "offset"} 1156 1157 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1158 NO_PAREN_IF_COMMANDS = True 1159 1160 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1161 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1162 1163 # Whether the `:` operator is used to extract a value from a JSON document 1164 COLON_IS_JSON_EXTRACT = False 1165 1166 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1167 # If this is True and '(' is not found, the keyword will be treated as an identifier 1168 VALUES_FOLLOWED_BY_PAREN = True 1169 1170 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1171 SUPPORTS_IMPLICIT_UNNEST = False 1172 1173 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1174 INTERVAL_SPANS = True 1175 1176 # Whether a PARTITION clause can follow a table reference 1177 SUPPORTS_PARTITION_SELECTION = False 1178 1179 __slots__ = ( 1180 "error_level", 1181 "error_message_context", 1182 "max_errors", 1183 "dialect", 1184 "sql", 1185 "errors", 1186 "_tokens", 1187 "_index", 1188 "_curr", 1189 "_next", 1190 "_prev", 1191 "_prev_comments", 1192 ) 1193 1194 # Autofilled 1195 SHOW_TRIE: t.Dict = {} 1196 SET_TRIE: t.Dict = {} 1197 1198 def __init__( 1199 self, 1200 error_level: t.Optional[ErrorLevel] = None, 1201 error_message_context: int = 100, 1202 max_errors: int = 3, 1203 dialect: DialectType = None, 1204 ): 1205 from sqlglot.dialects import Dialect 1206 1207 self.error_level = error_level or ErrorLevel.IMMEDIATE 1208 self.error_message_context = error_message_context 1209 self.max_errors = max_errors 1210 self.dialect = Dialect.get_or_raise(dialect) 1211 self.reset() 1212 1213 def reset(self): 1214 self.sql = "" 1215 self.errors = [] 1216 self._tokens = [] 1217 self._index = 0 1218 self._curr = None 1219 self._next = None 1220 self._prev = None 1221 self._prev_comments = None 1222 1223 def parse( 1224 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1225 ) -> t.List[t.Optional[exp.Expression]]: 1226 """ 1227 Parses a list of tokens and returns a list of syntax trees, one tree 1228 per parsed SQL statement. 1229 1230 Args: 1231 raw_tokens: The list of tokens. 1232 sql: The original SQL string, used to produce helpful debug messages. 1233 1234 Returns: 1235 The list of the produced syntax trees. 1236 """ 1237 return self._parse( 1238 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1239 ) 1240 1241 def parse_into( 1242 self, 1243 expression_types: exp.IntoType, 1244 raw_tokens: t.List[Token], 1245 sql: t.Optional[str] = None, 1246 ) -> t.List[t.Optional[exp.Expression]]: 1247 """ 1248 Parses a list of tokens into a given Expression type. If a collection of Expression 1249 types is given instead, this method will try to parse the token list into each one 1250 of them, stopping at the first for which the parsing succeeds. 1251 1252 Args: 1253 expression_types: The expression type(s) to try and parse the token list into. 1254 raw_tokens: The list of tokens. 1255 sql: The original SQL string, used to produce helpful debug messages. 1256 1257 Returns: 1258 The target Expression. 1259 """ 1260 errors = [] 1261 for expression_type in ensure_list(expression_types): 1262 parser = self.EXPRESSION_PARSERS.get(expression_type) 1263 if not parser: 1264 raise TypeError(f"No parser registered for {expression_type}") 1265 1266 try: 1267 return self._parse(parser, raw_tokens, sql) 1268 except ParseError as e: 1269 e.errors[0]["into_expression"] = expression_type 1270 errors.append(e) 1271 1272 raise ParseError( 1273 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1274 errors=merge_errors(errors), 1275 ) from errors[-1] 1276 1277 def _parse( 1278 self, 1279 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1280 raw_tokens: t.List[Token], 1281 sql: t.Optional[str] = None, 1282 ) -> t.List[t.Optional[exp.Expression]]: 1283 self.reset() 1284 self.sql = sql or "" 1285 1286 total = len(raw_tokens) 1287 chunks: t.List[t.List[Token]] = [[]] 1288 1289 for i, token in enumerate(raw_tokens): 1290 if token.token_type == TokenType.SEMICOLON: 1291 if token.comments: 1292 chunks.append([token]) 1293 1294 if i < total - 1: 1295 chunks.append([]) 1296 else: 1297 chunks[-1].append(token) 1298 1299 expressions = [] 1300 1301 for tokens in chunks: 1302 self._index = -1 1303 self._tokens = tokens 1304 self._advance() 1305 1306 expressions.append(parse_method(self)) 1307 1308 if self._index < len(self._tokens): 1309 self.raise_error("Invalid expression / Unexpected token") 1310 1311 self.check_errors() 1312 1313 return expressions 1314 1315 def check_errors(self) -> None: 1316 """Logs or raises any found errors, depending on the chosen error level setting.""" 1317 if self.error_level == ErrorLevel.WARN: 1318 for error in self.errors: 1319 logger.error(str(error)) 1320 elif self.error_level == ErrorLevel.RAISE and self.errors: 1321 raise ParseError( 1322 concat_messages(self.errors, self.max_errors), 1323 errors=merge_errors(self.errors), 1324 ) 1325 1326 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1327 """ 1328 Appends an error in the list of recorded errors or raises it, depending on the chosen 1329 error level setting. 1330 """ 1331 token = token or self._curr or self._prev or Token.string("") 1332 start = token.start 1333 end = token.end + 1 1334 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1335 highlight = self.sql[start:end] 1336 end_context = self.sql[end : end + self.error_message_context] 1337 1338 error = ParseError.new( 1339 f"{message}. Line {token.line}, Col: {token.col}.\n" 1340 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1341 description=message, 1342 line=token.line, 1343 col=token.col, 1344 start_context=start_context, 1345 highlight=highlight, 1346 end_context=end_context, 1347 ) 1348 1349 if self.error_level == ErrorLevel.IMMEDIATE: 1350 raise error 1351 1352 self.errors.append(error) 1353 1354 def expression( 1355 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1356 ) -> E: 1357 """ 1358 Creates a new, validated Expression. 1359 1360 Args: 1361 exp_class: The expression class to instantiate. 1362 comments: An optional list of comments to attach to the expression. 1363 kwargs: The arguments to set for the expression along with their respective values. 1364 1365 Returns: 1366 The target expression. 1367 """ 1368 instance = exp_class(**kwargs) 1369 instance.add_comments(comments) if comments else self._add_comments(instance) 1370 return self.validate_expression(instance) 1371 1372 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1373 if expression and self._prev_comments: 1374 expression.add_comments(self._prev_comments) 1375 self._prev_comments = None 1376 1377 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1378 """ 1379 Validates an Expression, making sure that all its mandatory arguments are set. 1380 1381 Args: 1382 expression: The expression to validate. 1383 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1384 1385 Returns: 1386 The validated expression. 1387 """ 1388 if self.error_level != ErrorLevel.IGNORE: 1389 for error_message in expression.error_messages(args): 1390 self.raise_error(error_message) 1391 1392 return expression 1393 1394 def _find_sql(self, start: Token, end: Token) -> str: 1395 return self.sql[start.start : end.end + 1] 1396 1397 def _is_connected(self) -> bool: 1398 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1399 1400 def _advance(self, times: int = 1) -> None: 1401 self._index += times 1402 self._curr = seq_get(self._tokens, self._index) 1403 self._next = seq_get(self._tokens, self._index + 1) 1404 1405 if self._index > 0: 1406 self._prev = self._tokens[self._index - 1] 1407 self._prev_comments = self._prev.comments 1408 else: 1409 self._prev = None 1410 self._prev_comments = None 1411 1412 def _retreat(self, index: int) -> None: 1413 if index != self._index: 1414 self._advance(index - self._index) 1415 1416 def _warn_unsupported(self) -> None: 1417 if len(self._tokens) <= 1: 1418 return 1419 1420 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1421 # interested in emitting a warning for the one being currently processed. 1422 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1423 1424 logger.warning( 1425 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1426 ) 1427 1428 def _parse_command(self) -> exp.Command: 1429 self._warn_unsupported() 1430 return self.expression( 1431 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1432 ) 1433 1434 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1435 """ 1436 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1437 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1438 the parser state accordingly 1439 """ 1440 index = self._index 1441 error_level = self.error_level 1442 1443 self.error_level = ErrorLevel.IMMEDIATE 1444 try: 1445 this = parse_method() 1446 except ParseError: 1447 this = None 1448 finally: 1449 if not this or retreat: 1450 self._retreat(index) 1451 self.error_level = error_level 1452 1453 return this 1454 1455 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1456 start = self._prev 1457 exists = self._parse_exists() if allow_exists else None 1458 1459 self._match(TokenType.ON) 1460 1461 materialized = self._match_text_seq("MATERIALIZED") 1462 kind = self._match_set(self.CREATABLES) and self._prev 1463 if not kind: 1464 return self._parse_as_command(start) 1465 1466 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1467 this = self._parse_user_defined_function(kind=kind.token_type) 1468 elif kind.token_type == TokenType.TABLE: 1469 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1470 elif kind.token_type == TokenType.COLUMN: 1471 this = self._parse_column() 1472 else: 1473 this = self._parse_id_var() 1474 1475 self._match(TokenType.IS) 1476 1477 return self.expression( 1478 exp.Comment, 1479 this=this, 1480 kind=kind.text, 1481 expression=self._parse_string(), 1482 exists=exists, 1483 materialized=materialized, 1484 ) 1485 1486 def _parse_to_table( 1487 self, 1488 ) -> exp.ToTableProperty: 1489 table = self._parse_table_parts(schema=True) 1490 return self.expression(exp.ToTableProperty, this=table) 1491 1492 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1493 def _parse_ttl(self) -> exp.Expression: 1494 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1495 this = self._parse_bitwise() 1496 1497 if self._match_text_seq("DELETE"): 1498 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1499 if self._match_text_seq("RECOMPRESS"): 1500 return self.expression( 1501 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1502 ) 1503 if self._match_text_seq("TO", "DISK"): 1504 return self.expression( 1505 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1506 ) 1507 if self._match_text_seq("TO", "VOLUME"): 1508 return self.expression( 1509 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1510 ) 1511 1512 return this 1513 1514 expressions = self._parse_csv(_parse_ttl_action) 1515 where = self._parse_where() 1516 group = self._parse_group() 1517 1518 aggregates = None 1519 if group and self._match(TokenType.SET): 1520 aggregates = self._parse_csv(self._parse_set_item) 1521 1522 return self.expression( 1523 exp.MergeTreeTTL, 1524 expressions=expressions, 1525 where=where, 1526 group=group, 1527 aggregates=aggregates, 1528 ) 1529 1530 def _parse_statement(self) -> t.Optional[exp.Expression]: 1531 if self._curr is None: 1532 return None 1533 1534 if self._match_set(self.STATEMENT_PARSERS): 1535 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1536 1537 if self._match_set(self.dialect.tokenizer.COMMANDS): 1538 return self._parse_command() 1539 1540 expression = self._parse_expression() 1541 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1542 return self._parse_query_modifiers(expression) 1543 1544 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1545 start = self._prev 1546 temporary = self._match(TokenType.TEMPORARY) 1547 materialized = self._match_text_seq("MATERIALIZED") 1548 1549 kind = self._match_set(self.CREATABLES) and self._prev.text 1550 if not kind: 1551 return self._parse_as_command(start) 1552 1553 if_exists = exists or self._parse_exists() 1554 table = self._parse_table_parts( 1555 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1556 ) 1557 1558 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1559 1560 if self._match(TokenType.L_PAREN, advance=False): 1561 expressions = self._parse_wrapped_csv(self._parse_types) 1562 else: 1563 expressions = None 1564 1565 return self.expression( 1566 exp.Drop, 1567 comments=start.comments, 1568 exists=if_exists, 1569 this=table, 1570 expressions=expressions, 1571 kind=kind.upper(), 1572 temporary=temporary, 1573 materialized=materialized, 1574 cascade=self._match_text_seq("CASCADE"), 1575 constraints=self._match_text_seq("CONSTRAINTS"), 1576 purge=self._match_text_seq("PURGE"), 1577 cluster=cluster, 1578 ) 1579 1580 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1581 return ( 1582 self._match_text_seq("IF") 1583 and (not not_ or self._match(TokenType.NOT)) 1584 and self._match(TokenType.EXISTS) 1585 ) 1586 1587 def _parse_create(self) -> exp.Create | exp.Command: 1588 # Note: this can't be None because we've matched a statement parser 1589 start = self._prev 1590 comments = self._prev_comments 1591 1592 replace = ( 1593 start.token_type == TokenType.REPLACE 1594 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1595 or self._match_pair(TokenType.OR, TokenType.ALTER) 1596 ) 1597 1598 unique = self._match(TokenType.UNIQUE) 1599 1600 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1601 self._advance() 1602 1603 properties = None 1604 create_token = self._match_set(self.CREATABLES) and self._prev 1605 1606 if not create_token: 1607 # exp.Properties.Location.POST_CREATE 1608 properties = self._parse_properties() 1609 create_token = self._match_set(self.CREATABLES) and self._prev 1610 1611 if not properties or not create_token: 1612 return self._parse_as_command(start) 1613 1614 exists = self._parse_exists(not_=True) 1615 this = None 1616 expression: t.Optional[exp.Expression] = None 1617 indexes = None 1618 no_schema_binding = None 1619 begin = None 1620 end = None 1621 clone = None 1622 1623 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1624 nonlocal properties 1625 if properties and temp_props: 1626 properties.expressions.extend(temp_props.expressions) 1627 elif temp_props: 1628 properties = temp_props 1629 1630 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1631 this = self._parse_user_defined_function(kind=create_token.token_type) 1632 1633 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1634 extend_props(self._parse_properties()) 1635 1636 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1637 1638 if not expression: 1639 if self._match(TokenType.COMMAND): 1640 expression = self._parse_as_command(self._prev) 1641 else: 1642 begin = self._match(TokenType.BEGIN) 1643 return_ = self._match_text_seq("RETURN") 1644 1645 if self._match(TokenType.STRING, advance=False): 1646 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1647 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1648 expression = self._parse_string() 1649 extend_props(self._parse_properties()) 1650 else: 1651 expression = self._parse_statement() 1652 1653 end = self._match_text_seq("END") 1654 1655 if return_: 1656 expression = self.expression(exp.Return, this=expression) 1657 elif create_token.token_type == TokenType.INDEX: 1658 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1659 if not self._match(TokenType.ON): 1660 index = self._parse_id_var() 1661 anonymous = False 1662 else: 1663 index = None 1664 anonymous = True 1665 1666 this = self._parse_index(index=index, anonymous=anonymous) 1667 elif create_token.token_type in self.DB_CREATABLES: 1668 table_parts = self._parse_table_parts( 1669 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1670 ) 1671 1672 # exp.Properties.Location.POST_NAME 1673 self._match(TokenType.COMMA) 1674 extend_props(self._parse_properties(before=True)) 1675 1676 this = self._parse_schema(this=table_parts) 1677 1678 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1679 extend_props(self._parse_properties()) 1680 1681 self._match(TokenType.ALIAS) 1682 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1683 # exp.Properties.Location.POST_ALIAS 1684 extend_props(self._parse_properties()) 1685 1686 if create_token.token_type == TokenType.SEQUENCE: 1687 expression = self._parse_types() 1688 extend_props(self._parse_properties()) 1689 else: 1690 expression = self._parse_ddl_select() 1691 1692 if create_token.token_type == TokenType.TABLE: 1693 # exp.Properties.Location.POST_EXPRESSION 1694 extend_props(self._parse_properties()) 1695 1696 indexes = [] 1697 while True: 1698 index = self._parse_index() 1699 1700 # exp.Properties.Location.POST_INDEX 1701 extend_props(self._parse_properties()) 1702 1703 if not index: 1704 break 1705 else: 1706 self._match(TokenType.COMMA) 1707 indexes.append(index) 1708 elif create_token.token_type == TokenType.VIEW: 1709 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1710 no_schema_binding = True 1711 1712 shallow = self._match_text_seq("SHALLOW") 1713 1714 if self._match_texts(self.CLONE_KEYWORDS): 1715 copy = self._prev.text.lower() == "copy" 1716 clone = self.expression( 1717 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1718 ) 1719 1720 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1721 return self._parse_as_command(start) 1722 1723 return self.expression( 1724 exp.Create, 1725 comments=comments, 1726 this=this, 1727 kind=create_token.text.upper(), 1728 replace=replace, 1729 unique=unique, 1730 expression=expression, 1731 exists=exists, 1732 properties=properties, 1733 indexes=indexes, 1734 no_schema_binding=no_schema_binding, 1735 begin=begin, 1736 end=end, 1737 clone=clone, 1738 ) 1739 1740 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1741 seq = exp.SequenceProperties() 1742 1743 options = [] 1744 index = self._index 1745 1746 while self._curr: 1747 self._match(TokenType.COMMA) 1748 if self._match_text_seq("INCREMENT"): 1749 self._match_text_seq("BY") 1750 self._match_text_seq("=") 1751 seq.set("increment", self._parse_term()) 1752 elif self._match_text_seq("MINVALUE"): 1753 seq.set("minvalue", self._parse_term()) 1754 elif self._match_text_seq("MAXVALUE"): 1755 seq.set("maxvalue", self._parse_term()) 1756 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1757 self._match_text_seq("=") 1758 seq.set("start", self._parse_term()) 1759 elif self._match_text_seq("CACHE"): 1760 # T-SQL allows empty CACHE which is initialized dynamically 1761 seq.set("cache", self._parse_number() or True) 1762 elif self._match_text_seq("OWNED", "BY"): 1763 # "OWNED BY NONE" is the default 1764 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1765 else: 1766 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1767 if opt: 1768 options.append(opt) 1769 else: 1770 break 1771 1772 seq.set("options", options if options else None) 1773 return None if self._index == index else seq 1774 1775 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1776 # only used for teradata currently 1777 self._match(TokenType.COMMA) 1778 1779 kwargs = { 1780 "no": self._match_text_seq("NO"), 1781 "dual": self._match_text_seq("DUAL"), 1782 "before": self._match_text_seq("BEFORE"), 1783 "default": self._match_text_seq("DEFAULT"), 1784 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1785 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1786 "after": self._match_text_seq("AFTER"), 1787 "minimum": self._match_texts(("MIN", "MINIMUM")), 1788 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1789 } 1790 1791 if self._match_texts(self.PROPERTY_PARSERS): 1792 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1793 try: 1794 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1795 except TypeError: 1796 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1797 1798 return None 1799 1800 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1801 return self._parse_wrapped_csv(self._parse_property) 1802 1803 def _parse_property(self) -> t.Optional[exp.Expression]: 1804 if self._match_texts(self.PROPERTY_PARSERS): 1805 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1806 1807 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1808 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1809 1810 if self._match_text_seq("COMPOUND", "SORTKEY"): 1811 return self._parse_sortkey(compound=True) 1812 1813 if self._match_text_seq("SQL", "SECURITY"): 1814 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1815 1816 index = self._index 1817 key = self._parse_column() 1818 1819 if not self._match(TokenType.EQ): 1820 self._retreat(index) 1821 return self._parse_sequence_properties() 1822 1823 return self.expression( 1824 exp.Property, 1825 this=key.to_dot() if isinstance(key, exp.Column) else key, 1826 value=self._parse_bitwise() or self._parse_var(any_token=True), 1827 ) 1828 1829 def _parse_stored(self) -> exp.FileFormatProperty: 1830 self._match(TokenType.ALIAS) 1831 1832 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1833 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1834 1835 return self.expression( 1836 exp.FileFormatProperty, 1837 this=( 1838 self.expression( 1839 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1840 ) 1841 if input_format or output_format 1842 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1843 ), 1844 ) 1845 1846 def _parse_unquoted_field(self): 1847 field = self._parse_field() 1848 if isinstance(field, exp.Identifier) and not field.quoted: 1849 field = exp.var(field) 1850 1851 return field 1852 1853 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1854 self._match(TokenType.EQ) 1855 self._match(TokenType.ALIAS) 1856 1857 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1858 1859 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1860 properties = [] 1861 while True: 1862 if before: 1863 prop = self._parse_property_before() 1864 else: 1865 prop = self._parse_property() 1866 if not prop: 1867 break 1868 for p in ensure_list(prop): 1869 properties.append(p) 1870 1871 if properties: 1872 return self.expression(exp.Properties, expressions=properties) 1873 1874 return None 1875 1876 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1877 return self.expression( 1878 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1879 ) 1880 1881 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1882 if self._index >= 2: 1883 pre_volatile_token = self._tokens[self._index - 2] 1884 else: 1885 pre_volatile_token = None 1886 1887 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1888 return exp.VolatileProperty() 1889 1890 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1891 1892 def _parse_retention_period(self) -> exp.Var: 1893 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1894 number = self._parse_number() 1895 number_str = f"{number} " if number else "" 1896 unit = self._parse_var(any_token=True) 1897 return exp.var(f"{number_str}{unit}") 1898 1899 def _parse_system_versioning_property( 1900 self, with_: bool = False 1901 ) -> exp.WithSystemVersioningProperty: 1902 self._match(TokenType.EQ) 1903 prop = self.expression( 1904 exp.WithSystemVersioningProperty, 1905 **{ # type: ignore 1906 "on": True, 1907 "with": with_, 1908 }, 1909 ) 1910 1911 if self._match_text_seq("OFF"): 1912 prop.set("on", False) 1913 return prop 1914 1915 self._match(TokenType.ON) 1916 if self._match(TokenType.L_PAREN): 1917 while self._curr and not self._match(TokenType.R_PAREN): 1918 if self._match_text_seq("HISTORY_TABLE", "="): 1919 prop.set("this", self._parse_table_parts()) 1920 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1921 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1922 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1923 prop.set("retention_period", self._parse_retention_period()) 1924 1925 self._match(TokenType.COMMA) 1926 1927 return prop 1928 1929 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1930 self._match(TokenType.EQ) 1931 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1932 prop = self.expression(exp.DataDeletionProperty, on=on) 1933 1934 if self._match(TokenType.L_PAREN): 1935 while self._curr and not self._match(TokenType.R_PAREN): 1936 if self._match_text_seq("FILTER_COLUMN", "="): 1937 prop.set("filter_column", self._parse_column()) 1938 elif self._match_text_seq("RETENTION_PERIOD", "="): 1939 prop.set("retention_period", self._parse_retention_period()) 1940 1941 self._match(TokenType.COMMA) 1942 1943 return prop 1944 1945 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1946 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1947 prop = self._parse_system_versioning_property(with_=True) 1948 self._match_r_paren() 1949 return prop 1950 1951 if self._match(TokenType.L_PAREN, advance=False): 1952 return self._parse_wrapped_properties() 1953 1954 if self._match_text_seq("JOURNAL"): 1955 return self._parse_withjournaltable() 1956 1957 if self._match_texts(self.VIEW_ATTRIBUTES): 1958 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1959 1960 if self._match_text_seq("DATA"): 1961 return self._parse_withdata(no=False) 1962 elif self._match_text_seq("NO", "DATA"): 1963 return self._parse_withdata(no=True) 1964 1965 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1966 return self._parse_serde_properties(with_=True) 1967 1968 if not self._next: 1969 return None 1970 1971 return self._parse_withisolatedloading() 1972 1973 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1974 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1975 self._match(TokenType.EQ) 1976 1977 user = self._parse_id_var() 1978 self._match(TokenType.PARAMETER) 1979 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1980 1981 if not user or not host: 1982 return None 1983 1984 return exp.DefinerProperty(this=f"{user}@{host}") 1985 1986 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1987 self._match(TokenType.TABLE) 1988 self._match(TokenType.EQ) 1989 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1990 1991 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1992 return self.expression(exp.LogProperty, no=no) 1993 1994 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1995 return self.expression(exp.JournalProperty, **kwargs) 1996 1997 def _parse_checksum(self) -> exp.ChecksumProperty: 1998 self._match(TokenType.EQ) 1999 2000 on = None 2001 if self._match(TokenType.ON): 2002 on = True 2003 elif self._match_text_seq("OFF"): 2004 on = False 2005 2006 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2007 2008 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2009 return self.expression( 2010 exp.Cluster, 2011 expressions=( 2012 self._parse_wrapped_csv(self._parse_ordered) 2013 if wrapped 2014 else self._parse_csv(self._parse_ordered) 2015 ), 2016 ) 2017 2018 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2019 self._match_text_seq("BY") 2020 2021 self._match_l_paren() 2022 expressions = self._parse_csv(self._parse_column) 2023 self._match_r_paren() 2024 2025 if self._match_text_seq("SORTED", "BY"): 2026 self._match_l_paren() 2027 sorted_by = self._parse_csv(self._parse_ordered) 2028 self._match_r_paren() 2029 else: 2030 sorted_by = None 2031 2032 self._match(TokenType.INTO) 2033 buckets = self._parse_number() 2034 self._match_text_seq("BUCKETS") 2035 2036 return self.expression( 2037 exp.ClusteredByProperty, 2038 expressions=expressions, 2039 sorted_by=sorted_by, 2040 buckets=buckets, 2041 ) 2042 2043 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2044 if not self._match_text_seq("GRANTS"): 2045 self._retreat(self._index - 1) 2046 return None 2047 2048 return self.expression(exp.CopyGrantsProperty) 2049 2050 def _parse_freespace(self) -> exp.FreespaceProperty: 2051 self._match(TokenType.EQ) 2052 return self.expression( 2053 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2054 ) 2055 2056 def _parse_mergeblockratio( 2057 self, no: bool = False, default: bool = False 2058 ) -> exp.MergeBlockRatioProperty: 2059 if self._match(TokenType.EQ): 2060 return self.expression( 2061 exp.MergeBlockRatioProperty, 2062 this=self._parse_number(), 2063 percent=self._match(TokenType.PERCENT), 2064 ) 2065 2066 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2067 2068 def _parse_datablocksize( 2069 self, 2070 default: t.Optional[bool] = None, 2071 minimum: t.Optional[bool] = None, 2072 maximum: t.Optional[bool] = None, 2073 ) -> exp.DataBlocksizeProperty: 2074 self._match(TokenType.EQ) 2075 size = self._parse_number() 2076 2077 units = None 2078 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2079 units = self._prev.text 2080 2081 return self.expression( 2082 exp.DataBlocksizeProperty, 2083 size=size, 2084 units=units, 2085 default=default, 2086 minimum=minimum, 2087 maximum=maximum, 2088 ) 2089 2090 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2091 self._match(TokenType.EQ) 2092 always = self._match_text_seq("ALWAYS") 2093 manual = self._match_text_seq("MANUAL") 2094 never = self._match_text_seq("NEVER") 2095 default = self._match_text_seq("DEFAULT") 2096 2097 autotemp = None 2098 if self._match_text_seq("AUTOTEMP"): 2099 autotemp = self._parse_schema() 2100 2101 return self.expression( 2102 exp.BlockCompressionProperty, 2103 always=always, 2104 manual=manual, 2105 never=never, 2106 default=default, 2107 autotemp=autotemp, 2108 ) 2109 2110 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2111 index = self._index 2112 no = self._match_text_seq("NO") 2113 concurrent = self._match_text_seq("CONCURRENT") 2114 2115 if not self._match_text_seq("ISOLATED", "LOADING"): 2116 self._retreat(index) 2117 return None 2118 2119 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2120 return self.expression( 2121 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2122 ) 2123 2124 def _parse_locking(self) -> exp.LockingProperty: 2125 if self._match(TokenType.TABLE): 2126 kind = "TABLE" 2127 elif self._match(TokenType.VIEW): 2128 kind = "VIEW" 2129 elif self._match(TokenType.ROW): 2130 kind = "ROW" 2131 elif self._match_text_seq("DATABASE"): 2132 kind = "DATABASE" 2133 else: 2134 kind = None 2135 2136 if kind in ("DATABASE", "TABLE", "VIEW"): 2137 this = self._parse_table_parts() 2138 else: 2139 this = None 2140 2141 if self._match(TokenType.FOR): 2142 for_or_in = "FOR" 2143 elif self._match(TokenType.IN): 2144 for_or_in = "IN" 2145 else: 2146 for_or_in = None 2147 2148 if self._match_text_seq("ACCESS"): 2149 lock_type = "ACCESS" 2150 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2151 lock_type = "EXCLUSIVE" 2152 elif self._match_text_seq("SHARE"): 2153 lock_type = "SHARE" 2154 elif self._match_text_seq("READ"): 2155 lock_type = "READ" 2156 elif self._match_text_seq("WRITE"): 2157 lock_type = "WRITE" 2158 elif self._match_text_seq("CHECKSUM"): 2159 lock_type = "CHECKSUM" 2160 else: 2161 lock_type = None 2162 2163 override = self._match_text_seq("OVERRIDE") 2164 2165 return self.expression( 2166 exp.LockingProperty, 2167 this=this, 2168 kind=kind, 2169 for_or_in=for_or_in, 2170 lock_type=lock_type, 2171 override=override, 2172 ) 2173 2174 def _parse_partition_by(self) -> t.List[exp.Expression]: 2175 if self._match(TokenType.PARTITION_BY): 2176 return self._parse_csv(self._parse_conjunction) 2177 return [] 2178 2179 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2180 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2181 if self._match_text_seq("MINVALUE"): 2182 return exp.var("MINVALUE") 2183 if self._match_text_seq("MAXVALUE"): 2184 return exp.var("MAXVALUE") 2185 return self._parse_bitwise() 2186 2187 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2188 expression = None 2189 from_expressions = None 2190 to_expressions = None 2191 2192 if self._match(TokenType.IN): 2193 this = self._parse_wrapped_csv(self._parse_bitwise) 2194 elif self._match(TokenType.FROM): 2195 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2196 self._match_text_seq("TO") 2197 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2198 elif self._match_text_seq("WITH", "(", "MODULUS"): 2199 this = self._parse_number() 2200 self._match_text_seq(",", "REMAINDER") 2201 expression = self._parse_number() 2202 self._match_r_paren() 2203 else: 2204 self.raise_error("Failed to parse partition bound spec.") 2205 2206 return self.expression( 2207 exp.PartitionBoundSpec, 2208 this=this, 2209 expression=expression, 2210 from_expressions=from_expressions, 2211 to_expressions=to_expressions, 2212 ) 2213 2214 # https://www.postgresql.org/docs/current/sql-createtable.html 2215 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2216 if not self._match_text_seq("OF"): 2217 self._retreat(self._index - 1) 2218 return None 2219 2220 this = self._parse_table(schema=True) 2221 2222 if self._match(TokenType.DEFAULT): 2223 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2224 elif self._match_text_seq("FOR", "VALUES"): 2225 expression = self._parse_partition_bound_spec() 2226 else: 2227 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2228 2229 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2230 2231 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2232 self._match(TokenType.EQ) 2233 return self.expression( 2234 exp.PartitionedByProperty, 2235 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2236 ) 2237 2238 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2239 if self._match_text_seq("AND", "STATISTICS"): 2240 statistics = True 2241 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2242 statistics = False 2243 else: 2244 statistics = None 2245 2246 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2247 2248 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2249 if self._match_text_seq("SQL"): 2250 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2251 return None 2252 2253 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2254 if self._match_text_seq("SQL", "DATA"): 2255 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2256 return None 2257 2258 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2259 if self._match_text_seq("PRIMARY", "INDEX"): 2260 return exp.NoPrimaryIndexProperty() 2261 if self._match_text_seq("SQL"): 2262 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2263 return None 2264 2265 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2266 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2267 return exp.OnCommitProperty() 2268 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2269 return exp.OnCommitProperty(delete=True) 2270 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2271 2272 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2273 if self._match_text_seq("SQL", "DATA"): 2274 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2275 return None 2276 2277 def _parse_distkey(self) -> exp.DistKeyProperty: 2278 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2279 2280 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2281 table = self._parse_table(schema=True) 2282 2283 options = [] 2284 while self._match_texts(("INCLUDING", "EXCLUDING")): 2285 this = self._prev.text.upper() 2286 2287 id_var = self._parse_id_var() 2288 if not id_var: 2289 return None 2290 2291 options.append( 2292 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2293 ) 2294 2295 return self.expression(exp.LikeProperty, this=table, expressions=options) 2296 2297 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2298 return self.expression( 2299 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2300 ) 2301 2302 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2303 self._match(TokenType.EQ) 2304 return self.expression( 2305 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2306 ) 2307 2308 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2309 self._match_text_seq("WITH", "CONNECTION") 2310 return self.expression( 2311 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2312 ) 2313 2314 def _parse_returns(self) -> exp.ReturnsProperty: 2315 value: t.Optional[exp.Expression] 2316 null = None 2317 is_table = self._match(TokenType.TABLE) 2318 2319 if is_table: 2320 if self._match(TokenType.LT): 2321 value = self.expression( 2322 exp.Schema, 2323 this="TABLE", 2324 expressions=self._parse_csv(self._parse_struct_types), 2325 ) 2326 if not self._match(TokenType.GT): 2327 self.raise_error("Expecting >") 2328 else: 2329 value = self._parse_schema(exp.var("TABLE")) 2330 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2331 null = True 2332 value = None 2333 else: 2334 value = self._parse_types() 2335 2336 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2337 2338 def _parse_describe(self) -> exp.Describe: 2339 kind = self._match_set(self.CREATABLES) and self._prev.text 2340 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2341 if self._match(TokenType.DOT): 2342 style = None 2343 self._retreat(self._index - 2) 2344 this = self._parse_table(schema=True) 2345 properties = self._parse_properties() 2346 expressions = properties.expressions if properties else None 2347 return self.expression( 2348 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2349 ) 2350 2351 def _parse_insert(self) -> exp.Insert: 2352 comments = ensure_list(self._prev_comments) 2353 hint = self._parse_hint() 2354 overwrite = self._match(TokenType.OVERWRITE) 2355 ignore = self._match(TokenType.IGNORE) 2356 local = self._match_text_seq("LOCAL") 2357 alternative = None 2358 is_function = None 2359 2360 if self._match_text_seq("DIRECTORY"): 2361 this: t.Optional[exp.Expression] = self.expression( 2362 exp.Directory, 2363 this=self._parse_var_or_string(), 2364 local=local, 2365 row_format=self._parse_row_format(match_row=True), 2366 ) 2367 else: 2368 if self._match(TokenType.OR): 2369 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2370 2371 self._match(TokenType.INTO) 2372 comments += ensure_list(self._prev_comments) 2373 self._match(TokenType.TABLE) 2374 is_function = self._match(TokenType.FUNCTION) 2375 2376 this = ( 2377 self._parse_table(schema=True, parse_partition=True) 2378 if not is_function 2379 else self._parse_function() 2380 ) 2381 2382 returning = self._parse_returning() 2383 2384 return self.expression( 2385 exp.Insert, 2386 comments=comments, 2387 hint=hint, 2388 is_function=is_function, 2389 this=this, 2390 stored=self._match_text_seq("STORED") and self._parse_stored(), 2391 by_name=self._match_text_seq("BY", "NAME"), 2392 exists=self._parse_exists(), 2393 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2394 and self._parse_conjunction(), 2395 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2396 conflict=self._parse_on_conflict(), 2397 returning=returning or self._parse_returning(), 2398 overwrite=overwrite, 2399 alternative=alternative, 2400 ignore=ignore, 2401 ) 2402 2403 def _parse_kill(self) -> exp.Kill: 2404 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2405 2406 return self.expression( 2407 exp.Kill, 2408 this=self._parse_primary(), 2409 kind=kind, 2410 ) 2411 2412 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2413 conflict = self._match_text_seq("ON", "CONFLICT") 2414 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2415 2416 if not conflict and not duplicate: 2417 return None 2418 2419 conflict_keys = None 2420 constraint = None 2421 2422 if conflict: 2423 if self._match_text_seq("ON", "CONSTRAINT"): 2424 constraint = self._parse_id_var() 2425 elif self._match(TokenType.L_PAREN): 2426 conflict_keys = self._parse_csv(self._parse_id_var) 2427 self._match_r_paren() 2428 2429 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2430 if self._prev.token_type == TokenType.UPDATE: 2431 self._match(TokenType.SET) 2432 expressions = self._parse_csv(self._parse_equality) 2433 else: 2434 expressions = None 2435 2436 return self.expression( 2437 exp.OnConflict, 2438 duplicate=duplicate, 2439 expressions=expressions, 2440 action=action, 2441 conflict_keys=conflict_keys, 2442 constraint=constraint, 2443 ) 2444 2445 def _parse_returning(self) -> t.Optional[exp.Returning]: 2446 if not self._match(TokenType.RETURNING): 2447 return None 2448 return self.expression( 2449 exp.Returning, 2450 expressions=self._parse_csv(self._parse_expression), 2451 into=self._match(TokenType.INTO) and self._parse_table_part(), 2452 ) 2453 2454 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2455 if not self._match(TokenType.FORMAT): 2456 return None 2457 return self._parse_row_format() 2458 2459 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2460 index = self._index 2461 with_ = with_ or self._match_text_seq("WITH") 2462 2463 if not self._match(TokenType.SERDE_PROPERTIES): 2464 self._retreat(index) 2465 return None 2466 return self.expression( 2467 exp.SerdeProperties, 2468 **{ # type: ignore 2469 "expressions": self._parse_wrapped_properties(), 2470 "with": with_, 2471 }, 2472 ) 2473 2474 def _parse_row_format( 2475 self, match_row: bool = False 2476 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2477 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2478 return None 2479 2480 if self._match_text_seq("SERDE"): 2481 this = self._parse_string() 2482 2483 serde_properties = self._parse_serde_properties() 2484 2485 return self.expression( 2486 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2487 ) 2488 2489 self._match_text_seq("DELIMITED") 2490 2491 kwargs = {} 2492 2493 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2494 kwargs["fields"] = self._parse_string() 2495 if self._match_text_seq("ESCAPED", "BY"): 2496 kwargs["escaped"] = self._parse_string() 2497 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2498 kwargs["collection_items"] = self._parse_string() 2499 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2500 kwargs["map_keys"] = self._parse_string() 2501 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2502 kwargs["lines"] = self._parse_string() 2503 if self._match_text_seq("NULL", "DEFINED", "AS"): 2504 kwargs["null"] = self._parse_string() 2505 2506 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2507 2508 def _parse_load(self) -> exp.LoadData | exp.Command: 2509 if self._match_text_seq("DATA"): 2510 local = self._match_text_seq("LOCAL") 2511 self._match_text_seq("INPATH") 2512 inpath = self._parse_string() 2513 overwrite = self._match(TokenType.OVERWRITE) 2514 self._match_pair(TokenType.INTO, TokenType.TABLE) 2515 2516 return self.expression( 2517 exp.LoadData, 2518 this=self._parse_table(schema=True), 2519 local=local, 2520 overwrite=overwrite, 2521 inpath=inpath, 2522 partition=self._parse_partition(), 2523 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2524 serde=self._match_text_seq("SERDE") and self._parse_string(), 2525 ) 2526 return self._parse_as_command(self._prev) 2527 2528 def _parse_delete(self) -> exp.Delete: 2529 # This handles MySQL's "Multiple-Table Syntax" 2530 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2531 tables = None 2532 comments = self._prev_comments 2533 if not self._match(TokenType.FROM, advance=False): 2534 tables = self._parse_csv(self._parse_table) or None 2535 2536 returning = self._parse_returning() 2537 2538 return self.expression( 2539 exp.Delete, 2540 comments=comments, 2541 tables=tables, 2542 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2543 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2544 where=self._parse_where(), 2545 returning=returning or self._parse_returning(), 2546 limit=self._parse_limit(), 2547 ) 2548 2549 def _parse_update(self) -> exp.Update: 2550 comments = self._prev_comments 2551 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2552 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2553 returning = self._parse_returning() 2554 return self.expression( 2555 exp.Update, 2556 comments=comments, 2557 **{ # type: ignore 2558 "this": this, 2559 "expressions": expressions, 2560 "from": self._parse_from(joins=True), 2561 "where": self._parse_where(), 2562 "returning": returning or self._parse_returning(), 2563 "order": self._parse_order(), 2564 "limit": self._parse_limit(), 2565 }, 2566 ) 2567 2568 def _parse_uncache(self) -> exp.Uncache: 2569 if not self._match(TokenType.TABLE): 2570 self.raise_error("Expecting TABLE after UNCACHE") 2571 2572 return self.expression( 2573 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2574 ) 2575 2576 def _parse_cache(self) -> exp.Cache: 2577 lazy = self._match_text_seq("LAZY") 2578 self._match(TokenType.TABLE) 2579 table = self._parse_table(schema=True) 2580 2581 options = [] 2582 if self._match_text_seq("OPTIONS"): 2583 self._match_l_paren() 2584 k = self._parse_string() 2585 self._match(TokenType.EQ) 2586 v = self._parse_string() 2587 options = [k, v] 2588 self._match_r_paren() 2589 2590 self._match(TokenType.ALIAS) 2591 return self.expression( 2592 exp.Cache, 2593 this=table, 2594 lazy=lazy, 2595 options=options, 2596 expression=self._parse_select(nested=True), 2597 ) 2598 2599 def _parse_partition(self) -> t.Optional[exp.Partition]: 2600 if not self._match(TokenType.PARTITION): 2601 return None 2602 2603 return self.expression( 2604 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2605 ) 2606 2607 def _parse_value(self) -> t.Optional[exp.Tuple]: 2608 if self._match(TokenType.L_PAREN): 2609 expressions = self._parse_csv(self._parse_expression) 2610 self._match_r_paren() 2611 return self.expression(exp.Tuple, expressions=expressions) 2612 2613 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2614 expression = self._parse_expression() 2615 if expression: 2616 return self.expression(exp.Tuple, expressions=[expression]) 2617 return None 2618 2619 def _parse_projections(self) -> t.List[exp.Expression]: 2620 return self._parse_expressions() 2621 2622 def _parse_select( 2623 self, 2624 nested: bool = False, 2625 table: bool = False, 2626 parse_subquery_alias: bool = True, 2627 parse_set_operation: bool = True, 2628 ) -> t.Optional[exp.Expression]: 2629 cte = self._parse_with() 2630 2631 if cte: 2632 this = self._parse_statement() 2633 2634 if not this: 2635 self.raise_error("Failed to parse any statement following CTE") 2636 return cte 2637 2638 if "with" in this.arg_types: 2639 this.set("with", cte) 2640 else: 2641 self.raise_error(f"{this.key} does not support CTE") 2642 this = cte 2643 2644 return this 2645 2646 # duckdb supports leading with FROM x 2647 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2648 2649 if self._match(TokenType.SELECT): 2650 comments = self._prev_comments 2651 2652 hint = self._parse_hint() 2653 all_ = self._match(TokenType.ALL) 2654 distinct = self._match_set(self.DISTINCT_TOKENS) 2655 2656 kind = ( 2657 self._match(TokenType.ALIAS) 2658 and self._match_texts(("STRUCT", "VALUE")) 2659 and self._prev.text.upper() 2660 ) 2661 2662 if distinct: 2663 distinct = self.expression( 2664 exp.Distinct, 2665 on=self._parse_value() if self._match(TokenType.ON) else None, 2666 ) 2667 2668 if all_ and distinct: 2669 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2670 2671 limit = self._parse_limit(top=True) 2672 projections = self._parse_projections() 2673 2674 this = self.expression( 2675 exp.Select, 2676 kind=kind, 2677 hint=hint, 2678 distinct=distinct, 2679 expressions=projections, 2680 limit=limit, 2681 ) 2682 this.comments = comments 2683 2684 into = self._parse_into() 2685 if into: 2686 this.set("into", into) 2687 2688 if not from_: 2689 from_ = self._parse_from() 2690 2691 if from_: 2692 this.set("from", from_) 2693 2694 this = self._parse_query_modifiers(this) 2695 elif (table or nested) and self._match(TokenType.L_PAREN): 2696 if self._match(TokenType.PIVOT): 2697 this = self._parse_simplified_pivot() 2698 elif self._match(TokenType.FROM): 2699 this = exp.select("*").from_( 2700 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2701 ) 2702 else: 2703 this = ( 2704 self._parse_table() 2705 if table 2706 else self._parse_select(nested=True, parse_set_operation=False) 2707 ) 2708 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2709 2710 self._match_r_paren() 2711 2712 # We return early here so that the UNION isn't attached to the subquery by the 2713 # following call to _parse_set_operations, but instead becomes the parent node 2714 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2715 elif self._match(TokenType.VALUES, advance=False): 2716 this = self._parse_derived_table_values() 2717 elif from_: 2718 this = exp.select("*").from_(from_.this, copy=False) 2719 else: 2720 this = None 2721 2722 if parse_set_operation: 2723 return self._parse_set_operations(this) 2724 return this 2725 2726 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2727 if not skip_with_token and not self._match(TokenType.WITH): 2728 return None 2729 2730 comments = self._prev_comments 2731 recursive = self._match(TokenType.RECURSIVE) 2732 2733 expressions = [] 2734 while True: 2735 expressions.append(self._parse_cte()) 2736 2737 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2738 break 2739 else: 2740 self._match(TokenType.WITH) 2741 2742 return self.expression( 2743 exp.With, comments=comments, expressions=expressions, recursive=recursive 2744 ) 2745 2746 def _parse_cte(self) -> exp.CTE: 2747 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2748 if not alias or not alias.this: 2749 self.raise_error("Expected CTE to have alias") 2750 2751 self._match(TokenType.ALIAS) 2752 2753 if self._match_text_seq("NOT", "MATERIALIZED"): 2754 materialized = False 2755 elif self._match_text_seq("MATERIALIZED"): 2756 materialized = True 2757 else: 2758 materialized = None 2759 2760 return self.expression( 2761 exp.CTE, 2762 this=self._parse_wrapped(self._parse_statement), 2763 alias=alias, 2764 materialized=materialized, 2765 ) 2766 2767 def _parse_table_alias( 2768 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2769 ) -> t.Optional[exp.TableAlias]: 2770 any_token = self._match(TokenType.ALIAS) 2771 alias = ( 2772 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2773 or self._parse_string_as_identifier() 2774 ) 2775 2776 index = self._index 2777 if self._match(TokenType.L_PAREN): 2778 columns = self._parse_csv(self._parse_function_parameter) 2779 self._match_r_paren() if columns else self._retreat(index) 2780 else: 2781 columns = None 2782 2783 if not alias and not columns: 2784 return None 2785 2786 return self.expression(exp.TableAlias, this=alias, columns=columns) 2787 2788 def _parse_subquery( 2789 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2790 ) -> t.Optional[exp.Subquery]: 2791 if not this: 2792 return None 2793 2794 return self.expression( 2795 exp.Subquery, 2796 this=this, 2797 pivots=self._parse_pivots(), 2798 alias=self._parse_table_alias() if parse_alias else None, 2799 ) 2800 2801 def _implicit_unnests_to_explicit(self, this: E) -> E: 2802 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2803 2804 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2805 for i, join in enumerate(this.args.get("joins") or []): 2806 table = join.this 2807 normalized_table = table.copy() 2808 normalized_table.meta["maybe_column"] = True 2809 normalized_table = _norm(normalized_table, dialect=self.dialect) 2810 2811 if isinstance(table, exp.Table) and not join.args.get("on"): 2812 if normalized_table.parts[0].name in refs: 2813 table_as_column = table.to_column() 2814 unnest = exp.Unnest(expressions=[table_as_column]) 2815 2816 # Table.to_column creates a parent Alias node that we want to convert to 2817 # a TableAlias and attach to the Unnest, so it matches the parser's output 2818 if isinstance(table.args.get("alias"), exp.TableAlias): 2819 table_as_column.replace(table_as_column.this) 2820 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2821 2822 table.replace(unnest) 2823 2824 refs.add(normalized_table.alias_or_name) 2825 2826 return this 2827 2828 def _parse_query_modifiers( 2829 self, this: t.Optional[exp.Expression] 2830 ) -> t.Optional[exp.Expression]: 2831 if isinstance(this, (exp.Query, exp.Table)): 2832 for join in self._parse_joins(): 2833 this.append("joins", join) 2834 for lateral in iter(self._parse_lateral, None): 2835 this.append("laterals", lateral) 2836 2837 while True: 2838 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2839 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2840 key, expression = parser(self) 2841 2842 if expression: 2843 this.set(key, expression) 2844 if key == "limit": 2845 offset = expression.args.pop("offset", None) 2846 2847 if offset: 2848 offset = exp.Offset(expression=offset) 2849 this.set("offset", offset) 2850 2851 limit_by_expressions = expression.expressions 2852 expression.set("expressions", None) 2853 offset.set("expressions", limit_by_expressions) 2854 continue 2855 break 2856 2857 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2858 this = self._implicit_unnests_to_explicit(this) 2859 2860 return this 2861 2862 def _parse_hint(self) -> t.Optional[exp.Hint]: 2863 if self._match(TokenType.HINT): 2864 hints = [] 2865 for hint in iter( 2866 lambda: self._parse_csv( 2867 lambda: self._parse_function() or self._parse_var(upper=True) 2868 ), 2869 [], 2870 ): 2871 hints.extend(hint) 2872 2873 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2874 self.raise_error("Expected */ after HINT") 2875 2876 return self.expression(exp.Hint, expressions=hints) 2877 2878 return None 2879 2880 def _parse_into(self) -> t.Optional[exp.Into]: 2881 if not self._match(TokenType.INTO): 2882 return None 2883 2884 temp = self._match(TokenType.TEMPORARY) 2885 unlogged = self._match_text_seq("UNLOGGED") 2886 self._match(TokenType.TABLE) 2887 2888 return self.expression( 2889 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2890 ) 2891 2892 def _parse_from( 2893 self, joins: bool = False, skip_from_token: bool = False 2894 ) -> t.Optional[exp.From]: 2895 if not skip_from_token and not self._match(TokenType.FROM): 2896 return None 2897 2898 return self.expression( 2899 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2900 ) 2901 2902 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2903 return self.expression( 2904 exp.MatchRecognizeMeasure, 2905 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2906 this=self._parse_expression(), 2907 ) 2908 2909 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2910 if not self._match(TokenType.MATCH_RECOGNIZE): 2911 return None 2912 2913 self._match_l_paren() 2914 2915 partition = self._parse_partition_by() 2916 order = self._parse_order() 2917 2918 measures = ( 2919 self._parse_csv(self._parse_match_recognize_measure) 2920 if self._match_text_seq("MEASURES") 2921 else None 2922 ) 2923 2924 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2925 rows = exp.var("ONE ROW PER MATCH") 2926 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2927 text = "ALL ROWS PER MATCH" 2928 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2929 text += " SHOW EMPTY MATCHES" 2930 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2931 text += " OMIT EMPTY MATCHES" 2932 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2933 text += " WITH UNMATCHED ROWS" 2934 rows = exp.var(text) 2935 else: 2936 rows = None 2937 2938 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2939 text = "AFTER MATCH SKIP" 2940 if self._match_text_seq("PAST", "LAST", "ROW"): 2941 text += " PAST LAST ROW" 2942 elif self._match_text_seq("TO", "NEXT", "ROW"): 2943 text += " TO NEXT ROW" 2944 elif self._match_text_seq("TO", "FIRST"): 2945 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2946 elif self._match_text_seq("TO", "LAST"): 2947 text += f" TO LAST {self._advance_any().text}" # type: ignore 2948 after = exp.var(text) 2949 else: 2950 after = None 2951 2952 if self._match_text_seq("PATTERN"): 2953 self._match_l_paren() 2954 2955 if not self._curr: 2956 self.raise_error("Expecting )", self._curr) 2957 2958 paren = 1 2959 start = self._curr 2960 2961 while self._curr and paren > 0: 2962 if self._curr.token_type == TokenType.L_PAREN: 2963 paren += 1 2964 if self._curr.token_type == TokenType.R_PAREN: 2965 paren -= 1 2966 2967 end = self._prev 2968 self._advance() 2969 2970 if paren > 0: 2971 self.raise_error("Expecting )", self._curr) 2972 2973 pattern = exp.var(self._find_sql(start, end)) 2974 else: 2975 pattern = None 2976 2977 define = ( 2978 self._parse_csv(self._parse_name_as_expression) 2979 if self._match_text_seq("DEFINE") 2980 else None 2981 ) 2982 2983 self._match_r_paren() 2984 2985 return self.expression( 2986 exp.MatchRecognize, 2987 partition_by=partition, 2988 order=order, 2989 measures=measures, 2990 rows=rows, 2991 after=after, 2992 pattern=pattern, 2993 define=define, 2994 alias=self._parse_table_alias(), 2995 ) 2996 2997 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2998 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2999 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3000 cross_apply = False 3001 3002 if cross_apply is not None: 3003 this = self._parse_select(table=True) 3004 view = None 3005 outer = None 3006 elif self._match(TokenType.LATERAL): 3007 this = self._parse_select(table=True) 3008 view = self._match(TokenType.VIEW) 3009 outer = self._match(TokenType.OUTER) 3010 else: 3011 return None 3012 3013 if not this: 3014 this = ( 3015 self._parse_unnest() 3016 or self._parse_function() 3017 or self._parse_id_var(any_token=False) 3018 ) 3019 3020 while self._match(TokenType.DOT): 3021 this = exp.Dot( 3022 this=this, 3023 expression=self._parse_function() or self._parse_id_var(any_token=False), 3024 ) 3025 3026 if view: 3027 table = self._parse_id_var(any_token=False) 3028 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3029 table_alias: t.Optional[exp.TableAlias] = self.expression( 3030 exp.TableAlias, this=table, columns=columns 3031 ) 3032 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3033 # We move the alias from the lateral's child node to the lateral itself 3034 table_alias = this.args["alias"].pop() 3035 else: 3036 table_alias = self._parse_table_alias() 3037 3038 return self.expression( 3039 exp.Lateral, 3040 this=this, 3041 view=view, 3042 outer=outer, 3043 alias=table_alias, 3044 cross_apply=cross_apply, 3045 ) 3046 3047 def _parse_join_parts( 3048 self, 3049 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3050 return ( 3051 self._match_set(self.JOIN_METHODS) and self._prev, 3052 self._match_set(self.JOIN_SIDES) and self._prev, 3053 self._match_set(self.JOIN_KINDS) and self._prev, 3054 ) 3055 3056 def _parse_join( 3057 self, skip_join_token: bool = False, parse_bracket: bool = False 3058 ) -> t.Optional[exp.Join]: 3059 if self._match(TokenType.COMMA): 3060 return self.expression(exp.Join, this=self._parse_table()) 3061 3062 index = self._index 3063 method, side, kind = self._parse_join_parts() 3064 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3065 join = self._match(TokenType.JOIN) 3066 3067 if not skip_join_token and not join: 3068 self._retreat(index) 3069 kind = None 3070 method = None 3071 side = None 3072 3073 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3074 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3075 3076 if not skip_join_token and not join and not outer_apply and not cross_apply: 3077 return None 3078 3079 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3080 3081 if method: 3082 kwargs["method"] = method.text 3083 if side: 3084 kwargs["side"] = side.text 3085 if kind: 3086 kwargs["kind"] = kind.text 3087 if hint: 3088 kwargs["hint"] = hint 3089 3090 if self._match(TokenType.MATCH_CONDITION): 3091 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3092 3093 if self._match(TokenType.ON): 3094 kwargs["on"] = self._parse_conjunction() 3095 elif self._match(TokenType.USING): 3096 kwargs["using"] = self._parse_wrapped_id_vars() 3097 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3098 kind and kind.token_type == TokenType.CROSS 3099 ): 3100 index = self._index 3101 joins: t.Optional[list] = list(self._parse_joins()) 3102 3103 if joins and self._match(TokenType.ON): 3104 kwargs["on"] = self._parse_conjunction() 3105 elif joins and self._match(TokenType.USING): 3106 kwargs["using"] = self._parse_wrapped_id_vars() 3107 else: 3108 joins = None 3109 self._retreat(index) 3110 3111 kwargs["this"].set("joins", joins if joins else None) 3112 3113 comments = [c for token in (method, side, kind) if token for c in token.comments] 3114 return self.expression(exp.Join, comments=comments, **kwargs) 3115 3116 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3117 this = self._parse_conjunction() 3118 3119 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3120 return this 3121 3122 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3123 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3124 3125 return this 3126 3127 def _parse_index_params(self) -> exp.IndexParameters: 3128 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3129 3130 if self._match(TokenType.L_PAREN, advance=False): 3131 columns = self._parse_wrapped_csv(self._parse_with_operator) 3132 else: 3133 columns = None 3134 3135 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3136 partition_by = self._parse_partition_by() 3137 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3138 tablespace = ( 3139 self._parse_var(any_token=True) 3140 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3141 else None 3142 ) 3143 where = self._parse_where() 3144 3145 return self.expression( 3146 exp.IndexParameters, 3147 using=using, 3148 columns=columns, 3149 include=include, 3150 partition_by=partition_by, 3151 where=where, 3152 with_storage=with_storage, 3153 tablespace=tablespace, 3154 ) 3155 3156 def _parse_index( 3157 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3158 ) -> t.Optional[exp.Index]: 3159 if index or anonymous: 3160 unique = None 3161 primary = None 3162 amp = None 3163 3164 self._match(TokenType.ON) 3165 self._match(TokenType.TABLE) # hive 3166 table = self._parse_table_parts(schema=True) 3167 else: 3168 unique = self._match(TokenType.UNIQUE) 3169 primary = self._match_text_seq("PRIMARY") 3170 amp = self._match_text_seq("AMP") 3171 3172 if not self._match(TokenType.INDEX): 3173 return None 3174 3175 index = self._parse_id_var() 3176 table = None 3177 3178 params = self._parse_index_params() 3179 3180 return self.expression( 3181 exp.Index, 3182 this=index, 3183 table=table, 3184 unique=unique, 3185 primary=primary, 3186 amp=amp, 3187 params=params, 3188 ) 3189 3190 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3191 hints: t.List[exp.Expression] = [] 3192 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3193 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3194 hints.append( 3195 self.expression( 3196 exp.WithTableHint, 3197 expressions=self._parse_csv( 3198 lambda: self._parse_function() or self._parse_var(any_token=True) 3199 ), 3200 ) 3201 ) 3202 self._match_r_paren() 3203 else: 3204 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3205 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3206 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3207 3208 self._match_texts(("INDEX", "KEY")) 3209 if self._match(TokenType.FOR): 3210 hint.set("target", self._advance_any() and self._prev.text.upper()) 3211 3212 hint.set("expressions", self._parse_wrapped_id_vars()) 3213 hints.append(hint) 3214 3215 return hints or None 3216 3217 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3218 return ( 3219 (not schema and self._parse_function(optional_parens=False)) 3220 or self._parse_id_var(any_token=False) 3221 or self._parse_string_as_identifier() 3222 or self._parse_placeholder() 3223 ) 3224 3225 def _parse_table_parts( 3226 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3227 ) -> exp.Table: 3228 catalog = None 3229 db = None 3230 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3231 3232 while self._match(TokenType.DOT): 3233 if catalog: 3234 # This allows nesting the table in arbitrarily many dot expressions if needed 3235 table = self.expression( 3236 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3237 ) 3238 else: 3239 catalog = db 3240 db = table 3241 # "" used for tsql FROM a..b case 3242 table = self._parse_table_part(schema=schema) or "" 3243 3244 if ( 3245 wildcard 3246 and self._is_connected() 3247 and (isinstance(table, exp.Identifier) or not table) 3248 and self._match(TokenType.STAR) 3249 ): 3250 if isinstance(table, exp.Identifier): 3251 table.args["this"] += "*" 3252 else: 3253 table = exp.Identifier(this="*") 3254 3255 # We bubble up comments from the Identifier to the Table 3256 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3257 3258 if is_db_reference: 3259 catalog = db 3260 db = table 3261 table = None 3262 3263 if not table and not is_db_reference: 3264 self.raise_error(f"Expected table name but got {self._curr}") 3265 if not db and is_db_reference: 3266 self.raise_error(f"Expected database name but got {self._curr}") 3267 3268 return self.expression( 3269 exp.Table, 3270 comments=comments, 3271 this=table, 3272 db=db, 3273 catalog=catalog, 3274 pivots=self._parse_pivots(), 3275 ) 3276 3277 def _parse_table( 3278 self, 3279 schema: bool = False, 3280 joins: bool = False, 3281 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3282 parse_bracket: bool = False, 3283 is_db_reference: bool = False, 3284 parse_partition: bool = False, 3285 ) -> t.Optional[exp.Expression]: 3286 lateral = self._parse_lateral() 3287 if lateral: 3288 return lateral 3289 3290 unnest = self._parse_unnest() 3291 if unnest: 3292 return unnest 3293 3294 values = self._parse_derived_table_values() 3295 if values: 3296 return values 3297 3298 subquery = self._parse_select(table=True) 3299 if subquery: 3300 if not subquery.args.get("pivots"): 3301 subquery.set("pivots", self._parse_pivots()) 3302 return subquery 3303 3304 bracket = parse_bracket and self._parse_bracket(None) 3305 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3306 3307 only = self._match(TokenType.ONLY) 3308 3309 this = t.cast( 3310 exp.Expression, 3311 bracket 3312 or self._parse_bracket( 3313 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3314 ), 3315 ) 3316 3317 if only: 3318 this.set("only", only) 3319 3320 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3321 self._match_text_seq("*") 3322 3323 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3324 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3325 this.set("partition", self._parse_partition()) 3326 3327 if schema: 3328 return self._parse_schema(this=this) 3329 3330 version = self._parse_version() 3331 3332 if version: 3333 this.set("version", version) 3334 3335 if self.dialect.ALIAS_POST_TABLESAMPLE: 3336 table_sample = self._parse_table_sample() 3337 3338 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3339 if alias: 3340 this.set("alias", alias) 3341 3342 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3343 return self.expression( 3344 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3345 ) 3346 3347 this.set("hints", self._parse_table_hints()) 3348 3349 if not this.args.get("pivots"): 3350 this.set("pivots", self._parse_pivots()) 3351 3352 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3353 table_sample = self._parse_table_sample() 3354 3355 if table_sample: 3356 table_sample.set("this", this) 3357 this = table_sample 3358 3359 if joins: 3360 for join in self._parse_joins(): 3361 this.append("joins", join) 3362 3363 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3364 this.set("ordinality", True) 3365 this.set("alias", self._parse_table_alias()) 3366 3367 return this 3368 3369 def _parse_version(self) -> t.Optional[exp.Version]: 3370 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3371 this = "TIMESTAMP" 3372 elif self._match(TokenType.VERSION_SNAPSHOT): 3373 this = "VERSION" 3374 else: 3375 return None 3376 3377 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3378 kind = self._prev.text.upper() 3379 start = self._parse_bitwise() 3380 self._match_texts(("TO", "AND")) 3381 end = self._parse_bitwise() 3382 expression: t.Optional[exp.Expression] = self.expression( 3383 exp.Tuple, expressions=[start, end] 3384 ) 3385 elif self._match_text_seq("CONTAINED", "IN"): 3386 kind = "CONTAINED IN" 3387 expression = self.expression( 3388 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3389 ) 3390 elif self._match(TokenType.ALL): 3391 kind = "ALL" 3392 expression = None 3393 else: 3394 self._match_text_seq("AS", "OF") 3395 kind = "AS OF" 3396 expression = self._parse_type() 3397 3398 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3399 3400 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3401 if not self._match(TokenType.UNNEST): 3402 return None 3403 3404 expressions = self._parse_wrapped_csv(self._parse_equality) 3405 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3406 3407 alias = self._parse_table_alias() if with_alias else None 3408 3409 if alias: 3410 if self.dialect.UNNEST_COLUMN_ONLY: 3411 if alias.args.get("columns"): 3412 self.raise_error("Unexpected extra column alias in unnest.") 3413 3414 alias.set("columns", [alias.this]) 3415 alias.set("this", None) 3416 3417 columns = alias.args.get("columns") or [] 3418 if offset and len(expressions) < len(columns): 3419 offset = columns.pop() 3420 3421 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3422 self._match(TokenType.ALIAS) 3423 offset = self._parse_id_var( 3424 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3425 ) or exp.to_identifier("offset") 3426 3427 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3428 3429 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3430 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3431 if not is_derived and not self._match_text_seq("VALUES"): 3432 return None 3433 3434 expressions = self._parse_csv(self._parse_value) 3435 alias = self._parse_table_alias() 3436 3437 if is_derived: 3438 self._match_r_paren() 3439 3440 return self.expression( 3441 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3442 ) 3443 3444 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3445 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3446 as_modifier and self._match_text_seq("USING", "SAMPLE") 3447 ): 3448 return None 3449 3450 bucket_numerator = None 3451 bucket_denominator = None 3452 bucket_field = None 3453 percent = None 3454 size = None 3455 seed = None 3456 3457 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3458 matched_l_paren = self._match(TokenType.L_PAREN) 3459 3460 if self.TABLESAMPLE_CSV: 3461 num = None 3462 expressions = self._parse_csv(self._parse_primary) 3463 else: 3464 expressions = None 3465 num = ( 3466 self._parse_factor() 3467 if self._match(TokenType.NUMBER, advance=False) 3468 else self._parse_primary() or self._parse_placeholder() 3469 ) 3470 3471 if self._match_text_seq("BUCKET"): 3472 bucket_numerator = self._parse_number() 3473 self._match_text_seq("OUT", "OF") 3474 bucket_denominator = bucket_denominator = self._parse_number() 3475 self._match(TokenType.ON) 3476 bucket_field = self._parse_field() 3477 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3478 percent = num 3479 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3480 size = num 3481 else: 3482 percent = num 3483 3484 if matched_l_paren: 3485 self._match_r_paren() 3486 3487 if self._match(TokenType.L_PAREN): 3488 method = self._parse_var(upper=True) 3489 seed = self._match(TokenType.COMMA) and self._parse_number() 3490 self._match_r_paren() 3491 elif self._match_texts(("SEED", "REPEATABLE")): 3492 seed = self._parse_wrapped(self._parse_number) 3493 3494 if not method and self.DEFAULT_SAMPLING_METHOD: 3495 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3496 3497 return self.expression( 3498 exp.TableSample, 3499 expressions=expressions, 3500 method=method, 3501 bucket_numerator=bucket_numerator, 3502 bucket_denominator=bucket_denominator, 3503 bucket_field=bucket_field, 3504 percent=percent, 3505 size=size, 3506 seed=seed, 3507 ) 3508 3509 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3510 return list(iter(self._parse_pivot, None)) or None 3511 3512 def _parse_joins(self) -> t.Iterator[exp.Join]: 3513 return iter(self._parse_join, None) 3514 3515 # https://duckdb.org/docs/sql/statements/pivot 3516 def _parse_simplified_pivot(self) -> exp.Pivot: 3517 def _parse_on() -> t.Optional[exp.Expression]: 3518 this = self._parse_bitwise() 3519 return self._parse_in(this) if self._match(TokenType.IN) else this 3520 3521 this = self._parse_table() 3522 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3523 using = self._match(TokenType.USING) and self._parse_csv( 3524 lambda: self._parse_alias(self._parse_function()) 3525 ) 3526 group = self._parse_group() 3527 return self.expression( 3528 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3529 ) 3530 3531 def _parse_pivot_in(self) -> exp.In: 3532 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3533 this = self._parse_conjunction() 3534 3535 self._match(TokenType.ALIAS) 3536 alias = self._parse_field() 3537 if alias: 3538 return self.expression(exp.PivotAlias, this=this, alias=alias) 3539 3540 return this 3541 3542 value = self._parse_column() 3543 3544 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3545 self.raise_error("Expecting IN (") 3546 3547 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3548 3549 self._match_r_paren() 3550 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3551 3552 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3553 index = self._index 3554 include_nulls = None 3555 3556 if self._match(TokenType.PIVOT): 3557 unpivot = False 3558 elif self._match(TokenType.UNPIVOT): 3559 unpivot = True 3560 3561 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3562 if self._match_text_seq("INCLUDE", "NULLS"): 3563 include_nulls = True 3564 elif self._match_text_seq("EXCLUDE", "NULLS"): 3565 include_nulls = False 3566 else: 3567 return None 3568 3569 expressions = [] 3570 3571 if not self._match(TokenType.L_PAREN): 3572 self._retreat(index) 3573 return None 3574 3575 if unpivot: 3576 expressions = self._parse_csv(self._parse_column) 3577 else: 3578 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3579 3580 if not expressions: 3581 self.raise_error("Failed to parse PIVOT's aggregation list") 3582 3583 if not self._match(TokenType.FOR): 3584 self.raise_error("Expecting FOR") 3585 3586 field = self._parse_pivot_in() 3587 3588 self._match_r_paren() 3589 3590 pivot = self.expression( 3591 exp.Pivot, 3592 expressions=expressions, 3593 field=field, 3594 unpivot=unpivot, 3595 include_nulls=include_nulls, 3596 ) 3597 3598 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3599 pivot.set("alias", self._parse_table_alias()) 3600 3601 if not unpivot: 3602 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3603 3604 columns: t.List[exp.Expression] = [] 3605 for fld in pivot.args["field"].expressions: 3606 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3607 for name in names: 3608 if self.PREFIXED_PIVOT_COLUMNS: 3609 name = f"{name}_{field_name}" if name else field_name 3610 else: 3611 name = f"{field_name}_{name}" if name else field_name 3612 3613 columns.append(exp.to_identifier(name)) 3614 3615 pivot.set("columns", columns) 3616 3617 return pivot 3618 3619 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3620 return [agg.alias for agg in aggregations] 3621 3622 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3623 if not skip_where_token and not self._match(TokenType.PREWHERE): 3624 return None 3625 3626 return self.expression( 3627 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3628 ) 3629 3630 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3631 if not skip_where_token and not self._match(TokenType.WHERE): 3632 return None 3633 3634 return self.expression( 3635 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3636 ) 3637 3638 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3639 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3640 return None 3641 3642 elements: t.Dict[str, t.Any] = defaultdict(list) 3643 3644 if self._match(TokenType.ALL): 3645 elements["all"] = True 3646 elif self._match(TokenType.DISTINCT): 3647 elements["all"] = False 3648 3649 while True: 3650 expressions = self._parse_csv( 3651 lambda: None 3652 if self._match(TokenType.ROLLUP, advance=False) 3653 else self._parse_conjunction() 3654 ) 3655 if expressions: 3656 elements["expressions"].extend(expressions) 3657 3658 grouping_sets = self._parse_grouping_sets() 3659 if grouping_sets: 3660 elements["grouping_sets"].extend(grouping_sets) 3661 3662 rollup = None 3663 cube = None 3664 totals = None 3665 3666 index = self._index 3667 with_ = self._match(TokenType.WITH) 3668 if self._match(TokenType.ROLLUP): 3669 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3670 elements["rollup"].extend(ensure_list(rollup)) 3671 3672 if self._match(TokenType.CUBE): 3673 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3674 elements["cube"].extend(ensure_list(cube)) 3675 3676 if self._match_text_seq("TOTALS"): 3677 totals = True 3678 elements["totals"] = True # type: ignore 3679 3680 if not (grouping_sets or rollup or cube or totals): 3681 if with_: 3682 self._retreat(index) 3683 break 3684 3685 return self.expression(exp.Group, **elements) # type: ignore 3686 3687 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3688 if not self._match(TokenType.GROUPING_SETS): 3689 return None 3690 3691 return self._parse_wrapped_csv(self._parse_grouping_set) 3692 3693 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3694 if self._match(TokenType.L_PAREN): 3695 grouping_set = self._parse_csv(self._parse_column) 3696 self._match_r_paren() 3697 return self.expression(exp.Tuple, expressions=grouping_set) 3698 3699 return self._parse_column() 3700 3701 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3702 if not skip_having_token and not self._match(TokenType.HAVING): 3703 return None 3704 return self.expression(exp.Having, this=self._parse_conjunction()) 3705 3706 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3707 if not self._match(TokenType.QUALIFY): 3708 return None 3709 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3710 3711 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3712 if skip_start_token: 3713 start = None 3714 elif self._match(TokenType.START_WITH): 3715 start = self._parse_conjunction() 3716 else: 3717 return None 3718 3719 self._match(TokenType.CONNECT_BY) 3720 nocycle = self._match_text_seq("NOCYCLE") 3721 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3722 exp.Prior, this=self._parse_bitwise() 3723 ) 3724 connect = self._parse_conjunction() 3725 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3726 3727 if not start and self._match(TokenType.START_WITH): 3728 start = self._parse_conjunction() 3729 3730 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3731 3732 def _parse_name_as_expression(self) -> exp.Alias: 3733 return self.expression( 3734 exp.Alias, 3735 alias=self._parse_id_var(any_token=True), 3736 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3737 ) 3738 3739 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3740 if self._match_text_seq("INTERPOLATE"): 3741 return self._parse_wrapped_csv(self._parse_name_as_expression) 3742 return None 3743 3744 def _parse_order( 3745 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3746 ) -> t.Optional[exp.Expression]: 3747 siblings = None 3748 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3749 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3750 return this 3751 3752 siblings = True 3753 3754 return self.expression( 3755 exp.Order, 3756 this=this, 3757 expressions=self._parse_csv(self._parse_ordered), 3758 interpolate=self._parse_interpolate(), 3759 siblings=siblings, 3760 ) 3761 3762 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3763 if not self._match(token): 3764 return None 3765 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3766 3767 def _parse_ordered( 3768 self, parse_method: t.Optional[t.Callable] = None 3769 ) -> t.Optional[exp.Ordered]: 3770 this = parse_method() if parse_method else self._parse_conjunction() 3771 if not this: 3772 return None 3773 3774 asc = self._match(TokenType.ASC) 3775 desc = self._match(TokenType.DESC) or (asc and False) 3776 3777 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3778 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3779 3780 nulls_first = is_nulls_first or False 3781 explicitly_null_ordered = is_nulls_first or is_nulls_last 3782 3783 if ( 3784 not explicitly_null_ordered 3785 and ( 3786 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3787 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3788 ) 3789 and self.dialect.NULL_ORDERING != "nulls_are_last" 3790 ): 3791 nulls_first = True 3792 3793 if self._match_text_seq("WITH", "FILL"): 3794 with_fill = self.expression( 3795 exp.WithFill, 3796 **{ # type: ignore 3797 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3798 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3799 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3800 }, 3801 ) 3802 else: 3803 with_fill = None 3804 3805 return self.expression( 3806 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3807 ) 3808 3809 def _parse_limit( 3810 self, 3811 this: t.Optional[exp.Expression] = None, 3812 top: bool = False, 3813 skip_limit_token: bool = False, 3814 ) -> t.Optional[exp.Expression]: 3815 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3816 comments = self._prev_comments 3817 if top: 3818 limit_paren = self._match(TokenType.L_PAREN) 3819 expression = self._parse_term() if limit_paren else self._parse_number() 3820 3821 if limit_paren: 3822 self._match_r_paren() 3823 else: 3824 expression = self._parse_term() 3825 3826 if self._match(TokenType.COMMA): 3827 offset = expression 3828 expression = self._parse_term() 3829 else: 3830 offset = None 3831 3832 limit_exp = self.expression( 3833 exp.Limit, 3834 this=this, 3835 expression=expression, 3836 offset=offset, 3837 comments=comments, 3838 expressions=self._parse_limit_by(), 3839 ) 3840 3841 return limit_exp 3842 3843 if self._match(TokenType.FETCH): 3844 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3845 direction = self._prev.text.upper() if direction else "FIRST" 3846 3847 count = self._parse_field(tokens=self.FETCH_TOKENS) 3848 percent = self._match(TokenType.PERCENT) 3849 3850 self._match_set((TokenType.ROW, TokenType.ROWS)) 3851 3852 only = self._match_text_seq("ONLY") 3853 with_ties = self._match_text_seq("WITH", "TIES") 3854 3855 if only and with_ties: 3856 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3857 3858 return self.expression( 3859 exp.Fetch, 3860 direction=direction, 3861 count=count, 3862 percent=percent, 3863 with_ties=with_ties, 3864 ) 3865 3866 return this 3867 3868 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3869 if not self._match(TokenType.OFFSET): 3870 return this 3871 3872 count = self._parse_term() 3873 self._match_set((TokenType.ROW, TokenType.ROWS)) 3874 3875 return self.expression( 3876 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3877 ) 3878 3879 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3880 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3881 3882 def _parse_locks(self) -> t.List[exp.Lock]: 3883 locks = [] 3884 while True: 3885 if self._match_text_seq("FOR", "UPDATE"): 3886 update = True 3887 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3888 "LOCK", "IN", "SHARE", "MODE" 3889 ): 3890 update = False 3891 else: 3892 break 3893 3894 expressions = None 3895 if self._match_text_seq("OF"): 3896 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3897 3898 wait: t.Optional[bool | exp.Expression] = None 3899 if self._match_text_seq("NOWAIT"): 3900 wait = True 3901 elif self._match_text_seq("WAIT"): 3902 wait = self._parse_primary() 3903 elif self._match_text_seq("SKIP", "LOCKED"): 3904 wait = False 3905 3906 locks.append( 3907 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3908 ) 3909 3910 return locks 3911 3912 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3913 while this and self._match_set(self.SET_OPERATIONS): 3914 token_type = self._prev.token_type 3915 3916 if token_type == TokenType.UNION: 3917 operation = exp.Union 3918 elif token_type == TokenType.EXCEPT: 3919 operation = exp.Except 3920 else: 3921 operation = exp.Intersect 3922 3923 comments = self._prev.comments 3924 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3925 by_name = self._match_text_seq("BY", "NAME") 3926 expression = self._parse_select(nested=True, parse_set_operation=False) 3927 3928 this = self.expression( 3929 operation, 3930 comments=comments, 3931 this=this, 3932 distinct=distinct, 3933 by_name=by_name, 3934 expression=expression, 3935 ) 3936 3937 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3938 expression = this.expression 3939 3940 if expression: 3941 for arg in self.UNION_MODIFIERS: 3942 expr = expression.args.get(arg) 3943 if expr: 3944 this.set(arg, expr.pop()) 3945 3946 return this 3947 3948 def _parse_expression(self) -> t.Optional[exp.Expression]: 3949 return self._parse_alias(self._parse_conjunction()) 3950 3951 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3952 this = self._parse_equality() 3953 3954 if self._match(TokenType.COLON_EQ): 3955 this = self.expression( 3956 exp.PropertyEQ, 3957 this=this, 3958 comments=self._prev_comments, 3959 expression=self._parse_conjunction(), 3960 ) 3961 3962 while self._match_set(self.CONJUNCTION): 3963 this = self.expression( 3964 self.CONJUNCTION[self._prev.token_type], 3965 this=this, 3966 comments=self._prev_comments, 3967 expression=self._parse_equality(), 3968 ) 3969 return this 3970 3971 def _parse_equality(self) -> t.Optional[exp.Expression]: 3972 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3973 3974 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3975 return self._parse_tokens(self._parse_range, self.COMPARISON) 3976 3977 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3978 this = this or self._parse_bitwise() 3979 negate = self._match(TokenType.NOT) 3980 3981 if self._match_set(self.RANGE_PARSERS): 3982 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3983 if not expression: 3984 return this 3985 3986 this = expression 3987 elif self._match(TokenType.ISNULL): 3988 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3989 3990 # Postgres supports ISNULL and NOTNULL for conditions. 3991 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3992 if self._match(TokenType.NOTNULL): 3993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3994 this = self.expression(exp.Not, this=this) 3995 3996 if negate: 3997 this = self.expression(exp.Not, this=this) 3998 3999 if self._match(TokenType.IS): 4000 this = self._parse_is(this) 4001 4002 return this 4003 4004 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4005 index = self._index - 1 4006 negate = self._match(TokenType.NOT) 4007 4008 if self._match_text_seq("DISTINCT", "FROM"): 4009 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4010 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4011 4012 expression = self._parse_null() or self._parse_boolean() 4013 if not expression: 4014 self._retreat(index) 4015 return None 4016 4017 this = self.expression(exp.Is, this=this, expression=expression) 4018 return self.expression(exp.Not, this=this) if negate else this 4019 4020 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4021 unnest = self._parse_unnest(with_alias=False) 4022 if unnest: 4023 this = self.expression(exp.In, this=this, unnest=unnest) 4024 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4025 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4026 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4027 4028 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4029 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4030 else: 4031 this = self.expression(exp.In, this=this, expressions=expressions) 4032 4033 if matched_l_paren: 4034 self._match_r_paren(this) 4035 elif not self._match(TokenType.R_BRACKET, expression=this): 4036 self.raise_error("Expecting ]") 4037 else: 4038 this = self.expression(exp.In, this=this, field=self._parse_field()) 4039 4040 return this 4041 4042 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4043 low = self._parse_bitwise() 4044 self._match(TokenType.AND) 4045 high = self._parse_bitwise() 4046 return self.expression(exp.Between, this=this, low=low, high=high) 4047 4048 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4049 if not self._match(TokenType.ESCAPE): 4050 return this 4051 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4052 4053 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4054 index = self._index 4055 4056 if not self._match(TokenType.INTERVAL) and match_interval: 4057 return None 4058 4059 if self._match(TokenType.STRING, advance=False): 4060 this = self._parse_primary() 4061 else: 4062 this = self._parse_term() 4063 4064 if not this or ( 4065 isinstance(this, exp.Column) 4066 and not this.table 4067 and not this.this.quoted 4068 and this.name.upper() == "IS" 4069 ): 4070 self._retreat(index) 4071 return None 4072 4073 unit = self._parse_function() or ( 4074 not self._match(TokenType.ALIAS, advance=False) 4075 and self._parse_var(any_token=True, upper=True) 4076 ) 4077 4078 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4079 # each INTERVAL expression into this canonical form so it's easy to transpile 4080 if this and this.is_number: 4081 this = exp.Literal.string(this.name) 4082 elif this and this.is_string: 4083 parts = this.name.split() 4084 4085 if len(parts) == 2: 4086 if unit: 4087 # This is not actually a unit, it's something else (e.g. a "window side") 4088 unit = None 4089 self._retreat(self._index - 1) 4090 4091 this = exp.Literal.string(parts[0]) 4092 unit = self.expression(exp.Var, this=parts[1].upper()) 4093 4094 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4095 unit = self.expression( 4096 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4097 ) 4098 4099 return self.expression(exp.Interval, this=this, unit=unit) 4100 4101 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4102 this = self._parse_term() 4103 4104 while True: 4105 if self._match_set(self.BITWISE): 4106 this = self.expression( 4107 self.BITWISE[self._prev.token_type], 4108 this=this, 4109 expression=self._parse_term(), 4110 ) 4111 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4112 this = self.expression( 4113 exp.DPipe, 4114 this=this, 4115 expression=self._parse_term(), 4116 safe=not self.dialect.STRICT_STRING_CONCAT, 4117 ) 4118 elif self._match(TokenType.DQMARK): 4119 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4120 elif self._match_pair(TokenType.LT, TokenType.LT): 4121 this = self.expression( 4122 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4123 ) 4124 elif self._match_pair(TokenType.GT, TokenType.GT): 4125 this = self.expression( 4126 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4127 ) 4128 else: 4129 break 4130 4131 return this 4132 4133 def _parse_term(self) -> t.Optional[exp.Expression]: 4134 return self._parse_tokens(self._parse_factor, self.TERM) 4135 4136 def _parse_factor(self) -> t.Optional[exp.Expression]: 4137 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4138 this = parse_method() 4139 4140 while self._match_set(self.FACTOR): 4141 this = self.expression( 4142 self.FACTOR[self._prev.token_type], 4143 this=this, 4144 comments=self._prev_comments, 4145 expression=parse_method(), 4146 ) 4147 if isinstance(this, exp.Div): 4148 this.args["typed"] = self.dialect.TYPED_DIVISION 4149 this.args["safe"] = self.dialect.SAFE_DIVISION 4150 4151 return this 4152 4153 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4154 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4155 4156 def _parse_unary(self) -> t.Optional[exp.Expression]: 4157 if self._match_set(self.UNARY_PARSERS): 4158 return self.UNARY_PARSERS[self._prev.token_type](self) 4159 return self._parse_at_time_zone(self._parse_type()) 4160 4161 def _parse_type( 4162 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4163 ) -> t.Optional[exp.Expression]: 4164 interval = parse_interval and self._parse_interval() 4165 if interval: 4166 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4167 while True: 4168 index = self._index 4169 self._match(TokenType.PLUS) 4170 4171 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4172 self._retreat(index) 4173 break 4174 4175 interval = self.expression( # type: ignore 4176 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4177 ) 4178 4179 return interval 4180 4181 index = self._index 4182 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4183 this = self._parse_column() 4184 4185 if data_type: 4186 if isinstance(this, exp.Literal): 4187 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4188 if parser: 4189 return parser(self, this, data_type) 4190 return self.expression(exp.Cast, this=this, to=data_type) 4191 4192 if not data_type.expressions: 4193 self._retreat(index) 4194 return self._parse_id_var() if fallback_to_identifier else self._parse_column() 4195 4196 return self._parse_column_ops(data_type) 4197 4198 return this and self._parse_column_ops(this) 4199 4200 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4201 this = self._parse_type() 4202 if not this: 4203 return None 4204 4205 if isinstance(this, exp.Column) and not this.table: 4206 this = exp.var(this.name.upper()) 4207 4208 return self.expression( 4209 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4210 ) 4211 4212 def _parse_types( 4213 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4214 ) -> t.Optional[exp.Expression]: 4215 index = self._index 4216 4217 this: t.Optional[exp.Expression] = None 4218 prefix = self._match_text_seq("SYSUDTLIB", ".") 4219 4220 if not self._match_set(self.TYPE_TOKENS): 4221 identifier = allow_identifiers and self._parse_id_var( 4222 any_token=False, tokens=(TokenType.VAR,) 4223 ) 4224 if identifier: 4225 tokens = self.dialect.tokenize(identifier.name) 4226 4227 if len(tokens) != 1: 4228 self.raise_error("Unexpected identifier", self._prev) 4229 4230 if tokens[0].token_type in self.TYPE_TOKENS: 4231 self._prev = tokens[0] 4232 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4233 type_name = identifier.name 4234 4235 while self._match(TokenType.DOT): 4236 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4237 4238 this = exp.DataType.build(type_name, udt=True) 4239 else: 4240 self._retreat(self._index - 1) 4241 return None 4242 else: 4243 return None 4244 4245 type_token = self._prev.token_type 4246 4247 if type_token == TokenType.PSEUDO_TYPE: 4248 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4249 4250 if type_token == TokenType.OBJECT_IDENTIFIER: 4251 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4252 4253 nested = type_token in self.NESTED_TYPE_TOKENS 4254 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4255 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4256 expressions = None 4257 maybe_func = False 4258 4259 if self._match(TokenType.L_PAREN): 4260 if is_struct: 4261 expressions = self._parse_csv(self._parse_struct_types) 4262 elif nested: 4263 expressions = self._parse_csv( 4264 lambda: self._parse_types( 4265 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4266 ) 4267 ) 4268 elif type_token in self.ENUM_TYPE_TOKENS: 4269 expressions = self._parse_csv(self._parse_equality) 4270 elif is_aggregate: 4271 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4272 any_token=False, tokens=(TokenType.VAR,) 4273 ) 4274 if not func_or_ident or not self._match(TokenType.COMMA): 4275 return None 4276 expressions = self._parse_csv( 4277 lambda: self._parse_types( 4278 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4279 ) 4280 ) 4281 expressions.insert(0, func_or_ident) 4282 else: 4283 expressions = self._parse_csv(self._parse_type_size) 4284 4285 if not expressions or not self._match(TokenType.R_PAREN): 4286 self._retreat(index) 4287 return None 4288 4289 maybe_func = True 4290 4291 values: t.Optional[t.List[exp.Expression]] = None 4292 4293 if nested and self._match(TokenType.LT): 4294 if is_struct: 4295 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4296 else: 4297 expressions = self._parse_csv( 4298 lambda: self._parse_types( 4299 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4300 ) 4301 ) 4302 4303 if not self._match(TokenType.GT): 4304 self.raise_error("Expecting >") 4305 4306 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4307 values = self._parse_csv(self._parse_conjunction) 4308 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4309 4310 if type_token in self.TIMESTAMPS: 4311 if self._match_text_seq("WITH", "TIME", "ZONE"): 4312 maybe_func = False 4313 tz_type = ( 4314 exp.DataType.Type.TIMETZ 4315 if type_token in self.TIMES 4316 else exp.DataType.Type.TIMESTAMPTZ 4317 ) 4318 this = exp.DataType(this=tz_type, expressions=expressions) 4319 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4320 maybe_func = False 4321 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4322 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4323 maybe_func = False 4324 elif type_token == TokenType.INTERVAL: 4325 unit = self._parse_var(upper=True) 4326 if unit: 4327 if self._match_text_seq("TO"): 4328 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4329 4330 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4331 else: 4332 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4333 4334 if maybe_func and check_func: 4335 index2 = self._index 4336 peek = self._parse_string() 4337 4338 if not peek: 4339 self._retreat(index) 4340 return None 4341 4342 self._retreat(index2) 4343 4344 if not this: 4345 if self._match_text_seq("UNSIGNED"): 4346 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4347 if not unsigned_type_token: 4348 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4349 4350 type_token = unsigned_type_token or type_token 4351 4352 this = exp.DataType( 4353 this=exp.DataType.Type[type_token.value], 4354 expressions=expressions, 4355 nested=nested, 4356 values=values, 4357 prefix=prefix, 4358 ) 4359 elif expressions: 4360 this.set("expressions", expressions) 4361 4362 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4363 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4364 4365 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4366 converter = self.TYPE_CONVERTER.get(this.this) 4367 if converter: 4368 this = converter(t.cast(exp.DataType, this)) 4369 4370 return this 4371 4372 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4373 index = self._index 4374 this = ( 4375 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4376 or self._parse_id_var() 4377 ) 4378 self._match(TokenType.COLON) 4379 column_def = self._parse_column_def(this) 4380 4381 if type_required and ( 4382 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4383 ): 4384 self._retreat(index) 4385 return self._parse_types() 4386 4387 return column_def 4388 4389 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4390 if not self._match_text_seq("AT", "TIME", "ZONE"): 4391 return this 4392 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4393 4394 def _parse_column(self) -> t.Optional[exp.Expression]: 4395 this = self._parse_column_reference() 4396 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4397 4398 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4399 this = self._parse_field() 4400 if ( 4401 not this 4402 and self._match(TokenType.VALUES, advance=False) 4403 and self.VALUES_FOLLOWED_BY_PAREN 4404 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4405 ): 4406 this = self._parse_id_var() 4407 4408 if isinstance(this, exp.Identifier): 4409 # We bubble up comments from the Identifier to the Column 4410 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4411 4412 return this 4413 4414 def _parse_colon_as_json_extract( 4415 self, this: t.Optional[exp.Expression] 4416 ) -> t.Optional[exp.Expression]: 4417 casts = [] 4418 json_path = [] 4419 4420 while self._match(TokenType.COLON): 4421 start_index = self._index 4422 path = self._parse_column_ops(self._parse_field(any_token=True)) 4423 4424 # The cast :: operator has a lower precedence than the extraction operator :, so 4425 # we rearrange the AST appropriately to avoid casting the JSON path 4426 while isinstance(path, exp.Cast): 4427 casts.append(path.to) 4428 path = path.this 4429 4430 if casts: 4431 dcolon_offset = next( 4432 i 4433 for i, t in enumerate(self._tokens[start_index:]) 4434 if t.token_type == TokenType.DCOLON 4435 ) 4436 end_token = self._tokens[start_index + dcolon_offset - 1] 4437 else: 4438 end_token = self._prev 4439 4440 if path: 4441 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4442 4443 if json_path: 4444 this = self.expression( 4445 exp.JSONExtract, 4446 this=this, 4447 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4448 ) 4449 4450 while casts: 4451 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4452 4453 return this 4454 4455 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4456 this = self._parse_bracket(this) 4457 4458 while self._match_set(self.COLUMN_OPERATORS): 4459 op_token = self._prev.token_type 4460 op = self.COLUMN_OPERATORS.get(op_token) 4461 4462 if op_token == TokenType.DCOLON: 4463 field = self._parse_types() 4464 if not field: 4465 self.raise_error("Expected type") 4466 elif op and self._curr: 4467 field = self._parse_column_reference() 4468 else: 4469 field = self._parse_field(any_token=True, anonymous_func=True) 4470 4471 if isinstance(field, exp.Func) and this: 4472 # bigquery allows function calls like x.y.count(...) 4473 # SAFE.SUBSTR(...) 4474 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4475 this = exp.replace_tree( 4476 this, 4477 lambda n: ( 4478 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4479 if n.table 4480 else n.this 4481 ) 4482 if isinstance(n, exp.Column) 4483 else n, 4484 ) 4485 4486 if op: 4487 this = op(self, this, field) 4488 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4489 this = self.expression( 4490 exp.Column, 4491 this=field, 4492 table=this.this, 4493 db=this.args.get("table"), 4494 catalog=this.args.get("db"), 4495 ) 4496 else: 4497 this = self.expression(exp.Dot, this=this, expression=field) 4498 4499 this = self._parse_bracket(this) 4500 4501 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4502 4503 def _parse_primary(self) -> t.Optional[exp.Expression]: 4504 if self._match_set(self.PRIMARY_PARSERS): 4505 token_type = self._prev.token_type 4506 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4507 4508 if token_type == TokenType.STRING: 4509 expressions = [primary] 4510 while self._match(TokenType.STRING): 4511 expressions.append(exp.Literal.string(self._prev.text)) 4512 4513 if len(expressions) > 1: 4514 return self.expression(exp.Concat, expressions=expressions) 4515 4516 return primary 4517 4518 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4519 return exp.Literal.number(f"0.{self._prev.text}") 4520 4521 if self._match(TokenType.L_PAREN): 4522 comments = self._prev_comments 4523 query = self._parse_select() 4524 4525 if query: 4526 expressions = [query] 4527 else: 4528 expressions = self._parse_expressions() 4529 4530 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4531 4532 if not this and self._match(TokenType.R_PAREN, advance=False): 4533 this = self.expression(exp.Tuple) 4534 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4535 this = self._parse_subquery(this=this, parse_alias=False) 4536 elif isinstance(this, exp.Subquery): 4537 this = self._parse_subquery( 4538 this=self._parse_set_operations(this), parse_alias=False 4539 ) 4540 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4541 this = self.expression(exp.Tuple, expressions=expressions) 4542 else: 4543 this = self.expression(exp.Paren, this=this) 4544 4545 if this: 4546 this.add_comments(comments) 4547 4548 self._match_r_paren(expression=this) 4549 return this 4550 4551 return None 4552 4553 def _parse_field( 4554 self, 4555 any_token: bool = False, 4556 tokens: t.Optional[t.Collection[TokenType]] = None, 4557 anonymous_func: bool = False, 4558 ) -> t.Optional[exp.Expression]: 4559 if anonymous_func: 4560 field = ( 4561 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4562 or self._parse_primary() 4563 ) 4564 else: 4565 field = self._parse_primary() or self._parse_function( 4566 anonymous=anonymous_func, any_token=any_token 4567 ) 4568 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4569 4570 def _parse_function( 4571 self, 4572 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4573 anonymous: bool = False, 4574 optional_parens: bool = True, 4575 any_token: bool = False, 4576 ) -> t.Optional[exp.Expression]: 4577 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4578 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4579 fn_syntax = False 4580 if ( 4581 self._match(TokenType.L_BRACE, advance=False) 4582 and self._next 4583 and self._next.text.upper() == "FN" 4584 ): 4585 self._advance(2) 4586 fn_syntax = True 4587 4588 func = self._parse_function_call( 4589 functions=functions, 4590 anonymous=anonymous, 4591 optional_parens=optional_parens, 4592 any_token=any_token, 4593 ) 4594 4595 if fn_syntax: 4596 self._match(TokenType.R_BRACE) 4597 4598 return func 4599 4600 def _parse_function_call( 4601 self, 4602 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4603 anonymous: bool = False, 4604 optional_parens: bool = True, 4605 any_token: bool = False, 4606 ) -> t.Optional[exp.Expression]: 4607 if not self._curr: 4608 return None 4609 4610 comments = self._curr.comments 4611 token_type = self._curr.token_type 4612 this = self._curr.text 4613 upper = this.upper() 4614 4615 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4616 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4617 self._advance() 4618 return self._parse_window(parser(self)) 4619 4620 if not self._next or self._next.token_type != TokenType.L_PAREN: 4621 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4622 self._advance() 4623 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4624 4625 return None 4626 4627 if any_token: 4628 if token_type in self.RESERVED_TOKENS: 4629 return None 4630 elif token_type not in self.FUNC_TOKENS: 4631 return None 4632 4633 self._advance(2) 4634 4635 parser = self.FUNCTION_PARSERS.get(upper) 4636 if parser and not anonymous: 4637 this = parser(self) 4638 else: 4639 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4640 4641 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4642 this = self.expression(subquery_predicate, this=self._parse_select()) 4643 self._match_r_paren() 4644 return this 4645 4646 if functions is None: 4647 functions = self.FUNCTIONS 4648 4649 function = functions.get(upper) 4650 4651 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4652 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4653 4654 if alias: 4655 args = self._kv_to_prop_eq(args) 4656 4657 if function and not anonymous: 4658 if "dialect" in function.__code__.co_varnames: 4659 func = function(args, dialect=self.dialect) 4660 else: 4661 func = function(args) 4662 4663 func = self.validate_expression(func, args) 4664 if not self.dialect.NORMALIZE_FUNCTIONS: 4665 func.meta["name"] = this 4666 4667 this = func 4668 else: 4669 if token_type == TokenType.IDENTIFIER: 4670 this = exp.Identifier(this=this, quoted=True) 4671 this = self.expression(exp.Anonymous, this=this, expressions=args) 4672 4673 if isinstance(this, exp.Expression): 4674 this.add_comments(comments) 4675 4676 self._match_r_paren(this) 4677 return self._parse_window(this) 4678 4679 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4680 transformed = [] 4681 4682 for e in expressions: 4683 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4684 if isinstance(e, exp.Alias): 4685 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4686 4687 if not isinstance(e, exp.PropertyEQ): 4688 e = self.expression( 4689 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4690 ) 4691 4692 if isinstance(e.this, exp.Column): 4693 e.this.replace(e.this.this) 4694 4695 transformed.append(e) 4696 4697 return transformed 4698 4699 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4700 return self._parse_column_def(self._parse_id_var()) 4701 4702 def _parse_user_defined_function( 4703 self, kind: t.Optional[TokenType] = None 4704 ) -> t.Optional[exp.Expression]: 4705 this = self._parse_id_var() 4706 4707 while self._match(TokenType.DOT): 4708 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4709 4710 if not self._match(TokenType.L_PAREN): 4711 return this 4712 4713 expressions = self._parse_csv(self._parse_function_parameter) 4714 self._match_r_paren() 4715 return self.expression( 4716 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4717 ) 4718 4719 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4720 literal = self._parse_primary() 4721 if literal: 4722 return self.expression(exp.Introducer, this=token.text, expression=literal) 4723 4724 return self.expression(exp.Identifier, this=token.text) 4725 4726 def _parse_session_parameter(self) -> exp.SessionParameter: 4727 kind = None 4728 this = self._parse_id_var() or self._parse_primary() 4729 4730 if this and self._match(TokenType.DOT): 4731 kind = this.name 4732 this = self._parse_var() or self._parse_primary() 4733 4734 return self.expression(exp.SessionParameter, this=this, kind=kind) 4735 4736 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4737 return self._parse_id_var() 4738 4739 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4740 index = self._index 4741 4742 if self._match(TokenType.L_PAREN): 4743 expressions = t.cast( 4744 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4745 ) 4746 4747 if not self._match(TokenType.R_PAREN): 4748 self._retreat(index) 4749 else: 4750 expressions = [self._parse_lambda_arg()] 4751 4752 if self._match_set(self.LAMBDAS): 4753 return self.LAMBDAS[self._prev.token_type](self, expressions) 4754 4755 self._retreat(index) 4756 4757 this: t.Optional[exp.Expression] 4758 4759 if self._match(TokenType.DISTINCT): 4760 this = self.expression( 4761 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4762 ) 4763 else: 4764 this = self._parse_select_or_expression(alias=alias) 4765 4766 return self._parse_limit( 4767 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4768 ) 4769 4770 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4771 index = self._index 4772 if not self._match(TokenType.L_PAREN): 4773 return this 4774 4775 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4776 # expr can be of both types 4777 if self._match_set(self.SELECT_START_TOKENS): 4778 self._retreat(index) 4779 return this 4780 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4781 self._match_r_paren() 4782 return self.expression(exp.Schema, this=this, expressions=args) 4783 4784 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4785 return self._parse_column_def(self._parse_field(any_token=True)) 4786 4787 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4788 # column defs are not really columns, they're identifiers 4789 if isinstance(this, exp.Column): 4790 this = this.this 4791 4792 kind = self._parse_types(schema=True) 4793 4794 if self._match_text_seq("FOR", "ORDINALITY"): 4795 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4796 4797 constraints: t.List[exp.Expression] = [] 4798 4799 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4800 ("ALIAS", "MATERIALIZED") 4801 ): 4802 persisted = self._prev.text.upper() == "MATERIALIZED" 4803 constraints.append( 4804 self.expression( 4805 exp.ComputedColumnConstraint, 4806 this=self._parse_conjunction(), 4807 persisted=persisted or self._match_text_seq("PERSISTED"), 4808 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4809 ) 4810 ) 4811 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4812 self._match(TokenType.ALIAS) 4813 constraints.append( 4814 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4815 ) 4816 4817 while True: 4818 constraint = self._parse_column_constraint() 4819 if not constraint: 4820 break 4821 constraints.append(constraint) 4822 4823 if not kind and not constraints: 4824 return this 4825 4826 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4827 4828 def _parse_auto_increment( 4829 self, 4830 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4831 start = None 4832 increment = None 4833 4834 if self._match(TokenType.L_PAREN, advance=False): 4835 args = self._parse_wrapped_csv(self._parse_bitwise) 4836 start = seq_get(args, 0) 4837 increment = seq_get(args, 1) 4838 elif self._match_text_seq("START"): 4839 start = self._parse_bitwise() 4840 self._match_text_seq("INCREMENT") 4841 increment = self._parse_bitwise() 4842 4843 if start and increment: 4844 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4845 4846 return exp.AutoIncrementColumnConstraint() 4847 4848 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4849 if not self._match_text_seq("REFRESH"): 4850 self._retreat(self._index - 1) 4851 return None 4852 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4853 4854 def _parse_compress(self) -> exp.CompressColumnConstraint: 4855 if self._match(TokenType.L_PAREN, advance=False): 4856 return self.expression( 4857 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4858 ) 4859 4860 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4861 4862 def _parse_generated_as_identity( 4863 self, 4864 ) -> ( 4865 exp.GeneratedAsIdentityColumnConstraint 4866 | exp.ComputedColumnConstraint 4867 | exp.GeneratedAsRowColumnConstraint 4868 ): 4869 if self._match_text_seq("BY", "DEFAULT"): 4870 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4871 this = self.expression( 4872 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4873 ) 4874 else: 4875 self._match_text_seq("ALWAYS") 4876 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4877 4878 self._match(TokenType.ALIAS) 4879 4880 if self._match_text_seq("ROW"): 4881 start = self._match_text_seq("START") 4882 if not start: 4883 self._match(TokenType.END) 4884 hidden = self._match_text_seq("HIDDEN") 4885 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4886 4887 identity = self._match_text_seq("IDENTITY") 4888 4889 if self._match(TokenType.L_PAREN): 4890 if self._match(TokenType.START_WITH): 4891 this.set("start", self._parse_bitwise()) 4892 if self._match_text_seq("INCREMENT", "BY"): 4893 this.set("increment", self._parse_bitwise()) 4894 if self._match_text_seq("MINVALUE"): 4895 this.set("minvalue", self._parse_bitwise()) 4896 if self._match_text_seq("MAXVALUE"): 4897 this.set("maxvalue", self._parse_bitwise()) 4898 4899 if self._match_text_seq("CYCLE"): 4900 this.set("cycle", True) 4901 elif self._match_text_seq("NO", "CYCLE"): 4902 this.set("cycle", False) 4903 4904 if not identity: 4905 this.set("expression", self._parse_range()) 4906 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4907 args = self._parse_csv(self._parse_bitwise) 4908 this.set("start", seq_get(args, 0)) 4909 this.set("increment", seq_get(args, 1)) 4910 4911 self._match_r_paren() 4912 4913 return this 4914 4915 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4916 self._match_text_seq("LENGTH") 4917 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4918 4919 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4920 if self._match_text_seq("NULL"): 4921 return self.expression(exp.NotNullColumnConstraint) 4922 if self._match_text_seq("CASESPECIFIC"): 4923 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4924 if self._match_text_seq("FOR", "REPLICATION"): 4925 return self.expression(exp.NotForReplicationColumnConstraint) 4926 return None 4927 4928 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4929 if self._match(TokenType.CONSTRAINT): 4930 this = self._parse_id_var() 4931 else: 4932 this = None 4933 4934 if self._match_texts(self.CONSTRAINT_PARSERS): 4935 return self.expression( 4936 exp.ColumnConstraint, 4937 this=this, 4938 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4939 ) 4940 4941 return this 4942 4943 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4944 if not self._match(TokenType.CONSTRAINT): 4945 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4946 4947 return self.expression( 4948 exp.Constraint, 4949 this=self._parse_id_var(), 4950 expressions=self._parse_unnamed_constraints(), 4951 ) 4952 4953 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4954 constraints = [] 4955 while True: 4956 constraint = self._parse_unnamed_constraint() or self._parse_function() 4957 if not constraint: 4958 break 4959 constraints.append(constraint) 4960 4961 return constraints 4962 4963 def _parse_unnamed_constraint( 4964 self, constraints: t.Optional[t.Collection[str]] = None 4965 ) -> t.Optional[exp.Expression]: 4966 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4967 constraints or self.CONSTRAINT_PARSERS 4968 ): 4969 return None 4970 4971 constraint = self._prev.text.upper() 4972 if constraint not in self.CONSTRAINT_PARSERS: 4973 self.raise_error(f"No parser found for schema constraint {constraint}.") 4974 4975 return self.CONSTRAINT_PARSERS[constraint](self) 4976 4977 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4978 self._match_text_seq("KEY") 4979 return self.expression( 4980 exp.UniqueColumnConstraint, 4981 this=self._parse_schema(self._parse_id_var(any_token=False)), 4982 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4983 on_conflict=self._parse_on_conflict(), 4984 ) 4985 4986 def _parse_key_constraint_options(self) -> t.List[str]: 4987 options = [] 4988 while True: 4989 if not self._curr: 4990 break 4991 4992 if self._match(TokenType.ON): 4993 action = None 4994 on = self._advance_any() and self._prev.text 4995 4996 if self._match_text_seq("NO", "ACTION"): 4997 action = "NO ACTION" 4998 elif self._match_text_seq("CASCADE"): 4999 action = "CASCADE" 5000 elif self._match_text_seq("RESTRICT"): 5001 action = "RESTRICT" 5002 elif self._match_pair(TokenType.SET, TokenType.NULL): 5003 action = "SET NULL" 5004 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5005 action = "SET DEFAULT" 5006 else: 5007 self.raise_error("Invalid key constraint") 5008 5009 options.append(f"ON {on} {action}") 5010 elif self._match_text_seq("NOT", "ENFORCED"): 5011 options.append("NOT ENFORCED") 5012 elif self._match_text_seq("DEFERRABLE"): 5013 options.append("DEFERRABLE") 5014 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5015 options.append("INITIALLY DEFERRED") 5016 elif self._match_text_seq("NORELY"): 5017 options.append("NORELY") 5018 elif self._match_text_seq("MATCH", "FULL"): 5019 options.append("MATCH FULL") 5020 else: 5021 break 5022 5023 return options 5024 5025 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5026 if match and not self._match(TokenType.REFERENCES): 5027 return None 5028 5029 expressions = None 5030 this = self._parse_table(schema=True) 5031 options = self._parse_key_constraint_options() 5032 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5033 5034 def _parse_foreign_key(self) -> exp.ForeignKey: 5035 expressions = self._parse_wrapped_id_vars() 5036 reference = self._parse_references() 5037 options = {} 5038 5039 while self._match(TokenType.ON): 5040 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5041 self.raise_error("Expected DELETE or UPDATE") 5042 5043 kind = self._prev.text.lower() 5044 5045 if self._match_text_seq("NO", "ACTION"): 5046 action = "NO ACTION" 5047 elif self._match(TokenType.SET): 5048 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5049 action = "SET " + self._prev.text.upper() 5050 else: 5051 self._advance() 5052 action = self._prev.text.upper() 5053 5054 options[kind] = action 5055 5056 return self.expression( 5057 exp.ForeignKey, 5058 expressions=expressions, 5059 reference=reference, 5060 **options, # type: ignore 5061 ) 5062 5063 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5064 return self._parse_field() 5065 5066 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5067 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5068 self._retreat(self._index - 1) 5069 return None 5070 5071 id_vars = self._parse_wrapped_id_vars() 5072 return self.expression( 5073 exp.PeriodForSystemTimeConstraint, 5074 this=seq_get(id_vars, 0), 5075 expression=seq_get(id_vars, 1), 5076 ) 5077 5078 def _parse_primary_key( 5079 self, wrapped_optional: bool = False, in_props: bool = False 5080 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5081 desc = ( 5082 self._match_set((TokenType.ASC, TokenType.DESC)) 5083 and self._prev.token_type == TokenType.DESC 5084 ) 5085 5086 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5087 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5088 5089 expressions = self._parse_wrapped_csv( 5090 self._parse_primary_key_part, optional=wrapped_optional 5091 ) 5092 options = self._parse_key_constraint_options() 5093 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5094 5095 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5096 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5097 5098 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5099 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5100 return this 5101 5102 bracket_kind = self._prev.token_type 5103 expressions = self._parse_csv( 5104 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5105 ) 5106 5107 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5108 self.raise_error("Expected ]") 5109 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5110 self.raise_error("Expected }") 5111 5112 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5113 if bracket_kind == TokenType.L_BRACE: 5114 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5115 elif not this or this.name.upper() == "ARRAY": 5116 this = self.expression(exp.Array, expressions=expressions) 5117 else: 5118 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5119 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5120 5121 self._add_comments(this) 5122 return self._parse_bracket(this) 5123 5124 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5125 if self._match(TokenType.COLON): 5126 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5127 return this 5128 5129 def _parse_case(self) -> t.Optional[exp.Expression]: 5130 ifs = [] 5131 default = None 5132 5133 comments = self._prev_comments 5134 expression = self._parse_conjunction() 5135 5136 while self._match(TokenType.WHEN): 5137 this = self._parse_conjunction() 5138 self._match(TokenType.THEN) 5139 then = self._parse_conjunction() 5140 ifs.append(self.expression(exp.If, this=this, true=then)) 5141 5142 if self._match(TokenType.ELSE): 5143 default = self._parse_conjunction() 5144 5145 if not self._match(TokenType.END): 5146 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5147 default = exp.column("interval") 5148 else: 5149 self.raise_error("Expected END after CASE", self._prev) 5150 5151 return self.expression( 5152 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5153 ) 5154 5155 def _parse_if(self) -> t.Optional[exp.Expression]: 5156 if self._match(TokenType.L_PAREN): 5157 args = self._parse_csv(self._parse_conjunction) 5158 this = self.validate_expression(exp.If.from_arg_list(args), args) 5159 self._match_r_paren() 5160 else: 5161 index = self._index - 1 5162 5163 if self.NO_PAREN_IF_COMMANDS and index == 0: 5164 return self._parse_as_command(self._prev) 5165 5166 condition = self._parse_conjunction() 5167 5168 if not condition: 5169 self._retreat(index) 5170 return None 5171 5172 self._match(TokenType.THEN) 5173 true = self._parse_conjunction() 5174 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5175 self._match(TokenType.END) 5176 this = self.expression(exp.If, this=condition, true=true, false=false) 5177 5178 return this 5179 5180 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5181 if not self._match_text_seq("VALUE", "FOR"): 5182 self._retreat(self._index - 1) 5183 return None 5184 5185 return self.expression( 5186 exp.NextValueFor, 5187 this=self._parse_column(), 5188 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5189 ) 5190 5191 def _parse_extract(self) -> exp.Extract: 5192 this = self._parse_function() or self._parse_var() or self._parse_type() 5193 5194 if self._match(TokenType.FROM): 5195 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5196 5197 if not self._match(TokenType.COMMA): 5198 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5199 5200 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5201 5202 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5203 this = self._parse_conjunction() 5204 5205 if not self._match(TokenType.ALIAS): 5206 if self._match(TokenType.COMMA): 5207 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5208 5209 self.raise_error("Expected AS after CAST") 5210 5211 fmt = None 5212 to = self._parse_types() 5213 5214 if self._match(TokenType.FORMAT): 5215 fmt_string = self._parse_string() 5216 fmt = self._parse_at_time_zone(fmt_string) 5217 5218 if not to: 5219 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5220 if to.this in exp.DataType.TEMPORAL_TYPES: 5221 this = self.expression( 5222 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5223 this=this, 5224 format=exp.Literal.string( 5225 format_time( 5226 fmt_string.this if fmt_string else "", 5227 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5228 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5229 ) 5230 ), 5231 ) 5232 5233 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5234 this.set("zone", fmt.args["zone"]) 5235 return this 5236 elif not to: 5237 self.raise_error("Expected TYPE after CAST") 5238 elif isinstance(to, exp.Identifier): 5239 to = exp.DataType.build(to.name, udt=True) 5240 elif to.this == exp.DataType.Type.CHAR: 5241 if self._match(TokenType.CHARACTER_SET): 5242 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5243 5244 return self.expression( 5245 exp.Cast if strict else exp.TryCast, 5246 this=this, 5247 to=to, 5248 format=fmt, 5249 safe=safe, 5250 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5251 ) 5252 5253 def _parse_string_agg(self) -> exp.Expression: 5254 if self._match(TokenType.DISTINCT): 5255 args: t.List[t.Optional[exp.Expression]] = [ 5256 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5257 ] 5258 if self._match(TokenType.COMMA): 5259 args.extend(self._parse_csv(self._parse_conjunction)) 5260 else: 5261 args = self._parse_csv(self._parse_conjunction) # type: ignore 5262 5263 index = self._index 5264 if not self._match(TokenType.R_PAREN) and args: 5265 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5266 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5267 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5268 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5269 5270 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5271 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5272 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5273 if not self._match_text_seq("WITHIN", "GROUP"): 5274 self._retreat(index) 5275 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5276 5277 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5278 order = self._parse_order(this=seq_get(args, 0)) 5279 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5280 5281 def _parse_convert( 5282 self, strict: bool, safe: t.Optional[bool] = None 5283 ) -> t.Optional[exp.Expression]: 5284 this = self._parse_bitwise() 5285 5286 if self._match(TokenType.USING): 5287 to: t.Optional[exp.Expression] = self.expression( 5288 exp.CharacterSet, this=self._parse_var() 5289 ) 5290 elif self._match(TokenType.COMMA): 5291 to = self._parse_types() 5292 else: 5293 to = None 5294 5295 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5296 5297 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5298 """ 5299 There are generally two variants of the DECODE function: 5300 5301 - DECODE(bin, charset) 5302 - DECODE(expression, search, result [, search, result] ... [, default]) 5303 5304 The second variant will always be parsed into a CASE expression. Note that NULL 5305 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5306 instead of relying on pattern matching. 5307 """ 5308 args = self._parse_csv(self._parse_conjunction) 5309 5310 if len(args) < 3: 5311 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5312 5313 expression, *expressions = args 5314 if not expression: 5315 return None 5316 5317 ifs = [] 5318 for search, result in zip(expressions[::2], expressions[1::2]): 5319 if not search or not result: 5320 return None 5321 5322 if isinstance(search, exp.Literal): 5323 ifs.append( 5324 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5325 ) 5326 elif isinstance(search, exp.Null): 5327 ifs.append( 5328 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5329 ) 5330 else: 5331 cond = exp.or_( 5332 exp.EQ(this=expression.copy(), expression=search), 5333 exp.and_( 5334 exp.Is(this=expression.copy(), expression=exp.Null()), 5335 exp.Is(this=search.copy(), expression=exp.Null()), 5336 copy=False, 5337 ), 5338 copy=False, 5339 ) 5340 ifs.append(exp.If(this=cond, true=result)) 5341 5342 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5343 5344 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5345 self._match_text_seq("KEY") 5346 key = self._parse_column() 5347 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5348 self._match_text_seq("VALUE") 5349 value = self._parse_bitwise() 5350 5351 if not key and not value: 5352 return None 5353 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5354 5355 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5356 if not this or not self._match_text_seq("FORMAT", "JSON"): 5357 return this 5358 5359 return self.expression(exp.FormatJson, this=this) 5360 5361 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5362 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5363 for value in values: 5364 if self._match_text_seq(value, "ON", on): 5365 return f"{value} ON {on}" 5366 5367 return None 5368 5369 @t.overload 5370 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5371 5372 @t.overload 5373 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5374 5375 def _parse_json_object(self, agg=False): 5376 star = self._parse_star() 5377 expressions = ( 5378 [star] 5379 if star 5380 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5381 ) 5382 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5383 5384 unique_keys = None 5385 if self._match_text_seq("WITH", "UNIQUE"): 5386 unique_keys = True 5387 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5388 unique_keys = False 5389 5390 self._match_text_seq("KEYS") 5391 5392 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5393 self._parse_type() 5394 ) 5395 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5396 5397 return self.expression( 5398 exp.JSONObjectAgg if agg else exp.JSONObject, 5399 expressions=expressions, 5400 null_handling=null_handling, 5401 unique_keys=unique_keys, 5402 return_type=return_type, 5403 encoding=encoding, 5404 ) 5405 5406 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5407 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5408 if not self._match_text_seq("NESTED"): 5409 this = self._parse_id_var() 5410 kind = self._parse_types(allow_identifiers=False) 5411 nested = None 5412 else: 5413 this = None 5414 kind = None 5415 nested = True 5416 5417 path = self._match_text_seq("PATH") and self._parse_string() 5418 nested_schema = nested and self._parse_json_schema() 5419 5420 return self.expression( 5421 exp.JSONColumnDef, 5422 this=this, 5423 kind=kind, 5424 path=path, 5425 nested_schema=nested_schema, 5426 ) 5427 5428 def _parse_json_schema(self) -> exp.JSONSchema: 5429 self._match_text_seq("COLUMNS") 5430 return self.expression( 5431 exp.JSONSchema, 5432 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5433 ) 5434 5435 def _parse_json_table(self) -> exp.JSONTable: 5436 this = self._parse_format_json(self._parse_bitwise()) 5437 path = self._match(TokenType.COMMA) and self._parse_string() 5438 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5439 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5440 schema = self._parse_json_schema() 5441 5442 return exp.JSONTable( 5443 this=this, 5444 schema=schema, 5445 path=path, 5446 error_handling=error_handling, 5447 empty_handling=empty_handling, 5448 ) 5449 5450 def _parse_match_against(self) -> exp.MatchAgainst: 5451 expressions = self._parse_csv(self._parse_column) 5452 5453 self._match_text_seq(")", "AGAINST", "(") 5454 5455 this = self._parse_string() 5456 5457 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5458 modifier = "IN NATURAL LANGUAGE MODE" 5459 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5460 modifier = f"{modifier} WITH QUERY EXPANSION" 5461 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5462 modifier = "IN BOOLEAN MODE" 5463 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5464 modifier = "WITH QUERY EXPANSION" 5465 else: 5466 modifier = None 5467 5468 return self.expression( 5469 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5470 ) 5471 5472 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5473 def _parse_open_json(self) -> exp.OpenJSON: 5474 this = self._parse_bitwise() 5475 path = self._match(TokenType.COMMA) and self._parse_string() 5476 5477 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5478 this = self._parse_field(any_token=True) 5479 kind = self._parse_types() 5480 path = self._parse_string() 5481 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5482 5483 return self.expression( 5484 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5485 ) 5486 5487 expressions = None 5488 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5489 self._match_l_paren() 5490 expressions = self._parse_csv(_parse_open_json_column_def) 5491 5492 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5493 5494 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5495 args = self._parse_csv(self._parse_bitwise) 5496 5497 if self._match(TokenType.IN): 5498 return self.expression( 5499 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5500 ) 5501 5502 if haystack_first: 5503 haystack = seq_get(args, 0) 5504 needle = seq_get(args, 1) 5505 else: 5506 needle = seq_get(args, 0) 5507 haystack = seq_get(args, 1) 5508 5509 return self.expression( 5510 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5511 ) 5512 5513 def _parse_predict(self) -> exp.Predict: 5514 self._match_text_seq("MODEL") 5515 this = self._parse_table() 5516 5517 self._match(TokenType.COMMA) 5518 self._match_text_seq("TABLE") 5519 5520 return self.expression( 5521 exp.Predict, 5522 this=this, 5523 expression=self._parse_table(), 5524 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5525 ) 5526 5527 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5528 args = self._parse_csv(self._parse_table) 5529 return exp.JoinHint(this=func_name.upper(), expressions=args) 5530 5531 def _parse_substring(self) -> exp.Substring: 5532 # Postgres supports the form: substring(string [from int] [for int]) 5533 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5534 5535 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5536 5537 if self._match(TokenType.FROM): 5538 args.append(self._parse_bitwise()) 5539 if self._match(TokenType.FOR): 5540 if len(args) == 1: 5541 args.append(exp.Literal.number(1)) 5542 args.append(self._parse_bitwise()) 5543 5544 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5545 5546 def _parse_trim(self) -> exp.Trim: 5547 # https://www.w3resource.com/sql/character-functions/trim.php 5548 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5549 5550 position = None 5551 collation = None 5552 expression = None 5553 5554 if self._match_texts(self.TRIM_TYPES): 5555 position = self._prev.text.upper() 5556 5557 this = self._parse_bitwise() 5558 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5559 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5560 expression = self._parse_bitwise() 5561 5562 if invert_order: 5563 this, expression = expression, this 5564 5565 if self._match(TokenType.COLLATE): 5566 collation = self._parse_bitwise() 5567 5568 return self.expression( 5569 exp.Trim, this=this, position=position, expression=expression, collation=collation 5570 ) 5571 5572 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5573 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5574 5575 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5576 return self._parse_window(self._parse_id_var(), alias=True) 5577 5578 def _parse_respect_or_ignore_nulls( 5579 self, this: t.Optional[exp.Expression] 5580 ) -> t.Optional[exp.Expression]: 5581 if self._match_text_seq("IGNORE", "NULLS"): 5582 return self.expression(exp.IgnoreNulls, this=this) 5583 if self._match_text_seq("RESPECT", "NULLS"): 5584 return self.expression(exp.RespectNulls, this=this) 5585 return this 5586 5587 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5588 if self._match(TokenType.HAVING): 5589 self._match_texts(("MAX", "MIN")) 5590 max = self._prev.text.upper() != "MIN" 5591 return self.expression( 5592 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5593 ) 5594 5595 return this 5596 5597 def _parse_window( 5598 self, this: t.Optional[exp.Expression], alias: bool = False 5599 ) -> t.Optional[exp.Expression]: 5600 func = this 5601 comments = func.comments if isinstance(func, exp.Expression) else None 5602 5603 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5604 self._match(TokenType.WHERE) 5605 this = self.expression( 5606 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5607 ) 5608 self._match_r_paren() 5609 5610 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5611 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5612 if self._match_text_seq("WITHIN", "GROUP"): 5613 order = self._parse_wrapped(self._parse_order) 5614 this = self.expression(exp.WithinGroup, this=this, expression=order) 5615 5616 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5617 # Some dialects choose to implement and some do not. 5618 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5619 5620 # There is some code above in _parse_lambda that handles 5621 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5622 5623 # The below changes handle 5624 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5625 5626 # Oracle allows both formats 5627 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5628 # and Snowflake chose to do the same for familiarity 5629 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5630 if isinstance(this, exp.AggFunc): 5631 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5632 5633 if ignore_respect and ignore_respect is not this: 5634 ignore_respect.replace(ignore_respect.this) 5635 this = self.expression(ignore_respect.__class__, this=this) 5636 5637 this = self._parse_respect_or_ignore_nulls(this) 5638 5639 # bigquery select from window x AS (partition by ...) 5640 if alias: 5641 over = None 5642 self._match(TokenType.ALIAS) 5643 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5644 return this 5645 else: 5646 over = self._prev.text.upper() 5647 5648 if comments and isinstance(func, exp.Expression): 5649 func.pop_comments() 5650 5651 if not self._match(TokenType.L_PAREN): 5652 return self.expression( 5653 exp.Window, 5654 comments=comments, 5655 this=this, 5656 alias=self._parse_id_var(False), 5657 over=over, 5658 ) 5659 5660 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5661 5662 first = self._match(TokenType.FIRST) 5663 if self._match_text_seq("LAST"): 5664 first = False 5665 5666 partition, order = self._parse_partition_and_order() 5667 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5668 5669 if kind: 5670 self._match(TokenType.BETWEEN) 5671 start = self._parse_window_spec() 5672 self._match(TokenType.AND) 5673 end = self._parse_window_spec() 5674 5675 spec = self.expression( 5676 exp.WindowSpec, 5677 kind=kind, 5678 start=start["value"], 5679 start_side=start["side"], 5680 end=end["value"], 5681 end_side=end["side"], 5682 ) 5683 else: 5684 spec = None 5685 5686 self._match_r_paren() 5687 5688 window = self.expression( 5689 exp.Window, 5690 comments=comments, 5691 this=this, 5692 partition_by=partition, 5693 order=order, 5694 spec=spec, 5695 alias=window_alias, 5696 over=over, 5697 first=first, 5698 ) 5699 5700 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5701 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5702 return self._parse_window(window, alias=alias) 5703 5704 return window 5705 5706 def _parse_partition_and_order( 5707 self, 5708 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5709 return self._parse_partition_by(), self._parse_order() 5710 5711 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5712 self._match(TokenType.BETWEEN) 5713 5714 return { 5715 "value": ( 5716 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5717 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5718 or self._parse_bitwise() 5719 ), 5720 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5721 } 5722 5723 def _parse_alias( 5724 self, this: t.Optional[exp.Expression], explicit: bool = False 5725 ) -> t.Optional[exp.Expression]: 5726 any_token = self._match(TokenType.ALIAS) 5727 comments = self._prev_comments or [] 5728 5729 if explicit and not any_token: 5730 return this 5731 5732 if self._match(TokenType.L_PAREN): 5733 aliases = self.expression( 5734 exp.Aliases, 5735 comments=comments, 5736 this=this, 5737 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5738 ) 5739 self._match_r_paren(aliases) 5740 return aliases 5741 5742 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5743 self.STRING_ALIASES and self._parse_string_as_identifier() 5744 ) 5745 5746 if alias: 5747 comments.extend(alias.pop_comments()) 5748 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5749 column = this.this 5750 5751 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5752 if not this.comments and column and column.comments: 5753 this.comments = column.pop_comments() 5754 5755 return this 5756 5757 def _parse_id_var( 5758 self, 5759 any_token: bool = True, 5760 tokens: t.Optional[t.Collection[TokenType]] = None, 5761 ) -> t.Optional[exp.Expression]: 5762 expression = self._parse_identifier() 5763 if not expression and ( 5764 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5765 ): 5766 quoted = self._prev.token_type == TokenType.STRING 5767 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5768 5769 return expression 5770 5771 def _parse_string(self) -> t.Optional[exp.Expression]: 5772 if self._match_set(self.STRING_PARSERS): 5773 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5774 return self._parse_placeholder() 5775 5776 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5777 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5778 5779 def _parse_number(self) -> t.Optional[exp.Expression]: 5780 if self._match_set(self.NUMERIC_PARSERS): 5781 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5782 return self._parse_placeholder() 5783 5784 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5785 if self._match(TokenType.IDENTIFIER): 5786 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5787 return self._parse_placeholder() 5788 5789 def _parse_var( 5790 self, 5791 any_token: bool = False, 5792 tokens: t.Optional[t.Collection[TokenType]] = None, 5793 upper: bool = False, 5794 ) -> t.Optional[exp.Expression]: 5795 if ( 5796 (any_token and self._advance_any()) 5797 or self._match(TokenType.VAR) 5798 or (self._match_set(tokens) if tokens else False) 5799 ): 5800 return self.expression( 5801 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5802 ) 5803 return self._parse_placeholder() 5804 5805 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5806 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5807 self._advance() 5808 return self._prev 5809 return None 5810 5811 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5812 return self._parse_var() or self._parse_string() 5813 5814 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5815 return self._parse_primary() or self._parse_var(any_token=True) 5816 5817 def _parse_null(self) -> t.Optional[exp.Expression]: 5818 if self._match_set(self.NULL_TOKENS): 5819 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5820 return self._parse_placeholder() 5821 5822 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5823 if self._match(TokenType.TRUE): 5824 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5825 if self._match(TokenType.FALSE): 5826 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5827 return self._parse_placeholder() 5828 5829 def _parse_star(self) -> t.Optional[exp.Expression]: 5830 if self._match(TokenType.STAR): 5831 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5832 return self._parse_placeholder() 5833 5834 def _parse_parameter(self) -> exp.Parameter: 5835 this = self._parse_identifier() or self._parse_primary_or_var() 5836 return self.expression(exp.Parameter, this=this) 5837 5838 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5839 if self._match_set(self.PLACEHOLDER_PARSERS): 5840 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5841 if placeholder: 5842 return placeholder 5843 self._advance(-1) 5844 return None 5845 5846 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5847 if not self._match_texts(keywords): 5848 return None 5849 if self._match(TokenType.L_PAREN, advance=False): 5850 return self._parse_wrapped_csv(self._parse_expression) 5851 5852 expression = self._parse_expression() 5853 return [expression] if expression else None 5854 5855 def _parse_csv( 5856 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5857 ) -> t.List[exp.Expression]: 5858 parse_result = parse_method() 5859 items = [parse_result] if parse_result is not None else [] 5860 5861 while self._match(sep): 5862 self._add_comments(parse_result) 5863 parse_result = parse_method() 5864 if parse_result is not None: 5865 items.append(parse_result) 5866 5867 return items 5868 5869 def _parse_tokens( 5870 self, parse_method: t.Callable, expressions: t.Dict 5871 ) -> t.Optional[exp.Expression]: 5872 this = parse_method() 5873 5874 while self._match_set(expressions): 5875 this = self.expression( 5876 expressions[self._prev.token_type], 5877 this=this, 5878 comments=self._prev_comments, 5879 expression=parse_method(), 5880 ) 5881 5882 return this 5883 5884 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5885 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5886 5887 def _parse_wrapped_csv( 5888 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5889 ) -> t.List[exp.Expression]: 5890 return self._parse_wrapped( 5891 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5892 ) 5893 5894 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5895 wrapped = self._match(TokenType.L_PAREN) 5896 if not wrapped and not optional: 5897 self.raise_error("Expecting (") 5898 parse_result = parse_method() 5899 if wrapped: 5900 self._match_r_paren() 5901 return parse_result 5902 5903 def _parse_expressions(self) -> t.List[exp.Expression]: 5904 return self._parse_csv(self._parse_expression) 5905 5906 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5907 return self._parse_select() or self._parse_set_operations( 5908 self._parse_expression() if alias else self._parse_conjunction() 5909 ) 5910 5911 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5912 return self._parse_query_modifiers( 5913 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5914 ) 5915 5916 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5917 this = None 5918 if self._match_texts(self.TRANSACTION_KIND): 5919 this = self._prev.text 5920 5921 self._match_texts(("TRANSACTION", "WORK")) 5922 5923 modes = [] 5924 while True: 5925 mode = [] 5926 while self._match(TokenType.VAR): 5927 mode.append(self._prev.text) 5928 5929 if mode: 5930 modes.append(" ".join(mode)) 5931 if not self._match(TokenType.COMMA): 5932 break 5933 5934 return self.expression(exp.Transaction, this=this, modes=modes) 5935 5936 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5937 chain = None 5938 savepoint = None 5939 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5940 5941 self._match_texts(("TRANSACTION", "WORK")) 5942 5943 if self._match_text_seq("TO"): 5944 self._match_text_seq("SAVEPOINT") 5945 savepoint = self._parse_id_var() 5946 5947 if self._match(TokenType.AND): 5948 chain = not self._match_text_seq("NO") 5949 self._match_text_seq("CHAIN") 5950 5951 if is_rollback: 5952 return self.expression(exp.Rollback, savepoint=savepoint) 5953 5954 return self.expression(exp.Commit, chain=chain) 5955 5956 def _parse_refresh(self) -> exp.Refresh: 5957 self._match(TokenType.TABLE) 5958 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5959 5960 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5961 if not self._match_text_seq("ADD"): 5962 return None 5963 5964 self._match(TokenType.COLUMN) 5965 exists_column = self._parse_exists(not_=True) 5966 expression = self._parse_field_def() 5967 5968 if expression: 5969 expression.set("exists", exists_column) 5970 5971 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5972 if self._match_texts(("FIRST", "AFTER")): 5973 position = self._prev.text 5974 column_position = self.expression( 5975 exp.ColumnPosition, this=self._parse_column(), position=position 5976 ) 5977 expression.set("position", column_position) 5978 5979 return expression 5980 5981 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5982 drop = self._match(TokenType.DROP) and self._parse_drop() 5983 if drop and not isinstance(drop, exp.Command): 5984 drop.set("kind", drop.args.get("kind", "COLUMN")) 5985 return drop 5986 5987 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5988 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5989 return self.expression( 5990 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5991 ) 5992 5993 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5994 index = self._index - 1 5995 5996 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5997 return self._parse_csv( 5998 lambda: self.expression( 5999 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6000 ) 6001 ) 6002 6003 self._retreat(index) 6004 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6005 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6006 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6007 6008 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6009 if self._match_texts(self.ALTER_ALTER_PARSERS): 6010 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6011 6012 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6013 # keyword after ALTER we default to parsing this statement 6014 self._match(TokenType.COLUMN) 6015 column = self._parse_field(any_token=True) 6016 6017 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6018 return self.expression(exp.AlterColumn, this=column, drop=True) 6019 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6020 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6021 if self._match(TokenType.COMMENT): 6022 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6023 6024 self._match_text_seq("SET", "DATA") 6025 self._match_text_seq("TYPE") 6026 return self.expression( 6027 exp.AlterColumn, 6028 this=column, 6029 dtype=self._parse_types(), 6030 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6031 using=self._match(TokenType.USING) and self._parse_conjunction(), 6032 ) 6033 6034 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6035 if self._match_texts(("ALL", "EVEN", "AUTO")): 6036 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6037 6038 self._match_text_seq("KEY", "DISTKEY") 6039 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6040 6041 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6042 if compound: 6043 self._match_text_seq("SORTKEY") 6044 6045 if self._match(TokenType.L_PAREN, advance=False): 6046 return self.expression( 6047 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6048 ) 6049 6050 self._match_texts(("AUTO", "NONE")) 6051 return self.expression( 6052 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6053 ) 6054 6055 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6056 index = self._index - 1 6057 6058 partition_exists = self._parse_exists() 6059 if self._match(TokenType.PARTITION, advance=False): 6060 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6061 6062 self._retreat(index) 6063 return self._parse_csv(self._parse_drop_column) 6064 6065 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6066 if self._match(TokenType.COLUMN): 6067 exists = self._parse_exists() 6068 old_column = self._parse_column() 6069 to = self._match_text_seq("TO") 6070 new_column = self._parse_column() 6071 6072 if old_column is None or to is None or new_column is None: 6073 return None 6074 6075 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6076 6077 self._match_text_seq("TO") 6078 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6079 6080 def _parse_alter_table_set(self) -> exp.AlterSet: 6081 alter_set = self.expression(exp.AlterSet) 6082 6083 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6084 "TABLE", "PROPERTIES" 6085 ): 6086 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6087 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6088 alter_set.set("expressions", [self._parse_conjunction()]) 6089 elif self._match_texts(("LOGGED", "UNLOGGED")): 6090 alter_set.set("option", exp.var(self._prev.text.upper())) 6091 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6092 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6093 elif self._match_text_seq("LOCATION"): 6094 alter_set.set("location", self._parse_field()) 6095 elif self._match_text_seq("ACCESS", "METHOD"): 6096 alter_set.set("access_method", self._parse_field()) 6097 elif self._match_text_seq("TABLESPACE"): 6098 alter_set.set("tablespace", self._parse_field()) 6099 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6100 alter_set.set("file_format", [self._parse_field()]) 6101 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6102 alter_set.set("file_format", self._parse_wrapped_options()) 6103 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6104 alter_set.set("copy_options", self._parse_wrapped_options()) 6105 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6106 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6107 else: 6108 if self._match_text_seq("SERDE"): 6109 alter_set.set("serde", self._parse_field()) 6110 6111 alter_set.set("expressions", [self._parse_properties()]) 6112 6113 return alter_set 6114 6115 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6116 start = self._prev 6117 6118 if not self._match(TokenType.TABLE): 6119 return self._parse_as_command(start) 6120 6121 exists = self._parse_exists() 6122 only = self._match_text_seq("ONLY") 6123 this = self._parse_table(schema=True) 6124 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6125 6126 if self._next: 6127 self._advance() 6128 6129 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6130 if parser: 6131 actions = ensure_list(parser(self)) 6132 options = self._parse_csv(self._parse_property) 6133 6134 if not self._curr and actions: 6135 return self.expression( 6136 exp.AlterTable, 6137 this=this, 6138 exists=exists, 6139 actions=actions, 6140 only=only, 6141 options=options, 6142 cluster=cluster, 6143 ) 6144 6145 return self._parse_as_command(start) 6146 6147 def _parse_merge(self) -> exp.Merge: 6148 self._match(TokenType.INTO) 6149 target = self._parse_table() 6150 6151 if target and self._match(TokenType.ALIAS, advance=False): 6152 target.set("alias", self._parse_table_alias()) 6153 6154 self._match(TokenType.USING) 6155 using = self._parse_table() 6156 6157 self._match(TokenType.ON) 6158 on = self._parse_conjunction() 6159 6160 return self.expression( 6161 exp.Merge, 6162 this=target, 6163 using=using, 6164 on=on, 6165 expressions=self._parse_when_matched(), 6166 ) 6167 6168 def _parse_when_matched(self) -> t.List[exp.When]: 6169 whens = [] 6170 6171 while self._match(TokenType.WHEN): 6172 matched = not self._match(TokenType.NOT) 6173 self._match_text_seq("MATCHED") 6174 source = ( 6175 False 6176 if self._match_text_seq("BY", "TARGET") 6177 else self._match_text_seq("BY", "SOURCE") 6178 ) 6179 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6180 6181 self._match(TokenType.THEN) 6182 6183 if self._match(TokenType.INSERT): 6184 _this = self._parse_star() 6185 if _this: 6186 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6187 else: 6188 then = self.expression( 6189 exp.Insert, 6190 this=self._parse_value(), 6191 expression=self._match_text_seq("VALUES") and self._parse_value(), 6192 ) 6193 elif self._match(TokenType.UPDATE): 6194 expressions = self._parse_star() 6195 if expressions: 6196 then = self.expression(exp.Update, expressions=expressions) 6197 else: 6198 then = self.expression( 6199 exp.Update, 6200 expressions=self._match(TokenType.SET) 6201 and self._parse_csv(self._parse_equality), 6202 ) 6203 elif self._match(TokenType.DELETE): 6204 then = self.expression(exp.Var, this=self._prev.text) 6205 else: 6206 then = None 6207 6208 whens.append( 6209 self.expression( 6210 exp.When, 6211 matched=matched, 6212 source=source, 6213 condition=condition, 6214 then=then, 6215 ) 6216 ) 6217 return whens 6218 6219 def _parse_show(self) -> t.Optional[exp.Expression]: 6220 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6221 if parser: 6222 return parser(self) 6223 return self._parse_as_command(self._prev) 6224 6225 def _parse_set_item_assignment( 6226 self, kind: t.Optional[str] = None 6227 ) -> t.Optional[exp.Expression]: 6228 index = self._index 6229 6230 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6231 return self._parse_set_transaction(global_=kind == "GLOBAL") 6232 6233 left = self._parse_primary() or self._parse_column() 6234 assignment_delimiter = self._match_texts(("=", "TO")) 6235 6236 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6237 self._retreat(index) 6238 return None 6239 6240 right = self._parse_statement() or self._parse_id_var() 6241 this = self.expression(exp.EQ, this=left, expression=right) 6242 6243 return self.expression(exp.SetItem, this=this, kind=kind) 6244 6245 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6246 self._match_text_seq("TRANSACTION") 6247 characteristics = self._parse_csv( 6248 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6249 ) 6250 return self.expression( 6251 exp.SetItem, 6252 expressions=characteristics, 6253 kind="TRANSACTION", 6254 **{"global": global_}, # type: ignore 6255 ) 6256 6257 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6258 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6259 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6260 6261 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6262 index = self._index 6263 set_ = self.expression( 6264 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6265 ) 6266 6267 if self._curr: 6268 self._retreat(index) 6269 return self._parse_as_command(self._prev) 6270 6271 return set_ 6272 6273 def _parse_var_from_options( 6274 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6275 ) -> t.Optional[exp.Var]: 6276 start = self._curr 6277 if not start: 6278 return None 6279 6280 option = start.text.upper() 6281 continuations = options.get(option) 6282 6283 index = self._index 6284 self._advance() 6285 for keywords in continuations or []: 6286 if isinstance(keywords, str): 6287 keywords = (keywords,) 6288 6289 if self._match_text_seq(*keywords): 6290 option = f"{option} {' '.join(keywords)}" 6291 break 6292 else: 6293 if continuations or continuations is None: 6294 if raise_unmatched: 6295 self.raise_error(f"Unknown option {option}") 6296 6297 self._retreat(index) 6298 return None 6299 6300 return exp.var(option) 6301 6302 def _parse_as_command(self, start: Token) -> exp.Command: 6303 while self._curr: 6304 self._advance() 6305 text = self._find_sql(start, self._prev) 6306 size = len(start.text) 6307 self._warn_unsupported() 6308 return exp.Command(this=text[:size], expression=text[size:]) 6309 6310 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6311 settings = [] 6312 6313 self._match_l_paren() 6314 kind = self._parse_id_var() 6315 6316 if self._match(TokenType.L_PAREN): 6317 while True: 6318 key = self._parse_id_var() 6319 value = self._parse_primary() 6320 6321 if not key and value is None: 6322 break 6323 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6324 self._match(TokenType.R_PAREN) 6325 6326 self._match_r_paren() 6327 6328 return self.expression( 6329 exp.DictProperty, 6330 this=this, 6331 kind=kind.this if kind else None, 6332 settings=settings, 6333 ) 6334 6335 def _parse_dict_range(self, this: str) -> exp.DictRange: 6336 self._match_l_paren() 6337 has_min = self._match_text_seq("MIN") 6338 if has_min: 6339 min = self._parse_var() or self._parse_primary() 6340 self._match_text_seq("MAX") 6341 max = self._parse_var() or self._parse_primary() 6342 else: 6343 max = self._parse_var() or self._parse_primary() 6344 min = exp.Literal.number(0) 6345 self._match_r_paren() 6346 return self.expression(exp.DictRange, this=this, min=min, max=max) 6347 6348 def _parse_comprehension( 6349 self, this: t.Optional[exp.Expression] 6350 ) -> t.Optional[exp.Comprehension]: 6351 index = self._index 6352 expression = self._parse_column() 6353 if not self._match(TokenType.IN): 6354 self._retreat(index - 1) 6355 return None 6356 iterator = self._parse_column() 6357 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6358 return self.expression( 6359 exp.Comprehension, 6360 this=this, 6361 expression=expression, 6362 iterator=iterator, 6363 condition=condition, 6364 ) 6365 6366 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6367 if self._match(TokenType.HEREDOC_STRING): 6368 return self.expression(exp.Heredoc, this=self._prev.text) 6369 6370 if not self._match_text_seq("$"): 6371 return None 6372 6373 tags = ["$"] 6374 tag_text = None 6375 6376 if self._is_connected(): 6377 self._advance() 6378 tags.append(self._prev.text.upper()) 6379 else: 6380 self.raise_error("No closing $ found") 6381 6382 if tags[-1] != "$": 6383 if self._is_connected() and self._match_text_seq("$"): 6384 tag_text = tags[-1] 6385 tags.append("$") 6386 else: 6387 self.raise_error("No closing $ found") 6388 6389 heredoc_start = self._curr 6390 6391 while self._curr: 6392 if self._match_text_seq(*tags, advance=False): 6393 this = self._find_sql(heredoc_start, self._prev) 6394 self._advance(len(tags)) 6395 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6396 6397 self._advance() 6398 6399 self.raise_error(f"No closing {''.join(tags)} found") 6400 return None 6401 6402 def _find_parser( 6403 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6404 ) -> t.Optional[t.Callable]: 6405 if not self._curr: 6406 return None 6407 6408 index = self._index 6409 this = [] 6410 while True: 6411 # The current token might be multiple words 6412 curr = self._curr.text.upper() 6413 key = curr.split(" ") 6414 this.append(curr) 6415 6416 self._advance() 6417 result, trie = in_trie(trie, key) 6418 if result == TrieResult.FAILED: 6419 break 6420 6421 if result == TrieResult.EXISTS: 6422 subparser = parsers[" ".join(this)] 6423 return subparser 6424 6425 self._retreat(index) 6426 return None 6427 6428 def _match(self, token_type, advance=True, expression=None): 6429 if not self._curr: 6430 return None 6431 6432 if self._curr.token_type == token_type: 6433 if advance: 6434 self._advance() 6435 self._add_comments(expression) 6436 return True 6437 6438 return None 6439 6440 def _match_set(self, types, advance=True): 6441 if not self._curr: 6442 return None 6443 6444 if self._curr.token_type in types: 6445 if advance: 6446 self._advance() 6447 return True 6448 6449 return None 6450 6451 def _match_pair(self, token_type_a, token_type_b, advance=True): 6452 if not self._curr or not self._next: 6453 return None 6454 6455 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6456 if advance: 6457 self._advance(2) 6458 return True 6459 6460 return None 6461 6462 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6463 if not self._match(TokenType.L_PAREN, expression=expression): 6464 self.raise_error("Expecting (") 6465 6466 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6467 if not self._match(TokenType.R_PAREN, expression=expression): 6468 self.raise_error("Expecting )") 6469 6470 def _match_texts(self, texts, advance=True): 6471 if self._curr and self._curr.text.upper() in texts: 6472 if advance: 6473 self._advance() 6474 return True 6475 return None 6476 6477 def _match_text_seq(self, *texts, advance=True): 6478 index = self._index 6479 for text in texts: 6480 if self._curr and self._curr.text.upper() == text: 6481 self._advance() 6482 else: 6483 self._retreat(index) 6484 return None 6485 6486 if not advance: 6487 self._retreat(index) 6488 6489 return True 6490 6491 def _replace_lambda( 6492 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6493 ) -> t.Optional[exp.Expression]: 6494 if not node: 6495 return node 6496 6497 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6498 6499 for column in node.find_all(exp.Column): 6500 typ = lambda_types.get(column.parts[0].name) 6501 if typ is not None: 6502 dot_or_id = column.to_dot() if column.table else column.this 6503 6504 if typ: 6505 dot_or_id = self.expression( 6506 exp.Cast, 6507 this=dot_or_id, 6508 to=typ, 6509 ) 6510 6511 parent = column.parent 6512 6513 while isinstance(parent, exp.Dot): 6514 if not isinstance(parent.parent, exp.Dot): 6515 parent.replace(dot_or_id) 6516 break 6517 parent = parent.parent 6518 else: 6519 if column is node: 6520 node = dot_or_id 6521 else: 6522 column.replace(dot_or_id) 6523 return node 6524 6525 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6526 start = self._prev 6527 6528 # Not to be confused with TRUNCATE(number, decimals) function call 6529 if self._match(TokenType.L_PAREN): 6530 self._retreat(self._index - 2) 6531 return self._parse_function() 6532 6533 # Clickhouse supports TRUNCATE DATABASE as well 6534 is_database = self._match(TokenType.DATABASE) 6535 6536 self._match(TokenType.TABLE) 6537 6538 exists = self._parse_exists(not_=False) 6539 6540 expressions = self._parse_csv( 6541 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6542 ) 6543 6544 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6545 6546 if self._match_text_seq("RESTART", "IDENTITY"): 6547 identity = "RESTART" 6548 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6549 identity = "CONTINUE" 6550 else: 6551 identity = None 6552 6553 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6554 option = self._prev.text 6555 else: 6556 option = None 6557 6558 partition = self._parse_partition() 6559 6560 # Fallback case 6561 if self._curr: 6562 return self._parse_as_command(start) 6563 6564 return self.expression( 6565 exp.TruncateTable, 6566 expressions=expressions, 6567 is_database=is_database, 6568 exists=exists, 6569 cluster=cluster, 6570 identity=identity, 6571 option=option, 6572 partition=partition, 6573 ) 6574 6575 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6576 this = self._parse_ordered(self._parse_opclass) 6577 6578 if not self._match(TokenType.WITH): 6579 return this 6580 6581 op = self._parse_var(any_token=True) 6582 6583 return self.expression(exp.WithOperator, this=this, op=op) 6584 6585 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6586 opts = [] 6587 self._match(TokenType.EQ) 6588 self._match(TokenType.L_PAREN) 6589 while self._curr and not self._match(TokenType.R_PAREN): 6590 opts.append(self._parse_conjunction()) 6591 self._match(TokenType.COMMA) 6592 return opts 6593 6594 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6595 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6596 6597 options = [] 6598 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6599 option = self._parse_unquoted_field() 6600 value = None 6601 6602 # Some options are defined as functions with the values as params 6603 if not isinstance(option, exp.Func): 6604 prev = self._prev.text.upper() 6605 # Different dialects might separate options and values by white space, "=" and "AS" 6606 self._match(TokenType.EQ) 6607 self._match(TokenType.ALIAS) 6608 6609 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6610 # Snowflake FILE_FORMAT case 6611 value = self._parse_wrapped_options() 6612 else: 6613 value = self._parse_unquoted_field() 6614 6615 param = self.expression(exp.CopyParameter, this=option, expression=value) 6616 options.append(param) 6617 6618 if sep: 6619 self._match(sep) 6620 6621 return options 6622 6623 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6624 expr = self.expression(exp.Credentials) 6625 6626 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6627 expr.set("storage", self._parse_conjunction()) 6628 if self._match_text_seq("CREDENTIALS"): 6629 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6630 creds = ( 6631 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6632 ) 6633 expr.set("credentials", creds) 6634 if self._match_text_seq("ENCRYPTION"): 6635 expr.set("encryption", self._parse_wrapped_options()) 6636 if self._match_text_seq("IAM_ROLE"): 6637 expr.set("iam_role", self._parse_field()) 6638 if self._match_text_seq("REGION"): 6639 expr.set("region", self._parse_field()) 6640 6641 return expr 6642 6643 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6644 return self._parse_field() 6645 6646 def _parse_copy(self) -> exp.Copy | exp.Command: 6647 start = self._prev 6648 6649 self._match(TokenType.INTO) 6650 6651 this = ( 6652 self._parse_conjunction() 6653 if self._match(TokenType.L_PAREN, advance=False) 6654 else self._parse_table(schema=True) 6655 ) 6656 6657 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6658 6659 files = self._parse_csv(self._parse_file_location) 6660 credentials = self._parse_credentials() 6661 6662 self._match_text_seq("WITH") 6663 6664 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6665 6666 # Fallback case 6667 if self._curr: 6668 return self._parse_as_command(start) 6669 6670 return self.expression( 6671 exp.Copy, 6672 this=this, 6673 kind=kind, 6674 credentials=credentials, 6675 files=files, 6676 params=params, 6677 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
82def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 83 def _builder(args: t.List, dialect: Dialect) -> E: 84 expression = expr_type( 85 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 86 ) 87 if len(args) > 2 and expr_type is exp.JSONExtract: 88 expression.set("expressions", args[2:]) 89 90 return expression 91 92 return _builder
95def build_mod(args: t.List) -> exp.Mod: 96 this = seq_get(args, 0) 97 expression = seq_get(args, 1) 98 99 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 100 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 101 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 102 103 return exp.Mod(this=this, expression=expression)
116class Parser(metaclass=_Parser): 117 """ 118 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 119 120 Args: 121 error_level: The desired error level. 122 Default: ErrorLevel.IMMEDIATE 123 error_message_context: The amount of context to capture from a query string when displaying 124 the error message (in number of characters). 125 Default: 100 126 max_errors: Maximum number of error messages to include in a raised ParseError. 127 This is only relevant if error_level is ErrorLevel.RAISE. 128 Default: 3 129 """ 130 131 FUNCTIONS: t.Dict[str, t.Callable] = { 132 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 133 "CONCAT": lambda args, dialect: exp.Concat( 134 expressions=args, 135 safe=not dialect.STRICT_STRING_CONCAT, 136 coalesce=dialect.CONCAT_COALESCE, 137 ), 138 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "DATE_TO_DATE_STR": lambda args: exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 148 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 149 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 150 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 151 "LIKE": build_like, 152 "LOG": build_logarithm, 153 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 154 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 155 "MOD": build_mod, 156 "TIME_TO_TIME_STR": lambda args: exp.Cast( 157 this=seq_get(args, 0), 158 to=exp.DataType(this=exp.DataType.Type.TEXT), 159 ), 160 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 161 this=exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 start=exp.Literal.number(1), 166 length=exp.Literal.number(10), 167 ), 168 "VAR_MAP": build_var_map, 169 "LOWER": build_lower, 170 "UPPER": build_upper, 171 "HEX": build_hex, 172 "TO_HEX": build_hex, 173 } 174 175 NO_PAREN_FUNCTIONS = { 176 TokenType.CURRENT_DATE: exp.CurrentDate, 177 TokenType.CURRENT_DATETIME: exp.CurrentDate, 178 TokenType.CURRENT_TIME: exp.CurrentTime, 179 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 180 TokenType.CURRENT_USER: exp.CurrentUser, 181 } 182 183 STRUCT_TYPE_TOKENS = { 184 TokenType.NESTED, 185 TokenType.OBJECT, 186 TokenType.STRUCT, 187 } 188 189 NESTED_TYPE_TOKENS = { 190 TokenType.ARRAY, 191 TokenType.LOWCARDINALITY, 192 TokenType.MAP, 193 TokenType.NULLABLE, 194 *STRUCT_TYPE_TOKENS, 195 } 196 197 ENUM_TYPE_TOKENS = { 198 TokenType.ENUM, 199 TokenType.ENUM8, 200 TokenType.ENUM16, 201 } 202 203 AGGREGATE_TYPE_TOKENS = { 204 TokenType.AGGREGATEFUNCTION, 205 TokenType.SIMPLEAGGREGATEFUNCTION, 206 } 207 208 TYPE_TOKENS = { 209 TokenType.BIT, 210 TokenType.BOOLEAN, 211 TokenType.TINYINT, 212 TokenType.UTINYINT, 213 TokenType.SMALLINT, 214 TokenType.USMALLINT, 215 TokenType.INT, 216 TokenType.UINT, 217 TokenType.BIGINT, 218 TokenType.UBIGINT, 219 TokenType.INT128, 220 TokenType.UINT128, 221 TokenType.INT256, 222 TokenType.UINT256, 223 TokenType.MEDIUMINT, 224 TokenType.UMEDIUMINT, 225 TokenType.FIXEDSTRING, 226 TokenType.FLOAT, 227 TokenType.DOUBLE, 228 TokenType.CHAR, 229 TokenType.NCHAR, 230 TokenType.VARCHAR, 231 TokenType.NVARCHAR, 232 TokenType.BPCHAR, 233 TokenType.TEXT, 234 TokenType.MEDIUMTEXT, 235 TokenType.LONGTEXT, 236 TokenType.MEDIUMBLOB, 237 TokenType.LONGBLOB, 238 TokenType.BINARY, 239 TokenType.VARBINARY, 240 TokenType.JSON, 241 TokenType.JSONB, 242 TokenType.INTERVAL, 243 TokenType.TINYBLOB, 244 TokenType.TINYTEXT, 245 TokenType.TIME, 246 TokenType.TIMETZ, 247 TokenType.TIMESTAMP, 248 TokenType.TIMESTAMP_S, 249 TokenType.TIMESTAMP_MS, 250 TokenType.TIMESTAMP_NS, 251 TokenType.TIMESTAMPTZ, 252 TokenType.TIMESTAMPLTZ, 253 TokenType.TIMESTAMPNTZ, 254 TokenType.DATETIME, 255 TokenType.DATETIME64, 256 TokenType.DATE, 257 TokenType.DATE32, 258 TokenType.INT4RANGE, 259 TokenType.INT4MULTIRANGE, 260 TokenType.INT8RANGE, 261 TokenType.INT8MULTIRANGE, 262 TokenType.NUMRANGE, 263 TokenType.NUMMULTIRANGE, 264 TokenType.TSRANGE, 265 TokenType.TSMULTIRANGE, 266 TokenType.TSTZRANGE, 267 TokenType.TSTZMULTIRANGE, 268 TokenType.DATERANGE, 269 TokenType.DATEMULTIRANGE, 270 TokenType.DECIMAL, 271 TokenType.UDECIMAL, 272 TokenType.BIGDECIMAL, 273 TokenType.UUID, 274 TokenType.GEOGRAPHY, 275 TokenType.GEOMETRY, 276 TokenType.HLLSKETCH, 277 TokenType.HSTORE, 278 TokenType.PSEUDO_TYPE, 279 TokenType.SUPER, 280 TokenType.SERIAL, 281 TokenType.SMALLSERIAL, 282 TokenType.BIGSERIAL, 283 TokenType.XML, 284 TokenType.YEAR, 285 TokenType.UNIQUEIDENTIFIER, 286 TokenType.USERDEFINED, 287 TokenType.MONEY, 288 TokenType.SMALLMONEY, 289 TokenType.ROWVERSION, 290 TokenType.IMAGE, 291 TokenType.VARIANT, 292 TokenType.OBJECT, 293 TokenType.OBJECT_IDENTIFIER, 294 TokenType.INET, 295 TokenType.IPADDRESS, 296 TokenType.IPPREFIX, 297 TokenType.IPV4, 298 TokenType.IPV6, 299 TokenType.UNKNOWN, 300 TokenType.NULL, 301 TokenType.NAME, 302 TokenType.TDIGEST, 303 *ENUM_TYPE_TOKENS, 304 *NESTED_TYPE_TOKENS, 305 *AGGREGATE_TYPE_TOKENS, 306 } 307 308 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 309 TokenType.BIGINT: TokenType.UBIGINT, 310 TokenType.INT: TokenType.UINT, 311 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 312 TokenType.SMALLINT: TokenType.USMALLINT, 313 TokenType.TINYINT: TokenType.UTINYINT, 314 TokenType.DECIMAL: TokenType.UDECIMAL, 315 } 316 317 SUBQUERY_PREDICATES = { 318 TokenType.ANY: exp.Any, 319 TokenType.ALL: exp.All, 320 TokenType.EXISTS: exp.Exists, 321 TokenType.SOME: exp.Any, 322 } 323 324 RESERVED_TOKENS = { 325 *Tokenizer.SINGLE_TOKENS.values(), 326 TokenType.SELECT, 327 } - {TokenType.IDENTIFIER} 328 329 DB_CREATABLES = { 330 TokenType.DATABASE, 331 TokenType.DICTIONARY, 332 TokenType.MODEL, 333 TokenType.SCHEMA, 334 TokenType.SEQUENCE, 335 TokenType.STORAGE_INTEGRATION, 336 TokenType.TABLE, 337 TokenType.TAG, 338 TokenType.VIEW, 339 } 340 341 CREATABLES = { 342 TokenType.COLUMN, 343 TokenType.CONSTRAINT, 344 TokenType.FOREIGN_KEY, 345 TokenType.FUNCTION, 346 TokenType.INDEX, 347 TokenType.PROCEDURE, 348 *DB_CREATABLES, 349 } 350 351 # Tokens that can represent identifiers 352 ID_VAR_TOKENS = { 353 TokenType.VAR, 354 TokenType.ANTI, 355 TokenType.APPLY, 356 TokenType.ASC, 357 TokenType.ASOF, 358 TokenType.AUTO_INCREMENT, 359 TokenType.BEGIN, 360 TokenType.BPCHAR, 361 TokenType.CACHE, 362 TokenType.CASE, 363 TokenType.COLLATE, 364 TokenType.COMMAND, 365 TokenType.COMMENT, 366 TokenType.COMMIT, 367 TokenType.CONSTRAINT, 368 TokenType.COPY, 369 TokenType.DEFAULT, 370 TokenType.DELETE, 371 TokenType.DESC, 372 TokenType.DESCRIBE, 373 TokenType.DICTIONARY, 374 TokenType.DIV, 375 TokenType.END, 376 TokenType.EXECUTE, 377 TokenType.ESCAPE, 378 TokenType.FALSE, 379 TokenType.FIRST, 380 TokenType.FILTER, 381 TokenType.FINAL, 382 TokenType.FORMAT, 383 TokenType.FULL, 384 TokenType.IDENTIFIER, 385 TokenType.IS, 386 TokenType.ISNULL, 387 TokenType.INTERVAL, 388 TokenType.KEEP, 389 TokenType.KILL, 390 TokenType.LEFT, 391 TokenType.LOAD, 392 TokenType.MERGE, 393 TokenType.NATURAL, 394 TokenType.NEXT, 395 TokenType.OFFSET, 396 TokenType.OPERATOR, 397 TokenType.ORDINALITY, 398 TokenType.OVERLAPS, 399 TokenType.OVERWRITE, 400 TokenType.PARTITION, 401 TokenType.PERCENT, 402 TokenType.PIVOT, 403 TokenType.PRAGMA, 404 TokenType.RANGE, 405 TokenType.RECURSIVE, 406 TokenType.REFERENCES, 407 TokenType.REFRESH, 408 TokenType.REPLACE, 409 TokenType.RIGHT, 410 TokenType.ROLLUP, 411 TokenType.ROW, 412 TokenType.ROWS, 413 TokenType.SEMI, 414 TokenType.SET, 415 TokenType.SETTINGS, 416 TokenType.SHOW, 417 TokenType.TEMPORARY, 418 TokenType.TOP, 419 TokenType.TRUE, 420 TokenType.TRUNCATE, 421 TokenType.UNIQUE, 422 TokenType.UNPIVOT, 423 TokenType.UPDATE, 424 TokenType.USE, 425 TokenType.VOLATILE, 426 TokenType.WINDOW, 427 *CREATABLES, 428 *SUBQUERY_PREDICATES, 429 *TYPE_TOKENS, 430 *NO_PAREN_FUNCTIONS, 431 } 432 433 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 434 435 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 436 TokenType.ANTI, 437 TokenType.APPLY, 438 TokenType.ASOF, 439 TokenType.FULL, 440 TokenType.LEFT, 441 TokenType.LOCK, 442 TokenType.NATURAL, 443 TokenType.OFFSET, 444 TokenType.RIGHT, 445 TokenType.SEMI, 446 TokenType.WINDOW, 447 } 448 449 ALIAS_TOKENS = ID_VAR_TOKENS 450 451 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 452 453 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 454 455 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 456 457 FUNC_TOKENS = { 458 TokenType.COLLATE, 459 TokenType.COMMAND, 460 TokenType.CURRENT_DATE, 461 TokenType.CURRENT_DATETIME, 462 TokenType.CURRENT_TIMESTAMP, 463 TokenType.CURRENT_TIME, 464 TokenType.CURRENT_USER, 465 TokenType.FILTER, 466 TokenType.FIRST, 467 TokenType.FORMAT, 468 TokenType.GLOB, 469 TokenType.IDENTIFIER, 470 TokenType.INDEX, 471 TokenType.ISNULL, 472 TokenType.ILIKE, 473 TokenType.INSERT, 474 TokenType.LIKE, 475 TokenType.MERGE, 476 TokenType.OFFSET, 477 TokenType.PRIMARY_KEY, 478 TokenType.RANGE, 479 TokenType.REPLACE, 480 TokenType.RLIKE, 481 TokenType.ROW, 482 TokenType.UNNEST, 483 TokenType.VAR, 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.SEQUENCE, 487 TokenType.DATE, 488 TokenType.DATETIME, 489 TokenType.TABLE, 490 TokenType.TIMESTAMP, 491 TokenType.TIMESTAMPTZ, 492 TokenType.TRUNCATE, 493 TokenType.WINDOW, 494 TokenType.XOR, 495 *TYPE_TOKENS, 496 *SUBQUERY_PREDICATES, 497 } 498 499 CONJUNCTION = { 500 TokenType.AND: exp.And, 501 TokenType.OR: exp.Or, 502 } 503 504 EQUALITY = { 505 TokenType.EQ: exp.EQ, 506 TokenType.NEQ: exp.NEQ, 507 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 508 } 509 510 COMPARISON = { 511 TokenType.GT: exp.GT, 512 TokenType.GTE: exp.GTE, 513 TokenType.LT: exp.LT, 514 TokenType.LTE: exp.LTE, 515 } 516 517 BITWISE = { 518 TokenType.AMP: exp.BitwiseAnd, 519 TokenType.CARET: exp.BitwiseXor, 520 TokenType.PIPE: exp.BitwiseOr, 521 } 522 523 TERM = { 524 TokenType.DASH: exp.Sub, 525 TokenType.PLUS: exp.Add, 526 TokenType.MOD: exp.Mod, 527 TokenType.COLLATE: exp.Collate, 528 } 529 530 FACTOR = { 531 TokenType.DIV: exp.IntDiv, 532 TokenType.LR_ARROW: exp.Distance, 533 TokenType.SLASH: exp.Div, 534 TokenType.STAR: exp.Mul, 535 } 536 537 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 538 539 TIMES = { 540 TokenType.TIME, 541 TokenType.TIMETZ, 542 } 543 544 TIMESTAMPS = { 545 TokenType.TIMESTAMP, 546 TokenType.TIMESTAMPTZ, 547 TokenType.TIMESTAMPLTZ, 548 *TIMES, 549 } 550 551 SET_OPERATIONS = { 552 TokenType.UNION, 553 TokenType.INTERSECT, 554 TokenType.EXCEPT, 555 } 556 557 JOIN_METHODS = { 558 TokenType.ASOF, 559 TokenType.NATURAL, 560 TokenType.POSITIONAL, 561 } 562 563 JOIN_SIDES = { 564 TokenType.LEFT, 565 TokenType.RIGHT, 566 TokenType.FULL, 567 } 568 569 JOIN_KINDS = { 570 TokenType.INNER, 571 TokenType.OUTER, 572 TokenType.CROSS, 573 TokenType.SEMI, 574 TokenType.ANTI, 575 } 576 577 JOIN_HINTS: t.Set[str] = set() 578 579 LAMBDAS = { 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_conjunction(), 584 expressions, 585 ), 586 expressions=expressions, 587 ), 588 TokenType.FARROW: lambda self, expressions: self.expression( 589 exp.Kwarg, 590 this=exp.var(expressions[0].name), 591 expression=self._parse_conjunction(), 592 ), 593 } 594 595 COLUMN_OPERATORS = { 596 TokenType.DOT: None, 597 TokenType.DCOLON: lambda self, this, to: self.expression( 598 exp.Cast if self.STRICT_CAST else exp.TryCast, 599 this=this, 600 to=to, 601 ), 602 TokenType.ARROW: lambda self, this, path: self.expression( 603 exp.JSONExtract, 604 this=this, 605 expression=self.dialect.to_json_path(path), 606 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 607 ), 608 TokenType.DARROW: lambda self, this, path: self.expression( 609 exp.JSONExtractScalar, 610 this=this, 611 expression=self.dialect.to_json_path(path), 612 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 613 ), 614 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 615 exp.JSONBExtract, 616 this=this, 617 expression=path, 618 ), 619 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 620 exp.JSONBExtractScalar, 621 this=this, 622 expression=path, 623 ), 624 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 625 exp.JSONBContains, 626 this=this, 627 expression=key, 628 ), 629 } 630 631 EXPRESSION_PARSERS = { 632 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 633 exp.Column: lambda self: self._parse_column(), 634 exp.Condition: lambda self: self._parse_conjunction(), 635 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 636 exp.Expression: lambda self: self._parse_expression(), 637 exp.From: lambda self: self._parse_from(joins=True), 638 exp.Group: lambda self: self._parse_group(), 639 exp.Having: lambda self: self._parse_having(), 640 exp.Identifier: lambda self: self._parse_id_var(), 641 exp.Join: lambda self: self._parse_join(), 642 exp.Lambda: lambda self: self._parse_lambda(), 643 exp.Lateral: lambda self: self._parse_lateral(), 644 exp.Limit: lambda self: self._parse_limit(), 645 exp.Offset: lambda self: self._parse_offset(), 646 exp.Order: lambda self: self._parse_order(), 647 exp.Ordered: lambda self: self._parse_ordered(), 648 exp.Properties: lambda self: self._parse_properties(), 649 exp.Qualify: lambda self: self._parse_qualify(), 650 exp.Returning: lambda self: self._parse_returning(), 651 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 652 exp.Table: lambda self: self._parse_table_parts(), 653 exp.TableAlias: lambda self: self._parse_table_alias(), 654 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 655 exp.Where: lambda self: self._parse_where(), 656 exp.Window: lambda self: self._parse_named_window(), 657 exp.With: lambda self: self._parse_with(), 658 "JOIN_TYPE": lambda self: self._parse_join_parts(), 659 } 660 661 STATEMENT_PARSERS = { 662 TokenType.ALTER: lambda self: self._parse_alter(), 663 TokenType.BEGIN: lambda self: self._parse_transaction(), 664 TokenType.CACHE: lambda self: self._parse_cache(), 665 TokenType.COMMENT: lambda self: self._parse_comment(), 666 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 667 TokenType.COPY: lambda self: self._parse_copy(), 668 TokenType.CREATE: lambda self: self._parse_create(), 669 TokenType.DELETE: lambda self: self._parse_delete(), 670 TokenType.DESC: lambda self: self._parse_describe(), 671 TokenType.DESCRIBE: lambda self: self._parse_describe(), 672 TokenType.DROP: lambda self: self._parse_drop(), 673 TokenType.INSERT: lambda self: self._parse_insert(), 674 TokenType.KILL: lambda self: self._parse_kill(), 675 TokenType.LOAD: lambda self: self._parse_load(), 676 TokenType.MERGE: lambda self: self._parse_merge(), 677 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 678 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 679 TokenType.REFRESH: lambda self: self._parse_refresh(), 680 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 681 TokenType.SET: lambda self: self._parse_set(), 682 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 683 TokenType.UNCACHE: lambda self: self._parse_uncache(), 684 TokenType.UPDATE: lambda self: self._parse_update(), 685 TokenType.USE: lambda self: self.expression( 686 exp.Use, 687 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 688 this=self._parse_table(schema=False), 689 ), 690 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 691 } 692 693 UNARY_PARSERS = { 694 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 695 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 696 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 697 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 698 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 699 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 700 } 701 702 STRING_PARSERS = { 703 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 704 exp.RawString, this=token.text 705 ), 706 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 707 exp.National, this=token.text 708 ), 709 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 710 TokenType.STRING: lambda self, token: self.expression( 711 exp.Literal, this=token.text, is_string=True 712 ), 713 TokenType.UNICODE_STRING: lambda self, token: self.expression( 714 exp.UnicodeString, 715 this=token.text, 716 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 717 ), 718 } 719 720 NUMERIC_PARSERS = { 721 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 722 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 723 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 724 TokenType.NUMBER: lambda self, token: self.expression( 725 exp.Literal, this=token.text, is_string=False 726 ), 727 } 728 729 PRIMARY_PARSERS = { 730 **STRING_PARSERS, 731 **NUMERIC_PARSERS, 732 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 733 TokenType.NULL: lambda self, _: self.expression(exp.Null), 734 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 735 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 736 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 737 TokenType.STAR: lambda self, _: self.expression( 738 exp.Star, 739 **{ 740 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 741 "replace": self._parse_star_op("REPLACE"), 742 "rename": self._parse_star_op("RENAME"), 743 }, 744 ), 745 } 746 747 PLACEHOLDER_PARSERS = { 748 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 749 TokenType.PARAMETER: lambda self: self._parse_parameter(), 750 TokenType.COLON: lambda self: ( 751 self.expression(exp.Placeholder, this=self._prev.text) 752 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 753 else None 754 ), 755 } 756 757 RANGE_PARSERS = { 758 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 759 TokenType.GLOB: binary_range_parser(exp.Glob), 760 TokenType.ILIKE: binary_range_parser(exp.ILike), 761 TokenType.IN: lambda self, this: self._parse_in(this), 762 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 763 TokenType.IS: lambda self, this: self._parse_is(this), 764 TokenType.LIKE: binary_range_parser(exp.Like), 765 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 766 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 767 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 768 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 769 } 770 771 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 772 "ALLOWED_VALUES": lambda self: self.expression( 773 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 774 ), 775 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 776 "AUTO": lambda self: self._parse_auto_property(), 777 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 778 "BACKUP": lambda self: self.expression( 779 exp.BackupProperty, this=self._parse_var(any_token=True) 780 ), 781 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 782 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 784 "CHECKSUM": lambda self: self._parse_checksum(), 785 "CLUSTER BY": lambda self: self._parse_cluster(), 786 "CLUSTERED": lambda self: self._parse_clustered_by(), 787 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 788 exp.CollateProperty, **kwargs 789 ), 790 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 791 "CONTAINS": lambda self: self._parse_contains_property(), 792 "COPY": lambda self: self._parse_copy_property(), 793 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 794 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 795 "DEFINER": lambda self: self._parse_definer(), 796 "DETERMINISTIC": lambda self: self.expression( 797 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 798 ), 799 "DISTKEY": lambda self: self._parse_distkey(), 800 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 801 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 802 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 803 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 804 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 805 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 806 "FREESPACE": lambda self: self._parse_freespace(), 807 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 808 "HEAP": lambda self: self.expression(exp.HeapProperty), 809 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 810 "IMMUTABLE": lambda self: self.expression( 811 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 812 ), 813 "INHERITS": lambda self: self.expression( 814 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 815 ), 816 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 817 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 818 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 819 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 820 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 821 "LIKE": lambda self: self._parse_create_like(), 822 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 823 "LOCK": lambda self: self._parse_locking(), 824 "LOCKING": lambda self: self._parse_locking(), 825 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 826 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 827 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 828 "MODIFIES": lambda self: self._parse_modifies_property(), 829 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 830 "NO": lambda self: self._parse_no_property(), 831 "ON": lambda self: self._parse_on_property(), 832 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 833 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 834 "PARTITION": lambda self: self._parse_partitioned_of(), 835 "PARTITION BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 837 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 839 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 840 "READS": lambda self: self._parse_reads_property(), 841 "REMOTE": lambda self: self._parse_remote_with_connection(), 842 "RETURNS": lambda self: self._parse_returns(), 843 "STRICT": lambda self: self.expression(exp.StrictProperty), 844 "ROW": lambda self: self._parse_row(), 845 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 846 "SAMPLE": lambda self: self.expression( 847 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 848 ), 849 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 850 "SETTINGS": lambda self: self.expression( 851 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 852 ), 853 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 854 "SORTKEY": lambda self: self._parse_sortkey(), 855 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 856 "STABLE": lambda self: self.expression( 857 exp.StabilityProperty, this=exp.Literal.string("STABLE") 858 ), 859 "STORED": lambda self: self._parse_stored(), 860 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 861 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 862 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 863 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 864 "TO": lambda self: self._parse_to_table(), 865 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 866 "TRANSFORM": lambda self: self.expression( 867 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 868 ), 869 "TTL": lambda self: self._parse_ttl(), 870 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 871 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 872 "VOLATILE": lambda self: self._parse_volatile_property(), 873 "WITH": lambda self: self._parse_with_property(), 874 } 875 876 CONSTRAINT_PARSERS = { 877 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 878 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 879 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 880 "CHARACTER SET": lambda self: self.expression( 881 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 882 ), 883 "CHECK": lambda self: self.expression( 884 exp.CheckColumnConstraint, 885 this=self._parse_wrapped(self._parse_conjunction), 886 enforced=self._match_text_seq("ENFORCED"), 887 ), 888 "COLLATE": lambda self: self.expression( 889 exp.CollateColumnConstraint, this=self._parse_var() 890 ), 891 "COMMENT": lambda self: self.expression( 892 exp.CommentColumnConstraint, this=self._parse_string() 893 ), 894 "COMPRESS": lambda self: self._parse_compress(), 895 "CLUSTERED": lambda self: self.expression( 896 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 897 ), 898 "NONCLUSTERED": lambda self: self.expression( 899 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 900 ), 901 "DEFAULT": lambda self: self.expression( 902 exp.DefaultColumnConstraint, this=self._parse_bitwise() 903 ), 904 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 905 "EPHEMERAL": lambda self: self.expression( 906 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 907 ), 908 "EXCLUDE": lambda self: self.expression( 909 exp.ExcludeColumnConstraint, this=self._parse_index_params() 910 ), 911 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 912 "FORMAT": lambda self: self.expression( 913 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 914 ), 915 "GENERATED": lambda self: self._parse_generated_as_identity(), 916 "IDENTITY": lambda self: self._parse_auto_increment(), 917 "INLINE": lambda self: self._parse_inline(), 918 "LIKE": lambda self: self._parse_create_like(), 919 "NOT": lambda self: self._parse_not_constraint(), 920 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 921 "ON": lambda self: ( 922 self._match(TokenType.UPDATE) 923 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 924 ) 925 or self.expression(exp.OnProperty, this=self._parse_id_var()), 926 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 927 "PERIOD": lambda self: self._parse_period_for_system_time(), 928 "PRIMARY KEY": lambda self: self._parse_primary_key(), 929 "REFERENCES": lambda self: self._parse_references(match=False), 930 "TITLE": lambda self: self.expression( 931 exp.TitleColumnConstraint, this=self._parse_var_or_string() 932 ), 933 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 934 "UNIQUE": lambda self: self._parse_unique(), 935 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 936 "WITH": lambda self: self.expression( 937 exp.Properties, expressions=self._parse_wrapped_properties() 938 ), 939 } 940 941 ALTER_PARSERS = { 942 "ADD": lambda self: self._parse_alter_table_add(), 943 "ALTER": lambda self: self._parse_alter_table_alter(), 944 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 945 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 946 "DROP": lambda self: self._parse_alter_table_drop(), 947 "RENAME": lambda self: self._parse_alter_table_rename(), 948 "SET": lambda self: self._parse_alter_table_set(), 949 } 950 951 ALTER_ALTER_PARSERS = { 952 "DISTKEY": lambda self: self._parse_alter_diststyle(), 953 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 954 "SORTKEY": lambda self: self._parse_alter_sortkey(), 955 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 956 } 957 958 SCHEMA_UNNAMED_CONSTRAINTS = { 959 "CHECK", 960 "EXCLUDE", 961 "FOREIGN KEY", 962 "LIKE", 963 "PERIOD", 964 "PRIMARY KEY", 965 "UNIQUE", 966 } 967 968 NO_PAREN_FUNCTION_PARSERS = { 969 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 970 "CASE": lambda self: self._parse_case(), 971 "IF": lambda self: self._parse_if(), 972 "NEXT": lambda self: self._parse_next_value_for(), 973 } 974 975 INVALID_FUNC_NAME_TOKENS = { 976 TokenType.IDENTIFIER, 977 TokenType.STRING, 978 } 979 980 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 981 982 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 983 984 FUNCTION_PARSERS = { 985 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 986 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 987 "DECODE": lambda self: self._parse_decode(), 988 "EXTRACT": lambda self: self._parse_extract(), 989 "JSON_OBJECT": lambda self: self._parse_json_object(), 990 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 991 "JSON_TABLE": lambda self: self._parse_json_table(), 992 "MATCH": lambda self: self._parse_match_against(), 993 "OPENJSON": lambda self: self._parse_open_json(), 994 "POSITION": lambda self: self._parse_position(), 995 "PREDICT": lambda self: self._parse_predict(), 996 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 997 "STRING_AGG": lambda self: self._parse_string_agg(), 998 "SUBSTRING": lambda self: self._parse_substring(), 999 "TRIM": lambda self: self._parse_trim(), 1000 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1001 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1002 } 1003 1004 QUERY_MODIFIER_PARSERS = { 1005 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1006 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1007 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1008 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1009 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1010 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1011 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1012 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1013 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1014 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1015 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1016 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1017 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1018 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1019 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1020 TokenType.CLUSTER_BY: lambda self: ( 1021 "cluster", 1022 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1023 ), 1024 TokenType.DISTRIBUTE_BY: lambda self: ( 1025 "distribute", 1026 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1027 ), 1028 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1029 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1030 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1031 } 1032 1033 SET_PARSERS = { 1034 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1035 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1036 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1037 "TRANSACTION": lambda self: self._parse_set_transaction(), 1038 } 1039 1040 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1041 1042 TYPE_LITERAL_PARSERS = { 1043 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1044 } 1045 1046 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1047 1048 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1049 1050 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1051 1052 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1053 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1054 "ISOLATION": ( 1055 ("LEVEL", "REPEATABLE", "READ"), 1056 ("LEVEL", "READ", "COMMITTED"), 1057 ("LEVEL", "READ", "UNCOMITTED"), 1058 ("LEVEL", "SERIALIZABLE"), 1059 ), 1060 "READ": ("WRITE", "ONLY"), 1061 } 1062 1063 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1064 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1065 ) 1066 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1067 1068 CREATE_SEQUENCE: OPTIONS_TYPE = { 1069 "SCALE": ("EXTEND", "NOEXTEND"), 1070 "SHARD": ("EXTEND", "NOEXTEND"), 1071 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1072 **dict.fromkeys( 1073 ( 1074 "SESSION", 1075 "GLOBAL", 1076 "KEEP", 1077 "NOKEEP", 1078 "ORDER", 1079 "NOORDER", 1080 "NOCACHE", 1081 "CYCLE", 1082 "NOCYCLE", 1083 "NOMINVALUE", 1084 "NOMAXVALUE", 1085 "NOSCALE", 1086 "NOSHARD", 1087 ), 1088 tuple(), 1089 ), 1090 } 1091 1092 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1093 1094 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1095 1096 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1097 1098 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1099 1100 CLONE_KEYWORDS = {"CLONE", "COPY"} 1101 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1102 1103 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1104 1105 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1106 1107 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1108 1109 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1110 1111 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1112 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1113 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1114 1115 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1116 1117 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1118 1119 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1120 1121 DISTINCT_TOKENS = {TokenType.DISTINCT} 1122 1123 NULL_TOKENS = {TokenType.NULL} 1124 1125 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1126 1127 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1128 1129 STRICT_CAST = True 1130 1131 PREFIXED_PIVOT_COLUMNS = False 1132 IDENTIFY_PIVOT_STRINGS = False 1133 1134 LOG_DEFAULTS_TO_LN = False 1135 1136 # Whether ADD is present for each column added by ALTER TABLE 1137 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1138 1139 # Whether the table sample clause expects CSV syntax 1140 TABLESAMPLE_CSV = False 1141 1142 # The default method used for table sampling 1143 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1144 1145 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1146 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1147 1148 # Whether the TRIM function expects the characters to trim as its first argument 1149 TRIM_PATTERN_FIRST = False 1150 1151 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1152 STRING_ALIASES = False 1153 1154 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1155 MODIFIERS_ATTACHED_TO_UNION = True 1156 UNION_MODIFIERS = {"order", "limit", "offset"} 1157 1158 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1159 NO_PAREN_IF_COMMANDS = True 1160 1161 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1162 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1163 1164 # Whether the `:` operator is used to extract a value from a JSON document 1165 COLON_IS_JSON_EXTRACT = False 1166 1167 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1168 # If this is True and '(' is not found, the keyword will be treated as an identifier 1169 VALUES_FOLLOWED_BY_PAREN = True 1170 1171 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1172 SUPPORTS_IMPLICIT_UNNEST = False 1173 1174 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1175 INTERVAL_SPANS = True 1176 1177 # Whether a PARTITION clause can follow a table reference 1178 SUPPORTS_PARTITION_SELECTION = False 1179 1180 __slots__ = ( 1181 "error_level", 1182 "error_message_context", 1183 "max_errors", 1184 "dialect", 1185 "sql", 1186 "errors", 1187 "_tokens", 1188 "_index", 1189 "_curr", 1190 "_next", 1191 "_prev", 1192 "_prev_comments", 1193 ) 1194 1195 # Autofilled 1196 SHOW_TRIE: t.Dict = {} 1197 SET_TRIE: t.Dict = {} 1198 1199 def __init__( 1200 self, 1201 error_level: t.Optional[ErrorLevel] = None, 1202 error_message_context: int = 100, 1203 max_errors: int = 3, 1204 dialect: DialectType = None, 1205 ): 1206 from sqlglot.dialects import Dialect 1207 1208 self.error_level = error_level or ErrorLevel.IMMEDIATE 1209 self.error_message_context = error_message_context 1210 self.max_errors = max_errors 1211 self.dialect = Dialect.get_or_raise(dialect) 1212 self.reset() 1213 1214 def reset(self): 1215 self.sql = "" 1216 self.errors = [] 1217 self._tokens = [] 1218 self._index = 0 1219 self._curr = None 1220 self._next = None 1221 self._prev = None 1222 self._prev_comments = None 1223 1224 def parse( 1225 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1226 ) -> t.List[t.Optional[exp.Expression]]: 1227 """ 1228 Parses a list of tokens and returns a list of syntax trees, one tree 1229 per parsed SQL statement. 1230 1231 Args: 1232 raw_tokens: The list of tokens. 1233 sql: The original SQL string, used to produce helpful debug messages. 1234 1235 Returns: 1236 The list of the produced syntax trees. 1237 """ 1238 return self._parse( 1239 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1240 ) 1241 1242 def parse_into( 1243 self, 1244 expression_types: exp.IntoType, 1245 raw_tokens: t.List[Token], 1246 sql: t.Optional[str] = None, 1247 ) -> t.List[t.Optional[exp.Expression]]: 1248 """ 1249 Parses a list of tokens into a given Expression type. If a collection of Expression 1250 types is given instead, this method will try to parse the token list into each one 1251 of them, stopping at the first for which the parsing succeeds. 1252 1253 Args: 1254 expression_types: The expression type(s) to try and parse the token list into. 1255 raw_tokens: The list of tokens. 1256 sql: The original SQL string, used to produce helpful debug messages. 1257 1258 Returns: 1259 The target Expression. 1260 """ 1261 errors = [] 1262 for expression_type in ensure_list(expression_types): 1263 parser = self.EXPRESSION_PARSERS.get(expression_type) 1264 if not parser: 1265 raise TypeError(f"No parser registered for {expression_type}") 1266 1267 try: 1268 return self._parse(parser, raw_tokens, sql) 1269 except ParseError as e: 1270 e.errors[0]["into_expression"] = expression_type 1271 errors.append(e) 1272 1273 raise ParseError( 1274 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1275 errors=merge_errors(errors), 1276 ) from errors[-1] 1277 1278 def _parse( 1279 self, 1280 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1281 raw_tokens: t.List[Token], 1282 sql: t.Optional[str] = None, 1283 ) -> t.List[t.Optional[exp.Expression]]: 1284 self.reset() 1285 self.sql = sql or "" 1286 1287 total = len(raw_tokens) 1288 chunks: t.List[t.List[Token]] = [[]] 1289 1290 for i, token in enumerate(raw_tokens): 1291 if token.token_type == TokenType.SEMICOLON: 1292 if token.comments: 1293 chunks.append([token]) 1294 1295 if i < total - 1: 1296 chunks.append([]) 1297 else: 1298 chunks[-1].append(token) 1299 1300 expressions = [] 1301 1302 for tokens in chunks: 1303 self._index = -1 1304 self._tokens = tokens 1305 self._advance() 1306 1307 expressions.append(parse_method(self)) 1308 1309 if self._index < len(self._tokens): 1310 self.raise_error("Invalid expression / Unexpected token") 1311 1312 self.check_errors() 1313 1314 return expressions 1315 1316 def check_errors(self) -> None: 1317 """Logs or raises any found errors, depending on the chosen error level setting.""" 1318 if self.error_level == ErrorLevel.WARN: 1319 for error in self.errors: 1320 logger.error(str(error)) 1321 elif self.error_level == ErrorLevel.RAISE and self.errors: 1322 raise ParseError( 1323 concat_messages(self.errors, self.max_errors), 1324 errors=merge_errors(self.errors), 1325 ) 1326 1327 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1328 """ 1329 Appends an error in the list of recorded errors or raises it, depending on the chosen 1330 error level setting. 1331 """ 1332 token = token or self._curr or self._prev or Token.string("") 1333 start = token.start 1334 end = token.end + 1 1335 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1336 highlight = self.sql[start:end] 1337 end_context = self.sql[end : end + self.error_message_context] 1338 1339 error = ParseError.new( 1340 f"{message}. Line {token.line}, Col: {token.col}.\n" 1341 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1342 description=message, 1343 line=token.line, 1344 col=token.col, 1345 start_context=start_context, 1346 highlight=highlight, 1347 end_context=end_context, 1348 ) 1349 1350 if self.error_level == ErrorLevel.IMMEDIATE: 1351 raise error 1352 1353 self.errors.append(error) 1354 1355 def expression( 1356 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1357 ) -> E: 1358 """ 1359 Creates a new, validated Expression. 1360 1361 Args: 1362 exp_class: The expression class to instantiate. 1363 comments: An optional list of comments to attach to the expression. 1364 kwargs: The arguments to set for the expression along with their respective values. 1365 1366 Returns: 1367 The target expression. 1368 """ 1369 instance = exp_class(**kwargs) 1370 instance.add_comments(comments) if comments else self._add_comments(instance) 1371 return self.validate_expression(instance) 1372 1373 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1374 if expression and self._prev_comments: 1375 expression.add_comments(self._prev_comments) 1376 self._prev_comments = None 1377 1378 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1379 """ 1380 Validates an Expression, making sure that all its mandatory arguments are set. 1381 1382 Args: 1383 expression: The expression to validate. 1384 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1385 1386 Returns: 1387 The validated expression. 1388 """ 1389 if self.error_level != ErrorLevel.IGNORE: 1390 for error_message in expression.error_messages(args): 1391 self.raise_error(error_message) 1392 1393 return expression 1394 1395 def _find_sql(self, start: Token, end: Token) -> str: 1396 return self.sql[start.start : end.end + 1] 1397 1398 def _is_connected(self) -> bool: 1399 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1400 1401 def _advance(self, times: int = 1) -> None: 1402 self._index += times 1403 self._curr = seq_get(self._tokens, self._index) 1404 self._next = seq_get(self._tokens, self._index + 1) 1405 1406 if self._index > 0: 1407 self._prev = self._tokens[self._index - 1] 1408 self._prev_comments = self._prev.comments 1409 else: 1410 self._prev = None 1411 self._prev_comments = None 1412 1413 def _retreat(self, index: int) -> None: 1414 if index != self._index: 1415 self._advance(index - self._index) 1416 1417 def _warn_unsupported(self) -> None: 1418 if len(self._tokens) <= 1: 1419 return 1420 1421 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1422 # interested in emitting a warning for the one being currently processed. 1423 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1424 1425 logger.warning( 1426 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1427 ) 1428 1429 def _parse_command(self) -> exp.Command: 1430 self._warn_unsupported() 1431 return self.expression( 1432 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1433 ) 1434 1435 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1436 """ 1437 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1438 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1439 the parser state accordingly 1440 """ 1441 index = self._index 1442 error_level = self.error_level 1443 1444 self.error_level = ErrorLevel.IMMEDIATE 1445 try: 1446 this = parse_method() 1447 except ParseError: 1448 this = None 1449 finally: 1450 if not this or retreat: 1451 self._retreat(index) 1452 self.error_level = error_level 1453 1454 return this 1455 1456 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1457 start = self._prev 1458 exists = self._parse_exists() if allow_exists else None 1459 1460 self._match(TokenType.ON) 1461 1462 materialized = self._match_text_seq("MATERIALIZED") 1463 kind = self._match_set(self.CREATABLES) and self._prev 1464 if not kind: 1465 return self._parse_as_command(start) 1466 1467 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1468 this = self._parse_user_defined_function(kind=kind.token_type) 1469 elif kind.token_type == TokenType.TABLE: 1470 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1471 elif kind.token_type == TokenType.COLUMN: 1472 this = self._parse_column() 1473 else: 1474 this = self._parse_id_var() 1475 1476 self._match(TokenType.IS) 1477 1478 return self.expression( 1479 exp.Comment, 1480 this=this, 1481 kind=kind.text, 1482 expression=self._parse_string(), 1483 exists=exists, 1484 materialized=materialized, 1485 ) 1486 1487 def _parse_to_table( 1488 self, 1489 ) -> exp.ToTableProperty: 1490 table = self._parse_table_parts(schema=True) 1491 return self.expression(exp.ToTableProperty, this=table) 1492 1493 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1494 def _parse_ttl(self) -> exp.Expression: 1495 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1496 this = self._parse_bitwise() 1497 1498 if self._match_text_seq("DELETE"): 1499 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1500 if self._match_text_seq("RECOMPRESS"): 1501 return self.expression( 1502 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1503 ) 1504 if self._match_text_seq("TO", "DISK"): 1505 return self.expression( 1506 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1507 ) 1508 if self._match_text_seq("TO", "VOLUME"): 1509 return self.expression( 1510 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1511 ) 1512 1513 return this 1514 1515 expressions = self._parse_csv(_parse_ttl_action) 1516 where = self._parse_where() 1517 group = self._parse_group() 1518 1519 aggregates = None 1520 if group and self._match(TokenType.SET): 1521 aggregates = self._parse_csv(self._parse_set_item) 1522 1523 return self.expression( 1524 exp.MergeTreeTTL, 1525 expressions=expressions, 1526 where=where, 1527 group=group, 1528 aggregates=aggregates, 1529 ) 1530 1531 def _parse_statement(self) -> t.Optional[exp.Expression]: 1532 if self._curr is None: 1533 return None 1534 1535 if self._match_set(self.STATEMENT_PARSERS): 1536 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1537 1538 if self._match_set(self.dialect.tokenizer.COMMANDS): 1539 return self._parse_command() 1540 1541 expression = self._parse_expression() 1542 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1543 return self._parse_query_modifiers(expression) 1544 1545 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1546 start = self._prev 1547 temporary = self._match(TokenType.TEMPORARY) 1548 materialized = self._match_text_seq("MATERIALIZED") 1549 1550 kind = self._match_set(self.CREATABLES) and self._prev.text 1551 if not kind: 1552 return self._parse_as_command(start) 1553 1554 if_exists = exists or self._parse_exists() 1555 table = self._parse_table_parts( 1556 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1557 ) 1558 1559 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1560 1561 if self._match(TokenType.L_PAREN, advance=False): 1562 expressions = self._parse_wrapped_csv(self._parse_types) 1563 else: 1564 expressions = None 1565 1566 return self.expression( 1567 exp.Drop, 1568 comments=start.comments, 1569 exists=if_exists, 1570 this=table, 1571 expressions=expressions, 1572 kind=kind.upper(), 1573 temporary=temporary, 1574 materialized=materialized, 1575 cascade=self._match_text_seq("CASCADE"), 1576 constraints=self._match_text_seq("CONSTRAINTS"), 1577 purge=self._match_text_seq("PURGE"), 1578 cluster=cluster, 1579 ) 1580 1581 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1582 return ( 1583 self._match_text_seq("IF") 1584 and (not not_ or self._match(TokenType.NOT)) 1585 and self._match(TokenType.EXISTS) 1586 ) 1587 1588 def _parse_create(self) -> exp.Create | exp.Command: 1589 # Note: this can't be None because we've matched a statement parser 1590 start = self._prev 1591 comments = self._prev_comments 1592 1593 replace = ( 1594 start.token_type == TokenType.REPLACE 1595 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1596 or self._match_pair(TokenType.OR, TokenType.ALTER) 1597 ) 1598 1599 unique = self._match(TokenType.UNIQUE) 1600 1601 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1602 self._advance() 1603 1604 properties = None 1605 create_token = self._match_set(self.CREATABLES) and self._prev 1606 1607 if not create_token: 1608 # exp.Properties.Location.POST_CREATE 1609 properties = self._parse_properties() 1610 create_token = self._match_set(self.CREATABLES) and self._prev 1611 1612 if not properties or not create_token: 1613 return self._parse_as_command(start) 1614 1615 exists = self._parse_exists(not_=True) 1616 this = None 1617 expression: t.Optional[exp.Expression] = None 1618 indexes = None 1619 no_schema_binding = None 1620 begin = None 1621 end = None 1622 clone = None 1623 1624 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1625 nonlocal properties 1626 if properties and temp_props: 1627 properties.expressions.extend(temp_props.expressions) 1628 elif temp_props: 1629 properties = temp_props 1630 1631 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1632 this = self._parse_user_defined_function(kind=create_token.token_type) 1633 1634 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1635 extend_props(self._parse_properties()) 1636 1637 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1638 1639 if not expression: 1640 if self._match(TokenType.COMMAND): 1641 expression = self._parse_as_command(self._prev) 1642 else: 1643 begin = self._match(TokenType.BEGIN) 1644 return_ = self._match_text_seq("RETURN") 1645 1646 if self._match(TokenType.STRING, advance=False): 1647 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1648 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1649 expression = self._parse_string() 1650 extend_props(self._parse_properties()) 1651 else: 1652 expression = self._parse_statement() 1653 1654 end = self._match_text_seq("END") 1655 1656 if return_: 1657 expression = self.expression(exp.Return, this=expression) 1658 elif create_token.token_type == TokenType.INDEX: 1659 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1660 if not self._match(TokenType.ON): 1661 index = self._parse_id_var() 1662 anonymous = False 1663 else: 1664 index = None 1665 anonymous = True 1666 1667 this = self._parse_index(index=index, anonymous=anonymous) 1668 elif create_token.token_type in self.DB_CREATABLES: 1669 table_parts = self._parse_table_parts( 1670 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1671 ) 1672 1673 # exp.Properties.Location.POST_NAME 1674 self._match(TokenType.COMMA) 1675 extend_props(self._parse_properties(before=True)) 1676 1677 this = self._parse_schema(this=table_parts) 1678 1679 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1680 extend_props(self._parse_properties()) 1681 1682 self._match(TokenType.ALIAS) 1683 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1684 # exp.Properties.Location.POST_ALIAS 1685 extend_props(self._parse_properties()) 1686 1687 if create_token.token_type == TokenType.SEQUENCE: 1688 expression = self._parse_types() 1689 extend_props(self._parse_properties()) 1690 else: 1691 expression = self._parse_ddl_select() 1692 1693 if create_token.token_type == TokenType.TABLE: 1694 # exp.Properties.Location.POST_EXPRESSION 1695 extend_props(self._parse_properties()) 1696 1697 indexes = [] 1698 while True: 1699 index = self._parse_index() 1700 1701 # exp.Properties.Location.POST_INDEX 1702 extend_props(self._parse_properties()) 1703 1704 if not index: 1705 break 1706 else: 1707 self._match(TokenType.COMMA) 1708 indexes.append(index) 1709 elif create_token.token_type == TokenType.VIEW: 1710 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1711 no_schema_binding = True 1712 1713 shallow = self._match_text_seq("SHALLOW") 1714 1715 if self._match_texts(self.CLONE_KEYWORDS): 1716 copy = self._prev.text.lower() == "copy" 1717 clone = self.expression( 1718 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1719 ) 1720 1721 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1722 return self._parse_as_command(start) 1723 1724 return self.expression( 1725 exp.Create, 1726 comments=comments, 1727 this=this, 1728 kind=create_token.text.upper(), 1729 replace=replace, 1730 unique=unique, 1731 expression=expression, 1732 exists=exists, 1733 properties=properties, 1734 indexes=indexes, 1735 no_schema_binding=no_schema_binding, 1736 begin=begin, 1737 end=end, 1738 clone=clone, 1739 ) 1740 1741 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1742 seq = exp.SequenceProperties() 1743 1744 options = [] 1745 index = self._index 1746 1747 while self._curr: 1748 self._match(TokenType.COMMA) 1749 if self._match_text_seq("INCREMENT"): 1750 self._match_text_seq("BY") 1751 self._match_text_seq("=") 1752 seq.set("increment", self._parse_term()) 1753 elif self._match_text_seq("MINVALUE"): 1754 seq.set("minvalue", self._parse_term()) 1755 elif self._match_text_seq("MAXVALUE"): 1756 seq.set("maxvalue", self._parse_term()) 1757 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1758 self._match_text_seq("=") 1759 seq.set("start", self._parse_term()) 1760 elif self._match_text_seq("CACHE"): 1761 # T-SQL allows empty CACHE which is initialized dynamically 1762 seq.set("cache", self._parse_number() or True) 1763 elif self._match_text_seq("OWNED", "BY"): 1764 # "OWNED BY NONE" is the default 1765 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1766 else: 1767 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1768 if opt: 1769 options.append(opt) 1770 else: 1771 break 1772 1773 seq.set("options", options if options else None) 1774 return None if self._index == index else seq 1775 1776 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1777 # only used for teradata currently 1778 self._match(TokenType.COMMA) 1779 1780 kwargs = { 1781 "no": self._match_text_seq("NO"), 1782 "dual": self._match_text_seq("DUAL"), 1783 "before": self._match_text_seq("BEFORE"), 1784 "default": self._match_text_seq("DEFAULT"), 1785 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1786 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1787 "after": self._match_text_seq("AFTER"), 1788 "minimum": self._match_texts(("MIN", "MINIMUM")), 1789 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1790 } 1791 1792 if self._match_texts(self.PROPERTY_PARSERS): 1793 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1794 try: 1795 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1796 except TypeError: 1797 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1798 1799 return None 1800 1801 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1802 return self._parse_wrapped_csv(self._parse_property) 1803 1804 def _parse_property(self) -> t.Optional[exp.Expression]: 1805 if self._match_texts(self.PROPERTY_PARSERS): 1806 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1807 1808 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1809 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1810 1811 if self._match_text_seq("COMPOUND", "SORTKEY"): 1812 return self._parse_sortkey(compound=True) 1813 1814 if self._match_text_seq("SQL", "SECURITY"): 1815 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1816 1817 index = self._index 1818 key = self._parse_column() 1819 1820 if not self._match(TokenType.EQ): 1821 self._retreat(index) 1822 return self._parse_sequence_properties() 1823 1824 return self.expression( 1825 exp.Property, 1826 this=key.to_dot() if isinstance(key, exp.Column) else key, 1827 value=self._parse_bitwise() or self._parse_var(any_token=True), 1828 ) 1829 1830 def _parse_stored(self) -> exp.FileFormatProperty: 1831 self._match(TokenType.ALIAS) 1832 1833 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1834 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1835 1836 return self.expression( 1837 exp.FileFormatProperty, 1838 this=( 1839 self.expression( 1840 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1841 ) 1842 if input_format or output_format 1843 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1844 ), 1845 ) 1846 1847 def _parse_unquoted_field(self): 1848 field = self._parse_field() 1849 if isinstance(field, exp.Identifier) and not field.quoted: 1850 field = exp.var(field) 1851 1852 return field 1853 1854 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1855 self._match(TokenType.EQ) 1856 self._match(TokenType.ALIAS) 1857 1858 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1859 1860 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1861 properties = [] 1862 while True: 1863 if before: 1864 prop = self._parse_property_before() 1865 else: 1866 prop = self._parse_property() 1867 if not prop: 1868 break 1869 for p in ensure_list(prop): 1870 properties.append(p) 1871 1872 if properties: 1873 return self.expression(exp.Properties, expressions=properties) 1874 1875 return None 1876 1877 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1878 return self.expression( 1879 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1880 ) 1881 1882 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1883 if self._index >= 2: 1884 pre_volatile_token = self._tokens[self._index - 2] 1885 else: 1886 pre_volatile_token = None 1887 1888 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1889 return exp.VolatileProperty() 1890 1891 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1892 1893 def _parse_retention_period(self) -> exp.Var: 1894 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1895 number = self._parse_number() 1896 number_str = f"{number} " if number else "" 1897 unit = self._parse_var(any_token=True) 1898 return exp.var(f"{number_str}{unit}") 1899 1900 def _parse_system_versioning_property( 1901 self, with_: bool = False 1902 ) -> exp.WithSystemVersioningProperty: 1903 self._match(TokenType.EQ) 1904 prop = self.expression( 1905 exp.WithSystemVersioningProperty, 1906 **{ # type: ignore 1907 "on": True, 1908 "with": with_, 1909 }, 1910 ) 1911 1912 if self._match_text_seq("OFF"): 1913 prop.set("on", False) 1914 return prop 1915 1916 self._match(TokenType.ON) 1917 if self._match(TokenType.L_PAREN): 1918 while self._curr and not self._match(TokenType.R_PAREN): 1919 if self._match_text_seq("HISTORY_TABLE", "="): 1920 prop.set("this", self._parse_table_parts()) 1921 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1922 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1923 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1924 prop.set("retention_period", self._parse_retention_period()) 1925 1926 self._match(TokenType.COMMA) 1927 1928 return prop 1929 1930 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1931 self._match(TokenType.EQ) 1932 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1933 prop = self.expression(exp.DataDeletionProperty, on=on) 1934 1935 if self._match(TokenType.L_PAREN): 1936 while self._curr and not self._match(TokenType.R_PAREN): 1937 if self._match_text_seq("FILTER_COLUMN", "="): 1938 prop.set("filter_column", self._parse_column()) 1939 elif self._match_text_seq("RETENTION_PERIOD", "="): 1940 prop.set("retention_period", self._parse_retention_period()) 1941 1942 self._match(TokenType.COMMA) 1943 1944 return prop 1945 1946 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1947 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1948 prop = self._parse_system_versioning_property(with_=True) 1949 self._match_r_paren() 1950 return prop 1951 1952 if self._match(TokenType.L_PAREN, advance=False): 1953 return self._parse_wrapped_properties() 1954 1955 if self._match_text_seq("JOURNAL"): 1956 return self._parse_withjournaltable() 1957 1958 if self._match_texts(self.VIEW_ATTRIBUTES): 1959 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1960 1961 if self._match_text_seq("DATA"): 1962 return self._parse_withdata(no=False) 1963 elif self._match_text_seq("NO", "DATA"): 1964 return self._parse_withdata(no=True) 1965 1966 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1967 return self._parse_serde_properties(with_=True) 1968 1969 if not self._next: 1970 return None 1971 1972 return self._parse_withisolatedloading() 1973 1974 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1975 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1976 self._match(TokenType.EQ) 1977 1978 user = self._parse_id_var() 1979 self._match(TokenType.PARAMETER) 1980 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1981 1982 if not user or not host: 1983 return None 1984 1985 return exp.DefinerProperty(this=f"{user}@{host}") 1986 1987 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1988 self._match(TokenType.TABLE) 1989 self._match(TokenType.EQ) 1990 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1991 1992 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1993 return self.expression(exp.LogProperty, no=no) 1994 1995 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1996 return self.expression(exp.JournalProperty, **kwargs) 1997 1998 def _parse_checksum(self) -> exp.ChecksumProperty: 1999 self._match(TokenType.EQ) 2000 2001 on = None 2002 if self._match(TokenType.ON): 2003 on = True 2004 elif self._match_text_seq("OFF"): 2005 on = False 2006 2007 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2008 2009 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2010 return self.expression( 2011 exp.Cluster, 2012 expressions=( 2013 self._parse_wrapped_csv(self._parse_ordered) 2014 if wrapped 2015 else self._parse_csv(self._parse_ordered) 2016 ), 2017 ) 2018 2019 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2020 self._match_text_seq("BY") 2021 2022 self._match_l_paren() 2023 expressions = self._parse_csv(self._parse_column) 2024 self._match_r_paren() 2025 2026 if self._match_text_seq("SORTED", "BY"): 2027 self._match_l_paren() 2028 sorted_by = self._parse_csv(self._parse_ordered) 2029 self._match_r_paren() 2030 else: 2031 sorted_by = None 2032 2033 self._match(TokenType.INTO) 2034 buckets = self._parse_number() 2035 self._match_text_seq("BUCKETS") 2036 2037 return self.expression( 2038 exp.ClusteredByProperty, 2039 expressions=expressions, 2040 sorted_by=sorted_by, 2041 buckets=buckets, 2042 ) 2043 2044 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2045 if not self._match_text_seq("GRANTS"): 2046 self._retreat(self._index - 1) 2047 return None 2048 2049 return self.expression(exp.CopyGrantsProperty) 2050 2051 def _parse_freespace(self) -> exp.FreespaceProperty: 2052 self._match(TokenType.EQ) 2053 return self.expression( 2054 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2055 ) 2056 2057 def _parse_mergeblockratio( 2058 self, no: bool = False, default: bool = False 2059 ) -> exp.MergeBlockRatioProperty: 2060 if self._match(TokenType.EQ): 2061 return self.expression( 2062 exp.MergeBlockRatioProperty, 2063 this=self._parse_number(), 2064 percent=self._match(TokenType.PERCENT), 2065 ) 2066 2067 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2068 2069 def _parse_datablocksize( 2070 self, 2071 default: t.Optional[bool] = None, 2072 minimum: t.Optional[bool] = None, 2073 maximum: t.Optional[bool] = None, 2074 ) -> exp.DataBlocksizeProperty: 2075 self._match(TokenType.EQ) 2076 size = self._parse_number() 2077 2078 units = None 2079 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2080 units = self._prev.text 2081 2082 return self.expression( 2083 exp.DataBlocksizeProperty, 2084 size=size, 2085 units=units, 2086 default=default, 2087 minimum=minimum, 2088 maximum=maximum, 2089 ) 2090 2091 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2092 self._match(TokenType.EQ) 2093 always = self._match_text_seq("ALWAYS") 2094 manual = self._match_text_seq("MANUAL") 2095 never = self._match_text_seq("NEVER") 2096 default = self._match_text_seq("DEFAULT") 2097 2098 autotemp = None 2099 if self._match_text_seq("AUTOTEMP"): 2100 autotemp = self._parse_schema() 2101 2102 return self.expression( 2103 exp.BlockCompressionProperty, 2104 always=always, 2105 manual=manual, 2106 never=never, 2107 default=default, 2108 autotemp=autotemp, 2109 ) 2110 2111 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2112 index = self._index 2113 no = self._match_text_seq("NO") 2114 concurrent = self._match_text_seq("CONCURRENT") 2115 2116 if not self._match_text_seq("ISOLATED", "LOADING"): 2117 self._retreat(index) 2118 return None 2119 2120 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2121 return self.expression( 2122 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2123 ) 2124 2125 def _parse_locking(self) -> exp.LockingProperty: 2126 if self._match(TokenType.TABLE): 2127 kind = "TABLE" 2128 elif self._match(TokenType.VIEW): 2129 kind = "VIEW" 2130 elif self._match(TokenType.ROW): 2131 kind = "ROW" 2132 elif self._match_text_seq("DATABASE"): 2133 kind = "DATABASE" 2134 else: 2135 kind = None 2136 2137 if kind in ("DATABASE", "TABLE", "VIEW"): 2138 this = self._parse_table_parts() 2139 else: 2140 this = None 2141 2142 if self._match(TokenType.FOR): 2143 for_or_in = "FOR" 2144 elif self._match(TokenType.IN): 2145 for_or_in = "IN" 2146 else: 2147 for_or_in = None 2148 2149 if self._match_text_seq("ACCESS"): 2150 lock_type = "ACCESS" 2151 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2152 lock_type = "EXCLUSIVE" 2153 elif self._match_text_seq("SHARE"): 2154 lock_type = "SHARE" 2155 elif self._match_text_seq("READ"): 2156 lock_type = "READ" 2157 elif self._match_text_seq("WRITE"): 2158 lock_type = "WRITE" 2159 elif self._match_text_seq("CHECKSUM"): 2160 lock_type = "CHECKSUM" 2161 else: 2162 lock_type = None 2163 2164 override = self._match_text_seq("OVERRIDE") 2165 2166 return self.expression( 2167 exp.LockingProperty, 2168 this=this, 2169 kind=kind, 2170 for_or_in=for_or_in, 2171 lock_type=lock_type, 2172 override=override, 2173 ) 2174 2175 def _parse_partition_by(self) -> t.List[exp.Expression]: 2176 if self._match(TokenType.PARTITION_BY): 2177 return self._parse_csv(self._parse_conjunction) 2178 return [] 2179 2180 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2181 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2182 if self._match_text_seq("MINVALUE"): 2183 return exp.var("MINVALUE") 2184 if self._match_text_seq("MAXVALUE"): 2185 return exp.var("MAXVALUE") 2186 return self._parse_bitwise() 2187 2188 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2189 expression = None 2190 from_expressions = None 2191 to_expressions = None 2192 2193 if self._match(TokenType.IN): 2194 this = self._parse_wrapped_csv(self._parse_bitwise) 2195 elif self._match(TokenType.FROM): 2196 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2197 self._match_text_seq("TO") 2198 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2199 elif self._match_text_seq("WITH", "(", "MODULUS"): 2200 this = self._parse_number() 2201 self._match_text_seq(",", "REMAINDER") 2202 expression = self._parse_number() 2203 self._match_r_paren() 2204 else: 2205 self.raise_error("Failed to parse partition bound spec.") 2206 2207 return self.expression( 2208 exp.PartitionBoundSpec, 2209 this=this, 2210 expression=expression, 2211 from_expressions=from_expressions, 2212 to_expressions=to_expressions, 2213 ) 2214 2215 # https://www.postgresql.org/docs/current/sql-createtable.html 2216 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2217 if not self._match_text_seq("OF"): 2218 self._retreat(self._index - 1) 2219 return None 2220 2221 this = self._parse_table(schema=True) 2222 2223 if self._match(TokenType.DEFAULT): 2224 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2225 elif self._match_text_seq("FOR", "VALUES"): 2226 expression = self._parse_partition_bound_spec() 2227 else: 2228 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2229 2230 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2231 2232 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2233 self._match(TokenType.EQ) 2234 return self.expression( 2235 exp.PartitionedByProperty, 2236 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2237 ) 2238 2239 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2240 if self._match_text_seq("AND", "STATISTICS"): 2241 statistics = True 2242 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2243 statistics = False 2244 else: 2245 statistics = None 2246 2247 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2248 2249 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2250 if self._match_text_seq("SQL"): 2251 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2252 return None 2253 2254 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2255 if self._match_text_seq("SQL", "DATA"): 2256 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2257 return None 2258 2259 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2260 if self._match_text_seq("PRIMARY", "INDEX"): 2261 return exp.NoPrimaryIndexProperty() 2262 if self._match_text_seq("SQL"): 2263 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2264 return None 2265 2266 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2267 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2268 return exp.OnCommitProperty() 2269 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2270 return exp.OnCommitProperty(delete=True) 2271 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2272 2273 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2274 if self._match_text_seq("SQL", "DATA"): 2275 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2276 return None 2277 2278 def _parse_distkey(self) -> exp.DistKeyProperty: 2279 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2280 2281 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2282 table = self._parse_table(schema=True) 2283 2284 options = [] 2285 while self._match_texts(("INCLUDING", "EXCLUDING")): 2286 this = self._prev.text.upper() 2287 2288 id_var = self._parse_id_var() 2289 if not id_var: 2290 return None 2291 2292 options.append( 2293 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2294 ) 2295 2296 return self.expression(exp.LikeProperty, this=table, expressions=options) 2297 2298 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2299 return self.expression( 2300 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2301 ) 2302 2303 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2304 self._match(TokenType.EQ) 2305 return self.expression( 2306 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2307 ) 2308 2309 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2310 self._match_text_seq("WITH", "CONNECTION") 2311 return self.expression( 2312 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2313 ) 2314 2315 def _parse_returns(self) -> exp.ReturnsProperty: 2316 value: t.Optional[exp.Expression] 2317 null = None 2318 is_table = self._match(TokenType.TABLE) 2319 2320 if is_table: 2321 if self._match(TokenType.LT): 2322 value = self.expression( 2323 exp.Schema, 2324 this="TABLE", 2325 expressions=self._parse_csv(self._parse_struct_types), 2326 ) 2327 if not self._match(TokenType.GT): 2328 self.raise_error("Expecting >") 2329 else: 2330 value = self._parse_schema(exp.var("TABLE")) 2331 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2332 null = True 2333 value = None 2334 else: 2335 value = self._parse_types() 2336 2337 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2338 2339 def _parse_describe(self) -> exp.Describe: 2340 kind = self._match_set(self.CREATABLES) and self._prev.text 2341 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2342 if self._match(TokenType.DOT): 2343 style = None 2344 self._retreat(self._index - 2) 2345 this = self._parse_table(schema=True) 2346 properties = self._parse_properties() 2347 expressions = properties.expressions if properties else None 2348 return self.expression( 2349 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2350 ) 2351 2352 def _parse_insert(self) -> exp.Insert: 2353 comments = ensure_list(self._prev_comments) 2354 hint = self._parse_hint() 2355 overwrite = self._match(TokenType.OVERWRITE) 2356 ignore = self._match(TokenType.IGNORE) 2357 local = self._match_text_seq("LOCAL") 2358 alternative = None 2359 is_function = None 2360 2361 if self._match_text_seq("DIRECTORY"): 2362 this: t.Optional[exp.Expression] = self.expression( 2363 exp.Directory, 2364 this=self._parse_var_or_string(), 2365 local=local, 2366 row_format=self._parse_row_format(match_row=True), 2367 ) 2368 else: 2369 if self._match(TokenType.OR): 2370 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2371 2372 self._match(TokenType.INTO) 2373 comments += ensure_list(self._prev_comments) 2374 self._match(TokenType.TABLE) 2375 is_function = self._match(TokenType.FUNCTION) 2376 2377 this = ( 2378 self._parse_table(schema=True, parse_partition=True) 2379 if not is_function 2380 else self._parse_function() 2381 ) 2382 2383 returning = self._parse_returning() 2384 2385 return self.expression( 2386 exp.Insert, 2387 comments=comments, 2388 hint=hint, 2389 is_function=is_function, 2390 this=this, 2391 stored=self._match_text_seq("STORED") and self._parse_stored(), 2392 by_name=self._match_text_seq("BY", "NAME"), 2393 exists=self._parse_exists(), 2394 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2395 and self._parse_conjunction(), 2396 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2397 conflict=self._parse_on_conflict(), 2398 returning=returning or self._parse_returning(), 2399 overwrite=overwrite, 2400 alternative=alternative, 2401 ignore=ignore, 2402 ) 2403 2404 def _parse_kill(self) -> exp.Kill: 2405 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2406 2407 return self.expression( 2408 exp.Kill, 2409 this=self._parse_primary(), 2410 kind=kind, 2411 ) 2412 2413 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2414 conflict = self._match_text_seq("ON", "CONFLICT") 2415 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2416 2417 if not conflict and not duplicate: 2418 return None 2419 2420 conflict_keys = None 2421 constraint = None 2422 2423 if conflict: 2424 if self._match_text_seq("ON", "CONSTRAINT"): 2425 constraint = self._parse_id_var() 2426 elif self._match(TokenType.L_PAREN): 2427 conflict_keys = self._parse_csv(self._parse_id_var) 2428 self._match_r_paren() 2429 2430 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2431 if self._prev.token_type == TokenType.UPDATE: 2432 self._match(TokenType.SET) 2433 expressions = self._parse_csv(self._parse_equality) 2434 else: 2435 expressions = None 2436 2437 return self.expression( 2438 exp.OnConflict, 2439 duplicate=duplicate, 2440 expressions=expressions, 2441 action=action, 2442 conflict_keys=conflict_keys, 2443 constraint=constraint, 2444 ) 2445 2446 def _parse_returning(self) -> t.Optional[exp.Returning]: 2447 if not self._match(TokenType.RETURNING): 2448 return None 2449 return self.expression( 2450 exp.Returning, 2451 expressions=self._parse_csv(self._parse_expression), 2452 into=self._match(TokenType.INTO) and self._parse_table_part(), 2453 ) 2454 2455 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2456 if not self._match(TokenType.FORMAT): 2457 return None 2458 return self._parse_row_format() 2459 2460 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2461 index = self._index 2462 with_ = with_ or self._match_text_seq("WITH") 2463 2464 if not self._match(TokenType.SERDE_PROPERTIES): 2465 self._retreat(index) 2466 return None 2467 return self.expression( 2468 exp.SerdeProperties, 2469 **{ # type: ignore 2470 "expressions": self._parse_wrapped_properties(), 2471 "with": with_, 2472 }, 2473 ) 2474 2475 def _parse_row_format( 2476 self, match_row: bool = False 2477 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2478 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2479 return None 2480 2481 if self._match_text_seq("SERDE"): 2482 this = self._parse_string() 2483 2484 serde_properties = self._parse_serde_properties() 2485 2486 return self.expression( 2487 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2488 ) 2489 2490 self._match_text_seq("DELIMITED") 2491 2492 kwargs = {} 2493 2494 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2495 kwargs["fields"] = self._parse_string() 2496 if self._match_text_seq("ESCAPED", "BY"): 2497 kwargs["escaped"] = self._parse_string() 2498 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2499 kwargs["collection_items"] = self._parse_string() 2500 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2501 kwargs["map_keys"] = self._parse_string() 2502 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2503 kwargs["lines"] = self._parse_string() 2504 if self._match_text_seq("NULL", "DEFINED", "AS"): 2505 kwargs["null"] = self._parse_string() 2506 2507 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2508 2509 def _parse_load(self) -> exp.LoadData | exp.Command: 2510 if self._match_text_seq("DATA"): 2511 local = self._match_text_seq("LOCAL") 2512 self._match_text_seq("INPATH") 2513 inpath = self._parse_string() 2514 overwrite = self._match(TokenType.OVERWRITE) 2515 self._match_pair(TokenType.INTO, TokenType.TABLE) 2516 2517 return self.expression( 2518 exp.LoadData, 2519 this=self._parse_table(schema=True), 2520 local=local, 2521 overwrite=overwrite, 2522 inpath=inpath, 2523 partition=self._parse_partition(), 2524 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2525 serde=self._match_text_seq("SERDE") and self._parse_string(), 2526 ) 2527 return self._parse_as_command(self._prev) 2528 2529 def _parse_delete(self) -> exp.Delete: 2530 # This handles MySQL's "Multiple-Table Syntax" 2531 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2532 tables = None 2533 comments = self._prev_comments 2534 if not self._match(TokenType.FROM, advance=False): 2535 tables = self._parse_csv(self._parse_table) or None 2536 2537 returning = self._parse_returning() 2538 2539 return self.expression( 2540 exp.Delete, 2541 comments=comments, 2542 tables=tables, 2543 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2544 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2545 where=self._parse_where(), 2546 returning=returning or self._parse_returning(), 2547 limit=self._parse_limit(), 2548 ) 2549 2550 def _parse_update(self) -> exp.Update: 2551 comments = self._prev_comments 2552 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2553 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2554 returning = self._parse_returning() 2555 return self.expression( 2556 exp.Update, 2557 comments=comments, 2558 **{ # type: ignore 2559 "this": this, 2560 "expressions": expressions, 2561 "from": self._parse_from(joins=True), 2562 "where": self._parse_where(), 2563 "returning": returning or self._parse_returning(), 2564 "order": self._parse_order(), 2565 "limit": self._parse_limit(), 2566 }, 2567 ) 2568 2569 def _parse_uncache(self) -> exp.Uncache: 2570 if not self._match(TokenType.TABLE): 2571 self.raise_error("Expecting TABLE after UNCACHE") 2572 2573 return self.expression( 2574 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2575 ) 2576 2577 def _parse_cache(self) -> exp.Cache: 2578 lazy = self._match_text_seq("LAZY") 2579 self._match(TokenType.TABLE) 2580 table = self._parse_table(schema=True) 2581 2582 options = [] 2583 if self._match_text_seq("OPTIONS"): 2584 self._match_l_paren() 2585 k = self._parse_string() 2586 self._match(TokenType.EQ) 2587 v = self._parse_string() 2588 options = [k, v] 2589 self._match_r_paren() 2590 2591 self._match(TokenType.ALIAS) 2592 return self.expression( 2593 exp.Cache, 2594 this=table, 2595 lazy=lazy, 2596 options=options, 2597 expression=self._parse_select(nested=True), 2598 ) 2599 2600 def _parse_partition(self) -> t.Optional[exp.Partition]: 2601 if not self._match(TokenType.PARTITION): 2602 return None 2603 2604 return self.expression( 2605 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2606 ) 2607 2608 def _parse_value(self) -> t.Optional[exp.Tuple]: 2609 if self._match(TokenType.L_PAREN): 2610 expressions = self._parse_csv(self._parse_expression) 2611 self._match_r_paren() 2612 return self.expression(exp.Tuple, expressions=expressions) 2613 2614 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2615 expression = self._parse_expression() 2616 if expression: 2617 return self.expression(exp.Tuple, expressions=[expression]) 2618 return None 2619 2620 def _parse_projections(self) -> t.List[exp.Expression]: 2621 return self._parse_expressions() 2622 2623 def _parse_select( 2624 self, 2625 nested: bool = False, 2626 table: bool = False, 2627 parse_subquery_alias: bool = True, 2628 parse_set_operation: bool = True, 2629 ) -> t.Optional[exp.Expression]: 2630 cte = self._parse_with() 2631 2632 if cte: 2633 this = self._parse_statement() 2634 2635 if not this: 2636 self.raise_error("Failed to parse any statement following CTE") 2637 return cte 2638 2639 if "with" in this.arg_types: 2640 this.set("with", cte) 2641 else: 2642 self.raise_error(f"{this.key} does not support CTE") 2643 this = cte 2644 2645 return this 2646 2647 # duckdb supports leading with FROM x 2648 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2649 2650 if self._match(TokenType.SELECT): 2651 comments = self._prev_comments 2652 2653 hint = self._parse_hint() 2654 all_ = self._match(TokenType.ALL) 2655 distinct = self._match_set(self.DISTINCT_TOKENS) 2656 2657 kind = ( 2658 self._match(TokenType.ALIAS) 2659 and self._match_texts(("STRUCT", "VALUE")) 2660 and self._prev.text.upper() 2661 ) 2662 2663 if distinct: 2664 distinct = self.expression( 2665 exp.Distinct, 2666 on=self._parse_value() if self._match(TokenType.ON) else None, 2667 ) 2668 2669 if all_ and distinct: 2670 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2671 2672 limit = self._parse_limit(top=True) 2673 projections = self._parse_projections() 2674 2675 this = self.expression( 2676 exp.Select, 2677 kind=kind, 2678 hint=hint, 2679 distinct=distinct, 2680 expressions=projections, 2681 limit=limit, 2682 ) 2683 this.comments = comments 2684 2685 into = self._parse_into() 2686 if into: 2687 this.set("into", into) 2688 2689 if not from_: 2690 from_ = self._parse_from() 2691 2692 if from_: 2693 this.set("from", from_) 2694 2695 this = self._parse_query_modifiers(this) 2696 elif (table or nested) and self._match(TokenType.L_PAREN): 2697 if self._match(TokenType.PIVOT): 2698 this = self._parse_simplified_pivot() 2699 elif self._match(TokenType.FROM): 2700 this = exp.select("*").from_( 2701 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2702 ) 2703 else: 2704 this = ( 2705 self._parse_table() 2706 if table 2707 else self._parse_select(nested=True, parse_set_operation=False) 2708 ) 2709 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2710 2711 self._match_r_paren() 2712 2713 # We return early here so that the UNION isn't attached to the subquery by the 2714 # following call to _parse_set_operations, but instead becomes the parent node 2715 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2716 elif self._match(TokenType.VALUES, advance=False): 2717 this = self._parse_derived_table_values() 2718 elif from_: 2719 this = exp.select("*").from_(from_.this, copy=False) 2720 else: 2721 this = None 2722 2723 if parse_set_operation: 2724 return self._parse_set_operations(this) 2725 return this 2726 2727 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2728 if not skip_with_token and not self._match(TokenType.WITH): 2729 return None 2730 2731 comments = self._prev_comments 2732 recursive = self._match(TokenType.RECURSIVE) 2733 2734 expressions = [] 2735 while True: 2736 expressions.append(self._parse_cte()) 2737 2738 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2739 break 2740 else: 2741 self._match(TokenType.WITH) 2742 2743 return self.expression( 2744 exp.With, comments=comments, expressions=expressions, recursive=recursive 2745 ) 2746 2747 def _parse_cte(self) -> exp.CTE: 2748 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2749 if not alias or not alias.this: 2750 self.raise_error("Expected CTE to have alias") 2751 2752 self._match(TokenType.ALIAS) 2753 2754 if self._match_text_seq("NOT", "MATERIALIZED"): 2755 materialized = False 2756 elif self._match_text_seq("MATERIALIZED"): 2757 materialized = True 2758 else: 2759 materialized = None 2760 2761 return self.expression( 2762 exp.CTE, 2763 this=self._parse_wrapped(self._parse_statement), 2764 alias=alias, 2765 materialized=materialized, 2766 ) 2767 2768 def _parse_table_alias( 2769 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2770 ) -> t.Optional[exp.TableAlias]: 2771 any_token = self._match(TokenType.ALIAS) 2772 alias = ( 2773 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2774 or self._parse_string_as_identifier() 2775 ) 2776 2777 index = self._index 2778 if self._match(TokenType.L_PAREN): 2779 columns = self._parse_csv(self._parse_function_parameter) 2780 self._match_r_paren() if columns else self._retreat(index) 2781 else: 2782 columns = None 2783 2784 if not alias and not columns: 2785 return None 2786 2787 return self.expression(exp.TableAlias, this=alias, columns=columns) 2788 2789 def _parse_subquery( 2790 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2791 ) -> t.Optional[exp.Subquery]: 2792 if not this: 2793 return None 2794 2795 return self.expression( 2796 exp.Subquery, 2797 this=this, 2798 pivots=self._parse_pivots(), 2799 alias=self._parse_table_alias() if parse_alias else None, 2800 ) 2801 2802 def _implicit_unnests_to_explicit(self, this: E) -> E: 2803 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2804 2805 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2806 for i, join in enumerate(this.args.get("joins") or []): 2807 table = join.this 2808 normalized_table = table.copy() 2809 normalized_table.meta["maybe_column"] = True 2810 normalized_table = _norm(normalized_table, dialect=self.dialect) 2811 2812 if isinstance(table, exp.Table) and not join.args.get("on"): 2813 if normalized_table.parts[0].name in refs: 2814 table_as_column = table.to_column() 2815 unnest = exp.Unnest(expressions=[table_as_column]) 2816 2817 # Table.to_column creates a parent Alias node that we want to convert to 2818 # a TableAlias and attach to the Unnest, so it matches the parser's output 2819 if isinstance(table.args.get("alias"), exp.TableAlias): 2820 table_as_column.replace(table_as_column.this) 2821 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2822 2823 table.replace(unnest) 2824 2825 refs.add(normalized_table.alias_or_name) 2826 2827 return this 2828 2829 def _parse_query_modifiers( 2830 self, this: t.Optional[exp.Expression] 2831 ) -> t.Optional[exp.Expression]: 2832 if isinstance(this, (exp.Query, exp.Table)): 2833 for join in self._parse_joins(): 2834 this.append("joins", join) 2835 for lateral in iter(self._parse_lateral, None): 2836 this.append("laterals", lateral) 2837 2838 while True: 2839 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2840 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2841 key, expression = parser(self) 2842 2843 if expression: 2844 this.set(key, expression) 2845 if key == "limit": 2846 offset = expression.args.pop("offset", None) 2847 2848 if offset: 2849 offset = exp.Offset(expression=offset) 2850 this.set("offset", offset) 2851 2852 limit_by_expressions = expression.expressions 2853 expression.set("expressions", None) 2854 offset.set("expressions", limit_by_expressions) 2855 continue 2856 break 2857 2858 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2859 this = self._implicit_unnests_to_explicit(this) 2860 2861 return this 2862 2863 def _parse_hint(self) -> t.Optional[exp.Hint]: 2864 if self._match(TokenType.HINT): 2865 hints = [] 2866 for hint in iter( 2867 lambda: self._parse_csv( 2868 lambda: self._parse_function() or self._parse_var(upper=True) 2869 ), 2870 [], 2871 ): 2872 hints.extend(hint) 2873 2874 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2875 self.raise_error("Expected */ after HINT") 2876 2877 return self.expression(exp.Hint, expressions=hints) 2878 2879 return None 2880 2881 def _parse_into(self) -> t.Optional[exp.Into]: 2882 if not self._match(TokenType.INTO): 2883 return None 2884 2885 temp = self._match(TokenType.TEMPORARY) 2886 unlogged = self._match_text_seq("UNLOGGED") 2887 self._match(TokenType.TABLE) 2888 2889 return self.expression( 2890 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2891 ) 2892 2893 def _parse_from( 2894 self, joins: bool = False, skip_from_token: bool = False 2895 ) -> t.Optional[exp.From]: 2896 if not skip_from_token and not self._match(TokenType.FROM): 2897 return None 2898 2899 return self.expression( 2900 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2901 ) 2902 2903 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2904 return self.expression( 2905 exp.MatchRecognizeMeasure, 2906 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2907 this=self._parse_expression(), 2908 ) 2909 2910 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2911 if not self._match(TokenType.MATCH_RECOGNIZE): 2912 return None 2913 2914 self._match_l_paren() 2915 2916 partition = self._parse_partition_by() 2917 order = self._parse_order() 2918 2919 measures = ( 2920 self._parse_csv(self._parse_match_recognize_measure) 2921 if self._match_text_seq("MEASURES") 2922 else None 2923 ) 2924 2925 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2926 rows = exp.var("ONE ROW PER MATCH") 2927 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2928 text = "ALL ROWS PER MATCH" 2929 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2930 text += " SHOW EMPTY MATCHES" 2931 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2932 text += " OMIT EMPTY MATCHES" 2933 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2934 text += " WITH UNMATCHED ROWS" 2935 rows = exp.var(text) 2936 else: 2937 rows = None 2938 2939 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2940 text = "AFTER MATCH SKIP" 2941 if self._match_text_seq("PAST", "LAST", "ROW"): 2942 text += " PAST LAST ROW" 2943 elif self._match_text_seq("TO", "NEXT", "ROW"): 2944 text += " TO NEXT ROW" 2945 elif self._match_text_seq("TO", "FIRST"): 2946 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2947 elif self._match_text_seq("TO", "LAST"): 2948 text += f" TO LAST {self._advance_any().text}" # type: ignore 2949 after = exp.var(text) 2950 else: 2951 after = None 2952 2953 if self._match_text_seq("PATTERN"): 2954 self._match_l_paren() 2955 2956 if not self._curr: 2957 self.raise_error("Expecting )", self._curr) 2958 2959 paren = 1 2960 start = self._curr 2961 2962 while self._curr and paren > 0: 2963 if self._curr.token_type == TokenType.L_PAREN: 2964 paren += 1 2965 if self._curr.token_type == TokenType.R_PAREN: 2966 paren -= 1 2967 2968 end = self._prev 2969 self._advance() 2970 2971 if paren > 0: 2972 self.raise_error("Expecting )", self._curr) 2973 2974 pattern = exp.var(self._find_sql(start, end)) 2975 else: 2976 pattern = None 2977 2978 define = ( 2979 self._parse_csv(self._parse_name_as_expression) 2980 if self._match_text_seq("DEFINE") 2981 else None 2982 ) 2983 2984 self._match_r_paren() 2985 2986 return self.expression( 2987 exp.MatchRecognize, 2988 partition_by=partition, 2989 order=order, 2990 measures=measures, 2991 rows=rows, 2992 after=after, 2993 pattern=pattern, 2994 define=define, 2995 alias=self._parse_table_alias(), 2996 ) 2997 2998 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2999 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3000 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3001 cross_apply = False 3002 3003 if cross_apply is not None: 3004 this = self._parse_select(table=True) 3005 view = None 3006 outer = None 3007 elif self._match(TokenType.LATERAL): 3008 this = self._parse_select(table=True) 3009 view = self._match(TokenType.VIEW) 3010 outer = self._match(TokenType.OUTER) 3011 else: 3012 return None 3013 3014 if not this: 3015 this = ( 3016 self._parse_unnest() 3017 or self._parse_function() 3018 or self._parse_id_var(any_token=False) 3019 ) 3020 3021 while self._match(TokenType.DOT): 3022 this = exp.Dot( 3023 this=this, 3024 expression=self._parse_function() or self._parse_id_var(any_token=False), 3025 ) 3026 3027 if view: 3028 table = self._parse_id_var(any_token=False) 3029 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3030 table_alias: t.Optional[exp.TableAlias] = self.expression( 3031 exp.TableAlias, this=table, columns=columns 3032 ) 3033 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3034 # We move the alias from the lateral's child node to the lateral itself 3035 table_alias = this.args["alias"].pop() 3036 else: 3037 table_alias = self._parse_table_alias() 3038 3039 return self.expression( 3040 exp.Lateral, 3041 this=this, 3042 view=view, 3043 outer=outer, 3044 alias=table_alias, 3045 cross_apply=cross_apply, 3046 ) 3047 3048 def _parse_join_parts( 3049 self, 3050 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3051 return ( 3052 self._match_set(self.JOIN_METHODS) and self._prev, 3053 self._match_set(self.JOIN_SIDES) and self._prev, 3054 self._match_set(self.JOIN_KINDS) and self._prev, 3055 ) 3056 3057 def _parse_join( 3058 self, skip_join_token: bool = False, parse_bracket: bool = False 3059 ) -> t.Optional[exp.Join]: 3060 if self._match(TokenType.COMMA): 3061 return self.expression(exp.Join, this=self._parse_table()) 3062 3063 index = self._index 3064 method, side, kind = self._parse_join_parts() 3065 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3066 join = self._match(TokenType.JOIN) 3067 3068 if not skip_join_token and not join: 3069 self._retreat(index) 3070 kind = None 3071 method = None 3072 side = None 3073 3074 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3075 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3076 3077 if not skip_join_token and not join and not outer_apply and not cross_apply: 3078 return None 3079 3080 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3081 3082 if method: 3083 kwargs["method"] = method.text 3084 if side: 3085 kwargs["side"] = side.text 3086 if kind: 3087 kwargs["kind"] = kind.text 3088 if hint: 3089 kwargs["hint"] = hint 3090 3091 if self._match(TokenType.MATCH_CONDITION): 3092 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3093 3094 if self._match(TokenType.ON): 3095 kwargs["on"] = self._parse_conjunction() 3096 elif self._match(TokenType.USING): 3097 kwargs["using"] = self._parse_wrapped_id_vars() 3098 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3099 kind and kind.token_type == TokenType.CROSS 3100 ): 3101 index = self._index 3102 joins: t.Optional[list] = list(self._parse_joins()) 3103 3104 if joins and self._match(TokenType.ON): 3105 kwargs["on"] = self._parse_conjunction() 3106 elif joins and self._match(TokenType.USING): 3107 kwargs["using"] = self._parse_wrapped_id_vars() 3108 else: 3109 joins = None 3110 self._retreat(index) 3111 3112 kwargs["this"].set("joins", joins if joins else None) 3113 3114 comments = [c for token in (method, side, kind) if token for c in token.comments] 3115 return self.expression(exp.Join, comments=comments, **kwargs) 3116 3117 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3118 this = self._parse_conjunction() 3119 3120 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3121 return this 3122 3123 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3124 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3125 3126 return this 3127 3128 def _parse_index_params(self) -> exp.IndexParameters: 3129 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3130 3131 if self._match(TokenType.L_PAREN, advance=False): 3132 columns = self._parse_wrapped_csv(self._parse_with_operator) 3133 else: 3134 columns = None 3135 3136 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3137 partition_by = self._parse_partition_by() 3138 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3139 tablespace = ( 3140 self._parse_var(any_token=True) 3141 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3142 else None 3143 ) 3144 where = self._parse_where() 3145 3146 return self.expression( 3147 exp.IndexParameters, 3148 using=using, 3149 columns=columns, 3150 include=include, 3151 partition_by=partition_by, 3152 where=where, 3153 with_storage=with_storage, 3154 tablespace=tablespace, 3155 ) 3156 3157 def _parse_index( 3158 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3159 ) -> t.Optional[exp.Index]: 3160 if index or anonymous: 3161 unique = None 3162 primary = None 3163 amp = None 3164 3165 self._match(TokenType.ON) 3166 self._match(TokenType.TABLE) # hive 3167 table = self._parse_table_parts(schema=True) 3168 else: 3169 unique = self._match(TokenType.UNIQUE) 3170 primary = self._match_text_seq("PRIMARY") 3171 amp = self._match_text_seq("AMP") 3172 3173 if not self._match(TokenType.INDEX): 3174 return None 3175 3176 index = self._parse_id_var() 3177 table = None 3178 3179 params = self._parse_index_params() 3180 3181 return self.expression( 3182 exp.Index, 3183 this=index, 3184 table=table, 3185 unique=unique, 3186 primary=primary, 3187 amp=amp, 3188 params=params, 3189 ) 3190 3191 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3192 hints: t.List[exp.Expression] = [] 3193 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3194 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3195 hints.append( 3196 self.expression( 3197 exp.WithTableHint, 3198 expressions=self._parse_csv( 3199 lambda: self._parse_function() or self._parse_var(any_token=True) 3200 ), 3201 ) 3202 ) 3203 self._match_r_paren() 3204 else: 3205 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3206 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3207 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3208 3209 self._match_texts(("INDEX", "KEY")) 3210 if self._match(TokenType.FOR): 3211 hint.set("target", self._advance_any() and self._prev.text.upper()) 3212 3213 hint.set("expressions", self._parse_wrapped_id_vars()) 3214 hints.append(hint) 3215 3216 return hints or None 3217 3218 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3219 return ( 3220 (not schema and self._parse_function(optional_parens=False)) 3221 or self._parse_id_var(any_token=False) 3222 or self._parse_string_as_identifier() 3223 or self._parse_placeholder() 3224 ) 3225 3226 def _parse_table_parts( 3227 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3228 ) -> exp.Table: 3229 catalog = None 3230 db = None 3231 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3232 3233 while self._match(TokenType.DOT): 3234 if catalog: 3235 # This allows nesting the table in arbitrarily many dot expressions if needed 3236 table = self.expression( 3237 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3238 ) 3239 else: 3240 catalog = db 3241 db = table 3242 # "" used for tsql FROM a..b case 3243 table = self._parse_table_part(schema=schema) or "" 3244 3245 if ( 3246 wildcard 3247 and self._is_connected() 3248 and (isinstance(table, exp.Identifier) or not table) 3249 and self._match(TokenType.STAR) 3250 ): 3251 if isinstance(table, exp.Identifier): 3252 table.args["this"] += "*" 3253 else: 3254 table = exp.Identifier(this="*") 3255 3256 # We bubble up comments from the Identifier to the Table 3257 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3258 3259 if is_db_reference: 3260 catalog = db 3261 db = table 3262 table = None 3263 3264 if not table and not is_db_reference: 3265 self.raise_error(f"Expected table name but got {self._curr}") 3266 if not db and is_db_reference: 3267 self.raise_error(f"Expected database name but got {self._curr}") 3268 3269 return self.expression( 3270 exp.Table, 3271 comments=comments, 3272 this=table, 3273 db=db, 3274 catalog=catalog, 3275 pivots=self._parse_pivots(), 3276 ) 3277 3278 def _parse_table( 3279 self, 3280 schema: bool = False, 3281 joins: bool = False, 3282 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3283 parse_bracket: bool = False, 3284 is_db_reference: bool = False, 3285 parse_partition: bool = False, 3286 ) -> t.Optional[exp.Expression]: 3287 lateral = self._parse_lateral() 3288 if lateral: 3289 return lateral 3290 3291 unnest = self._parse_unnest() 3292 if unnest: 3293 return unnest 3294 3295 values = self._parse_derived_table_values() 3296 if values: 3297 return values 3298 3299 subquery = self._parse_select(table=True) 3300 if subquery: 3301 if not subquery.args.get("pivots"): 3302 subquery.set("pivots", self._parse_pivots()) 3303 return subquery 3304 3305 bracket = parse_bracket and self._parse_bracket(None) 3306 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3307 3308 only = self._match(TokenType.ONLY) 3309 3310 this = t.cast( 3311 exp.Expression, 3312 bracket 3313 or self._parse_bracket( 3314 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3315 ), 3316 ) 3317 3318 if only: 3319 this.set("only", only) 3320 3321 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3322 self._match_text_seq("*") 3323 3324 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3325 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3326 this.set("partition", self._parse_partition()) 3327 3328 if schema: 3329 return self._parse_schema(this=this) 3330 3331 version = self._parse_version() 3332 3333 if version: 3334 this.set("version", version) 3335 3336 if self.dialect.ALIAS_POST_TABLESAMPLE: 3337 table_sample = self._parse_table_sample() 3338 3339 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3340 if alias: 3341 this.set("alias", alias) 3342 3343 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3344 return self.expression( 3345 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3346 ) 3347 3348 this.set("hints", self._parse_table_hints()) 3349 3350 if not this.args.get("pivots"): 3351 this.set("pivots", self._parse_pivots()) 3352 3353 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3354 table_sample = self._parse_table_sample() 3355 3356 if table_sample: 3357 table_sample.set("this", this) 3358 this = table_sample 3359 3360 if joins: 3361 for join in self._parse_joins(): 3362 this.append("joins", join) 3363 3364 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3365 this.set("ordinality", True) 3366 this.set("alias", self._parse_table_alias()) 3367 3368 return this 3369 3370 def _parse_version(self) -> t.Optional[exp.Version]: 3371 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3372 this = "TIMESTAMP" 3373 elif self._match(TokenType.VERSION_SNAPSHOT): 3374 this = "VERSION" 3375 else: 3376 return None 3377 3378 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3379 kind = self._prev.text.upper() 3380 start = self._parse_bitwise() 3381 self._match_texts(("TO", "AND")) 3382 end = self._parse_bitwise() 3383 expression: t.Optional[exp.Expression] = self.expression( 3384 exp.Tuple, expressions=[start, end] 3385 ) 3386 elif self._match_text_seq("CONTAINED", "IN"): 3387 kind = "CONTAINED IN" 3388 expression = self.expression( 3389 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3390 ) 3391 elif self._match(TokenType.ALL): 3392 kind = "ALL" 3393 expression = None 3394 else: 3395 self._match_text_seq("AS", "OF") 3396 kind = "AS OF" 3397 expression = self._parse_type() 3398 3399 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3400 3401 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3402 if not self._match(TokenType.UNNEST): 3403 return None 3404 3405 expressions = self._parse_wrapped_csv(self._parse_equality) 3406 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3407 3408 alias = self._parse_table_alias() if with_alias else None 3409 3410 if alias: 3411 if self.dialect.UNNEST_COLUMN_ONLY: 3412 if alias.args.get("columns"): 3413 self.raise_error("Unexpected extra column alias in unnest.") 3414 3415 alias.set("columns", [alias.this]) 3416 alias.set("this", None) 3417 3418 columns = alias.args.get("columns") or [] 3419 if offset and len(expressions) < len(columns): 3420 offset = columns.pop() 3421 3422 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3423 self._match(TokenType.ALIAS) 3424 offset = self._parse_id_var( 3425 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3426 ) or exp.to_identifier("offset") 3427 3428 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3429 3430 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3431 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3432 if not is_derived and not self._match_text_seq("VALUES"): 3433 return None 3434 3435 expressions = self._parse_csv(self._parse_value) 3436 alias = self._parse_table_alias() 3437 3438 if is_derived: 3439 self._match_r_paren() 3440 3441 return self.expression( 3442 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3443 ) 3444 3445 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3446 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3447 as_modifier and self._match_text_seq("USING", "SAMPLE") 3448 ): 3449 return None 3450 3451 bucket_numerator = None 3452 bucket_denominator = None 3453 bucket_field = None 3454 percent = None 3455 size = None 3456 seed = None 3457 3458 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3459 matched_l_paren = self._match(TokenType.L_PAREN) 3460 3461 if self.TABLESAMPLE_CSV: 3462 num = None 3463 expressions = self._parse_csv(self._parse_primary) 3464 else: 3465 expressions = None 3466 num = ( 3467 self._parse_factor() 3468 if self._match(TokenType.NUMBER, advance=False) 3469 else self._parse_primary() or self._parse_placeholder() 3470 ) 3471 3472 if self._match_text_seq("BUCKET"): 3473 bucket_numerator = self._parse_number() 3474 self._match_text_seq("OUT", "OF") 3475 bucket_denominator = bucket_denominator = self._parse_number() 3476 self._match(TokenType.ON) 3477 bucket_field = self._parse_field() 3478 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3479 percent = num 3480 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3481 size = num 3482 else: 3483 percent = num 3484 3485 if matched_l_paren: 3486 self._match_r_paren() 3487 3488 if self._match(TokenType.L_PAREN): 3489 method = self._parse_var(upper=True) 3490 seed = self._match(TokenType.COMMA) and self._parse_number() 3491 self._match_r_paren() 3492 elif self._match_texts(("SEED", "REPEATABLE")): 3493 seed = self._parse_wrapped(self._parse_number) 3494 3495 if not method and self.DEFAULT_SAMPLING_METHOD: 3496 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3497 3498 return self.expression( 3499 exp.TableSample, 3500 expressions=expressions, 3501 method=method, 3502 bucket_numerator=bucket_numerator, 3503 bucket_denominator=bucket_denominator, 3504 bucket_field=bucket_field, 3505 percent=percent, 3506 size=size, 3507 seed=seed, 3508 ) 3509 3510 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3511 return list(iter(self._parse_pivot, None)) or None 3512 3513 def _parse_joins(self) -> t.Iterator[exp.Join]: 3514 return iter(self._parse_join, None) 3515 3516 # https://duckdb.org/docs/sql/statements/pivot 3517 def _parse_simplified_pivot(self) -> exp.Pivot: 3518 def _parse_on() -> t.Optional[exp.Expression]: 3519 this = self._parse_bitwise() 3520 return self._parse_in(this) if self._match(TokenType.IN) else this 3521 3522 this = self._parse_table() 3523 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3524 using = self._match(TokenType.USING) and self._parse_csv( 3525 lambda: self._parse_alias(self._parse_function()) 3526 ) 3527 group = self._parse_group() 3528 return self.expression( 3529 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3530 ) 3531 3532 def _parse_pivot_in(self) -> exp.In: 3533 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3534 this = self._parse_conjunction() 3535 3536 self._match(TokenType.ALIAS) 3537 alias = self._parse_field() 3538 if alias: 3539 return self.expression(exp.PivotAlias, this=this, alias=alias) 3540 3541 return this 3542 3543 value = self._parse_column() 3544 3545 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3546 self.raise_error("Expecting IN (") 3547 3548 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3549 3550 self._match_r_paren() 3551 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3552 3553 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3554 index = self._index 3555 include_nulls = None 3556 3557 if self._match(TokenType.PIVOT): 3558 unpivot = False 3559 elif self._match(TokenType.UNPIVOT): 3560 unpivot = True 3561 3562 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3563 if self._match_text_seq("INCLUDE", "NULLS"): 3564 include_nulls = True 3565 elif self._match_text_seq("EXCLUDE", "NULLS"): 3566 include_nulls = False 3567 else: 3568 return None 3569 3570 expressions = [] 3571 3572 if not self._match(TokenType.L_PAREN): 3573 self._retreat(index) 3574 return None 3575 3576 if unpivot: 3577 expressions = self._parse_csv(self._parse_column) 3578 else: 3579 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3580 3581 if not expressions: 3582 self.raise_error("Failed to parse PIVOT's aggregation list") 3583 3584 if not self._match(TokenType.FOR): 3585 self.raise_error("Expecting FOR") 3586 3587 field = self._parse_pivot_in() 3588 3589 self._match_r_paren() 3590 3591 pivot = self.expression( 3592 exp.Pivot, 3593 expressions=expressions, 3594 field=field, 3595 unpivot=unpivot, 3596 include_nulls=include_nulls, 3597 ) 3598 3599 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3600 pivot.set("alias", self._parse_table_alias()) 3601 3602 if not unpivot: 3603 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3604 3605 columns: t.List[exp.Expression] = [] 3606 for fld in pivot.args["field"].expressions: 3607 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3608 for name in names: 3609 if self.PREFIXED_PIVOT_COLUMNS: 3610 name = f"{name}_{field_name}" if name else field_name 3611 else: 3612 name = f"{field_name}_{name}" if name else field_name 3613 3614 columns.append(exp.to_identifier(name)) 3615 3616 pivot.set("columns", columns) 3617 3618 return pivot 3619 3620 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3621 return [agg.alias for agg in aggregations] 3622 3623 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3624 if not skip_where_token and not self._match(TokenType.PREWHERE): 3625 return None 3626 3627 return self.expression( 3628 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3629 ) 3630 3631 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3632 if not skip_where_token and not self._match(TokenType.WHERE): 3633 return None 3634 3635 return self.expression( 3636 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3637 ) 3638 3639 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3640 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3641 return None 3642 3643 elements: t.Dict[str, t.Any] = defaultdict(list) 3644 3645 if self._match(TokenType.ALL): 3646 elements["all"] = True 3647 elif self._match(TokenType.DISTINCT): 3648 elements["all"] = False 3649 3650 while True: 3651 expressions = self._parse_csv( 3652 lambda: None 3653 if self._match(TokenType.ROLLUP, advance=False) 3654 else self._parse_conjunction() 3655 ) 3656 if expressions: 3657 elements["expressions"].extend(expressions) 3658 3659 grouping_sets = self._parse_grouping_sets() 3660 if grouping_sets: 3661 elements["grouping_sets"].extend(grouping_sets) 3662 3663 rollup = None 3664 cube = None 3665 totals = None 3666 3667 index = self._index 3668 with_ = self._match(TokenType.WITH) 3669 if self._match(TokenType.ROLLUP): 3670 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3671 elements["rollup"].extend(ensure_list(rollup)) 3672 3673 if self._match(TokenType.CUBE): 3674 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3675 elements["cube"].extend(ensure_list(cube)) 3676 3677 if self._match_text_seq("TOTALS"): 3678 totals = True 3679 elements["totals"] = True # type: ignore 3680 3681 if not (grouping_sets or rollup or cube or totals): 3682 if with_: 3683 self._retreat(index) 3684 break 3685 3686 return self.expression(exp.Group, **elements) # type: ignore 3687 3688 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3689 if not self._match(TokenType.GROUPING_SETS): 3690 return None 3691 3692 return self._parse_wrapped_csv(self._parse_grouping_set) 3693 3694 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3695 if self._match(TokenType.L_PAREN): 3696 grouping_set = self._parse_csv(self._parse_column) 3697 self._match_r_paren() 3698 return self.expression(exp.Tuple, expressions=grouping_set) 3699 3700 return self._parse_column() 3701 3702 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3703 if not skip_having_token and not self._match(TokenType.HAVING): 3704 return None 3705 return self.expression(exp.Having, this=self._parse_conjunction()) 3706 3707 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3708 if not self._match(TokenType.QUALIFY): 3709 return None 3710 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3711 3712 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3713 if skip_start_token: 3714 start = None 3715 elif self._match(TokenType.START_WITH): 3716 start = self._parse_conjunction() 3717 else: 3718 return None 3719 3720 self._match(TokenType.CONNECT_BY) 3721 nocycle = self._match_text_seq("NOCYCLE") 3722 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3723 exp.Prior, this=self._parse_bitwise() 3724 ) 3725 connect = self._parse_conjunction() 3726 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3727 3728 if not start and self._match(TokenType.START_WITH): 3729 start = self._parse_conjunction() 3730 3731 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3732 3733 def _parse_name_as_expression(self) -> exp.Alias: 3734 return self.expression( 3735 exp.Alias, 3736 alias=self._parse_id_var(any_token=True), 3737 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3738 ) 3739 3740 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3741 if self._match_text_seq("INTERPOLATE"): 3742 return self._parse_wrapped_csv(self._parse_name_as_expression) 3743 return None 3744 3745 def _parse_order( 3746 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3747 ) -> t.Optional[exp.Expression]: 3748 siblings = None 3749 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3750 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3751 return this 3752 3753 siblings = True 3754 3755 return self.expression( 3756 exp.Order, 3757 this=this, 3758 expressions=self._parse_csv(self._parse_ordered), 3759 interpolate=self._parse_interpolate(), 3760 siblings=siblings, 3761 ) 3762 3763 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3764 if not self._match(token): 3765 return None 3766 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3767 3768 def _parse_ordered( 3769 self, parse_method: t.Optional[t.Callable] = None 3770 ) -> t.Optional[exp.Ordered]: 3771 this = parse_method() if parse_method else self._parse_conjunction() 3772 if not this: 3773 return None 3774 3775 asc = self._match(TokenType.ASC) 3776 desc = self._match(TokenType.DESC) or (asc and False) 3777 3778 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3779 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3780 3781 nulls_first = is_nulls_first or False 3782 explicitly_null_ordered = is_nulls_first or is_nulls_last 3783 3784 if ( 3785 not explicitly_null_ordered 3786 and ( 3787 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3788 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3789 ) 3790 and self.dialect.NULL_ORDERING != "nulls_are_last" 3791 ): 3792 nulls_first = True 3793 3794 if self._match_text_seq("WITH", "FILL"): 3795 with_fill = self.expression( 3796 exp.WithFill, 3797 **{ # type: ignore 3798 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3799 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3800 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3801 }, 3802 ) 3803 else: 3804 with_fill = None 3805 3806 return self.expression( 3807 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3808 ) 3809 3810 def _parse_limit( 3811 self, 3812 this: t.Optional[exp.Expression] = None, 3813 top: bool = False, 3814 skip_limit_token: bool = False, 3815 ) -> t.Optional[exp.Expression]: 3816 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3817 comments = self._prev_comments 3818 if top: 3819 limit_paren = self._match(TokenType.L_PAREN) 3820 expression = self._parse_term() if limit_paren else self._parse_number() 3821 3822 if limit_paren: 3823 self._match_r_paren() 3824 else: 3825 expression = self._parse_term() 3826 3827 if self._match(TokenType.COMMA): 3828 offset = expression 3829 expression = self._parse_term() 3830 else: 3831 offset = None 3832 3833 limit_exp = self.expression( 3834 exp.Limit, 3835 this=this, 3836 expression=expression, 3837 offset=offset, 3838 comments=comments, 3839 expressions=self._parse_limit_by(), 3840 ) 3841 3842 return limit_exp 3843 3844 if self._match(TokenType.FETCH): 3845 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3846 direction = self._prev.text.upper() if direction else "FIRST" 3847 3848 count = self._parse_field(tokens=self.FETCH_TOKENS) 3849 percent = self._match(TokenType.PERCENT) 3850 3851 self._match_set((TokenType.ROW, TokenType.ROWS)) 3852 3853 only = self._match_text_seq("ONLY") 3854 with_ties = self._match_text_seq("WITH", "TIES") 3855 3856 if only and with_ties: 3857 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3858 3859 return self.expression( 3860 exp.Fetch, 3861 direction=direction, 3862 count=count, 3863 percent=percent, 3864 with_ties=with_ties, 3865 ) 3866 3867 return this 3868 3869 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3870 if not self._match(TokenType.OFFSET): 3871 return this 3872 3873 count = self._parse_term() 3874 self._match_set((TokenType.ROW, TokenType.ROWS)) 3875 3876 return self.expression( 3877 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3878 ) 3879 3880 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3881 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3882 3883 def _parse_locks(self) -> t.List[exp.Lock]: 3884 locks = [] 3885 while True: 3886 if self._match_text_seq("FOR", "UPDATE"): 3887 update = True 3888 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3889 "LOCK", "IN", "SHARE", "MODE" 3890 ): 3891 update = False 3892 else: 3893 break 3894 3895 expressions = None 3896 if self._match_text_seq("OF"): 3897 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3898 3899 wait: t.Optional[bool | exp.Expression] = None 3900 if self._match_text_seq("NOWAIT"): 3901 wait = True 3902 elif self._match_text_seq("WAIT"): 3903 wait = self._parse_primary() 3904 elif self._match_text_seq("SKIP", "LOCKED"): 3905 wait = False 3906 3907 locks.append( 3908 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3909 ) 3910 3911 return locks 3912 3913 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3914 while this and self._match_set(self.SET_OPERATIONS): 3915 token_type = self._prev.token_type 3916 3917 if token_type == TokenType.UNION: 3918 operation = exp.Union 3919 elif token_type == TokenType.EXCEPT: 3920 operation = exp.Except 3921 else: 3922 operation = exp.Intersect 3923 3924 comments = self._prev.comments 3925 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3926 by_name = self._match_text_seq("BY", "NAME") 3927 expression = self._parse_select(nested=True, parse_set_operation=False) 3928 3929 this = self.expression( 3930 operation, 3931 comments=comments, 3932 this=this, 3933 distinct=distinct, 3934 by_name=by_name, 3935 expression=expression, 3936 ) 3937 3938 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3939 expression = this.expression 3940 3941 if expression: 3942 for arg in self.UNION_MODIFIERS: 3943 expr = expression.args.get(arg) 3944 if expr: 3945 this.set(arg, expr.pop()) 3946 3947 return this 3948 3949 def _parse_expression(self) -> t.Optional[exp.Expression]: 3950 return self._parse_alias(self._parse_conjunction()) 3951 3952 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3953 this = self._parse_equality() 3954 3955 if self._match(TokenType.COLON_EQ): 3956 this = self.expression( 3957 exp.PropertyEQ, 3958 this=this, 3959 comments=self._prev_comments, 3960 expression=self._parse_conjunction(), 3961 ) 3962 3963 while self._match_set(self.CONJUNCTION): 3964 this = self.expression( 3965 self.CONJUNCTION[self._prev.token_type], 3966 this=this, 3967 comments=self._prev_comments, 3968 expression=self._parse_equality(), 3969 ) 3970 return this 3971 3972 def _parse_equality(self) -> t.Optional[exp.Expression]: 3973 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3974 3975 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3976 return self._parse_tokens(self._parse_range, self.COMPARISON) 3977 3978 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3979 this = this or self._parse_bitwise() 3980 negate = self._match(TokenType.NOT) 3981 3982 if self._match_set(self.RANGE_PARSERS): 3983 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3984 if not expression: 3985 return this 3986 3987 this = expression 3988 elif self._match(TokenType.ISNULL): 3989 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3990 3991 # Postgres supports ISNULL and NOTNULL for conditions. 3992 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3993 if self._match(TokenType.NOTNULL): 3994 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3995 this = self.expression(exp.Not, this=this) 3996 3997 if negate: 3998 this = self.expression(exp.Not, this=this) 3999 4000 if self._match(TokenType.IS): 4001 this = self._parse_is(this) 4002 4003 return this 4004 4005 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4006 index = self._index - 1 4007 negate = self._match(TokenType.NOT) 4008 4009 if self._match_text_seq("DISTINCT", "FROM"): 4010 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4011 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4012 4013 expression = self._parse_null() or self._parse_boolean() 4014 if not expression: 4015 self._retreat(index) 4016 return None 4017 4018 this = self.expression(exp.Is, this=this, expression=expression) 4019 return self.expression(exp.Not, this=this) if negate else this 4020 4021 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4022 unnest = self._parse_unnest(with_alias=False) 4023 if unnest: 4024 this = self.expression(exp.In, this=this, unnest=unnest) 4025 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4026 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4027 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4028 4029 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4030 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4031 else: 4032 this = self.expression(exp.In, this=this, expressions=expressions) 4033 4034 if matched_l_paren: 4035 self._match_r_paren(this) 4036 elif not self._match(TokenType.R_BRACKET, expression=this): 4037 self.raise_error("Expecting ]") 4038 else: 4039 this = self.expression(exp.In, this=this, field=self._parse_field()) 4040 4041 return this 4042 4043 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4044 low = self._parse_bitwise() 4045 self._match(TokenType.AND) 4046 high = self._parse_bitwise() 4047 return self.expression(exp.Between, this=this, low=low, high=high) 4048 4049 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4050 if not self._match(TokenType.ESCAPE): 4051 return this 4052 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4053 4054 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 4055 index = self._index 4056 4057 if not self._match(TokenType.INTERVAL) and match_interval: 4058 return None 4059 4060 if self._match(TokenType.STRING, advance=False): 4061 this = self._parse_primary() 4062 else: 4063 this = self._parse_term() 4064 4065 if not this or ( 4066 isinstance(this, exp.Column) 4067 and not this.table 4068 and not this.this.quoted 4069 and this.name.upper() == "IS" 4070 ): 4071 self._retreat(index) 4072 return None 4073 4074 unit = self._parse_function() or ( 4075 not self._match(TokenType.ALIAS, advance=False) 4076 and self._parse_var(any_token=True, upper=True) 4077 ) 4078 4079 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4080 # each INTERVAL expression into this canonical form so it's easy to transpile 4081 if this and this.is_number: 4082 this = exp.Literal.string(this.name) 4083 elif this and this.is_string: 4084 parts = this.name.split() 4085 4086 if len(parts) == 2: 4087 if unit: 4088 # This is not actually a unit, it's something else (e.g. a "window side") 4089 unit = None 4090 self._retreat(self._index - 1) 4091 4092 this = exp.Literal.string(parts[0]) 4093 unit = self.expression(exp.Var, this=parts[1].upper()) 4094 4095 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4096 unit = self.expression( 4097 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4098 ) 4099 4100 return self.expression(exp.Interval, this=this, unit=unit) 4101 4102 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4103 this = self._parse_term() 4104 4105 while True: 4106 if self._match_set(self.BITWISE): 4107 this = self.expression( 4108 self.BITWISE[self._prev.token_type], 4109 this=this, 4110 expression=self._parse_term(), 4111 ) 4112 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4113 this = self.expression( 4114 exp.DPipe, 4115 this=this, 4116 expression=self._parse_term(), 4117 safe=not self.dialect.STRICT_STRING_CONCAT, 4118 ) 4119 elif self._match(TokenType.DQMARK): 4120 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4121 elif self._match_pair(TokenType.LT, TokenType.LT): 4122 this = self.expression( 4123 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4124 ) 4125 elif self._match_pair(TokenType.GT, TokenType.GT): 4126 this = self.expression( 4127 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4128 ) 4129 else: 4130 break 4131 4132 return this 4133 4134 def _parse_term(self) -> t.Optional[exp.Expression]: 4135 return self._parse_tokens(self._parse_factor, self.TERM) 4136 4137 def _parse_factor(self) -> t.Optional[exp.Expression]: 4138 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4139 this = parse_method() 4140 4141 while self._match_set(self.FACTOR): 4142 this = self.expression( 4143 self.FACTOR[self._prev.token_type], 4144 this=this, 4145 comments=self._prev_comments, 4146 expression=parse_method(), 4147 ) 4148 if isinstance(this, exp.Div): 4149 this.args["typed"] = self.dialect.TYPED_DIVISION 4150 this.args["safe"] = self.dialect.SAFE_DIVISION 4151 4152 return this 4153 4154 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4155 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4156 4157 def _parse_unary(self) -> t.Optional[exp.Expression]: 4158 if self._match_set(self.UNARY_PARSERS): 4159 return self.UNARY_PARSERS[self._prev.token_type](self) 4160 return self._parse_at_time_zone(self._parse_type()) 4161 4162 def _parse_type( 4163 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4164 ) -> t.Optional[exp.Expression]: 4165 interval = parse_interval and self._parse_interval() 4166 if interval: 4167 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4168 while True: 4169 index = self._index 4170 self._match(TokenType.PLUS) 4171 4172 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4173 self._retreat(index) 4174 break 4175 4176 interval = self.expression( # type: ignore 4177 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4178 ) 4179 4180 return interval 4181 4182 index = self._index 4183 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4184 this = self._parse_column() 4185 4186 if data_type: 4187 if isinstance(this, exp.Literal): 4188 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4189 if parser: 4190 return parser(self, this, data_type) 4191 return self.expression(exp.Cast, this=this, to=data_type) 4192 4193 if not data_type.expressions: 4194 self._retreat(index) 4195 return self._parse_id_var() if fallback_to_identifier else self._parse_column() 4196 4197 return self._parse_column_ops(data_type) 4198 4199 return this and self._parse_column_ops(this) 4200 4201 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4202 this = self._parse_type() 4203 if not this: 4204 return None 4205 4206 if isinstance(this, exp.Column) and not this.table: 4207 this = exp.var(this.name.upper()) 4208 4209 return self.expression( 4210 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4211 ) 4212 4213 def _parse_types( 4214 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4215 ) -> t.Optional[exp.Expression]: 4216 index = self._index 4217 4218 this: t.Optional[exp.Expression] = None 4219 prefix = self._match_text_seq("SYSUDTLIB", ".") 4220 4221 if not self._match_set(self.TYPE_TOKENS): 4222 identifier = allow_identifiers and self._parse_id_var( 4223 any_token=False, tokens=(TokenType.VAR,) 4224 ) 4225 if identifier: 4226 tokens = self.dialect.tokenize(identifier.name) 4227 4228 if len(tokens) != 1: 4229 self.raise_error("Unexpected identifier", self._prev) 4230 4231 if tokens[0].token_type in self.TYPE_TOKENS: 4232 self._prev = tokens[0] 4233 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4234 type_name = identifier.name 4235 4236 while self._match(TokenType.DOT): 4237 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4238 4239 this = exp.DataType.build(type_name, udt=True) 4240 else: 4241 self._retreat(self._index - 1) 4242 return None 4243 else: 4244 return None 4245 4246 type_token = self._prev.token_type 4247 4248 if type_token == TokenType.PSEUDO_TYPE: 4249 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4250 4251 if type_token == TokenType.OBJECT_IDENTIFIER: 4252 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4253 4254 nested = type_token in self.NESTED_TYPE_TOKENS 4255 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4256 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4257 expressions = None 4258 maybe_func = False 4259 4260 if self._match(TokenType.L_PAREN): 4261 if is_struct: 4262 expressions = self._parse_csv(self._parse_struct_types) 4263 elif nested: 4264 expressions = self._parse_csv( 4265 lambda: self._parse_types( 4266 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4267 ) 4268 ) 4269 elif type_token in self.ENUM_TYPE_TOKENS: 4270 expressions = self._parse_csv(self._parse_equality) 4271 elif is_aggregate: 4272 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4273 any_token=False, tokens=(TokenType.VAR,) 4274 ) 4275 if not func_or_ident or not self._match(TokenType.COMMA): 4276 return None 4277 expressions = self._parse_csv( 4278 lambda: self._parse_types( 4279 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4280 ) 4281 ) 4282 expressions.insert(0, func_or_ident) 4283 else: 4284 expressions = self._parse_csv(self._parse_type_size) 4285 4286 if not expressions or not self._match(TokenType.R_PAREN): 4287 self._retreat(index) 4288 return None 4289 4290 maybe_func = True 4291 4292 values: t.Optional[t.List[exp.Expression]] = None 4293 4294 if nested and self._match(TokenType.LT): 4295 if is_struct: 4296 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4297 else: 4298 expressions = self._parse_csv( 4299 lambda: self._parse_types( 4300 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4301 ) 4302 ) 4303 4304 if not self._match(TokenType.GT): 4305 self.raise_error("Expecting >") 4306 4307 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4308 values = self._parse_csv(self._parse_conjunction) 4309 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4310 4311 if type_token in self.TIMESTAMPS: 4312 if self._match_text_seq("WITH", "TIME", "ZONE"): 4313 maybe_func = False 4314 tz_type = ( 4315 exp.DataType.Type.TIMETZ 4316 if type_token in self.TIMES 4317 else exp.DataType.Type.TIMESTAMPTZ 4318 ) 4319 this = exp.DataType(this=tz_type, expressions=expressions) 4320 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4321 maybe_func = False 4322 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4323 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4324 maybe_func = False 4325 elif type_token == TokenType.INTERVAL: 4326 unit = self._parse_var(upper=True) 4327 if unit: 4328 if self._match_text_seq("TO"): 4329 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4330 4331 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4332 else: 4333 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4334 4335 if maybe_func and check_func: 4336 index2 = self._index 4337 peek = self._parse_string() 4338 4339 if not peek: 4340 self._retreat(index) 4341 return None 4342 4343 self._retreat(index2) 4344 4345 if not this: 4346 if self._match_text_seq("UNSIGNED"): 4347 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4348 if not unsigned_type_token: 4349 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4350 4351 type_token = unsigned_type_token or type_token 4352 4353 this = exp.DataType( 4354 this=exp.DataType.Type[type_token.value], 4355 expressions=expressions, 4356 nested=nested, 4357 values=values, 4358 prefix=prefix, 4359 ) 4360 elif expressions: 4361 this.set("expressions", expressions) 4362 4363 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4364 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4365 4366 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4367 converter = self.TYPE_CONVERTER.get(this.this) 4368 if converter: 4369 this = converter(t.cast(exp.DataType, this)) 4370 4371 return this 4372 4373 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4374 index = self._index 4375 this = ( 4376 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4377 or self._parse_id_var() 4378 ) 4379 self._match(TokenType.COLON) 4380 column_def = self._parse_column_def(this) 4381 4382 if type_required and ( 4383 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4384 ): 4385 self._retreat(index) 4386 return self._parse_types() 4387 4388 return column_def 4389 4390 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4391 if not self._match_text_seq("AT", "TIME", "ZONE"): 4392 return this 4393 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4394 4395 def _parse_column(self) -> t.Optional[exp.Expression]: 4396 this = self._parse_column_reference() 4397 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4398 4399 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4400 this = self._parse_field() 4401 if ( 4402 not this 4403 and self._match(TokenType.VALUES, advance=False) 4404 and self.VALUES_FOLLOWED_BY_PAREN 4405 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4406 ): 4407 this = self._parse_id_var() 4408 4409 if isinstance(this, exp.Identifier): 4410 # We bubble up comments from the Identifier to the Column 4411 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4412 4413 return this 4414 4415 def _parse_colon_as_json_extract( 4416 self, this: t.Optional[exp.Expression] 4417 ) -> t.Optional[exp.Expression]: 4418 casts = [] 4419 json_path = [] 4420 4421 while self._match(TokenType.COLON): 4422 start_index = self._index 4423 path = self._parse_column_ops(self._parse_field(any_token=True)) 4424 4425 # The cast :: operator has a lower precedence than the extraction operator :, so 4426 # we rearrange the AST appropriately to avoid casting the JSON path 4427 while isinstance(path, exp.Cast): 4428 casts.append(path.to) 4429 path = path.this 4430 4431 if casts: 4432 dcolon_offset = next( 4433 i 4434 for i, t in enumerate(self._tokens[start_index:]) 4435 if t.token_type == TokenType.DCOLON 4436 ) 4437 end_token = self._tokens[start_index + dcolon_offset - 1] 4438 else: 4439 end_token = self._prev 4440 4441 if path: 4442 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4443 4444 if json_path: 4445 this = self.expression( 4446 exp.JSONExtract, 4447 this=this, 4448 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4449 ) 4450 4451 while casts: 4452 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4453 4454 return this 4455 4456 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4457 this = self._parse_bracket(this) 4458 4459 while self._match_set(self.COLUMN_OPERATORS): 4460 op_token = self._prev.token_type 4461 op = self.COLUMN_OPERATORS.get(op_token) 4462 4463 if op_token == TokenType.DCOLON: 4464 field = self._parse_types() 4465 if not field: 4466 self.raise_error("Expected type") 4467 elif op and self._curr: 4468 field = self._parse_column_reference() 4469 else: 4470 field = self._parse_field(any_token=True, anonymous_func=True) 4471 4472 if isinstance(field, exp.Func) and this: 4473 # bigquery allows function calls like x.y.count(...) 4474 # SAFE.SUBSTR(...) 4475 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4476 this = exp.replace_tree( 4477 this, 4478 lambda n: ( 4479 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4480 if n.table 4481 else n.this 4482 ) 4483 if isinstance(n, exp.Column) 4484 else n, 4485 ) 4486 4487 if op: 4488 this = op(self, this, field) 4489 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4490 this = self.expression( 4491 exp.Column, 4492 this=field, 4493 table=this.this, 4494 db=this.args.get("table"), 4495 catalog=this.args.get("db"), 4496 ) 4497 else: 4498 this = self.expression(exp.Dot, this=this, expression=field) 4499 4500 this = self._parse_bracket(this) 4501 4502 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4503 4504 def _parse_primary(self) -> t.Optional[exp.Expression]: 4505 if self._match_set(self.PRIMARY_PARSERS): 4506 token_type = self._prev.token_type 4507 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4508 4509 if token_type == TokenType.STRING: 4510 expressions = [primary] 4511 while self._match(TokenType.STRING): 4512 expressions.append(exp.Literal.string(self._prev.text)) 4513 4514 if len(expressions) > 1: 4515 return self.expression(exp.Concat, expressions=expressions) 4516 4517 return primary 4518 4519 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4520 return exp.Literal.number(f"0.{self._prev.text}") 4521 4522 if self._match(TokenType.L_PAREN): 4523 comments = self._prev_comments 4524 query = self._parse_select() 4525 4526 if query: 4527 expressions = [query] 4528 else: 4529 expressions = self._parse_expressions() 4530 4531 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4532 4533 if not this and self._match(TokenType.R_PAREN, advance=False): 4534 this = self.expression(exp.Tuple) 4535 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4536 this = self._parse_subquery(this=this, parse_alias=False) 4537 elif isinstance(this, exp.Subquery): 4538 this = self._parse_subquery( 4539 this=self._parse_set_operations(this), parse_alias=False 4540 ) 4541 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4542 this = self.expression(exp.Tuple, expressions=expressions) 4543 else: 4544 this = self.expression(exp.Paren, this=this) 4545 4546 if this: 4547 this.add_comments(comments) 4548 4549 self._match_r_paren(expression=this) 4550 return this 4551 4552 return None 4553 4554 def _parse_field( 4555 self, 4556 any_token: bool = False, 4557 tokens: t.Optional[t.Collection[TokenType]] = None, 4558 anonymous_func: bool = False, 4559 ) -> t.Optional[exp.Expression]: 4560 if anonymous_func: 4561 field = ( 4562 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4563 or self._parse_primary() 4564 ) 4565 else: 4566 field = self._parse_primary() or self._parse_function( 4567 anonymous=anonymous_func, any_token=any_token 4568 ) 4569 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4570 4571 def _parse_function( 4572 self, 4573 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4574 anonymous: bool = False, 4575 optional_parens: bool = True, 4576 any_token: bool = False, 4577 ) -> t.Optional[exp.Expression]: 4578 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4579 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4580 fn_syntax = False 4581 if ( 4582 self._match(TokenType.L_BRACE, advance=False) 4583 and self._next 4584 and self._next.text.upper() == "FN" 4585 ): 4586 self._advance(2) 4587 fn_syntax = True 4588 4589 func = self._parse_function_call( 4590 functions=functions, 4591 anonymous=anonymous, 4592 optional_parens=optional_parens, 4593 any_token=any_token, 4594 ) 4595 4596 if fn_syntax: 4597 self._match(TokenType.R_BRACE) 4598 4599 return func 4600 4601 def _parse_function_call( 4602 self, 4603 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4604 anonymous: bool = False, 4605 optional_parens: bool = True, 4606 any_token: bool = False, 4607 ) -> t.Optional[exp.Expression]: 4608 if not self._curr: 4609 return None 4610 4611 comments = self._curr.comments 4612 token_type = self._curr.token_type 4613 this = self._curr.text 4614 upper = this.upper() 4615 4616 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4617 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4618 self._advance() 4619 return self._parse_window(parser(self)) 4620 4621 if not self._next or self._next.token_type != TokenType.L_PAREN: 4622 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4623 self._advance() 4624 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4625 4626 return None 4627 4628 if any_token: 4629 if token_type in self.RESERVED_TOKENS: 4630 return None 4631 elif token_type not in self.FUNC_TOKENS: 4632 return None 4633 4634 self._advance(2) 4635 4636 parser = self.FUNCTION_PARSERS.get(upper) 4637 if parser and not anonymous: 4638 this = parser(self) 4639 else: 4640 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4641 4642 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4643 this = self.expression(subquery_predicate, this=self._parse_select()) 4644 self._match_r_paren() 4645 return this 4646 4647 if functions is None: 4648 functions = self.FUNCTIONS 4649 4650 function = functions.get(upper) 4651 4652 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4653 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4654 4655 if alias: 4656 args = self._kv_to_prop_eq(args) 4657 4658 if function and not anonymous: 4659 if "dialect" in function.__code__.co_varnames: 4660 func = function(args, dialect=self.dialect) 4661 else: 4662 func = function(args) 4663 4664 func = self.validate_expression(func, args) 4665 if not self.dialect.NORMALIZE_FUNCTIONS: 4666 func.meta["name"] = this 4667 4668 this = func 4669 else: 4670 if token_type == TokenType.IDENTIFIER: 4671 this = exp.Identifier(this=this, quoted=True) 4672 this = self.expression(exp.Anonymous, this=this, expressions=args) 4673 4674 if isinstance(this, exp.Expression): 4675 this.add_comments(comments) 4676 4677 self._match_r_paren(this) 4678 return self._parse_window(this) 4679 4680 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4681 transformed = [] 4682 4683 for e in expressions: 4684 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4685 if isinstance(e, exp.Alias): 4686 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4687 4688 if not isinstance(e, exp.PropertyEQ): 4689 e = self.expression( 4690 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4691 ) 4692 4693 if isinstance(e.this, exp.Column): 4694 e.this.replace(e.this.this) 4695 4696 transformed.append(e) 4697 4698 return transformed 4699 4700 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4701 return self._parse_column_def(self._parse_id_var()) 4702 4703 def _parse_user_defined_function( 4704 self, kind: t.Optional[TokenType] = None 4705 ) -> t.Optional[exp.Expression]: 4706 this = self._parse_id_var() 4707 4708 while self._match(TokenType.DOT): 4709 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4710 4711 if not self._match(TokenType.L_PAREN): 4712 return this 4713 4714 expressions = self._parse_csv(self._parse_function_parameter) 4715 self._match_r_paren() 4716 return self.expression( 4717 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4718 ) 4719 4720 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4721 literal = self._parse_primary() 4722 if literal: 4723 return self.expression(exp.Introducer, this=token.text, expression=literal) 4724 4725 return self.expression(exp.Identifier, this=token.text) 4726 4727 def _parse_session_parameter(self) -> exp.SessionParameter: 4728 kind = None 4729 this = self._parse_id_var() or self._parse_primary() 4730 4731 if this and self._match(TokenType.DOT): 4732 kind = this.name 4733 this = self._parse_var() or self._parse_primary() 4734 4735 return self.expression(exp.SessionParameter, this=this, kind=kind) 4736 4737 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4738 return self._parse_id_var() 4739 4740 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4741 index = self._index 4742 4743 if self._match(TokenType.L_PAREN): 4744 expressions = t.cast( 4745 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4746 ) 4747 4748 if not self._match(TokenType.R_PAREN): 4749 self._retreat(index) 4750 else: 4751 expressions = [self._parse_lambda_arg()] 4752 4753 if self._match_set(self.LAMBDAS): 4754 return self.LAMBDAS[self._prev.token_type](self, expressions) 4755 4756 self._retreat(index) 4757 4758 this: t.Optional[exp.Expression] 4759 4760 if self._match(TokenType.DISTINCT): 4761 this = self.expression( 4762 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4763 ) 4764 else: 4765 this = self._parse_select_or_expression(alias=alias) 4766 4767 return self._parse_limit( 4768 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4769 ) 4770 4771 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4772 index = self._index 4773 if not self._match(TokenType.L_PAREN): 4774 return this 4775 4776 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4777 # expr can be of both types 4778 if self._match_set(self.SELECT_START_TOKENS): 4779 self._retreat(index) 4780 return this 4781 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4782 self._match_r_paren() 4783 return self.expression(exp.Schema, this=this, expressions=args) 4784 4785 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4786 return self._parse_column_def(self._parse_field(any_token=True)) 4787 4788 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4789 # column defs are not really columns, they're identifiers 4790 if isinstance(this, exp.Column): 4791 this = this.this 4792 4793 kind = self._parse_types(schema=True) 4794 4795 if self._match_text_seq("FOR", "ORDINALITY"): 4796 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4797 4798 constraints: t.List[exp.Expression] = [] 4799 4800 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4801 ("ALIAS", "MATERIALIZED") 4802 ): 4803 persisted = self._prev.text.upper() == "MATERIALIZED" 4804 constraints.append( 4805 self.expression( 4806 exp.ComputedColumnConstraint, 4807 this=self._parse_conjunction(), 4808 persisted=persisted or self._match_text_seq("PERSISTED"), 4809 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4810 ) 4811 ) 4812 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4813 self._match(TokenType.ALIAS) 4814 constraints.append( 4815 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4816 ) 4817 4818 while True: 4819 constraint = self._parse_column_constraint() 4820 if not constraint: 4821 break 4822 constraints.append(constraint) 4823 4824 if not kind and not constraints: 4825 return this 4826 4827 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4828 4829 def _parse_auto_increment( 4830 self, 4831 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4832 start = None 4833 increment = None 4834 4835 if self._match(TokenType.L_PAREN, advance=False): 4836 args = self._parse_wrapped_csv(self._parse_bitwise) 4837 start = seq_get(args, 0) 4838 increment = seq_get(args, 1) 4839 elif self._match_text_seq("START"): 4840 start = self._parse_bitwise() 4841 self._match_text_seq("INCREMENT") 4842 increment = self._parse_bitwise() 4843 4844 if start and increment: 4845 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4846 4847 return exp.AutoIncrementColumnConstraint() 4848 4849 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4850 if not self._match_text_seq("REFRESH"): 4851 self._retreat(self._index - 1) 4852 return None 4853 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4854 4855 def _parse_compress(self) -> exp.CompressColumnConstraint: 4856 if self._match(TokenType.L_PAREN, advance=False): 4857 return self.expression( 4858 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4859 ) 4860 4861 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4862 4863 def _parse_generated_as_identity( 4864 self, 4865 ) -> ( 4866 exp.GeneratedAsIdentityColumnConstraint 4867 | exp.ComputedColumnConstraint 4868 | exp.GeneratedAsRowColumnConstraint 4869 ): 4870 if self._match_text_seq("BY", "DEFAULT"): 4871 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4872 this = self.expression( 4873 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4874 ) 4875 else: 4876 self._match_text_seq("ALWAYS") 4877 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4878 4879 self._match(TokenType.ALIAS) 4880 4881 if self._match_text_seq("ROW"): 4882 start = self._match_text_seq("START") 4883 if not start: 4884 self._match(TokenType.END) 4885 hidden = self._match_text_seq("HIDDEN") 4886 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4887 4888 identity = self._match_text_seq("IDENTITY") 4889 4890 if self._match(TokenType.L_PAREN): 4891 if self._match(TokenType.START_WITH): 4892 this.set("start", self._parse_bitwise()) 4893 if self._match_text_seq("INCREMENT", "BY"): 4894 this.set("increment", self._parse_bitwise()) 4895 if self._match_text_seq("MINVALUE"): 4896 this.set("minvalue", self._parse_bitwise()) 4897 if self._match_text_seq("MAXVALUE"): 4898 this.set("maxvalue", self._parse_bitwise()) 4899 4900 if self._match_text_seq("CYCLE"): 4901 this.set("cycle", True) 4902 elif self._match_text_seq("NO", "CYCLE"): 4903 this.set("cycle", False) 4904 4905 if not identity: 4906 this.set("expression", self._parse_range()) 4907 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4908 args = self._parse_csv(self._parse_bitwise) 4909 this.set("start", seq_get(args, 0)) 4910 this.set("increment", seq_get(args, 1)) 4911 4912 self._match_r_paren() 4913 4914 return this 4915 4916 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4917 self._match_text_seq("LENGTH") 4918 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4919 4920 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4921 if self._match_text_seq("NULL"): 4922 return self.expression(exp.NotNullColumnConstraint) 4923 if self._match_text_seq("CASESPECIFIC"): 4924 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4925 if self._match_text_seq("FOR", "REPLICATION"): 4926 return self.expression(exp.NotForReplicationColumnConstraint) 4927 return None 4928 4929 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4930 if self._match(TokenType.CONSTRAINT): 4931 this = self._parse_id_var() 4932 else: 4933 this = None 4934 4935 if self._match_texts(self.CONSTRAINT_PARSERS): 4936 return self.expression( 4937 exp.ColumnConstraint, 4938 this=this, 4939 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4940 ) 4941 4942 return this 4943 4944 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4945 if not self._match(TokenType.CONSTRAINT): 4946 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4947 4948 return self.expression( 4949 exp.Constraint, 4950 this=self._parse_id_var(), 4951 expressions=self._parse_unnamed_constraints(), 4952 ) 4953 4954 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4955 constraints = [] 4956 while True: 4957 constraint = self._parse_unnamed_constraint() or self._parse_function() 4958 if not constraint: 4959 break 4960 constraints.append(constraint) 4961 4962 return constraints 4963 4964 def _parse_unnamed_constraint( 4965 self, constraints: t.Optional[t.Collection[str]] = None 4966 ) -> t.Optional[exp.Expression]: 4967 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4968 constraints or self.CONSTRAINT_PARSERS 4969 ): 4970 return None 4971 4972 constraint = self._prev.text.upper() 4973 if constraint not in self.CONSTRAINT_PARSERS: 4974 self.raise_error(f"No parser found for schema constraint {constraint}.") 4975 4976 return self.CONSTRAINT_PARSERS[constraint](self) 4977 4978 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4979 self._match_text_seq("KEY") 4980 return self.expression( 4981 exp.UniqueColumnConstraint, 4982 this=self._parse_schema(self._parse_id_var(any_token=False)), 4983 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4984 on_conflict=self._parse_on_conflict(), 4985 ) 4986 4987 def _parse_key_constraint_options(self) -> t.List[str]: 4988 options = [] 4989 while True: 4990 if not self._curr: 4991 break 4992 4993 if self._match(TokenType.ON): 4994 action = None 4995 on = self._advance_any() and self._prev.text 4996 4997 if self._match_text_seq("NO", "ACTION"): 4998 action = "NO ACTION" 4999 elif self._match_text_seq("CASCADE"): 5000 action = "CASCADE" 5001 elif self._match_text_seq("RESTRICT"): 5002 action = "RESTRICT" 5003 elif self._match_pair(TokenType.SET, TokenType.NULL): 5004 action = "SET NULL" 5005 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5006 action = "SET DEFAULT" 5007 else: 5008 self.raise_error("Invalid key constraint") 5009 5010 options.append(f"ON {on} {action}") 5011 elif self._match_text_seq("NOT", "ENFORCED"): 5012 options.append("NOT ENFORCED") 5013 elif self._match_text_seq("DEFERRABLE"): 5014 options.append("DEFERRABLE") 5015 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5016 options.append("INITIALLY DEFERRED") 5017 elif self._match_text_seq("NORELY"): 5018 options.append("NORELY") 5019 elif self._match_text_seq("MATCH", "FULL"): 5020 options.append("MATCH FULL") 5021 else: 5022 break 5023 5024 return options 5025 5026 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5027 if match and not self._match(TokenType.REFERENCES): 5028 return None 5029 5030 expressions = None 5031 this = self._parse_table(schema=True) 5032 options = self._parse_key_constraint_options() 5033 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5034 5035 def _parse_foreign_key(self) -> exp.ForeignKey: 5036 expressions = self._parse_wrapped_id_vars() 5037 reference = self._parse_references() 5038 options = {} 5039 5040 while self._match(TokenType.ON): 5041 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5042 self.raise_error("Expected DELETE or UPDATE") 5043 5044 kind = self._prev.text.lower() 5045 5046 if self._match_text_seq("NO", "ACTION"): 5047 action = "NO ACTION" 5048 elif self._match(TokenType.SET): 5049 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5050 action = "SET " + self._prev.text.upper() 5051 else: 5052 self._advance() 5053 action = self._prev.text.upper() 5054 5055 options[kind] = action 5056 5057 return self.expression( 5058 exp.ForeignKey, 5059 expressions=expressions, 5060 reference=reference, 5061 **options, # type: ignore 5062 ) 5063 5064 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5065 return self._parse_field() 5066 5067 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5068 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5069 self._retreat(self._index - 1) 5070 return None 5071 5072 id_vars = self._parse_wrapped_id_vars() 5073 return self.expression( 5074 exp.PeriodForSystemTimeConstraint, 5075 this=seq_get(id_vars, 0), 5076 expression=seq_get(id_vars, 1), 5077 ) 5078 5079 def _parse_primary_key( 5080 self, wrapped_optional: bool = False, in_props: bool = False 5081 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5082 desc = ( 5083 self._match_set((TokenType.ASC, TokenType.DESC)) 5084 and self._prev.token_type == TokenType.DESC 5085 ) 5086 5087 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5088 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5089 5090 expressions = self._parse_wrapped_csv( 5091 self._parse_primary_key_part, optional=wrapped_optional 5092 ) 5093 options = self._parse_key_constraint_options() 5094 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5095 5096 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5097 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5098 5099 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5100 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5101 return this 5102 5103 bracket_kind = self._prev.token_type 5104 expressions = self._parse_csv( 5105 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5106 ) 5107 5108 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5109 self.raise_error("Expected ]") 5110 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5111 self.raise_error("Expected }") 5112 5113 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5114 if bracket_kind == TokenType.L_BRACE: 5115 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5116 elif not this or this.name.upper() == "ARRAY": 5117 this = self.expression(exp.Array, expressions=expressions) 5118 else: 5119 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5120 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5121 5122 self._add_comments(this) 5123 return self._parse_bracket(this) 5124 5125 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5126 if self._match(TokenType.COLON): 5127 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5128 return this 5129 5130 def _parse_case(self) -> t.Optional[exp.Expression]: 5131 ifs = [] 5132 default = None 5133 5134 comments = self._prev_comments 5135 expression = self._parse_conjunction() 5136 5137 while self._match(TokenType.WHEN): 5138 this = self._parse_conjunction() 5139 self._match(TokenType.THEN) 5140 then = self._parse_conjunction() 5141 ifs.append(self.expression(exp.If, this=this, true=then)) 5142 5143 if self._match(TokenType.ELSE): 5144 default = self._parse_conjunction() 5145 5146 if not self._match(TokenType.END): 5147 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5148 default = exp.column("interval") 5149 else: 5150 self.raise_error("Expected END after CASE", self._prev) 5151 5152 return self.expression( 5153 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5154 ) 5155 5156 def _parse_if(self) -> t.Optional[exp.Expression]: 5157 if self._match(TokenType.L_PAREN): 5158 args = self._parse_csv(self._parse_conjunction) 5159 this = self.validate_expression(exp.If.from_arg_list(args), args) 5160 self._match_r_paren() 5161 else: 5162 index = self._index - 1 5163 5164 if self.NO_PAREN_IF_COMMANDS and index == 0: 5165 return self._parse_as_command(self._prev) 5166 5167 condition = self._parse_conjunction() 5168 5169 if not condition: 5170 self._retreat(index) 5171 return None 5172 5173 self._match(TokenType.THEN) 5174 true = self._parse_conjunction() 5175 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5176 self._match(TokenType.END) 5177 this = self.expression(exp.If, this=condition, true=true, false=false) 5178 5179 return this 5180 5181 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5182 if not self._match_text_seq("VALUE", "FOR"): 5183 self._retreat(self._index - 1) 5184 return None 5185 5186 return self.expression( 5187 exp.NextValueFor, 5188 this=self._parse_column(), 5189 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5190 ) 5191 5192 def _parse_extract(self) -> exp.Extract: 5193 this = self._parse_function() or self._parse_var() or self._parse_type() 5194 5195 if self._match(TokenType.FROM): 5196 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5197 5198 if not self._match(TokenType.COMMA): 5199 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5200 5201 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5202 5203 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5204 this = self._parse_conjunction() 5205 5206 if not self._match(TokenType.ALIAS): 5207 if self._match(TokenType.COMMA): 5208 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5209 5210 self.raise_error("Expected AS after CAST") 5211 5212 fmt = None 5213 to = self._parse_types() 5214 5215 if self._match(TokenType.FORMAT): 5216 fmt_string = self._parse_string() 5217 fmt = self._parse_at_time_zone(fmt_string) 5218 5219 if not to: 5220 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5221 if to.this in exp.DataType.TEMPORAL_TYPES: 5222 this = self.expression( 5223 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5224 this=this, 5225 format=exp.Literal.string( 5226 format_time( 5227 fmt_string.this if fmt_string else "", 5228 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5229 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5230 ) 5231 ), 5232 ) 5233 5234 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5235 this.set("zone", fmt.args["zone"]) 5236 return this 5237 elif not to: 5238 self.raise_error("Expected TYPE after CAST") 5239 elif isinstance(to, exp.Identifier): 5240 to = exp.DataType.build(to.name, udt=True) 5241 elif to.this == exp.DataType.Type.CHAR: 5242 if self._match(TokenType.CHARACTER_SET): 5243 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5244 5245 return self.expression( 5246 exp.Cast if strict else exp.TryCast, 5247 this=this, 5248 to=to, 5249 format=fmt, 5250 safe=safe, 5251 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5252 ) 5253 5254 def _parse_string_agg(self) -> exp.Expression: 5255 if self._match(TokenType.DISTINCT): 5256 args: t.List[t.Optional[exp.Expression]] = [ 5257 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5258 ] 5259 if self._match(TokenType.COMMA): 5260 args.extend(self._parse_csv(self._parse_conjunction)) 5261 else: 5262 args = self._parse_csv(self._parse_conjunction) # type: ignore 5263 5264 index = self._index 5265 if not self._match(TokenType.R_PAREN) and args: 5266 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5267 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5268 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5269 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5270 5271 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5272 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5273 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5274 if not self._match_text_seq("WITHIN", "GROUP"): 5275 self._retreat(index) 5276 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5277 5278 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5279 order = self._parse_order(this=seq_get(args, 0)) 5280 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5281 5282 def _parse_convert( 5283 self, strict: bool, safe: t.Optional[bool] = None 5284 ) -> t.Optional[exp.Expression]: 5285 this = self._parse_bitwise() 5286 5287 if self._match(TokenType.USING): 5288 to: t.Optional[exp.Expression] = self.expression( 5289 exp.CharacterSet, this=self._parse_var() 5290 ) 5291 elif self._match(TokenType.COMMA): 5292 to = self._parse_types() 5293 else: 5294 to = None 5295 5296 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5297 5298 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5299 """ 5300 There are generally two variants of the DECODE function: 5301 5302 - DECODE(bin, charset) 5303 - DECODE(expression, search, result [, search, result] ... [, default]) 5304 5305 The second variant will always be parsed into a CASE expression. Note that NULL 5306 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5307 instead of relying on pattern matching. 5308 """ 5309 args = self._parse_csv(self._parse_conjunction) 5310 5311 if len(args) < 3: 5312 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5313 5314 expression, *expressions = args 5315 if not expression: 5316 return None 5317 5318 ifs = [] 5319 for search, result in zip(expressions[::2], expressions[1::2]): 5320 if not search or not result: 5321 return None 5322 5323 if isinstance(search, exp.Literal): 5324 ifs.append( 5325 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5326 ) 5327 elif isinstance(search, exp.Null): 5328 ifs.append( 5329 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5330 ) 5331 else: 5332 cond = exp.or_( 5333 exp.EQ(this=expression.copy(), expression=search), 5334 exp.and_( 5335 exp.Is(this=expression.copy(), expression=exp.Null()), 5336 exp.Is(this=search.copy(), expression=exp.Null()), 5337 copy=False, 5338 ), 5339 copy=False, 5340 ) 5341 ifs.append(exp.If(this=cond, true=result)) 5342 5343 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5344 5345 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5346 self._match_text_seq("KEY") 5347 key = self._parse_column() 5348 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5349 self._match_text_seq("VALUE") 5350 value = self._parse_bitwise() 5351 5352 if not key and not value: 5353 return None 5354 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5355 5356 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5357 if not this or not self._match_text_seq("FORMAT", "JSON"): 5358 return this 5359 5360 return self.expression(exp.FormatJson, this=this) 5361 5362 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5363 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5364 for value in values: 5365 if self._match_text_seq(value, "ON", on): 5366 return f"{value} ON {on}" 5367 5368 return None 5369 5370 @t.overload 5371 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5372 5373 @t.overload 5374 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5375 5376 def _parse_json_object(self, agg=False): 5377 star = self._parse_star() 5378 expressions = ( 5379 [star] 5380 if star 5381 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5382 ) 5383 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5384 5385 unique_keys = None 5386 if self._match_text_seq("WITH", "UNIQUE"): 5387 unique_keys = True 5388 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5389 unique_keys = False 5390 5391 self._match_text_seq("KEYS") 5392 5393 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5394 self._parse_type() 5395 ) 5396 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5397 5398 return self.expression( 5399 exp.JSONObjectAgg if agg else exp.JSONObject, 5400 expressions=expressions, 5401 null_handling=null_handling, 5402 unique_keys=unique_keys, 5403 return_type=return_type, 5404 encoding=encoding, 5405 ) 5406 5407 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5408 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5409 if not self._match_text_seq("NESTED"): 5410 this = self._parse_id_var() 5411 kind = self._parse_types(allow_identifiers=False) 5412 nested = None 5413 else: 5414 this = None 5415 kind = None 5416 nested = True 5417 5418 path = self._match_text_seq("PATH") and self._parse_string() 5419 nested_schema = nested and self._parse_json_schema() 5420 5421 return self.expression( 5422 exp.JSONColumnDef, 5423 this=this, 5424 kind=kind, 5425 path=path, 5426 nested_schema=nested_schema, 5427 ) 5428 5429 def _parse_json_schema(self) -> exp.JSONSchema: 5430 self._match_text_seq("COLUMNS") 5431 return self.expression( 5432 exp.JSONSchema, 5433 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5434 ) 5435 5436 def _parse_json_table(self) -> exp.JSONTable: 5437 this = self._parse_format_json(self._parse_bitwise()) 5438 path = self._match(TokenType.COMMA) and self._parse_string() 5439 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5440 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5441 schema = self._parse_json_schema() 5442 5443 return exp.JSONTable( 5444 this=this, 5445 schema=schema, 5446 path=path, 5447 error_handling=error_handling, 5448 empty_handling=empty_handling, 5449 ) 5450 5451 def _parse_match_against(self) -> exp.MatchAgainst: 5452 expressions = self._parse_csv(self._parse_column) 5453 5454 self._match_text_seq(")", "AGAINST", "(") 5455 5456 this = self._parse_string() 5457 5458 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5459 modifier = "IN NATURAL LANGUAGE MODE" 5460 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5461 modifier = f"{modifier} WITH QUERY EXPANSION" 5462 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5463 modifier = "IN BOOLEAN MODE" 5464 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5465 modifier = "WITH QUERY EXPANSION" 5466 else: 5467 modifier = None 5468 5469 return self.expression( 5470 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5471 ) 5472 5473 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5474 def _parse_open_json(self) -> exp.OpenJSON: 5475 this = self._parse_bitwise() 5476 path = self._match(TokenType.COMMA) and self._parse_string() 5477 5478 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5479 this = self._parse_field(any_token=True) 5480 kind = self._parse_types() 5481 path = self._parse_string() 5482 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5483 5484 return self.expression( 5485 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5486 ) 5487 5488 expressions = None 5489 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5490 self._match_l_paren() 5491 expressions = self._parse_csv(_parse_open_json_column_def) 5492 5493 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5494 5495 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5496 args = self._parse_csv(self._parse_bitwise) 5497 5498 if self._match(TokenType.IN): 5499 return self.expression( 5500 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5501 ) 5502 5503 if haystack_first: 5504 haystack = seq_get(args, 0) 5505 needle = seq_get(args, 1) 5506 else: 5507 needle = seq_get(args, 0) 5508 haystack = seq_get(args, 1) 5509 5510 return self.expression( 5511 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5512 ) 5513 5514 def _parse_predict(self) -> exp.Predict: 5515 self._match_text_seq("MODEL") 5516 this = self._parse_table() 5517 5518 self._match(TokenType.COMMA) 5519 self._match_text_seq("TABLE") 5520 5521 return self.expression( 5522 exp.Predict, 5523 this=this, 5524 expression=self._parse_table(), 5525 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5526 ) 5527 5528 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5529 args = self._parse_csv(self._parse_table) 5530 return exp.JoinHint(this=func_name.upper(), expressions=args) 5531 5532 def _parse_substring(self) -> exp.Substring: 5533 # Postgres supports the form: substring(string [from int] [for int]) 5534 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5535 5536 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5537 5538 if self._match(TokenType.FROM): 5539 args.append(self._parse_bitwise()) 5540 if self._match(TokenType.FOR): 5541 if len(args) == 1: 5542 args.append(exp.Literal.number(1)) 5543 args.append(self._parse_bitwise()) 5544 5545 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5546 5547 def _parse_trim(self) -> exp.Trim: 5548 # https://www.w3resource.com/sql/character-functions/trim.php 5549 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5550 5551 position = None 5552 collation = None 5553 expression = None 5554 5555 if self._match_texts(self.TRIM_TYPES): 5556 position = self._prev.text.upper() 5557 5558 this = self._parse_bitwise() 5559 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5560 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5561 expression = self._parse_bitwise() 5562 5563 if invert_order: 5564 this, expression = expression, this 5565 5566 if self._match(TokenType.COLLATE): 5567 collation = self._parse_bitwise() 5568 5569 return self.expression( 5570 exp.Trim, this=this, position=position, expression=expression, collation=collation 5571 ) 5572 5573 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5574 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5575 5576 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5577 return self._parse_window(self._parse_id_var(), alias=True) 5578 5579 def _parse_respect_or_ignore_nulls( 5580 self, this: t.Optional[exp.Expression] 5581 ) -> t.Optional[exp.Expression]: 5582 if self._match_text_seq("IGNORE", "NULLS"): 5583 return self.expression(exp.IgnoreNulls, this=this) 5584 if self._match_text_seq("RESPECT", "NULLS"): 5585 return self.expression(exp.RespectNulls, this=this) 5586 return this 5587 5588 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5589 if self._match(TokenType.HAVING): 5590 self._match_texts(("MAX", "MIN")) 5591 max = self._prev.text.upper() != "MIN" 5592 return self.expression( 5593 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5594 ) 5595 5596 return this 5597 5598 def _parse_window( 5599 self, this: t.Optional[exp.Expression], alias: bool = False 5600 ) -> t.Optional[exp.Expression]: 5601 func = this 5602 comments = func.comments if isinstance(func, exp.Expression) else None 5603 5604 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5605 self._match(TokenType.WHERE) 5606 this = self.expression( 5607 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5608 ) 5609 self._match_r_paren() 5610 5611 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5612 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5613 if self._match_text_seq("WITHIN", "GROUP"): 5614 order = self._parse_wrapped(self._parse_order) 5615 this = self.expression(exp.WithinGroup, this=this, expression=order) 5616 5617 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5618 # Some dialects choose to implement and some do not. 5619 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5620 5621 # There is some code above in _parse_lambda that handles 5622 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5623 5624 # The below changes handle 5625 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5626 5627 # Oracle allows both formats 5628 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5629 # and Snowflake chose to do the same for familiarity 5630 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5631 if isinstance(this, exp.AggFunc): 5632 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5633 5634 if ignore_respect and ignore_respect is not this: 5635 ignore_respect.replace(ignore_respect.this) 5636 this = self.expression(ignore_respect.__class__, this=this) 5637 5638 this = self._parse_respect_or_ignore_nulls(this) 5639 5640 # bigquery select from window x AS (partition by ...) 5641 if alias: 5642 over = None 5643 self._match(TokenType.ALIAS) 5644 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5645 return this 5646 else: 5647 over = self._prev.text.upper() 5648 5649 if comments and isinstance(func, exp.Expression): 5650 func.pop_comments() 5651 5652 if not self._match(TokenType.L_PAREN): 5653 return self.expression( 5654 exp.Window, 5655 comments=comments, 5656 this=this, 5657 alias=self._parse_id_var(False), 5658 over=over, 5659 ) 5660 5661 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5662 5663 first = self._match(TokenType.FIRST) 5664 if self._match_text_seq("LAST"): 5665 first = False 5666 5667 partition, order = self._parse_partition_and_order() 5668 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5669 5670 if kind: 5671 self._match(TokenType.BETWEEN) 5672 start = self._parse_window_spec() 5673 self._match(TokenType.AND) 5674 end = self._parse_window_spec() 5675 5676 spec = self.expression( 5677 exp.WindowSpec, 5678 kind=kind, 5679 start=start["value"], 5680 start_side=start["side"], 5681 end=end["value"], 5682 end_side=end["side"], 5683 ) 5684 else: 5685 spec = None 5686 5687 self._match_r_paren() 5688 5689 window = self.expression( 5690 exp.Window, 5691 comments=comments, 5692 this=this, 5693 partition_by=partition, 5694 order=order, 5695 spec=spec, 5696 alias=window_alias, 5697 over=over, 5698 first=first, 5699 ) 5700 5701 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5702 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5703 return self._parse_window(window, alias=alias) 5704 5705 return window 5706 5707 def _parse_partition_and_order( 5708 self, 5709 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5710 return self._parse_partition_by(), self._parse_order() 5711 5712 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5713 self._match(TokenType.BETWEEN) 5714 5715 return { 5716 "value": ( 5717 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5718 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5719 or self._parse_bitwise() 5720 ), 5721 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5722 } 5723 5724 def _parse_alias( 5725 self, this: t.Optional[exp.Expression], explicit: bool = False 5726 ) -> t.Optional[exp.Expression]: 5727 any_token = self._match(TokenType.ALIAS) 5728 comments = self._prev_comments or [] 5729 5730 if explicit and not any_token: 5731 return this 5732 5733 if self._match(TokenType.L_PAREN): 5734 aliases = self.expression( 5735 exp.Aliases, 5736 comments=comments, 5737 this=this, 5738 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5739 ) 5740 self._match_r_paren(aliases) 5741 return aliases 5742 5743 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5744 self.STRING_ALIASES and self._parse_string_as_identifier() 5745 ) 5746 5747 if alias: 5748 comments.extend(alias.pop_comments()) 5749 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5750 column = this.this 5751 5752 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5753 if not this.comments and column and column.comments: 5754 this.comments = column.pop_comments() 5755 5756 return this 5757 5758 def _parse_id_var( 5759 self, 5760 any_token: bool = True, 5761 tokens: t.Optional[t.Collection[TokenType]] = None, 5762 ) -> t.Optional[exp.Expression]: 5763 expression = self._parse_identifier() 5764 if not expression and ( 5765 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5766 ): 5767 quoted = self._prev.token_type == TokenType.STRING 5768 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5769 5770 return expression 5771 5772 def _parse_string(self) -> t.Optional[exp.Expression]: 5773 if self._match_set(self.STRING_PARSERS): 5774 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5775 return self._parse_placeholder() 5776 5777 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5778 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5779 5780 def _parse_number(self) -> t.Optional[exp.Expression]: 5781 if self._match_set(self.NUMERIC_PARSERS): 5782 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5783 return self._parse_placeholder() 5784 5785 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5786 if self._match(TokenType.IDENTIFIER): 5787 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5788 return self._parse_placeholder() 5789 5790 def _parse_var( 5791 self, 5792 any_token: bool = False, 5793 tokens: t.Optional[t.Collection[TokenType]] = None, 5794 upper: bool = False, 5795 ) -> t.Optional[exp.Expression]: 5796 if ( 5797 (any_token and self._advance_any()) 5798 or self._match(TokenType.VAR) 5799 or (self._match_set(tokens) if tokens else False) 5800 ): 5801 return self.expression( 5802 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5803 ) 5804 return self._parse_placeholder() 5805 5806 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5807 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5808 self._advance() 5809 return self._prev 5810 return None 5811 5812 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5813 return self._parse_var() or self._parse_string() 5814 5815 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5816 return self._parse_primary() or self._parse_var(any_token=True) 5817 5818 def _parse_null(self) -> t.Optional[exp.Expression]: 5819 if self._match_set(self.NULL_TOKENS): 5820 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5821 return self._parse_placeholder() 5822 5823 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5824 if self._match(TokenType.TRUE): 5825 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5826 if self._match(TokenType.FALSE): 5827 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5828 return self._parse_placeholder() 5829 5830 def _parse_star(self) -> t.Optional[exp.Expression]: 5831 if self._match(TokenType.STAR): 5832 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5833 return self._parse_placeholder() 5834 5835 def _parse_parameter(self) -> exp.Parameter: 5836 this = self._parse_identifier() or self._parse_primary_or_var() 5837 return self.expression(exp.Parameter, this=this) 5838 5839 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5840 if self._match_set(self.PLACEHOLDER_PARSERS): 5841 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5842 if placeholder: 5843 return placeholder 5844 self._advance(-1) 5845 return None 5846 5847 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5848 if not self._match_texts(keywords): 5849 return None 5850 if self._match(TokenType.L_PAREN, advance=False): 5851 return self._parse_wrapped_csv(self._parse_expression) 5852 5853 expression = self._parse_expression() 5854 return [expression] if expression else None 5855 5856 def _parse_csv( 5857 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5858 ) -> t.List[exp.Expression]: 5859 parse_result = parse_method() 5860 items = [parse_result] if parse_result is not None else [] 5861 5862 while self._match(sep): 5863 self._add_comments(parse_result) 5864 parse_result = parse_method() 5865 if parse_result is not None: 5866 items.append(parse_result) 5867 5868 return items 5869 5870 def _parse_tokens( 5871 self, parse_method: t.Callable, expressions: t.Dict 5872 ) -> t.Optional[exp.Expression]: 5873 this = parse_method() 5874 5875 while self._match_set(expressions): 5876 this = self.expression( 5877 expressions[self._prev.token_type], 5878 this=this, 5879 comments=self._prev_comments, 5880 expression=parse_method(), 5881 ) 5882 5883 return this 5884 5885 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5886 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5887 5888 def _parse_wrapped_csv( 5889 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5890 ) -> t.List[exp.Expression]: 5891 return self._parse_wrapped( 5892 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5893 ) 5894 5895 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5896 wrapped = self._match(TokenType.L_PAREN) 5897 if not wrapped and not optional: 5898 self.raise_error("Expecting (") 5899 parse_result = parse_method() 5900 if wrapped: 5901 self._match_r_paren() 5902 return parse_result 5903 5904 def _parse_expressions(self) -> t.List[exp.Expression]: 5905 return self._parse_csv(self._parse_expression) 5906 5907 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5908 return self._parse_select() or self._parse_set_operations( 5909 self._parse_expression() if alias else self._parse_conjunction() 5910 ) 5911 5912 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5913 return self._parse_query_modifiers( 5914 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5915 ) 5916 5917 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5918 this = None 5919 if self._match_texts(self.TRANSACTION_KIND): 5920 this = self._prev.text 5921 5922 self._match_texts(("TRANSACTION", "WORK")) 5923 5924 modes = [] 5925 while True: 5926 mode = [] 5927 while self._match(TokenType.VAR): 5928 mode.append(self._prev.text) 5929 5930 if mode: 5931 modes.append(" ".join(mode)) 5932 if not self._match(TokenType.COMMA): 5933 break 5934 5935 return self.expression(exp.Transaction, this=this, modes=modes) 5936 5937 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5938 chain = None 5939 savepoint = None 5940 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5941 5942 self._match_texts(("TRANSACTION", "WORK")) 5943 5944 if self._match_text_seq("TO"): 5945 self._match_text_seq("SAVEPOINT") 5946 savepoint = self._parse_id_var() 5947 5948 if self._match(TokenType.AND): 5949 chain = not self._match_text_seq("NO") 5950 self._match_text_seq("CHAIN") 5951 5952 if is_rollback: 5953 return self.expression(exp.Rollback, savepoint=savepoint) 5954 5955 return self.expression(exp.Commit, chain=chain) 5956 5957 def _parse_refresh(self) -> exp.Refresh: 5958 self._match(TokenType.TABLE) 5959 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5960 5961 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5962 if not self._match_text_seq("ADD"): 5963 return None 5964 5965 self._match(TokenType.COLUMN) 5966 exists_column = self._parse_exists(not_=True) 5967 expression = self._parse_field_def() 5968 5969 if expression: 5970 expression.set("exists", exists_column) 5971 5972 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5973 if self._match_texts(("FIRST", "AFTER")): 5974 position = self._prev.text 5975 column_position = self.expression( 5976 exp.ColumnPosition, this=self._parse_column(), position=position 5977 ) 5978 expression.set("position", column_position) 5979 5980 return expression 5981 5982 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5983 drop = self._match(TokenType.DROP) and self._parse_drop() 5984 if drop and not isinstance(drop, exp.Command): 5985 drop.set("kind", drop.args.get("kind", "COLUMN")) 5986 return drop 5987 5988 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5989 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5990 return self.expression( 5991 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5992 ) 5993 5994 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5995 index = self._index - 1 5996 5997 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5998 return self._parse_csv( 5999 lambda: self.expression( 6000 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6001 ) 6002 ) 6003 6004 self._retreat(index) 6005 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6006 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6007 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6008 6009 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6010 if self._match_texts(self.ALTER_ALTER_PARSERS): 6011 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6012 6013 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6014 # keyword after ALTER we default to parsing this statement 6015 self._match(TokenType.COLUMN) 6016 column = self._parse_field(any_token=True) 6017 6018 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6019 return self.expression(exp.AlterColumn, this=column, drop=True) 6020 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6021 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6022 if self._match(TokenType.COMMENT): 6023 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6024 6025 self._match_text_seq("SET", "DATA") 6026 self._match_text_seq("TYPE") 6027 return self.expression( 6028 exp.AlterColumn, 6029 this=column, 6030 dtype=self._parse_types(), 6031 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6032 using=self._match(TokenType.USING) and self._parse_conjunction(), 6033 ) 6034 6035 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6036 if self._match_texts(("ALL", "EVEN", "AUTO")): 6037 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6038 6039 self._match_text_seq("KEY", "DISTKEY") 6040 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6041 6042 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6043 if compound: 6044 self._match_text_seq("SORTKEY") 6045 6046 if self._match(TokenType.L_PAREN, advance=False): 6047 return self.expression( 6048 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6049 ) 6050 6051 self._match_texts(("AUTO", "NONE")) 6052 return self.expression( 6053 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6054 ) 6055 6056 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6057 index = self._index - 1 6058 6059 partition_exists = self._parse_exists() 6060 if self._match(TokenType.PARTITION, advance=False): 6061 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6062 6063 self._retreat(index) 6064 return self._parse_csv(self._parse_drop_column) 6065 6066 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6067 if self._match(TokenType.COLUMN): 6068 exists = self._parse_exists() 6069 old_column = self._parse_column() 6070 to = self._match_text_seq("TO") 6071 new_column = self._parse_column() 6072 6073 if old_column is None or to is None or new_column is None: 6074 return None 6075 6076 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6077 6078 self._match_text_seq("TO") 6079 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6080 6081 def _parse_alter_table_set(self) -> exp.AlterSet: 6082 alter_set = self.expression(exp.AlterSet) 6083 6084 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6085 "TABLE", "PROPERTIES" 6086 ): 6087 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6088 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6089 alter_set.set("expressions", [self._parse_conjunction()]) 6090 elif self._match_texts(("LOGGED", "UNLOGGED")): 6091 alter_set.set("option", exp.var(self._prev.text.upper())) 6092 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6093 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6094 elif self._match_text_seq("LOCATION"): 6095 alter_set.set("location", self._parse_field()) 6096 elif self._match_text_seq("ACCESS", "METHOD"): 6097 alter_set.set("access_method", self._parse_field()) 6098 elif self._match_text_seq("TABLESPACE"): 6099 alter_set.set("tablespace", self._parse_field()) 6100 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6101 alter_set.set("file_format", [self._parse_field()]) 6102 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6103 alter_set.set("file_format", self._parse_wrapped_options()) 6104 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6105 alter_set.set("copy_options", self._parse_wrapped_options()) 6106 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6107 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6108 else: 6109 if self._match_text_seq("SERDE"): 6110 alter_set.set("serde", self._parse_field()) 6111 6112 alter_set.set("expressions", [self._parse_properties()]) 6113 6114 return alter_set 6115 6116 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6117 start = self._prev 6118 6119 if not self._match(TokenType.TABLE): 6120 return self._parse_as_command(start) 6121 6122 exists = self._parse_exists() 6123 only = self._match_text_seq("ONLY") 6124 this = self._parse_table(schema=True) 6125 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6126 6127 if self._next: 6128 self._advance() 6129 6130 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6131 if parser: 6132 actions = ensure_list(parser(self)) 6133 options = self._parse_csv(self._parse_property) 6134 6135 if not self._curr and actions: 6136 return self.expression( 6137 exp.AlterTable, 6138 this=this, 6139 exists=exists, 6140 actions=actions, 6141 only=only, 6142 options=options, 6143 cluster=cluster, 6144 ) 6145 6146 return self._parse_as_command(start) 6147 6148 def _parse_merge(self) -> exp.Merge: 6149 self._match(TokenType.INTO) 6150 target = self._parse_table() 6151 6152 if target and self._match(TokenType.ALIAS, advance=False): 6153 target.set("alias", self._parse_table_alias()) 6154 6155 self._match(TokenType.USING) 6156 using = self._parse_table() 6157 6158 self._match(TokenType.ON) 6159 on = self._parse_conjunction() 6160 6161 return self.expression( 6162 exp.Merge, 6163 this=target, 6164 using=using, 6165 on=on, 6166 expressions=self._parse_when_matched(), 6167 ) 6168 6169 def _parse_when_matched(self) -> t.List[exp.When]: 6170 whens = [] 6171 6172 while self._match(TokenType.WHEN): 6173 matched = not self._match(TokenType.NOT) 6174 self._match_text_seq("MATCHED") 6175 source = ( 6176 False 6177 if self._match_text_seq("BY", "TARGET") 6178 else self._match_text_seq("BY", "SOURCE") 6179 ) 6180 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6181 6182 self._match(TokenType.THEN) 6183 6184 if self._match(TokenType.INSERT): 6185 _this = self._parse_star() 6186 if _this: 6187 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6188 else: 6189 then = self.expression( 6190 exp.Insert, 6191 this=self._parse_value(), 6192 expression=self._match_text_seq("VALUES") and self._parse_value(), 6193 ) 6194 elif self._match(TokenType.UPDATE): 6195 expressions = self._parse_star() 6196 if expressions: 6197 then = self.expression(exp.Update, expressions=expressions) 6198 else: 6199 then = self.expression( 6200 exp.Update, 6201 expressions=self._match(TokenType.SET) 6202 and self._parse_csv(self._parse_equality), 6203 ) 6204 elif self._match(TokenType.DELETE): 6205 then = self.expression(exp.Var, this=self._prev.text) 6206 else: 6207 then = None 6208 6209 whens.append( 6210 self.expression( 6211 exp.When, 6212 matched=matched, 6213 source=source, 6214 condition=condition, 6215 then=then, 6216 ) 6217 ) 6218 return whens 6219 6220 def _parse_show(self) -> t.Optional[exp.Expression]: 6221 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6222 if parser: 6223 return parser(self) 6224 return self._parse_as_command(self._prev) 6225 6226 def _parse_set_item_assignment( 6227 self, kind: t.Optional[str] = None 6228 ) -> t.Optional[exp.Expression]: 6229 index = self._index 6230 6231 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6232 return self._parse_set_transaction(global_=kind == "GLOBAL") 6233 6234 left = self._parse_primary() or self._parse_column() 6235 assignment_delimiter = self._match_texts(("=", "TO")) 6236 6237 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6238 self._retreat(index) 6239 return None 6240 6241 right = self._parse_statement() or self._parse_id_var() 6242 this = self.expression(exp.EQ, this=left, expression=right) 6243 6244 return self.expression(exp.SetItem, this=this, kind=kind) 6245 6246 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6247 self._match_text_seq("TRANSACTION") 6248 characteristics = self._parse_csv( 6249 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6250 ) 6251 return self.expression( 6252 exp.SetItem, 6253 expressions=characteristics, 6254 kind="TRANSACTION", 6255 **{"global": global_}, # type: ignore 6256 ) 6257 6258 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6259 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6260 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6261 6262 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6263 index = self._index 6264 set_ = self.expression( 6265 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6266 ) 6267 6268 if self._curr: 6269 self._retreat(index) 6270 return self._parse_as_command(self._prev) 6271 6272 return set_ 6273 6274 def _parse_var_from_options( 6275 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6276 ) -> t.Optional[exp.Var]: 6277 start = self._curr 6278 if not start: 6279 return None 6280 6281 option = start.text.upper() 6282 continuations = options.get(option) 6283 6284 index = self._index 6285 self._advance() 6286 for keywords in continuations or []: 6287 if isinstance(keywords, str): 6288 keywords = (keywords,) 6289 6290 if self._match_text_seq(*keywords): 6291 option = f"{option} {' '.join(keywords)}" 6292 break 6293 else: 6294 if continuations or continuations is None: 6295 if raise_unmatched: 6296 self.raise_error(f"Unknown option {option}") 6297 6298 self._retreat(index) 6299 return None 6300 6301 return exp.var(option) 6302 6303 def _parse_as_command(self, start: Token) -> exp.Command: 6304 while self._curr: 6305 self._advance() 6306 text = self._find_sql(start, self._prev) 6307 size = len(start.text) 6308 self._warn_unsupported() 6309 return exp.Command(this=text[:size], expression=text[size:]) 6310 6311 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6312 settings = [] 6313 6314 self._match_l_paren() 6315 kind = self._parse_id_var() 6316 6317 if self._match(TokenType.L_PAREN): 6318 while True: 6319 key = self._parse_id_var() 6320 value = self._parse_primary() 6321 6322 if not key and value is None: 6323 break 6324 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6325 self._match(TokenType.R_PAREN) 6326 6327 self._match_r_paren() 6328 6329 return self.expression( 6330 exp.DictProperty, 6331 this=this, 6332 kind=kind.this if kind else None, 6333 settings=settings, 6334 ) 6335 6336 def _parse_dict_range(self, this: str) -> exp.DictRange: 6337 self._match_l_paren() 6338 has_min = self._match_text_seq("MIN") 6339 if has_min: 6340 min = self._parse_var() or self._parse_primary() 6341 self._match_text_seq("MAX") 6342 max = self._parse_var() or self._parse_primary() 6343 else: 6344 max = self._parse_var() or self._parse_primary() 6345 min = exp.Literal.number(0) 6346 self._match_r_paren() 6347 return self.expression(exp.DictRange, this=this, min=min, max=max) 6348 6349 def _parse_comprehension( 6350 self, this: t.Optional[exp.Expression] 6351 ) -> t.Optional[exp.Comprehension]: 6352 index = self._index 6353 expression = self._parse_column() 6354 if not self._match(TokenType.IN): 6355 self._retreat(index - 1) 6356 return None 6357 iterator = self._parse_column() 6358 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6359 return self.expression( 6360 exp.Comprehension, 6361 this=this, 6362 expression=expression, 6363 iterator=iterator, 6364 condition=condition, 6365 ) 6366 6367 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6368 if self._match(TokenType.HEREDOC_STRING): 6369 return self.expression(exp.Heredoc, this=self._prev.text) 6370 6371 if not self._match_text_seq("$"): 6372 return None 6373 6374 tags = ["$"] 6375 tag_text = None 6376 6377 if self._is_connected(): 6378 self._advance() 6379 tags.append(self._prev.text.upper()) 6380 else: 6381 self.raise_error("No closing $ found") 6382 6383 if tags[-1] != "$": 6384 if self._is_connected() and self._match_text_seq("$"): 6385 tag_text = tags[-1] 6386 tags.append("$") 6387 else: 6388 self.raise_error("No closing $ found") 6389 6390 heredoc_start = self._curr 6391 6392 while self._curr: 6393 if self._match_text_seq(*tags, advance=False): 6394 this = self._find_sql(heredoc_start, self._prev) 6395 self._advance(len(tags)) 6396 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6397 6398 self._advance() 6399 6400 self.raise_error(f"No closing {''.join(tags)} found") 6401 return None 6402 6403 def _find_parser( 6404 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6405 ) -> t.Optional[t.Callable]: 6406 if not self._curr: 6407 return None 6408 6409 index = self._index 6410 this = [] 6411 while True: 6412 # The current token might be multiple words 6413 curr = self._curr.text.upper() 6414 key = curr.split(" ") 6415 this.append(curr) 6416 6417 self._advance() 6418 result, trie = in_trie(trie, key) 6419 if result == TrieResult.FAILED: 6420 break 6421 6422 if result == TrieResult.EXISTS: 6423 subparser = parsers[" ".join(this)] 6424 return subparser 6425 6426 self._retreat(index) 6427 return None 6428 6429 def _match(self, token_type, advance=True, expression=None): 6430 if not self._curr: 6431 return None 6432 6433 if self._curr.token_type == token_type: 6434 if advance: 6435 self._advance() 6436 self._add_comments(expression) 6437 return True 6438 6439 return None 6440 6441 def _match_set(self, types, advance=True): 6442 if not self._curr: 6443 return None 6444 6445 if self._curr.token_type in types: 6446 if advance: 6447 self._advance() 6448 return True 6449 6450 return None 6451 6452 def _match_pair(self, token_type_a, token_type_b, advance=True): 6453 if not self._curr or not self._next: 6454 return None 6455 6456 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6457 if advance: 6458 self._advance(2) 6459 return True 6460 6461 return None 6462 6463 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6464 if not self._match(TokenType.L_PAREN, expression=expression): 6465 self.raise_error("Expecting (") 6466 6467 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6468 if not self._match(TokenType.R_PAREN, expression=expression): 6469 self.raise_error("Expecting )") 6470 6471 def _match_texts(self, texts, advance=True): 6472 if self._curr and self._curr.text.upper() in texts: 6473 if advance: 6474 self._advance() 6475 return True 6476 return None 6477 6478 def _match_text_seq(self, *texts, advance=True): 6479 index = self._index 6480 for text in texts: 6481 if self._curr and self._curr.text.upper() == text: 6482 self._advance() 6483 else: 6484 self._retreat(index) 6485 return None 6486 6487 if not advance: 6488 self._retreat(index) 6489 6490 return True 6491 6492 def _replace_lambda( 6493 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6494 ) -> t.Optional[exp.Expression]: 6495 if not node: 6496 return node 6497 6498 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6499 6500 for column in node.find_all(exp.Column): 6501 typ = lambda_types.get(column.parts[0].name) 6502 if typ is not None: 6503 dot_or_id = column.to_dot() if column.table else column.this 6504 6505 if typ: 6506 dot_or_id = self.expression( 6507 exp.Cast, 6508 this=dot_or_id, 6509 to=typ, 6510 ) 6511 6512 parent = column.parent 6513 6514 while isinstance(parent, exp.Dot): 6515 if not isinstance(parent.parent, exp.Dot): 6516 parent.replace(dot_or_id) 6517 break 6518 parent = parent.parent 6519 else: 6520 if column is node: 6521 node = dot_or_id 6522 else: 6523 column.replace(dot_or_id) 6524 return node 6525 6526 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6527 start = self._prev 6528 6529 # Not to be confused with TRUNCATE(number, decimals) function call 6530 if self._match(TokenType.L_PAREN): 6531 self._retreat(self._index - 2) 6532 return self._parse_function() 6533 6534 # Clickhouse supports TRUNCATE DATABASE as well 6535 is_database = self._match(TokenType.DATABASE) 6536 6537 self._match(TokenType.TABLE) 6538 6539 exists = self._parse_exists(not_=False) 6540 6541 expressions = self._parse_csv( 6542 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6543 ) 6544 6545 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6546 6547 if self._match_text_seq("RESTART", "IDENTITY"): 6548 identity = "RESTART" 6549 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6550 identity = "CONTINUE" 6551 else: 6552 identity = None 6553 6554 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6555 option = self._prev.text 6556 else: 6557 option = None 6558 6559 partition = self._parse_partition() 6560 6561 # Fallback case 6562 if self._curr: 6563 return self._parse_as_command(start) 6564 6565 return self.expression( 6566 exp.TruncateTable, 6567 expressions=expressions, 6568 is_database=is_database, 6569 exists=exists, 6570 cluster=cluster, 6571 identity=identity, 6572 option=option, 6573 partition=partition, 6574 ) 6575 6576 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6577 this = self._parse_ordered(self._parse_opclass) 6578 6579 if not self._match(TokenType.WITH): 6580 return this 6581 6582 op = self._parse_var(any_token=True) 6583 6584 return self.expression(exp.WithOperator, this=this, op=op) 6585 6586 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6587 opts = [] 6588 self._match(TokenType.EQ) 6589 self._match(TokenType.L_PAREN) 6590 while self._curr and not self._match(TokenType.R_PAREN): 6591 opts.append(self._parse_conjunction()) 6592 self._match(TokenType.COMMA) 6593 return opts 6594 6595 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6596 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6597 6598 options = [] 6599 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6600 option = self._parse_unquoted_field() 6601 value = None 6602 6603 # Some options are defined as functions with the values as params 6604 if not isinstance(option, exp.Func): 6605 prev = self._prev.text.upper() 6606 # Different dialects might separate options and values by white space, "=" and "AS" 6607 self._match(TokenType.EQ) 6608 self._match(TokenType.ALIAS) 6609 6610 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6611 # Snowflake FILE_FORMAT case 6612 value = self._parse_wrapped_options() 6613 else: 6614 value = self._parse_unquoted_field() 6615 6616 param = self.expression(exp.CopyParameter, this=option, expression=value) 6617 options.append(param) 6618 6619 if sep: 6620 self._match(sep) 6621 6622 return options 6623 6624 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6625 expr = self.expression(exp.Credentials) 6626 6627 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6628 expr.set("storage", self._parse_conjunction()) 6629 if self._match_text_seq("CREDENTIALS"): 6630 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6631 creds = ( 6632 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6633 ) 6634 expr.set("credentials", creds) 6635 if self._match_text_seq("ENCRYPTION"): 6636 expr.set("encryption", self._parse_wrapped_options()) 6637 if self._match_text_seq("IAM_ROLE"): 6638 expr.set("iam_role", self._parse_field()) 6639 if self._match_text_seq("REGION"): 6640 expr.set("region", self._parse_field()) 6641 6642 return expr 6643 6644 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6645 return self._parse_field() 6646 6647 def _parse_copy(self) -> exp.Copy | exp.Command: 6648 start = self._prev 6649 6650 self._match(TokenType.INTO) 6651 6652 this = ( 6653 self._parse_conjunction() 6654 if self._match(TokenType.L_PAREN, advance=False) 6655 else self._parse_table(schema=True) 6656 ) 6657 6658 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6659 6660 files = self._parse_csv(self._parse_file_location) 6661 credentials = self._parse_credentials() 6662 6663 self._match_text_seq("WITH") 6664 6665 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6666 6667 # Fallback case 6668 if self._curr: 6669 return self._parse_as_command(start) 6670 6671 return self.expression( 6672 exp.Copy, 6673 this=this, 6674 kind=kind, 6675 credentials=credentials, 6676 files=files, 6677 params=params, 6678 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1199 def __init__( 1200 self, 1201 error_level: t.Optional[ErrorLevel] = None, 1202 error_message_context: int = 100, 1203 max_errors: int = 3, 1204 dialect: DialectType = None, 1205 ): 1206 from sqlglot.dialects import Dialect 1207 1208 self.error_level = error_level or ErrorLevel.IMMEDIATE 1209 self.error_message_context = error_message_context 1210 self.max_errors = max_errors 1211 self.dialect = Dialect.get_or_raise(dialect) 1212 self.reset()
1224 def parse( 1225 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1226 ) -> t.List[t.Optional[exp.Expression]]: 1227 """ 1228 Parses a list of tokens and returns a list of syntax trees, one tree 1229 per parsed SQL statement. 1230 1231 Args: 1232 raw_tokens: The list of tokens. 1233 sql: The original SQL string, used to produce helpful debug messages. 1234 1235 Returns: 1236 The list of the produced syntax trees. 1237 """ 1238 return self._parse( 1239 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1240 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1242 def parse_into( 1243 self, 1244 expression_types: exp.IntoType, 1245 raw_tokens: t.List[Token], 1246 sql: t.Optional[str] = None, 1247 ) -> t.List[t.Optional[exp.Expression]]: 1248 """ 1249 Parses a list of tokens into a given Expression type. If a collection of Expression 1250 types is given instead, this method will try to parse the token list into each one 1251 of them, stopping at the first for which the parsing succeeds. 1252 1253 Args: 1254 expression_types: The expression type(s) to try and parse the token list into. 1255 raw_tokens: The list of tokens. 1256 sql: The original SQL string, used to produce helpful debug messages. 1257 1258 Returns: 1259 The target Expression. 1260 """ 1261 errors = [] 1262 for expression_type in ensure_list(expression_types): 1263 parser = self.EXPRESSION_PARSERS.get(expression_type) 1264 if not parser: 1265 raise TypeError(f"No parser registered for {expression_type}") 1266 1267 try: 1268 return self._parse(parser, raw_tokens, sql) 1269 except ParseError as e: 1270 e.errors[0]["into_expression"] = expression_type 1271 errors.append(e) 1272 1273 raise ParseError( 1274 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1275 errors=merge_errors(errors), 1276 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1316 def check_errors(self) -> None: 1317 """Logs or raises any found errors, depending on the chosen error level setting.""" 1318 if self.error_level == ErrorLevel.WARN: 1319 for error in self.errors: 1320 logger.error(str(error)) 1321 elif self.error_level == ErrorLevel.RAISE and self.errors: 1322 raise ParseError( 1323 concat_messages(self.errors, self.max_errors), 1324 errors=merge_errors(self.errors), 1325 )
Logs or raises any found errors, depending on the chosen error level setting.
1327 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1328 """ 1329 Appends an error in the list of recorded errors or raises it, depending on the chosen 1330 error level setting. 1331 """ 1332 token = token or self._curr or self._prev or Token.string("") 1333 start = token.start 1334 end = token.end + 1 1335 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1336 highlight = self.sql[start:end] 1337 end_context = self.sql[end : end + self.error_message_context] 1338 1339 error = ParseError.new( 1340 f"{message}. Line {token.line}, Col: {token.col}.\n" 1341 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1342 description=message, 1343 line=token.line, 1344 col=token.col, 1345 start_context=start_context, 1346 highlight=highlight, 1347 end_context=end_context, 1348 ) 1349 1350 if self.error_level == ErrorLevel.IMMEDIATE: 1351 raise error 1352 1353 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1355 def expression( 1356 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1357 ) -> E: 1358 """ 1359 Creates a new, validated Expression. 1360 1361 Args: 1362 exp_class: The expression class to instantiate. 1363 comments: An optional list of comments to attach to the expression. 1364 kwargs: The arguments to set for the expression along with their respective values. 1365 1366 Returns: 1367 The target expression. 1368 """ 1369 instance = exp_class(**kwargs) 1370 instance.add_comments(comments) if comments else self._add_comments(instance) 1371 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1378 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1379 """ 1380 Validates an Expression, making sure that all its mandatory arguments are set. 1381 1382 Args: 1383 expression: The expression to validate. 1384 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1385 1386 Returns: 1387 The validated expression. 1388 """ 1389 if self.error_level != ErrorLevel.IGNORE: 1390 for error_message in expression.error_messages(args): 1391 self.raise_error(error_message) 1392 1393 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.