sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.array(*keys, copy=False), 33 values=exp.array(*values, copy=False), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _parser(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _parser 74 75 76class _Parser(type): 77 def __new__(cls, clsname, bases, attrs): 78 klass = super().__new__(cls, clsname, bases, attrs) 79 80 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 81 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 82 83 return klass 84 85 86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: Determines the amount of context to capture from a 94 query string when displaying the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": parse_like, 122 "LOG": parse_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": parse_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.UNIQUE, 373 TokenType.UNPIVOT, 374 TokenType.UPDATE, 375 TokenType.USE, 376 TokenType.VOLATILE, 377 TokenType.WINDOW, 378 *CREATABLES, 379 *SUBQUERY_PREDICATES, 380 *TYPE_TOKENS, 381 *NO_PAREN_FUNCTIONS, 382 } 383 384 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 385 386 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 387 TokenType.ANTI, 388 TokenType.APPLY, 389 TokenType.ASOF, 390 TokenType.FULL, 391 TokenType.LEFT, 392 TokenType.LOCK, 393 TokenType.NATURAL, 394 TokenType.OFFSET, 395 TokenType.RIGHT, 396 TokenType.SEMI, 397 TokenType.WINDOW, 398 } 399 400 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 401 402 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 403 404 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 405 406 FUNC_TOKENS = { 407 TokenType.COLLATE, 408 TokenType.COMMAND, 409 TokenType.CURRENT_DATE, 410 TokenType.CURRENT_DATETIME, 411 TokenType.CURRENT_TIMESTAMP, 412 TokenType.CURRENT_TIME, 413 TokenType.CURRENT_USER, 414 TokenType.FILTER, 415 TokenType.FIRST, 416 TokenType.FORMAT, 417 TokenType.GLOB, 418 TokenType.IDENTIFIER, 419 TokenType.INDEX, 420 TokenType.ISNULL, 421 TokenType.ILIKE, 422 TokenType.INSERT, 423 TokenType.LIKE, 424 TokenType.MERGE, 425 TokenType.OFFSET, 426 TokenType.PRIMARY_KEY, 427 TokenType.RANGE, 428 TokenType.REPLACE, 429 TokenType.RLIKE, 430 TokenType.ROW, 431 TokenType.UNNEST, 432 TokenType.VAR, 433 TokenType.LEFT, 434 TokenType.RIGHT, 435 TokenType.DATE, 436 TokenType.DATETIME, 437 TokenType.TABLE, 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.WINDOW, 441 TokenType.XOR, 442 *TYPE_TOKENS, 443 *SUBQUERY_PREDICATES, 444 } 445 446 CONJUNCTION = { 447 TokenType.AND: exp.And, 448 TokenType.OR: exp.Or, 449 } 450 451 EQUALITY = { 452 TokenType.COLON_EQ: exp.PropertyEQ, 453 TokenType.EQ: exp.EQ, 454 TokenType.NEQ: exp.NEQ, 455 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 456 } 457 458 COMPARISON = { 459 TokenType.GT: exp.GT, 460 TokenType.GTE: exp.GTE, 461 TokenType.LT: exp.LT, 462 TokenType.LTE: exp.LTE, 463 } 464 465 BITWISE = { 466 TokenType.AMP: exp.BitwiseAnd, 467 TokenType.CARET: exp.BitwiseXor, 468 TokenType.PIPE: exp.BitwiseOr, 469 } 470 471 TERM = { 472 TokenType.DASH: exp.Sub, 473 TokenType.PLUS: exp.Add, 474 TokenType.MOD: exp.Mod, 475 TokenType.COLLATE: exp.Collate, 476 } 477 478 FACTOR = { 479 TokenType.DIV: exp.IntDiv, 480 TokenType.LR_ARROW: exp.Distance, 481 TokenType.SLASH: exp.Div, 482 TokenType.STAR: exp.Mul, 483 } 484 485 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 486 487 TIMES = { 488 TokenType.TIME, 489 TokenType.TIMETZ, 490 } 491 492 TIMESTAMPS = { 493 TokenType.TIMESTAMP, 494 TokenType.TIMESTAMPTZ, 495 TokenType.TIMESTAMPLTZ, 496 *TIMES, 497 } 498 499 SET_OPERATIONS = { 500 TokenType.UNION, 501 TokenType.INTERSECT, 502 TokenType.EXCEPT, 503 } 504 505 JOIN_METHODS = { 506 TokenType.NATURAL, 507 TokenType.ASOF, 508 } 509 510 JOIN_SIDES = { 511 TokenType.LEFT, 512 TokenType.RIGHT, 513 TokenType.FULL, 514 } 515 516 JOIN_KINDS = { 517 TokenType.INNER, 518 TokenType.OUTER, 519 TokenType.CROSS, 520 TokenType.SEMI, 521 TokenType.ANTI, 522 } 523 524 JOIN_HINTS: t.Set[str] = set() 525 526 LAMBDAS = { 527 TokenType.ARROW: lambda self, expressions: self.expression( 528 exp.Lambda, 529 this=self._replace_lambda( 530 self._parse_conjunction(), 531 {node.name for node in expressions}, 532 ), 533 expressions=expressions, 534 ), 535 TokenType.FARROW: lambda self, expressions: self.expression( 536 exp.Kwarg, 537 this=exp.var(expressions[0].name), 538 expression=self._parse_conjunction(), 539 ), 540 } 541 542 COLUMN_OPERATORS = { 543 TokenType.DOT: None, 544 TokenType.DCOLON: lambda self, this, to: self.expression( 545 exp.Cast if self.STRICT_CAST else exp.TryCast, 546 this=this, 547 to=to, 548 ), 549 TokenType.ARROW: lambda self, this, path: self.expression( 550 exp.JSONExtract, 551 this=this, 552 expression=self.dialect.to_json_path(path), 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 642 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 643 } 644 645 PRIMARY_PARSERS = { 646 TokenType.STRING: lambda self, token: self.expression( 647 exp.Literal, this=token.text, is_string=True 648 ), 649 TokenType.NUMBER: lambda self, token: self.expression( 650 exp.Literal, this=token.text, is_string=False 651 ), 652 TokenType.STAR: lambda self, _: self.expression( 653 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 654 ), 655 TokenType.NULL: lambda self, _: self.expression(exp.Null), 656 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 657 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 658 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 659 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 660 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 661 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 662 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 663 exp.National, this=token.text 664 ), 665 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 666 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 667 exp.RawString, this=token.text 668 ), 669 TokenType.UNICODE_STRING: lambda self, token: self.expression( 670 exp.UnicodeString, 671 this=token.text, 672 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 673 ), 674 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 675 } 676 677 PLACEHOLDER_PARSERS = { 678 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 679 TokenType.PARAMETER: lambda self: self._parse_parameter(), 680 TokenType.COLON: lambda self: ( 681 self.expression(exp.Placeholder, this=self._prev.text) 682 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 683 else None 684 ), 685 } 686 687 RANGE_PARSERS = { 688 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 689 TokenType.GLOB: binary_range_parser(exp.Glob), 690 TokenType.ILIKE: binary_range_parser(exp.ILike), 691 TokenType.IN: lambda self, this: self._parse_in(this), 692 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 693 TokenType.IS: lambda self, this: self._parse_is(this), 694 TokenType.LIKE: binary_range_parser(exp.Like), 695 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 696 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 697 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 698 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 699 } 700 701 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 702 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 703 "AUTO": lambda self: self._parse_auto_property(), 704 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 705 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 706 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHECKSUM": lambda self: self._parse_checksum(), 709 "CLUSTER BY": lambda self: self._parse_cluster(), 710 "CLUSTERED": lambda self: self._parse_clustered_by(), 711 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 712 exp.CollateProperty, **kwargs 713 ), 714 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 715 "CONTAINS": lambda self: self._parse_contains_property(), 716 "COPY": lambda self: self._parse_copy_property(), 717 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 718 "DEFINER": lambda self: self._parse_definer(), 719 "DETERMINISTIC": lambda self: self.expression( 720 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 721 ), 722 "DISTKEY": lambda self: self._parse_distkey(), 723 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 724 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 725 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 726 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 727 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 728 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 729 "FREESPACE": lambda self: self._parse_freespace(), 730 "HEAP": lambda self: self.expression(exp.HeapProperty), 731 "IMMUTABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 733 ), 734 "INHERITS": lambda self: self.expression( 735 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 736 ), 737 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 738 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 739 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 740 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 741 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 742 "LIKE": lambda self: self._parse_create_like(), 743 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 744 "LOCK": lambda self: self._parse_locking(), 745 "LOCKING": lambda self: self._parse_locking(), 746 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 747 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 748 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 749 "MODIFIES": lambda self: self._parse_modifies_property(), 750 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 751 "NO": lambda self: self._parse_no_property(), 752 "ON": lambda self: self._parse_on_property(), 753 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 754 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 755 "PARTITION": lambda self: self._parse_partitioned_of(), 756 "PARTITION BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 759 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 760 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 761 "READS": lambda self: self._parse_reads_property(), 762 "REMOTE": lambda self: self._parse_remote_with_connection(), 763 "RETURNS": lambda self: self._parse_returns(), 764 "ROW": lambda self: self._parse_row(), 765 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 766 "SAMPLE": lambda self: self.expression( 767 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 768 ), 769 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 770 "SETTINGS": lambda self: self.expression( 771 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 772 ), 773 "SORTKEY": lambda self: self._parse_sortkey(), 774 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 775 "STABLE": lambda self: self.expression( 776 exp.StabilityProperty, this=exp.Literal.string("STABLE") 777 ), 778 "STORED": lambda self: self._parse_stored(), 779 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 780 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 781 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 782 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 783 "TO": lambda self: self._parse_to_table(), 784 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 785 "TRANSFORM": lambda self: self.expression( 786 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 787 ), 788 "TTL": lambda self: self._parse_ttl(), 789 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 790 "VOLATILE": lambda self: self._parse_volatile_property(), 791 "WITH": lambda self: self._parse_with_property(), 792 } 793 794 CONSTRAINT_PARSERS = { 795 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 796 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 797 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 798 "CHARACTER SET": lambda self: self.expression( 799 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 800 ), 801 "CHECK": lambda self: self.expression( 802 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 803 ), 804 "COLLATE": lambda self: self.expression( 805 exp.CollateColumnConstraint, this=self._parse_var() 806 ), 807 "COMMENT": lambda self: self.expression( 808 exp.CommentColumnConstraint, this=self._parse_string() 809 ), 810 "COMPRESS": lambda self: self._parse_compress(), 811 "CLUSTERED": lambda self: self.expression( 812 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 813 ), 814 "NONCLUSTERED": lambda self: self.expression( 815 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 816 ), 817 "DEFAULT": lambda self: self.expression( 818 exp.DefaultColumnConstraint, this=self._parse_bitwise() 819 ), 820 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 821 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 822 "FORMAT": lambda self: self.expression( 823 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 824 ), 825 "GENERATED": lambda self: self._parse_generated_as_identity(), 826 "IDENTITY": lambda self: self._parse_auto_increment(), 827 "INLINE": lambda self: self._parse_inline(), 828 "LIKE": lambda self: self._parse_create_like(), 829 "NOT": lambda self: self._parse_not_constraint(), 830 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 831 "ON": lambda self: ( 832 self._match(TokenType.UPDATE) 833 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 834 ) 835 or self.expression(exp.OnProperty, this=self._parse_id_var()), 836 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 837 "PERIOD": lambda self: self._parse_period_for_system_time(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(), 839 "REFERENCES": lambda self: self._parse_references(match=False), 840 "TITLE": lambda self: self.expression( 841 exp.TitleColumnConstraint, this=self._parse_var_or_string() 842 ), 843 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 844 "UNIQUE": lambda self: self._parse_unique(), 845 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 846 "WITH": lambda self: self.expression( 847 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 848 ), 849 } 850 851 ALTER_PARSERS = { 852 "ADD": lambda self: self._parse_alter_table_add(), 853 "ALTER": lambda self: self._parse_alter_table_alter(), 854 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 855 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 856 "DROP": lambda self: self._parse_alter_table_drop(), 857 "RENAME": lambda self: self._parse_alter_table_rename(), 858 } 859 860 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 861 862 NO_PAREN_FUNCTION_PARSERS = { 863 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 864 "CASE": lambda self: self._parse_case(), 865 "IF": lambda self: self._parse_if(), 866 "NEXT": lambda self: self._parse_next_value_for(), 867 } 868 869 INVALID_FUNC_NAME_TOKENS = { 870 TokenType.IDENTIFIER, 871 TokenType.STRING, 872 } 873 874 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 875 876 FUNCTION_PARSERS = { 877 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 878 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 879 "DECODE": lambda self: self._parse_decode(), 880 "EXTRACT": lambda self: self._parse_extract(), 881 "JSON_OBJECT": lambda self: self._parse_json_object(), 882 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 883 "JSON_TABLE": lambda self: self._parse_json_table(), 884 "MATCH": lambda self: self._parse_match_against(), 885 "OPENJSON": lambda self: self._parse_open_json(), 886 "POSITION": lambda self: self._parse_position(), 887 "PREDICT": lambda self: self._parse_predict(), 888 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 889 "STRING_AGG": lambda self: self._parse_string_agg(), 890 "SUBSTRING": lambda self: self._parse_substring(), 891 "TRIM": lambda self: self._parse_trim(), 892 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 893 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 894 } 895 896 QUERY_MODIFIER_PARSERS = { 897 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 898 TokenType.WHERE: lambda self: ("where", self._parse_where()), 899 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 900 TokenType.HAVING: lambda self: ("having", self._parse_having()), 901 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 902 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 903 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 904 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 905 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 906 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 907 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 908 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 909 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.CLUSTER_BY: lambda self: ( 912 "cluster", 913 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 914 ), 915 TokenType.DISTRIBUTE_BY: lambda self: ( 916 "distribute", 917 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 918 ), 919 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 920 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 921 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 922 } 923 924 SET_PARSERS = { 925 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 926 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 927 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 928 "TRANSACTION": lambda self: self._parse_set_transaction(), 929 } 930 931 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 932 933 TYPE_LITERAL_PARSERS = { 934 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 935 } 936 937 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 938 939 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 940 941 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 942 943 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 944 TRANSACTION_CHARACTERISTICS = { 945 "ISOLATION LEVEL REPEATABLE READ", 946 "ISOLATION LEVEL READ COMMITTED", 947 "ISOLATION LEVEL READ UNCOMMITTED", 948 "ISOLATION LEVEL SERIALIZABLE", 949 "READ WRITE", 950 "READ ONLY", 951 } 952 953 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 954 955 CLONE_KEYWORDS = {"CLONE", "COPY"} 956 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 957 958 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 959 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 960 961 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 962 963 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 964 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 965 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 966 967 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 968 969 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 970 971 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 972 973 DISTINCT_TOKENS = {TokenType.DISTINCT} 974 975 NULL_TOKENS = {TokenType.NULL} 976 977 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 978 979 STRICT_CAST = True 980 981 PREFIXED_PIVOT_COLUMNS = False 982 IDENTIFY_PIVOT_STRINGS = False 983 984 LOG_DEFAULTS_TO_LN = False 985 986 # Whether or not ADD is present for each column added by ALTER TABLE 987 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 988 989 # Whether or not the table sample clause expects CSV syntax 990 TABLESAMPLE_CSV = False 991 992 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 993 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 994 995 # Whether the TRIM function expects the characters to trim as its first argument 996 TRIM_PATTERN_FIRST = False 997 998 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 999 STRING_ALIASES = False 1000 1001 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1002 MODIFIERS_ATTACHED_TO_UNION = True 1003 UNION_MODIFIERS = {"order", "limit", "offset"} 1004 1005 # parses no parenthesis if statements as commands 1006 NO_PAREN_IF_COMMANDS = True 1007 1008 __slots__ = ( 1009 "error_level", 1010 "error_message_context", 1011 "max_errors", 1012 "dialect", 1013 "sql", 1014 "errors", 1015 "_tokens", 1016 "_index", 1017 "_curr", 1018 "_next", 1019 "_prev", 1020 "_prev_comments", 1021 ) 1022 1023 # Autofilled 1024 SHOW_TRIE: t.Dict = {} 1025 SET_TRIE: t.Dict = {} 1026 1027 def __init__( 1028 self, 1029 error_level: t.Optional[ErrorLevel] = None, 1030 error_message_context: int = 100, 1031 max_errors: int = 3, 1032 dialect: DialectType = None, 1033 ): 1034 from sqlglot.dialects import Dialect 1035 1036 self.error_level = error_level or ErrorLevel.IMMEDIATE 1037 self.error_message_context = error_message_context 1038 self.max_errors = max_errors 1039 self.dialect = Dialect.get_or_raise(dialect) 1040 self.reset() 1041 1042 def reset(self): 1043 self.sql = "" 1044 self.errors = [] 1045 self._tokens = [] 1046 self._index = 0 1047 self._curr = None 1048 self._next = None 1049 self._prev = None 1050 self._prev_comments = None 1051 1052 def parse( 1053 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 """ 1056 Parses a list of tokens and returns a list of syntax trees, one tree 1057 per parsed SQL statement. 1058 1059 Args: 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The list of the produced syntax trees. 1065 """ 1066 return self._parse( 1067 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1068 ) 1069 1070 def parse_into( 1071 self, 1072 expression_types: exp.IntoType, 1073 raw_tokens: t.List[Token], 1074 sql: t.Optional[str] = None, 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens into a given Expression type. If a collection of Expression 1078 types is given instead, this method will try to parse the token list into each one 1079 of them, stopping at the first for which the parsing succeeds. 1080 1081 Args: 1082 expression_types: The expression type(s) to try and parse the token list into. 1083 raw_tokens: The list of tokens. 1084 sql: The original SQL string, used to produce helpful debug messages. 1085 1086 Returns: 1087 The target Expression. 1088 """ 1089 errors = [] 1090 for expression_type in ensure_list(expression_types): 1091 parser = self.EXPRESSION_PARSERS.get(expression_type) 1092 if not parser: 1093 raise TypeError(f"No parser registered for {expression_type}") 1094 1095 try: 1096 return self._parse(parser, raw_tokens, sql) 1097 except ParseError as e: 1098 e.errors[0]["into_expression"] = expression_type 1099 errors.append(e) 1100 1101 raise ParseError( 1102 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1103 errors=merge_errors(errors), 1104 ) from errors[-1] 1105 1106 def _parse( 1107 self, 1108 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1109 raw_tokens: t.List[Token], 1110 sql: t.Optional[str] = None, 1111 ) -> t.List[t.Optional[exp.Expression]]: 1112 self.reset() 1113 self.sql = sql or "" 1114 1115 total = len(raw_tokens) 1116 chunks: t.List[t.List[Token]] = [[]] 1117 1118 for i, token in enumerate(raw_tokens): 1119 if token.token_type == TokenType.SEMICOLON: 1120 if i < total - 1: 1121 chunks.append([]) 1122 else: 1123 chunks[-1].append(token) 1124 1125 expressions = [] 1126 1127 for tokens in chunks: 1128 self._index = -1 1129 self._tokens = tokens 1130 self._advance() 1131 1132 expressions.append(parse_method(self)) 1133 1134 if self._index < len(self._tokens): 1135 self.raise_error("Invalid expression / Unexpected token") 1136 1137 self.check_errors() 1138 1139 return expressions 1140 1141 def check_errors(self) -> None: 1142 """Logs or raises any found errors, depending on the chosen error level setting.""" 1143 if self.error_level == ErrorLevel.WARN: 1144 for error in self.errors: 1145 logger.error(str(error)) 1146 elif self.error_level == ErrorLevel.RAISE and self.errors: 1147 raise ParseError( 1148 concat_messages(self.errors, self.max_errors), 1149 errors=merge_errors(self.errors), 1150 ) 1151 1152 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1153 """ 1154 Appends an error in the list of recorded errors or raises it, depending on the chosen 1155 error level setting. 1156 """ 1157 token = token or self._curr or self._prev or Token.string("") 1158 start = token.start 1159 end = token.end + 1 1160 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1161 highlight = self.sql[start:end] 1162 end_context = self.sql[end : end + self.error_message_context] 1163 1164 error = ParseError.new( 1165 f"{message}. Line {token.line}, Col: {token.col}.\n" 1166 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1167 description=message, 1168 line=token.line, 1169 col=token.col, 1170 start_context=start_context, 1171 highlight=highlight, 1172 end_context=end_context, 1173 ) 1174 1175 if self.error_level == ErrorLevel.IMMEDIATE: 1176 raise error 1177 1178 self.errors.append(error) 1179 1180 def expression( 1181 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1182 ) -> E: 1183 """ 1184 Creates a new, validated Expression. 1185 1186 Args: 1187 exp_class: The expression class to instantiate. 1188 comments: An optional list of comments to attach to the expression. 1189 kwargs: The arguments to set for the expression along with their respective values. 1190 1191 Returns: 1192 The target expression. 1193 """ 1194 instance = exp_class(**kwargs) 1195 instance.add_comments(comments) if comments else self._add_comments(instance) 1196 return self.validate_expression(instance) 1197 1198 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1199 if expression and self._prev_comments: 1200 expression.add_comments(self._prev_comments) 1201 self._prev_comments = None 1202 1203 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1204 """ 1205 Validates an Expression, making sure that all its mandatory arguments are set. 1206 1207 Args: 1208 expression: The expression to validate. 1209 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1210 1211 Returns: 1212 The validated expression. 1213 """ 1214 if self.error_level != ErrorLevel.IGNORE: 1215 for error_message in expression.error_messages(args): 1216 self.raise_error(error_message) 1217 1218 return expression 1219 1220 def _find_sql(self, start: Token, end: Token) -> str: 1221 return self.sql[start.start : end.end + 1] 1222 1223 def _is_connected(self) -> bool: 1224 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1225 1226 def _advance(self, times: int = 1) -> None: 1227 self._index += times 1228 self._curr = seq_get(self._tokens, self._index) 1229 self._next = seq_get(self._tokens, self._index + 1) 1230 1231 if self._index > 0: 1232 self._prev = self._tokens[self._index - 1] 1233 self._prev_comments = self._prev.comments 1234 else: 1235 self._prev = None 1236 self._prev_comments = None 1237 1238 def _retreat(self, index: int) -> None: 1239 if index != self._index: 1240 self._advance(index - self._index) 1241 1242 def _warn_unsupported(self) -> None: 1243 if len(self._tokens) <= 1: 1244 return 1245 1246 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1247 # interested in emitting a warning for the one being currently processed. 1248 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1249 1250 logger.warning( 1251 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1252 ) 1253 1254 def _parse_command(self) -> exp.Command: 1255 self._warn_unsupported() 1256 return self.expression( 1257 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1258 ) 1259 1260 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1261 start = self._prev 1262 exists = self._parse_exists() if allow_exists else None 1263 1264 self._match(TokenType.ON) 1265 1266 kind = self._match_set(self.CREATABLES) and self._prev 1267 if not kind: 1268 return self._parse_as_command(start) 1269 1270 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1271 this = self._parse_user_defined_function(kind=kind.token_type) 1272 elif kind.token_type == TokenType.TABLE: 1273 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1274 elif kind.token_type == TokenType.COLUMN: 1275 this = self._parse_column() 1276 else: 1277 this = self._parse_id_var() 1278 1279 self._match(TokenType.IS) 1280 1281 return self.expression( 1282 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1283 ) 1284 1285 def _parse_to_table( 1286 self, 1287 ) -> exp.ToTableProperty: 1288 table = self._parse_table_parts(schema=True) 1289 return self.expression(exp.ToTableProperty, this=table) 1290 1291 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1292 def _parse_ttl(self) -> exp.Expression: 1293 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1294 this = self._parse_bitwise() 1295 1296 if self._match_text_seq("DELETE"): 1297 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1298 if self._match_text_seq("RECOMPRESS"): 1299 return self.expression( 1300 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1301 ) 1302 if self._match_text_seq("TO", "DISK"): 1303 return self.expression( 1304 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1305 ) 1306 if self._match_text_seq("TO", "VOLUME"): 1307 return self.expression( 1308 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1309 ) 1310 1311 return this 1312 1313 expressions = self._parse_csv(_parse_ttl_action) 1314 where = self._parse_where() 1315 group = self._parse_group() 1316 1317 aggregates = None 1318 if group and self._match(TokenType.SET): 1319 aggregates = self._parse_csv(self._parse_set_item) 1320 1321 return self.expression( 1322 exp.MergeTreeTTL, 1323 expressions=expressions, 1324 where=where, 1325 group=group, 1326 aggregates=aggregates, 1327 ) 1328 1329 def _parse_statement(self) -> t.Optional[exp.Expression]: 1330 if self._curr is None: 1331 return None 1332 1333 if self._match_set(self.STATEMENT_PARSERS): 1334 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1335 1336 if self._match_set(Tokenizer.COMMANDS): 1337 return self._parse_command() 1338 1339 expression = self._parse_expression() 1340 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1341 return self._parse_query_modifiers(expression) 1342 1343 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1344 start = self._prev 1345 temporary = self._match(TokenType.TEMPORARY) 1346 materialized = self._match_text_seq("MATERIALIZED") 1347 1348 kind = self._match_set(self.CREATABLES) and self._prev.text 1349 if not kind: 1350 return self._parse_as_command(start) 1351 1352 return self.expression( 1353 exp.Drop, 1354 comments=start.comments, 1355 exists=exists or self._parse_exists(), 1356 this=self._parse_table( 1357 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1358 ), 1359 kind=kind, 1360 temporary=temporary, 1361 materialized=materialized, 1362 cascade=self._match_text_seq("CASCADE"), 1363 constraints=self._match_text_seq("CONSTRAINTS"), 1364 purge=self._match_text_seq("PURGE"), 1365 ) 1366 1367 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1368 return ( 1369 self._match_text_seq("IF") 1370 and (not not_ or self._match(TokenType.NOT)) 1371 and self._match(TokenType.EXISTS) 1372 ) 1373 1374 def _parse_create(self) -> exp.Create | exp.Command: 1375 # Note: this can't be None because we've matched a statement parser 1376 start = self._prev 1377 comments = self._prev_comments 1378 1379 replace = ( 1380 start.token_type == TokenType.REPLACE 1381 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1382 or self._match_pair(TokenType.OR, TokenType.ALTER) 1383 ) 1384 unique = self._match(TokenType.UNIQUE) 1385 1386 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1387 self._advance() 1388 1389 properties = None 1390 create_token = self._match_set(self.CREATABLES) and self._prev 1391 1392 if not create_token: 1393 # exp.Properties.Location.POST_CREATE 1394 properties = self._parse_properties() 1395 create_token = self._match_set(self.CREATABLES) and self._prev 1396 1397 if not properties or not create_token: 1398 return self._parse_as_command(start) 1399 1400 exists = self._parse_exists(not_=True) 1401 this = None 1402 expression: t.Optional[exp.Expression] = None 1403 indexes = None 1404 no_schema_binding = None 1405 begin = None 1406 end = None 1407 clone = None 1408 1409 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1410 nonlocal properties 1411 if properties and temp_props: 1412 properties.expressions.extend(temp_props.expressions) 1413 elif temp_props: 1414 properties = temp_props 1415 1416 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1417 this = self._parse_user_defined_function(kind=create_token.token_type) 1418 1419 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1420 extend_props(self._parse_properties()) 1421 1422 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1423 1424 if not expression: 1425 if self._match(TokenType.COMMAND): 1426 expression = self._parse_as_command(self._prev) 1427 else: 1428 begin = self._match(TokenType.BEGIN) 1429 return_ = self._match_text_seq("RETURN") 1430 1431 if self._match(TokenType.STRING, advance=False): 1432 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1433 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1434 expression = self._parse_string() 1435 extend_props(self._parse_properties()) 1436 else: 1437 expression = self._parse_statement() 1438 1439 end = self._match_text_seq("END") 1440 1441 if return_: 1442 expression = self.expression(exp.Return, this=expression) 1443 elif create_token.token_type == TokenType.INDEX: 1444 this = self._parse_index(index=self._parse_id_var()) 1445 elif create_token.token_type in self.DB_CREATABLES: 1446 table_parts = self._parse_table_parts( 1447 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1448 ) 1449 1450 # exp.Properties.Location.POST_NAME 1451 self._match(TokenType.COMMA) 1452 extend_props(self._parse_properties(before=True)) 1453 1454 this = self._parse_schema(this=table_parts) 1455 1456 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1457 extend_props(self._parse_properties()) 1458 1459 self._match(TokenType.ALIAS) 1460 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1461 # exp.Properties.Location.POST_ALIAS 1462 extend_props(self._parse_properties()) 1463 1464 expression = self._parse_ddl_select() 1465 1466 if create_token.token_type == TokenType.TABLE: 1467 # exp.Properties.Location.POST_EXPRESSION 1468 extend_props(self._parse_properties()) 1469 1470 indexes = [] 1471 while True: 1472 index = self._parse_index() 1473 1474 # exp.Properties.Location.POST_INDEX 1475 extend_props(self._parse_properties()) 1476 1477 if not index: 1478 break 1479 else: 1480 self._match(TokenType.COMMA) 1481 indexes.append(index) 1482 elif create_token.token_type == TokenType.VIEW: 1483 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1484 no_schema_binding = True 1485 1486 shallow = self._match_text_seq("SHALLOW") 1487 1488 if self._match_texts(self.CLONE_KEYWORDS): 1489 copy = self._prev.text.lower() == "copy" 1490 clone = self.expression( 1491 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1492 ) 1493 1494 if self._curr: 1495 return self._parse_as_command(start) 1496 1497 return self.expression( 1498 exp.Create, 1499 comments=comments, 1500 this=this, 1501 kind=create_token.text.upper(), 1502 replace=replace, 1503 unique=unique, 1504 expression=expression, 1505 exists=exists, 1506 properties=properties, 1507 indexes=indexes, 1508 no_schema_binding=no_schema_binding, 1509 begin=begin, 1510 end=end, 1511 clone=clone, 1512 ) 1513 1514 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1515 # only used for teradata currently 1516 self._match(TokenType.COMMA) 1517 1518 kwargs = { 1519 "no": self._match_text_seq("NO"), 1520 "dual": self._match_text_seq("DUAL"), 1521 "before": self._match_text_seq("BEFORE"), 1522 "default": self._match_text_seq("DEFAULT"), 1523 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1524 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1525 "after": self._match_text_seq("AFTER"), 1526 "minimum": self._match_texts(("MIN", "MINIMUM")), 1527 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1528 } 1529 1530 if self._match_texts(self.PROPERTY_PARSERS): 1531 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1532 try: 1533 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1534 except TypeError: 1535 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1536 1537 return None 1538 1539 def _parse_property(self) -> t.Optional[exp.Expression]: 1540 if self._match_texts(self.PROPERTY_PARSERS): 1541 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1542 1543 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1544 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1545 1546 if self._match_text_seq("COMPOUND", "SORTKEY"): 1547 return self._parse_sortkey(compound=True) 1548 1549 if self._match_text_seq("SQL", "SECURITY"): 1550 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1551 1552 index = self._index 1553 key = self._parse_column() 1554 1555 if not self._match(TokenType.EQ): 1556 self._retreat(index) 1557 return None 1558 1559 return self.expression( 1560 exp.Property, 1561 this=key.to_dot() if isinstance(key, exp.Column) else key, 1562 value=self._parse_column() or self._parse_var(any_token=True), 1563 ) 1564 1565 def _parse_stored(self) -> exp.FileFormatProperty: 1566 self._match(TokenType.ALIAS) 1567 1568 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1569 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1570 1571 return self.expression( 1572 exp.FileFormatProperty, 1573 this=( 1574 self.expression( 1575 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1576 ) 1577 if input_format or output_format 1578 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1579 ), 1580 ) 1581 1582 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1583 self._match(TokenType.EQ) 1584 self._match(TokenType.ALIAS) 1585 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1586 1587 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1588 properties = [] 1589 while True: 1590 if before: 1591 prop = self._parse_property_before() 1592 else: 1593 prop = self._parse_property() 1594 1595 if not prop: 1596 break 1597 for p in ensure_list(prop): 1598 properties.append(p) 1599 1600 if properties: 1601 return self.expression(exp.Properties, expressions=properties) 1602 1603 return None 1604 1605 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1606 return self.expression( 1607 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1608 ) 1609 1610 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1611 if self._index >= 2: 1612 pre_volatile_token = self._tokens[self._index - 2] 1613 else: 1614 pre_volatile_token = None 1615 1616 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1617 return exp.VolatileProperty() 1618 1619 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1620 1621 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1622 self._match_pair(TokenType.EQ, TokenType.ON) 1623 1624 prop = self.expression(exp.WithSystemVersioningProperty) 1625 if self._match(TokenType.L_PAREN): 1626 self._match_text_seq("HISTORY_TABLE", "=") 1627 prop.set("this", self._parse_table_parts()) 1628 1629 if self._match(TokenType.COMMA): 1630 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1631 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1632 1633 self._match_r_paren() 1634 1635 return prop 1636 1637 def _parse_with_property( 1638 self, 1639 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1640 if self._match(TokenType.L_PAREN, advance=False): 1641 return self._parse_wrapped_csv(self._parse_property) 1642 1643 if self._match_text_seq("JOURNAL"): 1644 return self._parse_withjournaltable() 1645 1646 if self._match_text_seq("DATA"): 1647 return self._parse_withdata(no=False) 1648 elif self._match_text_seq("NO", "DATA"): 1649 return self._parse_withdata(no=True) 1650 1651 if not self._next: 1652 return None 1653 1654 return self._parse_withisolatedloading() 1655 1656 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1657 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1658 self._match(TokenType.EQ) 1659 1660 user = self._parse_id_var() 1661 self._match(TokenType.PARAMETER) 1662 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1663 1664 if not user or not host: 1665 return None 1666 1667 return exp.DefinerProperty(this=f"{user}@{host}") 1668 1669 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1670 self._match(TokenType.TABLE) 1671 self._match(TokenType.EQ) 1672 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1673 1674 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1675 return self.expression(exp.LogProperty, no=no) 1676 1677 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1678 return self.expression(exp.JournalProperty, **kwargs) 1679 1680 def _parse_checksum(self) -> exp.ChecksumProperty: 1681 self._match(TokenType.EQ) 1682 1683 on = None 1684 if self._match(TokenType.ON): 1685 on = True 1686 elif self._match_text_seq("OFF"): 1687 on = False 1688 1689 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1690 1691 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1692 return self.expression( 1693 exp.Cluster, 1694 expressions=( 1695 self._parse_wrapped_csv(self._parse_ordered) 1696 if wrapped 1697 else self._parse_csv(self._parse_ordered) 1698 ), 1699 ) 1700 1701 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1702 self._match_text_seq("BY") 1703 1704 self._match_l_paren() 1705 expressions = self._parse_csv(self._parse_column) 1706 self._match_r_paren() 1707 1708 if self._match_text_seq("SORTED", "BY"): 1709 self._match_l_paren() 1710 sorted_by = self._parse_csv(self._parse_ordered) 1711 self._match_r_paren() 1712 else: 1713 sorted_by = None 1714 1715 self._match(TokenType.INTO) 1716 buckets = self._parse_number() 1717 self._match_text_seq("BUCKETS") 1718 1719 return self.expression( 1720 exp.ClusteredByProperty, 1721 expressions=expressions, 1722 sorted_by=sorted_by, 1723 buckets=buckets, 1724 ) 1725 1726 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1727 if not self._match_text_seq("GRANTS"): 1728 self._retreat(self._index - 1) 1729 return None 1730 1731 return self.expression(exp.CopyGrantsProperty) 1732 1733 def _parse_freespace(self) -> exp.FreespaceProperty: 1734 self._match(TokenType.EQ) 1735 return self.expression( 1736 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1737 ) 1738 1739 def _parse_mergeblockratio( 1740 self, no: bool = False, default: bool = False 1741 ) -> exp.MergeBlockRatioProperty: 1742 if self._match(TokenType.EQ): 1743 return self.expression( 1744 exp.MergeBlockRatioProperty, 1745 this=self._parse_number(), 1746 percent=self._match(TokenType.PERCENT), 1747 ) 1748 1749 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1750 1751 def _parse_datablocksize( 1752 self, 1753 default: t.Optional[bool] = None, 1754 minimum: t.Optional[bool] = None, 1755 maximum: t.Optional[bool] = None, 1756 ) -> exp.DataBlocksizeProperty: 1757 self._match(TokenType.EQ) 1758 size = self._parse_number() 1759 1760 units = None 1761 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1762 units = self._prev.text 1763 1764 return self.expression( 1765 exp.DataBlocksizeProperty, 1766 size=size, 1767 units=units, 1768 default=default, 1769 minimum=minimum, 1770 maximum=maximum, 1771 ) 1772 1773 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1774 self._match(TokenType.EQ) 1775 always = self._match_text_seq("ALWAYS") 1776 manual = self._match_text_seq("MANUAL") 1777 never = self._match_text_seq("NEVER") 1778 default = self._match_text_seq("DEFAULT") 1779 1780 autotemp = None 1781 if self._match_text_seq("AUTOTEMP"): 1782 autotemp = self._parse_schema() 1783 1784 return self.expression( 1785 exp.BlockCompressionProperty, 1786 always=always, 1787 manual=manual, 1788 never=never, 1789 default=default, 1790 autotemp=autotemp, 1791 ) 1792 1793 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1794 no = self._match_text_seq("NO") 1795 concurrent = self._match_text_seq("CONCURRENT") 1796 self._match_text_seq("ISOLATED", "LOADING") 1797 for_all = self._match_text_seq("FOR", "ALL") 1798 for_insert = self._match_text_seq("FOR", "INSERT") 1799 for_none = self._match_text_seq("FOR", "NONE") 1800 return self.expression( 1801 exp.IsolatedLoadingProperty, 1802 no=no, 1803 concurrent=concurrent, 1804 for_all=for_all, 1805 for_insert=for_insert, 1806 for_none=for_none, 1807 ) 1808 1809 def _parse_locking(self) -> exp.LockingProperty: 1810 if self._match(TokenType.TABLE): 1811 kind = "TABLE" 1812 elif self._match(TokenType.VIEW): 1813 kind = "VIEW" 1814 elif self._match(TokenType.ROW): 1815 kind = "ROW" 1816 elif self._match_text_seq("DATABASE"): 1817 kind = "DATABASE" 1818 else: 1819 kind = None 1820 1821 if kind in ("DATABASE", "TABLE", "VIEW"): 1822 this = self._parse_table_parts() 1823 else: 1824 this = None 1825 1826 if self._match(TokenType.FOR): 1827 for_or_in = "FOR" 1828 elif self._match(TokenType.IN): 1829 for_or_in = "IN" 1830 else: 1831 for_or_in = None 1832 1833 if self._match_text_seq("ACCESS"): 1834 lock_type = "ACCESS" 1835 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1836 lock_type = "EXCLUSIVE" 1837 elif self._match_text_seq("SHARE"): 1838 lock_type = "SHARE" 1839 elif self._match_text_seq("READ"): 1840 lock_type = "READ" 1841 elif self._match_text_seq("WRITE"): 1842 lock_type = "WRITE" 1843 elif self._match_text_seq("CHECKSUM"): 1844 lock_type = "CHECKSUM" 1845 else: 1846 lock_type = None 1847 1848 override = self._match_text_seq("OVERRIDE") 1849 1850 return self.expression( 1851 exp.LockingProperty, 1852 this=this, 1853 kind=kind, 1854 for_or_in=for_or_in, 1855 lock_type=lock_type, 1856 override=override, 1857 ) 1858 1859 def _parse_partition_by(self) -> t.List[exp.Expression]: 1860 if self._match(TokenType.PARTITION_BY): 1861 return self._parse_csv(self._parse_conjunction) 1862 return [] 1863 1864 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1865 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1866 if self._match_text_seq("MINVALUE"): 1867 return exp.var("MINVALUE") 1868 if self._match_text_seq("MAXVALUE"): 1869 return exp.var("MAXVALUE") 1870 return self._parse_bitwise() 1871 1872 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1873 expression = None 1874 from_expressions = None 1875 to_expressions = None 1876 1877 if self._match(TokenType.IN): 1878 this = self._parse_wrapped_csv(self._parse_bitwise) 1879 elif self._match(TokenType.FROM): 1880 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1881 self._match_text_seq("TO") 1882 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1883 elif self._match_text_seq("WITH", "(", "MODULUS"): 1884 this = self._parse_number() 1885 self._match_text_seq(",", "REMAINDER") 1886 expression = self._parse_number() 1887 self._match_r_paren() 1888 else: 1889 self.raise_error("Failed to parse partition bound spec.") 1890 1891 return self.expression( 1892 exp.PartitionBoundSpec, 1893 this=this, 1894 expression=expression, 1895 from_expressions=from_expressions, 1896 to_expressions=to_expressions, 1897 ) 1898 1899 # https://www.postgresql.org/docs/current/sql-createtable.html 1900 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1901 if not self._match_text_seq("OF"): 1902 self._retreat(self._index - 1) 1903 return None 1904 1905 this = self._parse_table(schema=True) 1906 1907 if self._match(TokenType.DEFAULT): 1908 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1909 elif self._match_text_seq("FOR", "VALUES"): 1910 expression = self._parse_partition_bound_spec() 1911 else: 1912 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1913 1914 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1915 1916 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1917 self._match(TokenType.EQ) 1918 return self.expression( 1919 exp.PartitionedByProperty, 1920 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1921 ) 1922 1923 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1924 if self._match_text_seq("AND", "STATISTICS"): 1925 statistics = True 1926 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1927 statistics = False 1928 else: 1929 statistics = None 1930 1931 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1932 1933 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1934 if self._match_text_seq("SQL"): 1935 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1936 return None 1937 1938 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1939 if self._match_text_seq("SQL", "DATA"): 1940 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1941 return None 1942 1943 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1944 if self._match_text_seq("PRIMARY", "INDEX"): 1945 return exp.NoPrimaryIndexProperty() 1946 if self._match_text_seq("SQL"): 1947 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1948 return None 1949 1950 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1951 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1952 return exp.OnCommitProperty() 1953 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1954 return exp.OnCommitProperty(delete=True) 1955 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1956 1957 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1958 if self._match_text_seq("SQL", "DATA"): 1959 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1960 return None 1961 1962 def _parse_distkey(self) -> exp.DistKeyProperty: 1963 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1964 1965 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1966 table = self._parse_table(schema=True) 1967 1968 options = [] 1969 while self._match_texts(("INCLUDING", "EXCLUDING")): 1970 this = self._prev.text.upper() 1971 1972 id_var = self._parse_id_var() 1973 if not id_var: 1974 return None 1975 1976 options.append( 1977 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1978 ) 1979 1980 return self.expression(exp.LikeProperty, this=table, expressions=options) 1981 1982 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1983 return self.expression( 1984 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1985 ) 1986 1987 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1988 self._match(TokenType.EQ) 1989 return self.expression( 1990 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1991 ) 1992 1993 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1994 self._match_text_seq("WITH", "CONNECTION") 1995 return self.expression( 1996 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1997 ) 1998 1999 def _parse_returns(self) -> exp.ReturnsProperty: 2000 value: t.Optional[exp.Expression] 2001 is_table = self._match(TokenType.TABLE) 2002 2003 if is_table: 2004 if self._match(TokenType.LT): 2005 value = self.expression( 2006 exp.Schema, 2007 this="TABLE", 2008 expressions=self._parse_csv(self._parse_struct_types), 2009 ) 2010 if not self._match(TokenType.GT): 2011 self.raise_error("Expecting >") 2012 else: 2013 value = self._parse_schema(exp.var("TABLE")) 2014 else: 2015 value = self._parse_types() 2016 2017 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2018 2019 def _parse_describe(self) -> exp.Describe: 2020 kind = self._match_set(self.CREATABLES) and self._prev.text 2021 extended = self._match_text_seq("EXTENDED") 2022 this = self._parse_table(schema=True) 2023 properties = self._parse_properties() 2024 expressions = properties.expressions if properties else None 2025 return self.expression( 2026 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2027 ) 2028 2029 def _parse_insert(self) -> exp.Insert: 2030 comments = ensure_list(self._prev_comments) 2031 overwrite = self._match(TokenType.OVERWRITE) 2032 ignore = self._match(TokenType.IGNORE) 2033 local = self._match_text_seq("LOCAL") 2034 alternative = None 2035 2036 if self._match_text_seq("DIRECTORY"): 2037 this: t.Optional[exp.Expression] = self.expression( 2038 exp.Directory, 2039 this=self._parse_var_or_string(), 2040 local=local, 2041 row_format=self._parse_row_format(match_row=True), 2042 ) 2043 else: 2044 if self._match(TokenType.OR): 2045 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2046 2047 self._match(TokenType.INTO) 2048 comments += ensure_list(self._prev_comments) 2049 self._match(TokenType.TABLE) 2050 this = self._parse_table(schema=True) 2051 2052 returning = self._parse_returning() 2053 2054 return self.expression( 2055 exp.Insert, 2056 comments=comments, 2057 this=this, 2058 by_name=self._match_text_seq("BY", "NAME"), 2059 exists=self._parse_exists(), 2060 partition=self._parse_partition(), 2061 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2062 and self._parse_conjunction(), 2063 expression=self._parse_ddl_select(), 2064 conflict=self._parse_on_conflict(), 2065 returning=returning or self._parse_returning(), 2066 overwrite=overwrite, 2067 alternative=alternative, 2068 ignore=ignore, 2069 ) 2070 2071 def _parse_kill(self) -> exp.Kill: 2072 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2073 2074 return self.expression( 2075 exp.Kill, 2076 this=self._parse_primary(), 2077 kind=kind, 2078 ) 2079 2080 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2081 conflict = self._match_text_seq("ON", "CONFLICT") 2082 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2083 2084 if not conflict and not duplicate: 2085 return None 2086 2087 nothing = None 2088 expressions = None 2089 key = None 2090 constraint = None 2091 2092 if conflict: 2093 if self._match_text_seq("ON", "CONSTRAINT"): 2094 constraint = self._parse_id_var() 2095 else: 2096 key = self._parse_csv(self._parse_value) 2097 2098 self._match_text_seq("DO") 2099 if self._match_text_seq("NOTHING"): 2100 nothing = True 2101 else: 2102 self._match(TokenType.UPDATE) 2103 self._match(TokenType.SET) 2104 expressions = self._parse_csv(self._parse_equality) 2105 2106 return self.expression( 2107 exp.OnConflict, 2108 duplicate=duplicate, 2109 expressions=expressions, 2110 nothing=nothing, 2111 key=key, 2112 constraint=constraint, 2113 ) 2114 2115 def _parse_returning(self) -> t.Optional[exp.Returning]: 2116 if not self._match(TokenType.RETURNING): 2117 return None 2118 return self.expression( 2119 exp.Returning, 2120 expressions=self._parse_csv(self._parse_expression), 2121 into=self._match(TokenType.INTO) and self._parse_table_part(), 2122 ) 2123 2124 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2125 if not self._match(TokenType.FORMAT): 2126 return None 2127 return self._parse_row_format() 2128 2129 def _parse_row_format( 2130 self, match_row: bool = False 2131 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2132 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2133 return None 2134 2135 if self._match_text_seq("SERDE"): 2136 this = self._parse_string() 2137 2138 serde_properties = None 2139 if self._match(TokenType.SERDE_PROPERTIES): 2140 serde_properties = self.expression( 2141 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2142 ) 2143 2144 return self.expression( 2145 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2146 ) 2147 2148 self._match_text_seq("DELIMITED") 2149 2150 kwargs = {} 2151 2152 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2153 kwargs["fields"] = self._parse_string() 2154 if self._match_text_seq("ESCAPED", "BY"): 2155 kwargs["escaped"] = self._parse_string() 2156 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2157 kwargs["collection_items"] = self._parse_string() 2158 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2159 kwargs["map_keys"] = self._parse_string() 2160 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2161 kwargs["lines"] = self._parse_string() 2162 if self._match_text_seq("NULL", "DEFINED", "AS"): 2163 kwargs["null"] = self._parse_string() 2164 2165 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2166 2167 def _parse_load(self) -> exp.LoadData | exp.Command: 2168 if self._match_text_seq("DATA"): 2169 local = self._match_text_seq("LOCAL") 2170 self._match_text_seq("INPATH") 2171 inpath = self._parse_string() 2172 overwrite = self._match(TokenType.OVERWRITE) 2173 self._match_pair(TokenType.INTO, TokenType.TABLE) 2174 2175 return self.expression( 2176 exp.LoadData, 2177 this=self._parse_table(schema=True), 2178 local=local, 2179 overwrite=overwrite, 2180 inpath=inpath, 2181 partition=self._parse_partition(), 2182 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2183 serde=self._match_text_seq("SERDE") and self._parse_string(), 2184 ) 2185 return self._parse_as_command(self._prev) 2186 2187 def _parse_delete(self) -> exp.Delete: 2188 # This handles MySQL's "Multiple-Table Syntax" 2189 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2190 tables = None 2191 comments = self._prev_comments 2192 if not self._match(TokenType.FROM, advance=False): 2193 tables = self._parse_csv(self._parse_table) or None 2194 2195 returning = self._parse_returning() 2196 2197 return self.expression( 2198 exp.Delete, 2199 comments=comments, 2200 tables=tables, 2201 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2202 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2203 where=self._parse_where(), 2204 returning=returning or self._parse_returning(), 2205 limit=self._parse_limit(), 2206 ) 2207 2208 def _parse_update(self) -> exp.Update: 2209 comments = self._prev_comments 2210 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2211 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2212 returning = self._parse_returning() 2213 return self.expression( 2214 exp.Update, 2215 comments=comments, 2216 **{ # type: ignore 2217 "this": this, 2218 "expressions": expressions, 2219 "from": self._parse_from(joins=True), 2220 "where": self._parse_where(), 2221 "returning": returning or self._parse_returning(), 2222 "order": self._parse_order(), 2223 "limit": self._parse_limit(), 2224 }, 2225 ) 2226 2227 def _parse_uncache(self) -> exp.Uncache: 2228 if not self._match(TokenType.TABLE): 2229 self.raise_error("Expecting TABLE after UNCACHE") 2230 2231 return self.expression( 2232 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2233 ) 2234 2235 def _parse_cache(self) -> exp.Cache: 2236 lazy = self._match_text_seq("LAZY") 2237 self._match(TokenType.TABLE) 2238 table = self._parse_table(schema=True) 2239 2240 options = [] 2241 if self._match_text_seq("OPTIONS"): 2242 self._match_l_paren() 2243 k = self._parse_string() 2244 self._match(TokenType.EQ) 2245 v = self._parse_string() 2246 options = [k, v] 2247 self._match_r_paren() 2248 2249 self._match(TokenType.ALIAS) 2250 return self.expression( 2251 exp.Cache, 2252 this=table, 2253 lazy=lazy, 2254 options=options, 2255 expression=self._parse_select(nested=True), 2256 ) 2257 2258 def _parse_partition(self) -> t.Optional[exp.Partition]: 2259 if not self._match(TokenType.PARTITION): 2260 return None 2261 2262 return self.expression( 2263 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2264 ) 2265 2266 def _parse_value(self) -> exp.Tuple: 2267 if self._match(TokenType.L_PAREN): 2268 expressions = self._parse_csv(self._parse_expression) 2269 self._match_r_paren() 2270 return self.expression(exp.Tuple, expressions=expressions) 2271 2272 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2273 # https://prestodb.io/docs/current/sql/values.html 2274 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2275 2276 def _parse_projections(self) -> t.List[exp.Expression]: 2277 return self._parse_expressions() 2278 2279 def _parse_select( 2280 self, 2281 nested: bool = False, 2282 table: bool = False, 2283 parse_subquery_alias: bool = True, 2284 parse_set_operation: bool = True, 2285 ) -> t.Optional[exp.Expression]: 2286 cte = self._parse_with() 2287 2288 if cte: 2289 this = self._parse_statement() 2290 2291 if not this: 2292 self.raise_error("Failed to parse any statement following CTE") 2293 return cte 2294 2295 if "with" in this.arg_types: 2296 this.set("with", cte) 2297 else: 2298 self.raise_error(f"{this.key} does not support CTE") 2299 this = cte 2300 2301 return this 2302 2303 # duckdb supports leading with FROM x 2304 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2305 2306 if self._match(TokenType.SELECT): 2307 comments = self._prev_comments 2308 2309 hint = self._parse_hint() 2310 all_ = self._match(TokenType.ALL) 2311 distinct = self._match_set(self.DISTINCT_TOKENS) 2312 2313 kind = ( 2314 self._match(TokenType.ALIAS) 2315 and self._match_texts(("STRUCT", "VALUE")) 2316 and self._prev.text.upper() 2317 ) 2318 2319 if distinct: 2320 distinct = self.expression( 2321 exp.Distinct, 2322 on=self._parse_value() if self._match(TokenType.ON) else None, 2323 ) 2324 2325 if all_ and distinct: 2326 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2327 2328 limit = self._parse_limit(top=True) 2329 projections = self._parse_projections() 2330 2331 this = self.expression( 2332 exp.Select, 2333 kind=kind, 2334 hint=hint, 2335 distinct=distinct, 2336 expressions=projections, 2337 limit=limit, 2338 ) 2339 this.comments = comments 2340 2341 into = self._parse_into() 2342 if into: 2343 this.set("into", into) 2344 2345 if not from_: 2346 from_ = self._parse_from() 2347 2348 if from_: 2349 this.set("from", from_) 2350 2351 this = self._parse_query_modifiers(this) 2352 elif (table or nested) and self._match(TokenType.L_PAREN): 2353 if self._match(TokenType.PIVOT): 2354 this = self._parse_simplified_pivot() 2355 elif self._match(TokenType.FROM): 2356 this = exp.select("*").from_( 2357 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2358 ) 2359 else: 2360 this = ( 2361 self._parse_table() 2362 if table 2363 else self._parse_select(nested=True, parse_set_operation=False) 2364 ) 2365 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2366 2367 self._match_r_paren() 2368 2369 # We return early here so that the UNION isn't attached to the subquery by the 2370 # following call to _parse_set_operations, but instead becomes the parent node 2371 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2372 elif self._match(TokenType.VALUES): 2373 this = self.expression( 2374 exp.Values, 2375 expressions=self._parse_csv(self._parse_value), 2376 alias=self._parse_table_alias(), 2377 ) 2378 elif from_: 2379 this = exp.select("*").from_(from_.this, copy=False) 2380 else: 2381 this = None 2382 2383 if parse_set_operation: 2384 return self._parse_set_operations(this) 2385 return this 2386 2387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2388 if not skip_with_token and not self._match(TokenType.WITH): 2389 return None 2390 2391 comments = self._prev_comments 2392 recursive = self._match(TokenType.RECURSIVE) 2393 2394 expressions = [] 2395 while True: 2396 expressions.append(self._parse_cte()) 2397 2398 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2399 break 2400 else: 2401 self._match(TokenType.WITH) 2402 2403 return self.expression( 2404 exp.With, comments=comments, expressions=expressions, recursive=recursive 2405 ) 2406 2407 def _parse_cte(self) -> exp.CTE: 2408 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2409 if not alias or not alias.this: 2410 self.raise_error("Expected CTE to have alias") 2411 2412 self._match(TokenType.ALIAS) 2413 return self.expression( 2414 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2415 ) 2416 2417 def _parse_table_alias( 2418 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2419 ) -> t.Optional[exp.TableAlias]: 2420 any_token = self._match(TokenType.ALIAS) 2421 alias = ( 2422 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2423 or self._parse_string_as_identifier() 2424 ) 2425 2426 index = self._index 2427 if self._match(TokenType.L_PAREN): 2428 columns = self._parse_csv(self._parse_function_parameter) 2429 self._match_r_paren() if columns else self._retreat(index) 2430 else: 2431 columns = None 2432 2433 if not alias and not columns: 2434 return None 2435 2436 return self.expression(exp.TableAlias, this=alias, columns=columns) 2437 2438 def _parse_subquery( 2439 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2440 ) -> t.Optional[exp.Subquery]: 2441 if not this: 2442 return None 2443 2444 return self.expression( 2445 exp.Subquery, 2446 this=this, 2447 pivots=self._parse_pivots(), 2448 alias=self._parse_table_alias() if parse_alias else None, 2449 ) 2450 2451 def _parse_query_modifiers( 2452 self, this: t.Optional[exp.Expression] 2453 ) -> t.Optional[exp.Expression]: 2454 if isinstance(this, self.MODIFIABLES): 2455 for join in iter(self._parse_join, None): 2456 this.append("joins", join) 2457 for lateral in iter(self._parse_lateral, None): 2458 this.append("laterals", lateral) 2459 2460 while True: 2461 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2462 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2463 key, expression = parser(self) 2464 2465 if expression: 2466 this.set(key, expression) 2467 if key == "limit": 2468 offset = expression.args.pop("offset", None) 2469 2470 if offset: 2471 offset = exp.Offset(expression=offset) 2472 this.set("offset", offset) 2473 2474 limit_by_expressions = expression.expressions 2475 expression.set("expressions", None) 2476 offset.set("expressions", limit_by_expressions) 2477 continue 2478 break 2479 return this 2480 2481 def _parse_hint(self) -> t.Optional[exp.Hint]: 2482 if self._match(TokenType.HINT): 2483 hints = [] 2484 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2485 hints.extend(hint) 2486 2487 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2488 self.raise_error("Expected */ after HINT") 2489 2490 return self.expression(exp.Hint, expressions=hints) 2491 2492 return None 2493 2494 def _parse_into(self) -> t.Optional[exp.Into]: 2495 if not self._match(TokenType.INTO): 2496 return None 2497 2498 temp = self._match(TokenType.TEMPORARY) 2499 unlogged = self._match_text_seq("UNLOGGED") 2500 self._match(TokenType.TABLE) 2501 2502 return self.expression( 2503 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2504 ) 2505 2506 def _parse_from( 2507 self, joins: bool = False, skip_from_token: bool = False 2508 ) -> t.Optional[exp.From]: 2509 if not skip_from_token and not self._match(TokenType.FROM): 2510 return None 2511 2512 return self.expression( 2513 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2514 ) 2515 2516 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2517 if not self._match(TokenType.MATCH_RECOGNIZE): 2518 return None 2519 2520 self._match_l_paren() 2521 2522 partition = self._parse_partition_by() 2523 order = self._parse_order() 2524 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2525 2526 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2527 rows = exp.var("ONE ROW PER MATCH") 2528 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2529 text = "ALL ROWS PER MATCH" 2530 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2531 text += " SHOW EMPTY MATCHES" 2532 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2533 text += " OMIT EMPTY MATCHES" 2534 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2535 text += " WITH UNMATCHED ROWS" 2536 rows = exp.var(text) 2537 else: 2538 rows = None 2539 2540 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2541 text = "AFTER MATCH SKIP" 2542 if self._match_text_seq("PAST", "LAST", "ROW"): 2543 text += " PAST LAST ROW" 2544 elif self._match_text_seq("TO", "NEXT", "ROW"): 2545 text += " TO NEXT ROW" 2546 elif self._match_text_seq("TO", "FIRST"): 2547 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2548 elif self._match_text_seq("TO", "LAST"): 2549 text += f" TO LAST {self._advance_any().text}" # type: ignore 2550 after = exp.var(text) 2551 else: 2552 after = None 2553 2554 if self._match_text_seq("PATTERN"): 2555 self._match_l_paren() 2556 2557 if not self._curr: 2558 self.raise_error("Expecting )", self._curr) 2559 2560 paren = 1 2561 start = self._curr 2562 2563 while self._curr and paren > 0: 2564 if self._curr.token_type == TokenType.L_PAREN: 2565 paren += 1 2566 if self._curr.token_type == TokenType.R_PAREN: 2567 paren -= 1 2568 2569 end = self._prev 2570 self._advance() 2571 2572 if paren > 0: 2573 self.raise_error("Expecting )", self._curr) 2574 2575 pattern = exp.var(self._find_sql(start, end)) 2576 else: 2577 pattern = None 2578 2579 define = ( 2580 self._parse_csv(self._parse_name_as_expression) 2581 if self._match_text_seq("DEFINE") 2582 else None 2583 ) 2584 2585 self._match_r_paren() 2586 2587 return self.expression( 2588 exp.MatchRecognize, 2589 partition_by=partition, 2590 order=order, 2591 measures=measures, 2592 rows=rows, 2593 after=after, 2594 pattern=pattern, 2595 define=define, 2596 alias=self._parse_table_alias(), 2597 ) 2598 2599 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2600 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2601 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2602 cross_apply = False 2603 2604 if cross_apply is not None: 2605 this = self._parse_select(table=True) 2606 view = None 2607 outer = None 2608 elif self._match(TokenType.LATERAL): 2609 this = self._parse_select(table=True) 2610 view = self._match(TokenType.VIEW) 2611 outer = self._match(TokenType.OUTER) 2612 else: 2613 return None 2614 2615 if not this: 2616 this = ( 2617 self._parse_unnest() 2618 or self._parse_function() 2619 or self._parse_id_var(any_token=False) 2620 ) 2621 2622 while self._match(TokenType.DOT): 2623 this = exp.Dot( 2624 this=this, 2625 expression=self._parse_function() or self._parse_id_var(any_token=False), 2626 ) 2627 2628 if view: 2629 table = self._parse_id_var(any_token=False) 2630 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2631 table_alias: t.Optional[exp.TableAlias] = self.expression( 2632 exp.TableAlias, this=table, columns=columns 2633 ) 2634 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2635 # We move the alias from the lateral's child node to the lateral itself 2636 table_alias = this.args["alias"].pop() 2637 else: 2638 table_alias = self._parse_table_alias() 2639 2640 return self.expression( 2641 exp.Lateral, 2642 this=this, 2643 view=view, 2644 outer=outer, 2645 alias=table_alias, 2646 cross_apply=cross_apply, 2647 ) 2648 2649 def _parse_join_parts( 2650 self, 2651 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2652 return ( 2653 self._match_set(self.JOIN_METHODS) and self._prev, 2654 self._match_set(self.JOIN_SIDES) and self._prev, 2655 self._match_set(self.JOIN_KINDS) and self._prev, 2656 ) 2657 2658 def _parse_join( 2659 self, skip_join_token: bool = False, parse_bracket: bool = False 2660 ) -> t.Optional[exp.Join]: 2661 if self._match(TokenType.COMMA): 2662 return self.expression(exp.Join, this=self._parse_table()) 2663 2664 index = self._index 2665 method, side, kind = self._parse_join_parts() 2666 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2667 join = self._match(TokenType.JOIN) 2668 2669 if not skip_join_token and not join: 2670 self._retreat(index) 2671 kind = None 2672 method = None 2673 side = None 2674 2675 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2676 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2677 2678 if not skip_join_token and not join and not outer_apply and not cross_apply: 2679 return None 2680 2681 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2682 2683 if method: 2684 kwargs["method"] = method.text 2685 if side: 2686 kwargs["side"] = side.text 2687 if kind: 2688 kwargs["kind"] = kind.text 2689 if hint: 2690 kwargs["hint"] = hint 2691 2692 if self._match(TokenType.ON): 2693 kwargs["on"] = self._parse_conjunction() 2694 elif self._match(TokenType.USING): 2695 kwargs["using"] = self._parse_wrapped_id_vars() 2696 elif not (kind and kind.token_type == TokenType.CROSS): 2697 index = self._index 2698 join = self._parse_join() 2699 2700 if join and self._match(TokenType.ON): 2701 kwargs["on"] = self._parse_conjunction() 2702 elif join and self._match(TokenType.USING): 2703 kwargs["using"] = self._parse_wrapped_id_vars() 2704 else: 2705 join = None 2706 self._retreat(index) 2707 2708 kwargs["this"].set("joins", [join] if join else None) 2709 2710 comments = [c for token in (method, side, kind) if token for c in token.comments] 2711 return self.expression(exp.Join, comments=comments, **kwargs) 2712 2713 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2714 this = self._parse_conjunction() 2715 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2716 return this 2717 2718 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2719 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2720 2721 return this 2722 2723 def _parse_index( 2724 self, 2725 index: t.Optional[exp.Expression] = None, 2726 ) -> t.Optional[exp.Index]: 2727 if index: 2728 unique = None 2729 primary = None 2730 amp = None 2731 2732 self._match(TokenType.ON) 2733 self._match(TokenType.TABLE) # hive 2734 table = self._parse_table_parts(schema=True) 2735 else: 2736 unique = self._match(TokenType.UNIQUE) 2737 primary = self._match_text_seq("PRIMARY") 2738 amp = self._match_text_seq("AMP") 2739 2740 if not self._match(TokenType.INDEX): 2741 return None 2742 2743 index = self._parse_id_var() 2744 table = None 2745 2746 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2747 2748 if self._match(TokenType.L_PAREN, advance=False): 2749 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2750 else: 2751 columns = None 2752 2753 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2754 2755 return self.expression( 2756 exp.Index, 2757 this=index, 2758 table=table, 2759 using=using, 2760 columns=columns, 2761 unique=unique, 2762 primary=primary, 2763 amp=amp, 2764 include=include, 2765 partition_by=self._parse_partition_by(), 2766 where=self._parse_where(), 2767 ) 2768 2769 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2770 hints: t.List[exp.Expression] = [] 2771 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2772 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2773 hints.append( 2774 self.expression( 2775 exp.WithTableHint, 2776 expressions=self._parse_csv( 2777 lambda: self._parse_function() or self._parse_var(any_token=True) 2778 ), 2779 ) 2780 ) 2781 self._match_r_paren() 2782 else: 2783 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2784 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2785 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2786 2787 self._match_texts(("INDEX", "KEY")) 2788 if self._match(TokenType.FOR): 2789 hint.set("target", self._advance_any() and self._prev.text.upper()) 2790 2791 hint.set("expressions", self._parse_wrapped_id_vars()) 2792 hints.append(hint) 2793 2794 return hints or None 2795 2796 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2797 return ( 2798 (not schema and self._parse_function(optional_parens=False)) 2799 or self._parse_id_var(any_token=False) 2800 or self._parse_string_as_identifier() 2801 or self._parse_placeholder() 2802 ) 2803 2804 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2805 catalog = None 2806 db = None 2807 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2808 2809 while self._match(TokenType.DOT): 2810 if catalog: 2811 # This allows nesting the table in arbitrarily many dot expressions if needed 2812 table = self.expression( 2813 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2814 ) 2815 else: 2816 catalog = db 2817 db = table 2818 table = self._parse_table_part(schema=schema) or "" 2819 2820 if is_db_reference: 2821 catalog = db 2822 db = table 2823 table = None 2824 2825 if not table and not is_db_reference: 2826 self.raise_error(f"Expected table name but got {self._curr}") 2827 if not db and is_db_reference: 2828 self.raise_error(f"Expected database name but got {self._curr}") 2829 2830 return self.expression( 2831 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2832 ) 2833 2834 def _parse_table( 2835 self, 2836 schema: bool = False, 2837 joins: bool = False, 2838 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2839 parse_bracket: bool = False, 2840 is_db_reference: bool = False, 2841 ) -> t.Optional[exp.Expression]: 2842 lateral = self._parse_lateral() 2843 if lateral: 2844 return lateral 2845 2846 unnest = self._parse_unnest() 2847 if unnest: 2848 return unnest 2849 2850 values = self._parse_derived_table_values() 2851 if values: 2852 return values 2853 2854 subquery = self._parse_select(table=True) 2855 if subquery: 2856 if not subquery.args.get("pivots"): 2857 subquery.set("pivots", self._parse_pivots()) 2858 return subquery 2859 2860 bracket = parse_bracket and self._parse_bracket(None) 2861 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2862 this = t.cast( 2863 exp.Expression, 2864 bracket 2865 or self._parse_bracket( 2866 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2867 ), 2868 ) 2869 2870 if schema: 2871 return self._parse_schema(this=this) 2872 2873 version = self._parse_version() 2874 2875 if version: 2876 this.set("version", version) 2877 2878 if self.dialect.ALIAS_POST_TABLESAMPLE: 2879 table_sample = self._parse_table_sample() 2880 2881 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2882 if alias: 2883 this.set("alias", alias) 2884 2885 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2886 return self.expression( 2887 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2888 ) 2889 2890 this.set("hints", self._parse_table_hints()) 2891 2892 if not this.args.get("pivots"): 2893 this.set("pivots", self._parse_pivots()) 2894 2895 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2896 table_sample = self._parse_table_sample() 2897 2898 if table_sample: 2899 table_sample.set("this", this) 2900 this = table_sample 2901 2902 if joins: 2903 for join in iter(self._parse_join, None): 2904 this.append("joins", join) 2905 2906 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2907 this.set("ordinality", True) 2908 this.set("alias", self._parse_table_alias()) 2909 2910 return this 2911 2912 def _parse_version(self) -> t.Optional[exp.Version]: 2913 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2914 this = "TIMESTAMP" 2915 elif self._match(TokenType.VERSION_SNAPSHOT): 2916 this = "VERSION" 2917 else: 2918 return None 2919 2920 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2921 kind = self._prev.text.upper() 2922 start = self._parse_bitwise() 2923 self._match_texts(("TO", "AND")) 2924 end = self._parse_bitwise() 2925 expression: t.Optional[exp.Expression] = self.expression( 2926 exp.Tuple, expressions=[start, end] 2927 ) 2928 elif self._match_text_seq("CONTAINED", "IN"): 2929 kind = "CONTAINED IN" 2930 expression = self.expression( 2931 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2932 ) 2933 elif self._match(TokenType.ALL): 2934 kind = "ALL" 2935 expression = None 2936 else: 2937 self._match_text_seq("AS", "OF") 2938 kind = "AS OF" 2939 expression = self._parse_type() 2940 2941 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2942 2943 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2944 if not self._match(TokenType.UNNEST): 2945 return None 2946 2947 expressions = self._parse_wrapped_csv(self._parse_equality) 2948 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2949 2950 alias = self._parse_table_alias() if with_alias else None 2951 2952 if alias: 2953 if self.dialect.UNNEST_COLUMN_ONLY: 2954 if alias.args.get("columns"): 2955 self.raise_error("Unexpected extra column alias in unnest.") 2956 2957 alias.set("columns", [alias.this]) 2958 alias.set("this", None) 2959 2960 columns = alias.args.get("columns") or [] 2961 if offset and len(expressions) < len(columns): 2962 offset = columns.pop() 2963 2964 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2965 self._match(TokenType.ALIAS) 2966 offset = self._parse_id_var( 2967 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2968 ) or exp.to_identifier("offset") 2969 2970 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2971 2972 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2973 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2974 if not is_derived and not self._match(TokenType.VALUES): 2975 return None 2976 2977 expressions = self._parse_csv(self._parse_value) 2978 alias = self._parse_table_alias() 2979 2980 if is_derived: 2981 self._match_r_paren() 2982 2983 return self.expression( 2984 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2985 ) 2986 2987 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2988 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2989 as_modifier and self._match_text_seq("USING", "SAMPLE") 2990 ): 2991 return None 2992 2993 bucket_numerator = None 2994 bucket_denominator = None 2995 bucket_field = None 2996 percent = None 2997 size = None 2998 seed = None 2999 3000 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3001 matched_l_paren = self._match(TokenType.L_PAREN) 3002 3003 if self.TABLESAMPLE_CSV: 3004 num = None 3005 expressions = self._parse_csv(self._parse_primary) 3006 else: 3007 expressions = None 3008 num = ( 3009 self._parse_factor() 3010 if self._match(TokenType.NUMBER, advance=False) 3011 else self._parse_primary() or self._parse_placeholder() 3012 ) 3013 3014 if self._match_text_seq("BUCKET"): 3015 bucket_numerator = self._parse_number() 3016 self._match_text_seq("OUT", "OF") 3017 bucket_denominator = bucket_denominator = self._parse_number() 3018 self._match(TokenType.ON) 3019 bucket_field = self._parse_field() 3020 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3021 percent = num 3022 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3023 size = num 3024 else: 3025 percent = num 3026 3027 if matched_l_paren: 3028 self._match_r_paren() 3029 3030 if self._match(TokenType.L_PAREN): 3031 method = self._parse_var(upper=True) 3032 seed = self._match(TokenType.COMMA) and self._parse_number() 3033 self._match_r_paren() 3034 elif self._match_texts(("SEED", "REPEATABLE")): 3035 seed = self._parse_wrapped(self._parse_number) 3036 3037 return self.expression( 3038 exp.TableSample, 3039 expressions=expressions, 3040 method=method, 3041 bucket_numerator=bucket_numerator, 3042 bucket_denominator=bucket_denominator, 3043 bucket_field=bucket_field, 3044 percent=percent, 3045 size=size, 3046 seed=seed, 3047 ) 3048 3049 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3050 return list(iter(self._parse_pivot, None)) or None 3051 3052 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3053 return list(iter(self._parse_join, None)) or None 3054 3055 # https://duckdb.org/docs/sql/statements/pivot 3056 def _parse_simplified_pivot(self) -> exp.Pivot: 3057 def _parse_on() -> t.Optional[exp.Expression]: 3058 this = self._parse_bitwise() 3059 return self._parse_in(this) if self._match(TokenType.IN) else this 3060 3061 this = self._parse_table() 3062 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3063 using = self._match(TokenType.USING) and self._parse_csv( 3064 lambda: self._parse_alias(self._parse_function()) 3065 ) 3066 group = self._parse_group() 3067 return self.expression( 3068 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3069 ) 3070 3071 def _parse_pivot_in(self) -> exp.In: 3072 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3073 this = self._parse_conjunction() 3074 3075 self._match(TokenType.ALIAS) 3076 alias = self._parse_field() 3077 if alias: 3078 return self.expression(exp.PivotAlias, this=this, alias=alias) 3079 3080 return this 3081 3082 value = self._parse_column() 3083 3084 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3085 self.raise_error("Expecting IN (") 3086 3087 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3088 3089 self._match_r_paren() 3090 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3091 3092 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3093 index = self._index 3094 include_nulls = None 3095 3096 if self._match(TokenType.PIVOT): 3097 unpivot = False 3098 elif self._match(TokenType.UNPIVOT): 3099 unpivot = True 3100 3101 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3102 if self._match_text_seq("INCLUDE", "NULLS"): 3103 include_nulls = True 3104 elif self._match_text_seq("EXCLUDE", "NULLS"): 3105 include_nulls = False 3106 else: 3107 return None 3108 3109 expressions = [] 3110 3111 if not self._match(TokenType.L_PAREN): 3112 self._retreat(index) 3113 return None 3114 3115 if unpivot: 3116 expressions = self._parse_csv(self._parse_column) 3117 else: 3118 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3119 3120 if not expressions: 3121 self.raise_error("Failed to parse PIVOT's aggregation list") 3122 3123 if not self._match(TokenType.FOR): 3124 self.raise_error("Expecting FOR") 3125 3126 field = self._parse_pivot_in() 3127 3128 self._match_r_paren() 3129 3130 pivot = self.expression( 3131 exp.Pivot, 3132 expressions=expressions, 3133 field=field, 3134 unpivot=unpivot, 3135 include_nulls=include_nulls, 3136 ) 3137 3138 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3139 pivot.set("alias", self._parse_table_alias()) 3140 3141 if not unpivot: 3142 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3143 3144 columns: t.List[exp.Expression] = [] 3145 for fld in pivot.args["field"].expressions: 3146 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3147 for name in names: 3148 if self.PREFIXED_PIVOT_COLUMNS: 3149 name = f"{name}_{field_name}" if name else field_name 3150 else: 3151 name = f"{field_name}_{name}" if name else field_name 3152 3153 columns.append(exp.to_identifier(name)) 3154 3155 pivot.set("columns", columns) 3156 3157 return pivot 3158 3159 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3160 return [agg.alias for agg in aggregations] 3161 3162 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3163 if not skip_where_token and not self._match(TokenType.WHERE): 3164 return None 3165 3166 return self.expression( 3167 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3168 ) 3169 3170 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3171 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3172 return None 3173 3174 elements = defaultdict(list) 3175 3176 if self._match(TokenType.ALL): 3177 return self.expression(exp.Group, all=True) 3178 3179 while True: 3180 expressions = self._parse_csv(self._parse_conjunction) 3181 if expressions: 3182 elements["expressions"].extend(expressions) 3183 3184 grouping_sets = self._parse_grouping_sets() 3185 if grouping_sets: 3186 elements["grouping_sets"].extend(grouping_sets) 3187 3188 rollup = None 3189 cube = None 3190 totals = None 3191 3192 index = self._index 3193 with_ = self._match(TokenType.WITH) 3194 if self._match(TokenType.ROLLUP): 3195 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3196 elements["rollup"].extend(ensure_list(rollup)) 3197 3198 if self._match(TokenType.CUBE): 3199 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3200 elements["cube"].extend(ensure_list(cube)) 3201 3202 if self._match_text_seq("TOTALS"): 3203 totals = True 3204 elements["totals"] = True # type: ignore 3205 3206 if not (grouping_sets or rollup or cube or totals): 3207 if with_: 3208 self._retreat(index) 3209 break 3210 3211 return self.expression(exp.Group, **elements) # type: ignore 3212 3213 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3214 if not self._match(TokenType.GROUPING_SETS): 3215 return None 3216 3217 return self._parse_wrapped_csv(self._parse_grouping_set) 3218 3219 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3220 if self._match(TokenType.L_PAREN): 3221 grouping_set = self._parse_csv(self._parse_column) 3222 self._match_r_paren() 3223 return self.expression(exp.Tuple, expressions=grouping_set) 3224 3225 return self._parse_column() 3226 3227 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3228 if not skip_having_token and not self._match(TokenType.HAVING): 3229 return None 3230 return self.expression(exp.Having, this=self._parse_conjunction()) 3231 3232 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3233 if not self._match(TokenType.QUALIFY): 3234 return None 3235 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3236 3237 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3238 if skip_start_token: 3239 start = None 3240 elif self._match(TokenType.START_WITH): 3241 start = self._parse_conjunction() 3242 else: 3243 return None 3244 3245 self._match(TokenType.CONNECT_BY) 3246 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3247 exp.Prior, this=self._parse_bitwise() 3248 ) 3249 connect = self._parse_conjunction() 3250 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3251 3252 if not start and self._match(TokenType.START_WITH): 3253 start = self._parse_conjunction() 3254 3255 return self.expression(exp.Connect, start=start, connect=connect) 3256 3257 def _parse_name_as_expression(self) -> exp.Alias: 3258 return self.expression( 3259 exp.Alias, 3260 alias=self._parse_id_var(any_token=True), 3261 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3262 ) 3263 3264 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3265 if self._match_text_seq("INTERPOLATE"): 3266 return self._parse_wrapped_csv(self._parse_name_as_expression) 3267 return None 3268 3269 def _parse_order( 3270 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3271 ) -> t.Optional[exp.Expression]: 3272 siblings = None 3273 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3274 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3275 return this 3276 3277 siblings = True 3278 3279 return self.expression( 3280 exp.Order, 3281 this=this, 3282 expressions=self._parse_csv(self._parse_ordered), 3283 interpolate=self._parse_interpolate(), 3284 siblings=siblings, 3285 ) 3286 3287 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3288 if not self._match(token): 3289 return None 3290 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3291 3292 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3293 this = parse_method() if parse_method else self._parse_conjunction() 3294 3295 asc = self._match(TokenType.ASC) 3296 desc = self._match(TokenType.DESC) or (asc and False) 3297 3298 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3299 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3300 3301 nulls_first = is_nulls_first or False 3302 explicitly_null_ordered = is_nulls_first or is_nulls_last 3303 3304 if ( 3305 not explicitly_null_ordered 3306 and ( 3307 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3308 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3309 ) 3310 and self.dialect.NULL_ORDERING != "nulls_are_last" 3311 ): 3312 nulls_first = True 3313 3314 if self._match_text_seq("WITH", "FILL"): 3315 with_fill = self.expression( 3316 exp.WithFill, 3317 **{ # type: ignore 3318 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3319 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3320 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3321 }, 3322 ) 3323 else: 3324 with_fill = None 3325 3326 return self.expression( 3327 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3328 ) 3329 3330 def _parse_limit( 3331 self, this: t.Optional[exp.Expression] = None, top: bool = False 3332 ) -> t.Optional[exp.Expression]: 3333 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3334 comments = self._prev_comments 3335 if top: 3336 limit_paren = self._match(TokenType.L_PAREN) 3337 expression = self._parse_term() if limit_paren else self._parse_number() 3338 3339 if limit_paren: 3340 self._match_r_paren() 3341 else: 3342 expression = self._parse_term() 3343 3344 if self._match(TokenType.COMMA): 3345 offset = expression 3346 expression = self._parse_term() 3347 else: 3348 offset = None 3349 3350 limit_exp = self.expression( 3351 exp.Limit, 3352 this=this, 3353 expression=expression, 3354 offset=offset, 3355 comments=comments, 3356 expressions=self._parse_limit_by(), 3357 ) 3358 3359 return limit_exp 3360 3361 if self._match(TokenType.FETCH): 3362 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3363 direction = self._prev.text.upper() if direction else "FIRST" 3364 3365 count = self._parse_field(tokens=self.FETCH_TOKENS) 3366 percent = self._match(TokenType.PERCENT) 3367 3368 self._match_set((TokenType.ROW, TokenType.ROWS)) 3369 3370 only = self._match_text_seq("ONLY") 3371 with_ties = self._match_text_seq("WITH", "TIES") 3372 3373 if only and with_ties: 3374 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3375 3376 return self.expression( 3377 exp.Fetch, 3378 direction=direction, 3379 count=count, 3380 percent=percent, 3381 with_ties=with_ties, 3382 ) 3383 3384 return this 3385 3386 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3387 if not self._match(TokenType.OFFSET): 3388 return this 3389 3390 count = self._parse_term() 3391 self._match_set((TokenType.ROW, TokenType.ROWS)) 3392 3393 return self.expression( 3394 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3395 ) 3396 3397 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3398 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3399 3400 def _parse_locks(self) -> t.List[exp.Lock]: 3401 locks = [] 3402 while True: 3403 if self._match_text_seq("FOR", "UPDATE"): 3404 update = True 3405 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3406 "LOCK", "IN", "SHARE", "MODE" 3407 ): 3408 update = False 3409 else: 3410 break 3411 3412 expressions = None 3413 if self._match_text_seq("OF"): 3414 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3415 3416 wait: t.Optional[bool | exp.Expression] = None 3417 if self._match_text_seq("NOWAIT"): 3418 wait = True 3419 elif self._match_text_seq("WAIT"): 3420 wait = self._parse_primary() 3421 elif self._match_text_seq("SKIP", "LOCKED"): 3422 wait = False 3423 3424 locks.append( 3425 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3426 ) 3427 3428 return locks 3429 3430 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3431 while this and self._match_set(self.SET_OPERATIONS): 3432 token_type = self._prev.token_type 3433 3434 if token_type == TokenType.UNION: 3435 operation = exp.Union 3436 elif token_type == TokenType.EXCEPT: 3437 operation = exp.Except 3438 else: 3439 operation = exp.Intersect 3440 3441 comments = self._prev.comments 3442 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3443 by_name = self._match_text_seq("BY", "NAME") 3444 expression = self._parse_select(nested=True, parse_set_operation=False) 3445 3446 this = self.expression( 3447 operation, 3448 comments=comments, 3449 this=this, 3450 distinct=distinct, 3451 by_name=by_name, 3452 expression=expression, 3453 ) 3454 3455 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3456 expression = this.expression 3457 3458 if expression: 3459 for arg in self.UNION_MODIFIERS: 3460 expr = expression.args.get(arg) 3461 if expr: 3462 this.set(arg, expr.pop()) 3463 3464 return this 3465 3466 def _parse_expression(self) -> t.Optional[exp.Expression]: 3467 return self._parse_alias(self._parse_conjunction()) 3468 3469 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3470 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3471 3472 def _parse_equality(self) -> t.Optional[exp.Expression]: 3473 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3474 3475 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3476 return self._parse_tokens(self._parse_range, self.COMPARISON) 3477 3478 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3479 this = this or self._parse_bitwise() 3480 negate = self._match(TokenType.NOT) 3481 3482 if self._match_set(self.RANGE_PARSERS): 3483 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3484 if not expression: 3485 return this 3486 3487 this = expression 3488 elif self._match(TokenType.ISNULL): 3489 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3490 3491 # Postgres supports ISNULL and NOTNULL for conditions. 3492 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3493 if self._match(TokenType.NOTNULL): 3494 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3495 this = self.expression(exp.Not, this=this) 3496 3497 if negate: 3498 this = self.expression(exp.Not, this=this) 3499 3500 if self._match(TokenType.IS): 3501 this = self._parse_is(this) 3502 3503 return this 3504 3505 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 index = self._index - 1 3507 negate = self._match(TokenType.NOT) 3508 3509 if self._match_text_seq("DISTINCT", "FROM"): 3510 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3511 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3512 3513 expression = self._parse_null() or self._parse_boolean() 3514 if not expression: 3515 self._retreat(index) 3516 return None 3517 3518 this = self.expression(exp.Is, this=this, expression=expression) 3519 return self.expression(exp.Not, this=this) if negate else this 3520 3521 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3522 unnest = self._parse_unnest(with_alias=False) 3523 if unnest: 3524 this = self.expression(exp.In, this=this, unnest=unnest) 3525 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3526 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3527 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3528 3529 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3530 this = self.expression(exp.In, this=this, query=expressions[0]) 3531 else: 3532 this = self.expression(exp.In, this=this, expressions=expressions) 3533 3534 if matched_l_paren: 3535 self._match_r_paren(this) 3536 elif not self._match(TokenType.R_BRACKET, expression=this): 3537 self.raise_error("Expecting ]") 3538 else: 3539 this = self.expression(exp.In, this=this, field=self._parse_field()) 3540 3541 return this 3542 3543 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3544 low = self._parse_bitwise() 3545 self._match(TokenType.AND) 3546 high = self._parse_bitwise() 3547 return self.expression(exp.Between, this=this, low=low, high=high) 3548 3549 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3550 if not self._match(TokenType.ESCAPE): 3551 return this 3552 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3553 3554 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3555 index = self._index 3556 3557 if not self._match(TokenType.INTERVAL) and match_interval: 3558 return None 3559 3560 if self._match(TokenType.STRING, advance=False): 3561 this = self._parse_primary() 3562 else: 3563 this = self._parse_term() 3564 3565 if not this or ( 3566 isinstance(this, exp.Column) 3567 and not this.table 3568 and not this.this.quoted 3569 and this.name.upper() == "IS" 3570 ): 3571 self._retreat(index) 3572 return None 3573 3574 unit = self._parse_function() or ( 3575 not self._match(TokenType.ALIAS, advance=False) 3576 and self._parse_var(any_token=True, upper=True) 3577 ) 3578 3579 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3580 # each INTERVAL expression into this canonical form so it's easy to transpile 3581 if this and this.is_number: 3582 this = exp.Literal.string(this.name) 3583 elif this and this.is_string: 3584 parts = this.name.split() 3585 3586 if len(parts) == 2: 3587 if unit: 3588 # This is not actually a unit, it's something else (e.g. a "window side") 3589 unit = None 3590 self._retreat(self._index - 1) 3591 3592 this = exp.Literal.string(parts[0]) 3593 unit = self.expression(exp.Var, this=parts[1].upper()) 3594 3595 return self.expression(exp.Interval, this=this, unit=unit) 3596 3597 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3598 this = self._parse_term() 3599 3600 while True: 3601 if self._match_set(self.BITWISE): 3602 this = self.expression( 3603 self.BITWISE[self._prev.token_type], 3604 this=this, 3605 expression=self._parse_term(), 3606 ) 3607 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3608 this = self.expression( 3609 exp.DPipe, 3610 this=this, 3611 expression=self._parse_term(), 3612 safe=not self.dialect.STRICT_STRING_CONCAT, 3613 ) 3614 elif self._match(TokenType.DQMARK): 3615 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3616 elif self._match_pair(TokenType.LT, TokenType.LT): 3617 this = self.expression( 3618 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3619 ) 3620 elif self._match_pair(TokenType.GT, TokenType.GT): 3621 this = self.expression( 3622 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3623 ) 3624 else: 3625 break 3626 3627 return this 3628 3629 def _parse_term(self) -> t.Optional[exp.Expression]: 3630 return self._parse_tokens(self._parse_factor, self.TERM) 3631 3632 def _parse_factor(self) -> t.Optional[exp.Expression]: 3633 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3634 this = parse_method() 3635 3636 while self._match_set(self.FACTOR): 3637 this = self.expression( 3638 self.FACTOR[self._prev.token_type], 3639 this=this, 3640 comments=self._prev_comments, 3641 expression=parse_method(), 3642 ) 3643 if isinstance(this, exp.Div): 3644 this.args["typed"] = self.dialect.TYPED_DIVISION 3645 this.args["safe"] = self.dialect.SAFE_DIVISION 3646 3647 return this 3648 3649 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3650 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3651 3652 def _parse_unary(self) -> t.Optional[exp.Expression]: 3653 if self._match_set(self.UNARY_PARSERS): 3654 return self.UNARY_PARSERS[self._prev.token_type](self) 3655 return self._parse_at_time_zone(self._parse_type()) 3656 3657 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3658 interval = parse_interval and self._parse_interval() 3659 if interval: 3660 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3661 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3662 interval = self.expression( # type: ignore 3663 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3664 ) 3665 3666 return interval 3667 3668 index = self._index 3669 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3670 this = self._parse_column() 3671 3672 if data_type: 3673 if isinstance(this, exp.Literal): 3674 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3675 if parser: 3676 return parser(self, this, data_type) 3677 return self.expression(exp.Cast, this=this, to=data_type) 3678 if not data_type.expressions: 3679 self._retreat(index) 3680 return self._parse_column() 3681 return self._parse_column_ops(data_type) 3682 3683 return this and self._parse_column_ops(this) 3684 3685 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3686 this = self._parse_type() 3687 if not this: 3688 return None 3689 3690 return self.expression( 3691 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3692 ) 3693 3694 def _parse_types( 3695 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3696 ) -> t.Optional[exp.Expression]: 3697 index = self._index 3698 3699 prefix = self._match_text_seq("SYSUDTLIB", ".") 3700 3701 if not self._match_set(self.TYPE_TOKENS): 3702 identifier = allow_identifiers and self._parse_id_var( 3703 any_token=False, tokens=(TokenType.VAR,) 3704 ) 3705 if identifier: 3706 tokens = self.dialect.tokenize(identifier.name) 3707 3708 if len(tokens) != 1: 3709 self.raise_error("Unexpected identifier", self._prev) 3710 3711 if tokens[0].token_type in self.TYPE_TOKENS: 3712 self._prev = tokens[0] 3713 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3714 type_name = identifier.name 3715 3716 while self._match(TokenType.DOT): 3717 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3718 3719 return exp.DataType.build(type_name, udt=True) 3720 else: 3721 self._retreat(self._index - 1) 3722 return None 3723 else: 3724 return None 3725 3726 type_token = self._prev.token_type 3727 3728 if type_token == TokenType.PSEUDO_TYPE: 3729 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3730 3731 if type_token == TokenType.OBJECT_IDENTIFIER: 3732 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3733 3734 nested = type_token in self.NESTED_TYPE_TOKENS 3735 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3736 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3737 expressions = None 3738 maybe_func = False 3739 3740 if self._match(TokenType.L_PAREN): 3741 if is_struct: 3742 expressions = self._parse_csv(self._parse_struct_types) 3743 elif nested: 3744 expressions = self._parse_csv( 3745 lambda: self._parse_types( 3746 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3747 ) 3748 ) 3749 elif type_token in self.ENUM_TYPE_TOKENS: 3750 expressions = self._parse_csv(self._parse_equality) 3751 elif is_aggregate: 3752 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3753 any_token=False, tokens=(TokenType.VAR,) 3754 ) 3755 if not func_or_ident or not self._match(TokenType.COMMA): 3756 return None 3757 expressions = self._parse_csv( 3758 lambda: self._parse_types( 3759 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3760 ) 3761 ) 3762 expressions.insert(0, func_or_ident) 3763 else: 3764 expressions = self._parse_csv(self._parse_type_size) 3765 3766 if not expressions or not self._match(TokenType.R_PAREN): 3767 self._retreat(index) 3768 return None 3769 3770 maybe_func = True 3771 3772 this: t.Optional[exp.Expression] = None 3773 values: t.Optional[t.List[exp.Expression]] = None 3774 3775 if nested and self._match(TokenType.LT): 3776 if is_struct: 3777 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3778 else: 3779 expressions = self._parse_csv( 3780 lambda: self._parse_types( 3781 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3782 ) 3783 ) 3784 3785 if not self._match(TokenType.GT): 3786 self.raise_error("Expecting >") 3787 3788 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3789 values = self._parse_csv(self._parse_conjunction) 3790 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3791 3792 if type_token in self.TIMESTAMPS: 3793 if self._match_text_seq("WITH", "TIME", "ZONE"): 3794 maybe_func = False 3795 tz_type = ( 3796 exp.DataType.Type.TIMETZ 3797 if type_token in self.TIMES 3798 else exp.DataType.Type.TIMESTAMPTZ 3799 ) 3800 this = exp.DataType(this=tz_type, expressions=expressions) 3801 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3802 maybe_func = False 3803 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3804 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3805 maybe_func = False 3806 elif type_token == TokenType.INTERVAL: 3807 unit = self._parse_var() 3808 3809 if self._match_text_seq("TO"): 3810 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3811 else: 3812 span = None 3813 3814 if span or not unit: 3815 this = self.expression( 3816 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3817 ) 3818 else: 3819 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3820 3821 if maybe_func and check_func: 3822 index2 = self._index 3823 peek = self._parse_string() 3824 3825 if not peek: 3826 self._retreat(index) 3827 return None 3828 3829 self._retreat(index2) 3830 3831 if not this: 3832 if self._match_text_seq("UNSIGNED"): 3833 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3834 if not unsigned_type_token: 3835 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3836 3837 type_token = unsigned_type_token or type_token 3838 3839 this = exp.DataType( 3840 this=exp.DataType.Type[type_token.value], 3841 expressions=expressions, 3842 nested=nested, 3843 values=values, 3844 prefix=prefix, 3845 ) 3846 3847 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3848 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3849 3850 return this 3851 3852 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3853 index = self._index 3854 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3855 self._match(TokenType.COLON) 3856 column_def = self._parse_column_def(this) 3857 3858 if type_required and ( 3859 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3860 ): 3861 self._retreat(index) 3862 return self._parse_types() 3863 3864 return column_def 3865 3866 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3867 if not self._match_text_seq("AT", "TIME", "ZONE"): 3868 return this 3869 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3870 3871 def _parse_column(self) -> t.Optional[exp.Expression]: 3872 this = self._parse_column_reference() 3873 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3874 3875 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3876 this = self._parse_field() 3877 if isinstance(this, exp.Identifier): 3878 this = self.expression(exp.Column, this=this) 3879 return this 3880 3881 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3882 this = self._parse_bracket(this) 3883 3884 while self._match_set(self.COLUMN_OPERATORS): 3885 op_token = self._prev.token_type 3886 op = self.COLUMN_OPERATORS.get(op_token) 3887 3888 if op_token == TokenType.DCOLON: 3889 field = self._parse_types() 3890 if not field: 3891 self.raise_error("Expected type") 3892 elif op and self._curr: 3893 field = self._parse_column_reference() 3894 else: 3895 field = self._parse_field(anonymous_func=True, any_token=True) 3896 3897 if isinstance(field, exp.Func): 3898 # bigquery allows function calls like x.y.count(...) 3899 # SAFE.SUBSTR(...) 3900 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3901 this = self._replace_columns_with_dots(this) 3902 3903 if op: 3904 this = op(self, this, field) 3905 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3906 this = self.expression( 3907 exp.Column, 3908 this=field, 3909 table=this.this, 3910 db=this.args.get("table"), 3911 catalog=this.args.get("db"), 3912 ) 3913 else: 3914 this = self.expression(exp.Dot, this=this, expression=field) 3915 this = self._parse_bracket(this) 3916 return this 3917 3918 def _parse_primary(self) -> t.Optional[exp.Expression]: 3919 if self._match_set(self.PRIMARY_PARSERS): 3920 token_type = self._prev.token_type 3921 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3922 3923 if token_type == TokenType.STRING: 3924 expressions = [primary] 3925 while self._match(TokenType.STRING): 3926 expressions.append(exp.Literal.string(self._prev.text)) 3927 3928 if len(expressions) > 1: 3929 return self.expression(exp.Concat, expressions=expressions) 3930 3931 return primary 3932 3933 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3934 return exp.Literal.number(f"0.{self._prev.text}") 3935 3936 if self._match(TokenType.L_PAREN): 3937 comments = self._prev_comments 3938 query = self._parse_select() 3939 3940 if query: 3941 expressions = [query] 3942 else: 3943 expressions = self._parse_expressions() 3944 3945 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3946 3947 if isinstance(this, exp.Subqueryable): 3948 this = self._parse_set_operations( 3949 self._parse_subquery(this=this, parse_alias=False) 3950 ) 3951 elif len(expressions) > 1: 3952 this = self.expression(exp.Tuple, expressions=expressions) 3953 else: 3954 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3955 3956 if this: 3957 this.add_comments(comments) 3958 3959 self._match_r_paren(expression=this) 3960 return this 3961 3962 return None 3963 3964 def _parse_field( 3965 self, 3966 any_token: bool = False, 3967 tokens: t.Optional[t.Collection[TokenType]] = None, 3968 anonymous_func: bool = False, 3969 ) -> t.Optional[exp.Expression]: 3970 return ( 3971 self._parse_primary() 3972 or self._parse_function(anonymous=anonymous_func) 3973 or self._parse_id_var(any_token=any_token, tokens=tokens) 3974 ) 3975 3976 def _parse_function( 3977 self, 3978 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3979 anonymous: bool = False, 3980 optional_parens: bool = True, 3981 ) -> t.Optional[exp.Expression]: 3982 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3983 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3984 fn_syntax = False 3985 if ( 3986 self._match(TokenType.L_BRACE, advance=False) 3987 and self._next 3988 and self._next.text.upper() == "FN" 3989 ): 3990 self._advance(2) 3991 fn_syntax = True 3992 3993 func = self._parse_function_call( 3994 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3995 ) 3996 3997 if fn_syntax: 3998 self._match(TokenType.R_BRACE) 3999 4000 return func 4001 4002 def _parse_function_call( 4003 self, 4004 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4005 anonymous: bool = False, 4006 optional_parens: bool = True, 4007 ) -> t.Optional[exp.Expression]: 4008 if not self._curr: 4009 return None 4010 4011 comments = self._curr.comments 4012 token_type = self._curr.token_type 4013 this = self._curr.text 4014 upper = this.upper() 4015 4016 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4017 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4018 self._advance() 4019 return parser(self) 4020 4021 if not self._next or self._next.token_type != TokenType.L_PAREN: 4022 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4023 self._advance() 4024 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4025 4026 return None 4027 4028 if token_type not in self.FUNC_TOKENS: 4029 return None 4030 4031 self._advance(2) 4032 4033 parser = self.FUNCTION_PARSERS.get(upper) 4034 if parser and not anonymous: 4035 this = parser(self) 4036 else: 4037 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4038 4039 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4040 this = self.expression(subquery_predicate, this=self._parse_select()) 4041 self._match_r_paren() 4042 return this 4043 4044 if functions is None: 4045 functions = self.FUNCTIONS 4046 4047 function = functions.get(upper) 4048 4049 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4050 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4051 4052 if function and not anonymous: 4053 if "dialect" in function.__code__.co_varnames: 4054 func = function(args, dialect=self.dialect) 4055 else: 4056 func = function(args) 4057 4058 func = self.validate_expression(func, args) 4059 if not self.dialect.NORMALIZE_FUNCTIONS: 4060 func.meta["name"] = this 4061 4062 this = func 4063 else: 4064 this = self.expression(exp.Anonymous, this=this, expressions=args) 4065 4066 if isinstance(this, exp.Expression): 4067 this.add_comments(comments) 4068 4069 self._match_r_paren(this) 4070 return self._parse_window(this) 4071 4072 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4073 return self._parse_column_def(self._parse_id_var()) 4074 4075 def _parse_user_defined_function( 4076 self, kind: t.Optional[TokenType] = None 4077 ) -> t.Optional[exp.Expression]: 4078 this = self._parse_id_var() 4079 4080 while self._match(TokenType.DOT): 4081 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4082 4083 if not self._match(TokenType.L_PAREN): 4084 return this 4085 4086 expressions = self._parse_csv(self._parse_function_parameter) 4087 self._match_r_paren() 4088 return self.expression( 4089 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4090 ) 4091 4092 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4093 literal = self._parse_primary() 4094 if literal: 4095 return self.expression(exp.Introducer, this=token.text, expression=literal) 4096 4097 return self.expression(exp.Identifier, this=token.text) 4098 4099 def _parse_session_parameter(self) -> exp.SessionParameter: 4100 kind = None 4101 this = self._parse_id_var() or self._parse_primary() 4102 4103 if this and self._match(TokenType.DOT): 4104 kind = this.name 4105 this = self._parse_var() or self._parse_primary() 4106 4107 return self.expression(exp.SessionParameter, this=this, kind=kind) 4108 4109 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4110 index = self._index 4111 4112 if self._match(TokenType.L_PAREN): 4113 expressions = t.cast( 4114 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4115 ) 4116 4117 if not self._match(TokenType.R_PAREN): 4118 self._retreat(index) 4119 else: 4120 expressions = [self._parse_id_var()] 4121 4122 if self._match_set(self.LAMBDAS): 4123 return self.LAMBDAS[self._prev.token_type](self, expressions) 4124 4125 self._retreat(index) 4126 4127 this: t.Optional[exp.Expression] 4128 4129 if self._match(TokenType.DISTINCT): 4130 this = self.expression( 4131 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4132 ) 4133 else: 4134 this = self._parse_select_or_expression(alias=alias) 4135 4136 return self._parse_limit( 4137 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4138 ) 4139 4140 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4141 index = self._index 4142 4143 if not self.errors: 4144 try: 4145 if self._parse_select(nested=True): 4146 return this 4147 except ParseError: 4148 pass 4149 finally: 4150 self.errors.clear() 4151 self._retreat(index) 4152 4153 if not self._match(TokenType.L_PAREN): 4154 return this 4155 4156 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4157 4158 self._match_r_paren() 4159 return self.expression(exp.Schema, this=this, expressions=args) 4160 4161 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4162 return self._parse_column_def(self._parse_field(any_token=True)) 4163 4164 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4165 # column defs are not really columns, they're identifiers 4166 if isinstance(this, exp.Column): 4167 this = this.this 4168 4169 kind = self._parse_types(schema=True) 4170 4171 if self._match_text_seq("FOR", "ORDINALITY"): 4172 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4173 4174 constraints: t.List[exp.Expression] = [] 4175 4176 if not kind and self._match(TokenType.ALIAS): 4177 constraints.append( 4178 self.expression( 4179 exp.ComputedColumnConstraint, 4180 this=self._parse_conjunction(), 4181 persisted=self._match_text_seq("PERSISTED"), 4182 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4183 ) 4184 ) 4185 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4186 self._match(TokenType.ALIAS) 4187 constraints.append( 4188 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4189 ) 4190 4191 while True: 4192 constraint = self._parse_column_constraint() 4193 if not constraint: 4194 break 4195 constraints.append(constraint) 4196 4197 if not kind and not constraints: 4198 return this 4199 4200 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4201 4202 def _parse_auto_increment( 4203 self, 4204 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4205 start = None 4206 increment = None 4207 4208 if self._match(TokenType.L_PAREN, advance=False): 4209 args = self._parse_wrapped_csv(self._parse_bitwise) 4210 start = seq_get(args, 0) 4211 increment = seq_get(args, 1) 4212 elif self._match_text_seq("START"): 4213 start = self._parse_bitwise() 4214 self._match_text_seq("INCREMENT") 4215 increment = self._parse_bitwise() 4216 4217 if start and increment: 4218 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4219 4220 return exp.AutoIncrementColumnConstraint() 4221 4222 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4223 if not self._match_text_seq("REFRESH"): 4224 self._retreat(self._index - 1) 4225 return None 4226 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4227 4228 def _parse_compress(self) -> exp.CompressColumnConstraint: 4229 if self._match(TokenType.L_PAREN, advance=False): 4230 return self.expression( 4231 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4232 ) 4233 4234 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4235 4236 def _parse_generated_as_identity( 4237 self, 4238 ) -> ( 4239 exp.GeneratedAsIdentityColumnConstraint 4240 | exp.ComputedColumnConstraint 4241 | exp.GeneratedAsRowColumnConstraint 4242 ): 4243 if self._match_text_seq("BY", "DEFAULT"): 4244 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4245 this = self.expression( 4246 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4247 ) 4248 else: 4249 self._match_text_seq("ALWAYS") 4250 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4251 4252 self._match(TokenType.ALIAS) 4253 4254 if self._match_text_seq("ROW"): 4255 start = self._match_text_seq("START") 4256 if not start: 4257 self._match(TokenType.END) 4258 hidden = self._match_text_seq("HIDDEN") 4259 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4260 4261 identity = self._match_text_seq("IDENTITY") 4262 4263 if self._match(TokenType.L_PAREN): 4264 if self._match(TokenType.START_WITH): 4265 this.set("start", self._parse_bitwise()) 4266 if self._match_text_seq("INCREMENT", "BY"): 4267 this.set("increment", self._parse_bitwise()) 4268 if self._match_text_seq("MINVALUE"): 4269 this.set("minvalue", self._parse_bitwise()) 4270 if self._match_text_seq("MAXVALUE"): 4271 this.set("maxvalue", self._parse_bitwise()) 4272 4273 if self._match_text_seq("CYCLE"): 4274 this.set("cycle", True) 4275 elif self._match_text_seq("NO", "CYCLE"): 4276 this.set("cycle", False) 4277 4278 if not identity: 4279 this.set("expression", self._parse_bitwise()) 4280 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4281 args = self._parse_csv(self._parse_bitwise) 4282 this.set("start", seq_get(args, 0)) 4283 this.set("increment", seq_get(args, 1)) 4284 4285 self._match_r_paren() 4286 4287 return this 4288 4289 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4290 self._match_text_seq("LENGTH") 4291 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4292 4293 def _parse_not_constraint( 4294 self, 4295 ) -> t.Optional[exp.Expression]: 4296 if self._match_text_seq("NULL"): 4297 return self.expression(exp.NotNullColumnConstraint) 4298 if self._match_text_seq("CASESPECIFIC"): 4299 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4300 if self._match_text_seq("FOR", "REPLICATION"): 4301 return self.expression(exp.NotForReplicationColumnConstraint) 4302 return None 4303 4304 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4305 if self._match(TokenType.CONSTRAINT): 4306 this = self._parse_id_var() 4307 else: 4308 this = None 4309 4310 if self._match_texts(self.CONSTRAINT_PARSERS): 4311 return self.expression( 4312 exp.ColumnConstraint, 4313 this=this, 4314 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4315 ) 4316 4317 return this 4318 4319 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4320 if not self._match(TokenType.CONSTRAINT): 4321 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4322 4323 this = self._parse_id_var() 4324 expressions = [] 4325 4326 while True: 4327 constraint = self._parse_unnamed_constraint() or self._parse_function() 4328 if not constraint: 4329 break 4330 expressions.append(constraint) 4331 4332 return self.expression(exp.Constraint, this=this, expressions=expressions) 4333 4334 def _parse_unnamed_constraint( 4335 self, constraints: t.Optional[t.Collection[str]] = None 4336 ) -> t.Optional[exp.Expression]: 4337 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4338 constraints or self.CONSTRAINT_PARSERS 4339 ): 4340 return None 4341 4342 constraint = self._prev.text.upper() 4343 if constraint not in self.CONSTRAINT_PARSERS: 4344 self.raise_error(f"No parser found for schema constraint {constraint}.") 4345 4346 return self.CONSTRAINT_PARSERS[constraint](self) 4347 4348 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4349 self._match_text_seq("KEY") 4350 return self.expression( 4351 exp.UniqueColumnConstraint, 4352 this=self._parse_schema(self._parse_id_var(any_token=False)), 4353 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4354 ) 4355 4356 def _parse_key_constraint_options(self) -> t.List[str]: 4357 options = [] 4358 while True: 4359 if not self._curr: 4360 break 4361 4362 if self._match(TokenType.ON): 4363 action = None 4364 on = self._advance_any() and self._prev.text 4365 4366 if self._match_text_seq("NO", "ACTION"): 4367 action = "NO ACTION" 4368 elif self._match_text_seq("CASCADE"): 4369 action = "CASCADE" 4370 elif self._match_text_seq("RESTRICT"): 4371 action = "RESTRICT" 4372 elif self._match_pair(TokenType.SET, TokenType.NULL): 4373 action = "SET NULL" 4374 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4375 action = "SET DEFAULT" 4376 else: 4377 self.raise_error("Invalid key constraint") 4378 4379 options.append(f"ON {on} {action}") 4380 elif self._match_text_seq("NOT", "ENFORCED"): 4381 options.append("NOT ENFORCED") 4382 elif self._match_text_seq("DEFERRABLE"): 4383 options.append("DEFERRABLE") 4384 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4385 options.append("INITIALLY DEFERRED") 4386 elif self._match_text_seq("NORELY"): 4387 options.append("NORELY") 4388 elif self._match_text_seq("MATCH", "FULL"): 4389 options.append("MATCH FULL") 4390 else: 4391 break 4392 4393 return options 4394 4395 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4396 if match and not self._match(TokenType.REFERENCES): 4397 return None 4398 4399 expressions = None 4400 this = self._parse_table(schema=True) 4401 options = self._parse_key_constraint_options() 4402 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4403 4404 def _parse_foreign_key(self) -> exp.ForeignKey: 4405 expressions = self._parse_wrapped_id_vars() 4406 reference = self._parse_references() 4407 options = {} 4408 4409 while self._match(TokenType.ON): 4410 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4411 self.raise_error("Expected DELETE or UPDATE") 4412 4413 kind = self._prev.text.lower() 4414 4415 if self._match_text_seq("NO", "ACTION"): 4416 action = "NO ACTION" 4417 elif self._match(TokenType.SET): 4418 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4419 action = "SET " + self._prev.text.upper() 4420 else: 4421 self._advance() 4422 action = self._prev.text.upper() 4423 4424 options[kind] = action 4425 4426 return self.expression( 4427 exp.ForeignKey, 4428 expressions=expressions, 4429 reference=reference, 4430 **options, # type: ignore 4431 ) 4432 4433 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4434 return self._parse_field() 4435 4436 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4437 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4438 self._retreat(self._index - 1) 4439 return None 4440 4441 id_vars = self._parse_wrapped_id_vars() 4442 return self.expression( 4443 exp.PeriodForSystemTimeConstraint, 4444 this=seq_get(id_vars, 0), 4445 expression=seq_get(id_vars, 1), 4446 ) 4447 4448 def _parse_primary_key( 4449 self, wrapped_optional: bool = False, in_props: bool = False 4450 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4451 desc = ( 4452 self._match_set((TokenType.ASC, TokenType.DESC)) 4453 and self._prev.token_type == TokenType.DESC 4454 ) 4455 4456 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4457 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4458 4459 expressions = self._parse_wrapped_csv( 4460 self._parse_primary_key_part, optional=wrapped_optional 4461 ) 4462 options = self._parse_key_constraint_options() 4463 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4464 4465 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4466 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4467 4468 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4469 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4470 return this 4471 4472 bracket_kind = self._prev.token_type 4473 expressions = self._parse_csv( 4474 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4475 ) 4476 4477 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4478 self.raise_error("Expected ]") 4479 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4480 self.raise_error("Expected }") 4481 4482 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4483 if bracket_kind == TokenType.L_BRACE: 4484 this = self.expression(exp.Struct, expressions=expressions) 4485 elif not this or this.name.upper() == "ARRAY": 4486 this = self.expression(exp.Array, expressions=expressions) 4487 else: 4488 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4489 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4490 4491 self._add_comments(this) 4492 return self._parse_bracket(this) 4493 4494 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4495 if self._match(TokenType.COLON): 4496 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4497 return this 4498 4499 def _parse_case(self) -> t.Optional[exp.Expression]: 4500 ifs = [] 4501 default = None 4502 4503 comments = self._prev_comments 4504 expression = self._parse_conjunction() 4505 4506 while self._match(TokenType.WHEN): 4507 this = self._parse_conjunction() 4508 self._match(TokenType.THEN) 4509 then = self._parse_conjunction() 4510 ifs.append(self.expression(exp.If, this=this, true=then)) 4511 4512 if self._match(TokenType.ELSE): 4513 default = self._parse_conjunction() 4514 4515 if not self._match(TokenType.END): 4516 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4517 default = exp.column("interval") 4518 else: 4519 self.raise_error("Expected END after CASE", self._prev) 4520 4521 return self._parse_window( 4522 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4523 ) 4524 4525 def _parse_if(self) -> t.Optional[exp.Expression]: 4526 if self._match(TokenType.L_PAREN): 4527 args = self._parse_csv(self._parse_conjunction) 4528 this = self.validate_expression(exp.If.from_arg_list(args), args) 4529 self._match_r_paren() 4530 else: 4531 index = self._index - 1 4532 4533 if self.NO_PAREN_IF_COMMANDS and index == 0: 4534 return self._parse_as_command(self._prev) 4535 4536 condition = self._parse_conjunction() 4537 4538 if not condition: 4539 self._retreat(index) 4540 return None 4541 4542 self._match(TokenType.THEN) 4543 true = self._parse_conjunction() 4544 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4545 self._match(TokenType.END) 4546 this = self.expression(exp.If, this=condition, true=true, false=false) 4547 4548 return self._parse_window(this) 4549 4550 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4551 if not self._match_text_seq("VALUE", "FOR"): 4552 self._retreat(self._index - 1) 4553 return None 4554 4555 return self.expression( 4556 exp.NextValueFor, 4557 this=self._parse_column(), 4558 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4559 ) 4560 4561 def _parse_extract(self) -> exp.Extract: 4562 this = self._parse_function() or self._parse_var() or self._parse_type() 4563 4564 if self._match(TokenType.FROM): 4565 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4566 4567 if not self._match(TokenType.COMMA): 4568 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4569 4570 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4571 4572 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4573 this = self._parse_conjunction() 4574 4575 if not self._match(TokenType.ALIAS): 4576 if self._match(TokenType.COMMA): 4577 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4578 4579 self.raise_error("Expected AS after CAST") 4580 4581 fmt = None 4582 to = self._parse_types() 4583 4584 if self._match(TokenType.FORMAT): 4585 fmt_string = self._parse_string() 4586 fmt = self._parse_at_time_zone(fmt_string) 4587 4588 if not to: 4589 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4590 if to.this in exp.DataType.TEMPORAL_TYPES: 4591 this = self.expression( 4592 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4593 this=this, 4594 format=exp.Literal.string( 4595 format_time( 4596 fmt_string.this if fmt_string else "", 4597 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4598 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4599 ) 4600 ), 4601 ) 4602 4603 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4604 this.set("zone", fmt.args["zone"]) 4605 return this 4606 elif not to: 4607 self.raise_error("Expected TYPE after CAST") 4608 elif isinstance(to, exp.Identifier): 4609 to = exp.DataType.build(to.name, udt=True) 4610 elif to.this == exp.DataType.Type.CHAR: 4611 if self._match(TokenType.CHARACTER_SET): 4612 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4613 4614 return self.expression( 4615 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4616 ) 4617 4618 def _parse_string_agg(self) -> exp.Expression: 4619 if self._match(TokenType.DISTINCT): 4620 args: t.List[t.Optional[exp.Expression]] = [ 4621 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4622 ] 4623 if self._match(TokenType.COMMA): 4624 args.extend(self._parse_csv(self._parse_conjunction)) 4625 else: 4626 args = self._parse_csv(self._parse_conjunction) # type: ignore 4627 4628 index = self._index 4629 if not self._match(TokenType.R_PAREN) and args: 4630 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4631 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4632 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4633 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4634 4635 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4636 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4637 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4638 if not self._match_text_seq("WITHIN", "GROUP"): 4639 self._retreat(index) 4640 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4641 4642 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4643 order = self._parse_order(this=seq_get(args, 0)) 4644 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4645 4646 def _parse_convert( 4647 self, strict: bool, safe: t.Optional[bool] = None 4648 ) -> t.Optional[exp.Expression]: 4649 this = self._parse_bitwise() 4650 4651 if self._match(TokenType.USING): 4652 to: t.Optional[exp.Expression] = self.expression( 4653 exp.CharacterSet, this=self._parse_var() 4654 ) 4655 elif self._match(TokenType.COMMA): 4656 to = self._parse_types() 4657 else: 4658 to = None 4659 4660 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4661 4662 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4663 """ 4664 There are generally two variants of the DECODE function: 4665 4666 - DECODE(bin, charset) 4667 - DECODE(expression, search, result [, search, result] ... [, default]) 4668 4669 The second variant will always be parsed into a CASE expression. Note that NULL 4670 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4671 instead of relying on pattern matching. 4672 """ 4673 args = self._parse_csv(self._parse_conjunction) 4674 4675 if len(args) < 3: 4676 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4677 4678 expression, *expressions = args 4679 if not expression: 4680 return None 4681 4682 ifs = [] 4683 for search, result in zip(expressions[::2], expressions[1::2]): 4684 if not search or not result: 4685 return None 4686 4687 if isinstance(search, exp.Literal): 4688 ifs.append( 4689 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4690 ) 4691 elif isinstance(search, exp.Null): 4692 ifs.append( 4693 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4694 ) 4695 else: 4696 cond = exp.or_( 4697 exp.EQ(this=expression.copy(), expression=search), 4698 exp.and_( 4699 exp.Is(this=expression.copy(), expression=exp.Null()), 4700 exp.Is(this=search.copy(), expression=exp.Null()), 4701 copy=False, 4702 ), 4703 copy=False, 4704 ) 4705 ifs.append(exp.If(this=cond, true=result)) 4706 4707 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4708 4709 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4710 self._match_text_seq("KEY") 4711 key = self._parse_column() 4712 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4713 self._match_text_seq("VALUE") 4714 value = self._parse_bitwise() 4715 4716 if not key and not value: 4717 return None 4718 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4719 4720 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4721 if not this or not self._match_text_seq("FORMAT", "JSON"): 4722 return this 4723 4724 return self.expression(exp.FormatJson, this=this) 4725 4726 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4727 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4728 for value in values: 4729 if self._match_text_seq(value, "ON", on): 4730 return f"{value} ON {on}" 4731 4732 return None 4733 4734 @t.overload 4735 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4736 ... 4737 4738 @t.overload 4739 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4740 ... 4741 4742 def _parse_json_object(self, agg=False): 4743 star = self._parse_star() 4744 expressions = ( 4745 [star] 4746 if star 4747 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4748 ) 4749 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4750 4751 unique_keys = None 4752 if self._match_text_seq("WITH", "UNIQUE"): 4753 unique_keys = True 4754 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4755 unique_keys = False 4756 4757 self._match_text_seq("KEYS") 4758 4759 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4760 self._parse_type() 4761 ) 4762 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4763 4764 return self.expression( 4765 exp.JSONObjectAgg if agg else exp.JSONObject, 4766 expressions=expressions, 4767 null_handling=null_handling, 4768 unique_keys=unique_keys, 4769 return_type=return_type, 4770 encoding=encoding, 4771 ) 4772 4773 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4774 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4775 if not self._match_text_seq("NESTED"): 4776 this = self._parse_id_var() 4777 kind = self._parse_types(allow_identifiers=False) 4778 nested = None 4779 else: 4780 this = None 4781 kind = None 4782 nested = True 4783 4784 path = self._match_text_seq("PATH") and self._parse_string() 4785 nested_schema = nested and self._parse_json_schema() 4786 4787 return self.expression( 4788 exp.JSONColumnDef, 4789 this=this, 4790 kind=kind, 4791 path=path, 4792 nested_schema=nested_schema, 4793 ) 4794 4795 def _parse_json_schema(self) -> exp.JSONSchema: 4796 self._match_text_seq("COLUMNS") 4797 return self.expression( 4798 exp.JSONSchema, 4799 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4800 ) 4801 4802 def _parse_json_table(self) -> exp.JSONTable: 4803 this = self._parse_format_json(self._parse_bitwise()) 4804 path = self._match(TokenType.COMMA) and self._parse_string() 4805 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4806 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4807 schema = self._parse_json_schema() 4808 4809 return exp.JSONTable( 4810 this=this, 4811 schema=schema, 4812 path=path, 4813 error_handling=error_handling, 4814 empty_handling=empty_handling, 4815 ) 4816 4817 def _parse_match_against(self) -> exp.MatchAgainst: 4818 expressions = self._parse_csv(self._parse_column) 4819 4820 self._match_text_seq(")", "AGAINST", "(") 4821 4822 this = self._parse_string() 4823 4824 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4825 modifier = "IN NATURAL LANGUAGE MODE" 4826 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4827 modifier = f"{modifier} WITH QUERY EXPANSION" 4828 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4829 modifier = "IN BOOLEAN MODE" 4830 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4831 modifier = "WITH QUERY EXPANSION" 4832 else: 4833 modifier = None 4834 4835 return self.expression( 4836 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4837 ) 4838 4839 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4840 def _parse_open_json(self) -> exp.OpenJSON: 4841 this = self._parse_bitwise() 4842 path = self._match(TokenType.COMMA) and self._parse_string() 4843 4844 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4845 this = self._parse_field(any_token=True) 4846 kind = self._parse_types() 4847 path = self._parse_string() 4848 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4849 4850 return self.expression( 4851 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4852 ) 4853 4854 expressions = None 4855 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4856 self._match_l_paren() 4857 expressions = self._parse_csv(_parse_open_json_column_def) 4858 4859 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4860 4861 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4862 args = self._parse_csv(self._parse_bitwise) 4863 4864 if self._match(TokenType.IN): 4865 return self.expression( 4866 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4867 ) 4868 4869 if haystack_first: 4870 haystack = seq_get(args, 0) 4871 needle = seq_get(args, 1) 4872 else: 4873 needle = seq_get(args, 0) 4874 haystack = seq_get(args, 1) 4875 4876 return self.expression( 4877 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4878 ) 4879 4880 def _parse_predict(self) -> exp.Predict: 4881 self._match_text_seq("MODEL") 4882 this = self._parse_table() 4883 4884 self._match(TokenType.COMMA) 4885 self._match_text_seq("TABLE") 4886 4887 return self.expression( 4888 exp.Predict, 4889 this=this, 4890 expression=self._parse_table(), 4891 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4892 ) 4893 4894 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4895 args = self._parse_csv(self._parse_table) 4896 return exp.JoinHint(this=func_name.upper(), expressions=args) 4897 4898 def _parse_substring(self) -> exp.Substring: 4899 # Postgres supports the form: substring(string [from int] [for int]) 4900 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4901 4902 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4903 4904 if self._match(TokenType.FROM): 4905 args.append(self._parse_bitwise()) 4906 if self._match(TokenType.FOR): 4907 args.append(self._parse_bitwise()) 4908 4909 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4910 4911 def _parse_trim(self) -> exp.Trim: 4912 # https://www.w3resource.com/sql/character-functions/trim.php 4913 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4914 4915 position = None 4916 collation = None 4917 expression = None 4918 4919 if self._match_texts(self.TRIM_TYPES): 4920 position = self._prev.text.upper() 4921 4922 this = self._parse_bitwise() 4923 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4924 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4925 expression = self._parse_bitwise() 4926 4927 if invert_order: 4928 this, expression = expression, this 4929 4930 if self._match(TokenType.COLLATE): 4931 collation = self._parse_bitwise() 4932 4933 return self.expression( 4934 exp.Trim, this=this, position=position, expression=expression, collation=collation 4935 ) 4936 4937 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4938 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4939 4940 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4941 return self._parse_window(self._parse_id_var(), alias=True) 4942 4943 def _parse_respect_or_ignore_nulls( 4944 self, this: t.Optional[exp.Expression] 4945 ) -> t.Optional[exp.Expression]: 4946 if self._match_text_seq("IGNORE", "NULLS"): 4947 return self.expression(exp.IgnoreNulls, this=this) 4948 if self._match_text_seq("RESPECT", "NULLS"): 4949 return self.expression(exp.RespectNulls, this=this) 4950 return this 4951 4952 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4953 if self._match(TokenType.HAVING): 4954 self._match_texts(("MAX", "MIN")) 4955 max = self._prev.text.upper() != "MIN" 4956 return self.expression( 4957 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4958 ) 4959 4960 return this 4961 4962 def _parse_window( 4963 self, this: t.Optional[exp.Expression], alias: bool = False 4964 ) -> t.Optional[exp.Expression]: 4965 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4966 self._match(TokenType.WHERE) 4967 this = self.expression( 4968 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4969 ) 4970 self._match_r_paren() 4971 4972 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4973 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4974 if self._match_text_seq("WITHIN", "GROUP"): 4975 order = self._parse_wrapped(self._parse_order) 4976 this = self.expression(exp.WithinGroup, this=this, expression=order) 4977 4978 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4979 # Some dialects choose to implement and some do not. 4980 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4981 4982 # There is some code above in _parse_lambda that handles 4983 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4984 4985 # The below changes handle 4986 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4987 4988 # Oracle allows both formats 4989 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4990 # and Snowflake chose to do the same for familiarity 4991 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4992 if isinstance(this, exp.AggFunc): 4993 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4994 4995 if ignore_respect and ignore_respect is not this: 4996 ignore_respect.replace(ignore_respect.this) 4997 this = self.expression(ignore_respect.__class__, this=this) 4998 4999 this = self._parse_respect_or_ignore_nulls(this) 5000 5001 # bigquery select from window x AS (partition by ...) 5002 if alias: 5003 over = None 5004 self._match(TokenType.ALIAS) 5005 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5006 return this 5007 else: 5008 over = self._prev.text.upper() 5009 5010 if not self._match(TokenType.L_PAREN): 5011 return self.expression( 5012 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5013 ) 5014 5015 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5016 5017 first = self._match(TokenType.FIRST) 5018 if self._match_text_seq("LAST"): 5019 first = False 5020 5021 partition, order = self._parse_partition_and_order() 5022 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5023 5024 if kind: 5025 self._match(TokenType.BETWEEN) 5026 start = self._parse_window_spec() 5027 self._match(TokenType.AND) 5028 end = self._parse_window_spec() 5029 5030 spec = self.expression( 5031 exp.WindowSpec, 5032 kind=kind, 5033 start=start["value"], 5034 start_side=start["side"], 5035 end=end["value"], 5036 end_side=end["side"], 5037 ) 5038 else: 5039 spec = None 5040 5041 self._match_r_paren() 5042 5043 window = self.expression( 5044 exp.Window, 5045 this=this, 5046 partition_by=partition, 5047 order=order, 5048 spec=spec, 5049 alias=window_alias, 5050 over=over, 5051 first=first, 5052 ) 5053 5054 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5055 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5056 return self._parse_window(window, alias=alias) 5057 5058 return window 5059 5060 def _parse_partition_and_order( 5061 self, 5062 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5063 return self._parse_partition_by(), self._parse_order() 5064 5065 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5066 self._match(TokenType.BETWEEN) 5067 5068 return { 5069 "value": ( 5070 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5071 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5072 or self._parse_bitwise() 5073 ), 5074 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5075 } 5076 5077 def _parse_alias( 5078 self, this: t.Optional[exp.Expression], explicit: bool = False 5079 ) -> t.Optional[exp.Expression]: 5080 any_token = self._match(TokenType.ALIAS) 5081 comments = self._prev_comments 5082 5083 if explicit and not any_token: 5084 return this 5085 5086 if self._match(TokenType.L_PAREN): 5087 aliases = self.expression( 5088 exp.Aliases, 5089 comments=comments, 5090 this=this, 5091 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5092 ) 5093 self._match_r_paren(aliases) 5094 return aliases 5095 5096 alias = self._parse_id_var(any_token) or ( 5097 self.STRING_ALIASES and self._parse_string_as_identifier() 5098 ) 5099 5100 if alias: 5101 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5102 column = this.this 5103 5104 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5105 if not this.comments and column and column.comments: 5106 this.comments = column.comments 5107 column.comments = None 5108 5109 return this 5110 5111 def _parse_id_var( 5112 self, 5113 any_token: bool = True, 5114 tokens: t.Optional[t.Collection[TokenType]] = None, 5115 ) -> t.Optional[exp.Expression]: 5116 identifier = self._parse_identifier() 5117 5118 if identifier: 5119 return identifier 5120 5121 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5122 quoted = self._prev.token_type == TokenType.STRING 5123 return exp.Identifier(this=self._prev.text, quoted=quoted) 5124 5125 return None 5126 5127 def _parse_string(self) -> t.Optional[exp.Expression]: 5128 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5129 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5130 return self._parse_placeholder() 5131 5132 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5133 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5134 5135 def _parse_number(self) -> t.Optional[exp.Expression]: 5136 if self._match(TokenType.NUMBER): 5137 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5138 return self._parse_placeholder() 5139 5140 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5141 if self._match(TokenType.IDENTIFIER): 5142 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5143 return self._parse_placeholder() 5144 5145 def _parse_var( 5146 self, 5147 any_token: bool = False, 5148 tokens: t.Optional[t.Collection[TokenType]] = None, 5149 upper: bool = False, 5150 ) -> t.Optional[exp.Expression]: 5151 if ( 5152 (any_token and self._advance_any()) 5153 or self._match(TokenType.VAR) 5154 or (self._match_set(tokens) if tokens else False) 5155 ): 5156 return self.expression( 5157 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5158 ) 5159 return self._parse_placeholder() 5160 5161 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5162 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5163 self._advance() 5164 return self._prev 5165 return None 5166 5167 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5168 return self._parse_var() or self._parse_string() 5169 5170 def _parse_null(self) -> t.Optional[exp.Expression]: 5171 if self._match_set(self.NULL_TOKENS): 5172 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5173 return self._parse_placeholder() 5174 5175 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5176 if self._match(TokenType.TRUE): 5177 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5178 if self._match(TokenType.FALSE): 5179 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5180 return self._parse_placeholder() 5181 5182 def _parse_star(self) -> t.Optional[exp.Expression]: 5183 if self._match(TokenType.STAR): 5184 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5185 return self._parse_placeholder() 5186 5187 def _parse_parameter(self) -> exp.Parameter: 5188 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5189 return ( 5190 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5191 ) 5192 5193 self._match(TokenType.L_BRACE) 5194 this = _parse_parameter_part() 5195 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5196 self._match(TokenType.R_BRACE) 5197 5198 return self.expression(exp.Parameter, this=this, expression=expression) 5199 5200 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5201 if self._match_set(self.PLACEHOLDER_PARSERS): 5202 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5203 if placeholder: 5204 return placeholder 5205 self._advance(-1) 5206 return None 5207 5208 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5209 if not self._match(TokenType.EXCEPT): 5210 return None 5211 if self._match(TokenType.L_PAREN, advance=False): 5212 return self._parse_wrapped_csv(self._parse_column) 5213 5214 except_column = self._parse_column() 5215 return [except_column] if except_column else None 5216 5217 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5218 if not self._match(TokenType.REPLACE): 5219 return None 5220 if self._match(TokenType.L_PAREN, advance=False): 5221 return self._parse_wrapped_csv(self._parse_expression) 5222 5223 replace_expression = self._parse_expression() 5224 return [replace_expression] if replace_expression else None 5225 5226 def _parse_csv( 5227 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5228 ) -> t.List[exp.Expression]: 5229 parse_result = parse_method() 5230 items = [parse_result] if parse_result is not None else [] 5231 5232 while self._match(sep): 5233 self._add_comments(parse_result) 5234 parse_result = parse_method() 5235 if parse_result is not None: 5236 items.append(parse_result) 5237 5238 return items 5239 5240 def _parse_tokens( 5241 self, parse_method: t.Callable, expressions: t.Dict 5242 ) -> t.Optional[exp.Expression]: 5243 this = parse_method() 5244 5245 while self._match_set(expressions): 5246 this = self.expression( 5247 expressions[self._prev.token_type], 5248 this=this, 5249 comments=self._prev_comments, 5250 expression=parse_method(), 5251 ) 5252 5253 return this 5254 5255 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5256 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5257 5258 def _parse_wrapped_csv( 5259 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5260 ) -> t.List[exp.Expression]: 5261 return self._parse_wrapped( 5262 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5263 ) 5264 5265 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5266 wrapped = self._match(TokenType.L_PAREN) 5267 if not wrapped and not optional: 5268 self.raise_error("Expecting (") 5269 parse_result = parse_method() 5270 if wrapped: 5271 self._match_r_paren() 5272 return parse_result 5273 5274 def _parse_expressions(self) -> t.List[exp.Expression]: 5275 return self._parse_csv(self._parse_expression) 5276 5277 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5278 return self._parse_select() or self._parse_set_operations( 5279 self._parse_expression() if alias else self._parse_conjunction() 5280 ) 5281 5282 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5283 return self._parse_query_modifiers( 5284 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5285 ) 5286 5287 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5288 this = None 5289 if self._match_texts(self.TRANSACTION_KIND): 5290 this = self._prev.text 5291 5292 self._match_texts(("TRANSACTION", "WORK")) 5293 5294 modes = [] 5295 while True: 5296 mode = [] 5297 while self._match(TokenType.VAR): 5298 mode.append(self._prev.text) 5299 5300 if mode: 5301 modes.append(" ".join(mode)) 5302 if not self._match(TokenType.COMMA): 5303 break 5304 5305 return self.expression(exp.Transaction, this=this, modes=modes) 5306 5307 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5308 chain = None 5309 savepoint = None 5310 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5311 5312 self._match_texts(("TRANSACTION", "WORK")) 5313 5314 if self._match_text_seq("TO"): 5315 self._match_text_seq("SAVEPOINT") 5316 savepoint = self._parse_id_var() 5317 5318 if self._match(TokenType.AND): 5319 chain = not self._match_text_seq("NO") 5320 self._match_text_seq("CHAIN") 5321 5322 if is_rollback: 5323 return self.expression(exp.Rollback, savepoint=savepoint) 5324 5325 return self.expression(exp.Commit, chain=chain) 5326 5327 def _parse_refresh(self) -> exp.Refresh: 5328 self._match(TokenType.TABLE) 5329 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5330 5331 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5332 if not self._match_text_seq("ADD"): 5333 return None 5334 5335 self._match(TokenType.COLUMN) 5336 exists_column = self._parse_exists(not_=True) 5337 expression = self._parse_field_def() 5338 5339 if expression: 5340 expression.set("exists", exists_column) 5341 5342 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5343 if self._match_texts(("FIRST", "AFTER")): 5344 position = self._prev.text 5345 column_position = self.expression( 5346 exp.ColumnPosition, this=self._parse_column(), position=position 5347 ) 5348 expression.set("position", column_position) 5349 5350 return expression 5351 5352 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5353 drop = self._match(TokenType.DROP) and self._parse_drop() 5354 if drop and not isinstance(drop, exp.Command): 5355 drop.set("kind", drop.args.get("kind", "COLUMN")) 5356 return drop 5357 5358 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5359 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5360 return self.expression( 5361 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5362 ) 5363 5364 def _parse_add_constraint(self) -> exp.AddConstraint: 5365 this = None 5366 kind = self._prev.token_type 5367 5368 if kind == TokenType.CONSTRAINT: 5369 this = self._parse_id_var() 5370 5371 if self._match_text_seq("CHECK"): 5372 expression = self._parse_wrapped(self._parse_conjunction) 5373 enforced = self._match_text_seq("ENFORCED") or False 5374 5375 return self.expression( 5376 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5377 ) 5378 5379 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5380 expression = self._parse_foreign_key() 5381 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5382 expression = self._parse_primary_key() 5383 else: 5384 expression = None 5385 5386 return self.expression(exp.AddConstraint, this=this, expression=expression) 5387 5388 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5389 index = self._index - 1 5390 5391 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5392 return self._parse_csv(self._parse_add_constraint) 5393 5394 self._retreat(index) 5395 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5396 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5397 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5398 5399 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5400 self._match(TokenType.COLUMN) 5401 column = self._parse_field(any_token=True) 5402 5403 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5404 return self.expression(exp.AlterColumn, this=column, drop=True) 5405 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5406 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5407 if self._match(TokenType.COMMENT): 5408 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5409 5410 self._match_text_seq("SET", "DATA") 5411 return self.expression( 5412 exp.AlterColumn, 5413 this=column, 5414 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5415 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5416 using=self._match(TokenType.USING) and self._parse_conjunction(), 5417 ) 5418 5419 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5420 index = self._index - 1 5421 5422 partition_exists = self._parse_exists() 5423 if self._match(TokenType.PARTITION, advance=False): 5424 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5425 5426 self._retreat(index) 5427 return self._parse_csv(self._parse_drop_column) 5428 5429 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5430 if self._match(TokenType.COLUMN): 5431 exists = self._parse_exists() 5432 old_column = self._parse_column() 5433 to = self._match_text_seq("TO") 5434 new_column = self._parse_column() 5435 5436 if old_column is None or to is None or new_column is None: 5437 return None 5438 5439 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5440 5441 self._match_text_seq("TO") 5442 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5443 5444 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5445 start = self._prev 5446 5447 if not self._match(TokenType.TABLE): 5448 return self._parse_as_command(start) 5449 5450 exists = self._parse_exists() 5451 only = self._match_text_seq("ONLY") 5452 this = self._parse_table(schema=True) 5453 5454 if self._next: 5455 self._advance() 5456 5457 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5458 if parser: 5459 actions = ensure_list(parser(self)) 5460 5461 if not self._curr and actions: 5462 return self.expression( 5463 exp.AlterTable, 5464 this=this, 5465 exists=exists, 5466 actions=actions, 5467 only=only, 5468 ) 5469 5470 return self._parse_as_command(start) 5471 5472 def _parse_merge(self) -> exp.Merge: 5473 self._match(TokenType.INTO) 5474 target = self._parse_table() 5475 5476 if target and self._match(TokenType.ALIAS, advance=False): 5477 target.set("alias", self._parse_table_alias()) 5478 5479 self._match(TokenType.USING) 5480 using = self._parse_table() 5481 5482 self._match(TokenType.ON) 5483 on = self._parse_conjunction() 5484 5485 return self.expression( 5486 exp.Merge, 5487 this=target, 5488 using=using, 5489 on=on, 5490 expressions=self._parse_when_matched(), 5491 ) 5492 5493 def _parse_when_matched(self) -> t.List[exp.When]: 5494 whens = [] 5495 5496 while self._match(TokenType.WHEN): 5497 matched = not self._match(TokenType.NOT) 5498 self._match_text_seq("MATCHED") 5499 source = ( 5500 False 5501 if self._match_text_seq("BY", "TARGET") 5502 else self._match_text_seq("BY", "SOURCE") 5503 ) 5504 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5505 5506 self._match(TokenType.THEN) 5507 5508 if self._match(TokenType.INSERT): 5509 _this = self._parse_star() 5510 if _this: 5511 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5512 else: 5513 then = self.expression( 5514 exp.Insert, 5515 this=self._parse_value(), 5516 expression=self._match(TokenType.VALUES) and self._parse_value(), 5517 ) 5518 elif self._match(TokenType.UPDATE): 5519 expressions = self._parse_star() 5520 if expressions: 5521 then = self.expression(exp.Update, expressions=expressions) 5522 else: 5523 then = self.expression( 5524 exp.Update, 5525 expressions=self._match(TokenType.SET) 5526 and self._parse_csv(self._parse_equality), 5527 ) 5528 elif self._match(TokenType.DELETE): 5529 then = self.expression(exp.Var, this=self._prev.text) 5530 else: 5531 then = None 5532 5533 whens.append( 5534 self.expression( 5535 exp.When, 5536 matched=matched, 5537 source=source, 5538 condition=condition, 5539 then=then, 5540 ) 5541 ) 5542 return whens 5543 5544 def _parse_show(self) -> t.Optional[exp.Expression]: 5545 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5546 if parser: 5547 return parser(self) 5548 return self._parse_as_command(self._prev) 5549 5550 def _parse_set_item_assignment( 5551 self, kind: t.Optional[str] = None 5552 ) -> t.Optional[exp.Expression]: 5553 index = self._index 5554 5555 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5556 return self._parse_set_transaction(global_=kind == "GLOBAL") 5557 5558 left = self._parse_primary() or self._parse_id_var() 5559 assignment_delimiter = self._match_texts(("=", "TO")) 5560 5561 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5562 self._retreat(index) 5563 return None 5564 5565 right = self._parse_statement() or self._parse_id_var() 5566 this = self.expression(exp.EQ, this=left, expression=right) 5567 5568 return self.expression(exp.SetItem, this=this, kind=kind) 5569 5570 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5571 self._match_text_seq("TRANSACTION") 5572 characteristics = self._parse_csv( 5573 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5574 ) 5575 return self.expression( 5576 exp.SetItem, 5577 expressions=characteristics, 5578 kind="TRANSACTION", 5579 **{"global": global_}, # type: ignore 5580 ) 5581 5582 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5583 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5584 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5585 5586 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5587 index = self._index 5588 set_ = self.expression( 5589 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5590 ) 5591 5592 if self._curr: 5593 self._retreat(index) 5594 return self._parse_as_command(self._prev) 5595 5596 return set_ 5597 5598 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5599 for option in options: 5600 if self._match_text_seq(*option.split(" ")): 5601 return exp.var(option) 5602 return None 5603 5604 def _parse_as_command(self, start: Token) -> exp.Command: 5605 while self._curr: 5606 self._advance() 5607 text = self._find_sql(start, self._prev) 5608 size = len(start.text) 5609 self._warn_unsupported() 5610 return exp.Command(this=text[:size], expression=text[size:]) 5611 5612 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5613 settings = [] 5614 5615 self._match_l_paren() 5616 kind = self._parse_id_var() 5617 5618 if self._match(TokenType.L_PAREN): 5619 while True: 5620 key = self._parse_id_var() 5621 value = self._parse_primary() 5622 5623 if not key and value is None: 5624 break 5625 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5626 self._match(TokenType.R_PAREN) 5627 5628 self._match_r_paren() 5629 5630 return self.expression( 5631 exp.DictProperty, 5632 this=this, 5633 kind=kind.this if kind else None, 5634 settings=settings, 5635 ) 5636 5637 def _parse_dict_range(self, this: str) -> exp.DictRange: 5638 self._match_l_paren() 5639 has_min = self._match_text_seq("MIN") 5640 if has_min: 5641 min = self._parse_var() or self._parse_primary() 5642 self._match_text_seq("MAX") 5643 max = self._parse_var() or self._parse_primary() 5644 else: 5645 max = self._parse_var() or self._parse_primary() 5646 min = exp.Literal.number(0) 5647 self._match_r_paren() 5648 return self.expression(exp.DictRange, this=this, min=min, max=max) 5649 5650 def _parse_comprehension( 5651 self, this: t.Optional[exp.Expression] 5652 ) -> t.Optional[exp.Comprehension]: 5653 index = self._index 5654 expression = self._parse_column() 5655 if not self._match(TokenType.IN): 5656 self._retreat(index - 1) 5657 return None 5658 iterator = self._parse_column() 5659 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5660 return self.expression( 5661 exp.Comprehension, 5662 this=this, 5663 expression=expression, 5664 iterator=iterator, 5665 condition=condition, 5666 ) 5667 5668 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5669 if self._match(TokenType.HEREDOC_STRING): 5670 return self.expression(exp.Heredoc, this=self._prev.text) 5671 5672 if not self._match_text_seq("$"): 5673 return None 5674 5675 tags = ["$"] 5676 tag_text = None 5677 5678 if self._is_connected(): 5679 self._advance() 5680 tags.append(self._prev.text.upper()) 5681 else: 5682 self.raise_error("No closing $ found") 5683 5684 if tags[-1] != "$": 5685 if self._is_connected() and self._match_text_seq("$"): 5686 tag_text = tags[-1] 5687 tags.append("$") 5688 else: 5689 self.raise_error("No closing $ found") 5690 5691 heredoc_start = self._curr 5692 5693 while self._curr: 5694 if self._match_text_seq(*tags, advance=False): 5695 this = self._find_sql(heredoc_start, self._prev) 5696 self._advance(len(tags)) 5697 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5698 5699 self._advance() 5700 5701 self.raise_error(f"No closing {''.join(tags)} found") 5702 return None 5703 5704 def _find_parser( 5705 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5706 ) -> t.Optional[t.Callable]: 5707 if not self._curr: 5708 return None 5709 5710 index = self._index 5711 this = [] 5712 while True: 5713 # The current token might be multiple words 5714 curr = self._curr.text.upper() 5715 key = curr.split(" ") 5716 this.append(curr) 5717 5718 self._advance() 5719 result, trie = in_trie(trie, key) 5720 if result == TrieResult.FAILED: 5721 break 5722 5723 if result == TrieResult.EXISTS: 5724 subparser = parsers[" ".join(this)] 5725 return subparser 5726 5727 self._retreat(index) 5728 return None 5729 5730 def _match(self, token_type, advance=True, expression=None): 5731 if not self._curr: 5732 return None 5733 5734 if self._curr.token_type == token_type: 5735 if advance: 5736 self._advance() 5737 self._add_comments(expression) 5738 return True 5739 5740 return None 5741 5742 def _match_set(self, types, advance=True): 5743 if not self._curr: 5744 return None 5745 5746 if self._curr.token_type in types: 5747 if advance: 5748 self._advance() 5749 return True 5750 5751 return None 5752 5753 def _match_pair(self, token_type_a, token_type_b, advance=True): 5754 if not self._curr or not self._next: 5755 return None 5756 5757 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5758 if advance: 5759 self._advance(2) 5760 return True 5761 5762 return None 5763 5764 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5765 if not self._match(TokenType.L_PAREN, expression=expression): 5766 self.raise_error("Expecting (") 5767 5768 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5769 if not self._match(TokenType.R_PAREN, expression=expression): 5770 self.raise_error("Expecting )") 5771 5772 def _match_texts(self, texts, advance=True): 5773 if self._curr and self._curr.text.upper() in texts: 5774 if advance: 5775 self._advance() 5776 return True 5777 return None 5778 5779 def _match_text_seq(self, *texts, advance=True): 5780 index = self._index 5781 for text in texts: 5782 if self._curr and self._curr.text.upper() == text: 5783 self._advance() 5784 else: 5785 self._retreat(index) 5786 return None 5787 5788 if not advance: 5789 self._retreat(index) 5790 5791 return True 5792 5793 @t.overload 5794 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5795 ... 5796 5797 @t.overload 5798 def _replace_columns_with_dots( 5799 self, this: t.Optional[exp.Expression] 5800 ) -> t.Optional[exp.Expression]: 5801 ... 5802 5803 def _replace_columns_with_dots(self, this): 5804 if isinstance(this, exp.Dot): 5805 exp.replace_children(this, self._replace_columns_with_dots) 5806 elif isinstance(this, exp.Column): 5807 exp.replace_children(this, self._replace_columns_with_dots) 5808 table = this.args.get("table") 5809 this = ( 5810 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5811 ) 5812 5813 return this 5814 5815 def _replace_lambda( 5816 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5817 ) -> t.Optional[exp.Expression]: 5818 if not node: 5819 return node 5820 5821 for column in node.find_all(exp.Column): 5822 if column.parts[0].name in lambda_variables: 5823 dot_or_id = column.to_dot() if column.table else column.this 5824 parent = column.parent 5825 5826 while isinstance(parent, exp.Dot): 5827 if not isinstance(parent.parent, exp.Dot): 5828 parent.replace(dot_or_id) 5829 break 5830 parent = parent.parent 5831 else: 5832 if column is node: 5833 node = dot_or_id 5834 else: 5835 column.replace(dot_or_id) 5836 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.array(*keys, copy=False), 34 values=exp.array(*values, copy=False), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
64def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _parser(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _parser
87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: Determines the amount of context to capture from a 95 query string when displaying the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": parse_like, 123 "LOG": parse_logarithm, 124 "TIME_TO_TIME_STR": lambda args: exp.Cast( 125 this=seq_get(args, 0), 126 to=exp.DataType(this=exp.DataType.Type.TEXT), 127 ), 128 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 129 this=exp.Cast( 130 this=seq_get(args, 0), 131 to=exp.DataType(this=exp.DataType.Type.TEXT), 132 ), 133 start=exp.Literal.number(1), 134 length=exp.Literal.number(10), 135 ), 136 "VAR_MAP": parse_var_map, 137 } 138 139 NO_PAREN_FUNCTIONS = { 140 TokenType.CURRENT_DATE: exp.CurrentDate, 141 TokenType.CURRENT_DATETIME: exp.CurrentDate, 142 TokenType.CURRENT_TIME: exp.CurrentTime, 143 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 144 TokenType.CURRENT_USER: exp.CurrentUser, 145 } 146 147 STRUCT_TYPE_TOKENS = { 148 TokenType.NESTED, 149 TokenType.STRUCT, 150 } 151 152 NESTED_TYPE_TOKENS = { 153 TokenType.ARRAY, 154 TokenType.LOWCARDINALITY, 155 TokenType.MAP, 156 TokenType.NULLABLE, 157 *STRUCT_TYPE_TOKENS, 158 } 159 160 ENUM_TYPE_TOKENS = { 161 TokenType.ENUM, 162 TokenType.ENUM8, 163 TokenType.ENUM16, 164 } 165 166 AGGREGATE_TYPE_TOKENS = { 167 TokenType.AGGREGATEFUNCTION, 168 TokenType.SIMPLEAGGREGATEFUNCTION, 169 } 170 171 TYPE_TOKENS = { 172 TokenType.BIT, 173 TokenType.BOOLEAN, 174 TokenType.TINYINT, 175 TokenType.UTINYINT, 176 TokenType.SMALLINT, 177 TokenType.USMALLINT, 178 TokenType.INT, 179 TokenType.UINT, 180 TokenType.BIGINT, 181 TokenType.UBIGINT, 182 TokenType.INT128, 183 TokenType.UINT128, 184 TokenType.INT256, 185 TokenType.UINT256, 186 TokenType.MEDIUMINT, 187 TokenType.UMEDIUMINT, 188 TokenType.FIXEDSTRING, 189 TokenType.FLOAT, 190 TokenType.DOUBLE, 191 TokenType.CHAR, 192 TokenType.NCHAR, 193 TokenType.VARCHAR, 194 TokenType.NVARCHAR, 195 TokenType.BPCHAR, 196 TokenType.TEXT, 197 TokenType.MEDIUMTEXT, 198 TokenType.LONGTEXT, 199 TokenType.MEDIUMBLOB, 200 TokenType.LONGBLOB, 201 TokenType.BINARY, 202 TokenType.VARBINARY, 203 TokenType.JSON, 204 TokenType.JSONB, 205 TokenType.INTERVAL, 206 TokenType.TINYBLOB, 207 TokenType.TINYTEXT, 208 TokenType.TIME, 209 TokenType.TIMETZ, 210 TokenType.TIMESTAMP, 211 TokenType.TIMESTAMP_S, 212 TokenType.TIMESTAMP_MS, 213 TokenType.TIMESTAMP_NS, 214 TokenType.TIMESTAMPTZ, 215 TokenType.TIMESTAMPLTZ, 216 TokenType.DATETIME, 217 TokenType.DATETIME64, 218 TokenType.DATE, 219 TokenType.DATE32, 220 TokenType.INT4RANGE, 221 TokenType.INT4MULTIRANGE, 222 TokenType.INT8RANGE, 223 TokenType.INT8MULTIRANGE, 224 TokenType.NUMRANGE, 225 TokenType.NUMMULTIRANGE, 226 TokenType.TSRANGE, 227 TokenType.TSMULTIRANGE, 228 TokenType.TSTZRANGE, 229 TokenType.TSTZMULTIRANGE, 230 TokenType.DATERANGE, 231 TokenType.DATEMULTIRANGE, 232 TokenType.DECIMAL, 233 TokenType.UDECIMAL, 234 TokenType.BIGDECIMAL, 235 TokenType.UUID, 236 TokenType.GEOGRAPHY, 237 TokenType.GEOMETRY, 238 TokenType.HLLSKETCH, 239 TokenType.HSTORE, 240 TokenType.PSEUDO_TYPE, 241 TokenType.SUPER, 242 TokenType.SERIAL, 243 TokenType.SMALLSERIAL, 244 TokenType.BIGSERIAL, 245 TokenType.XML, 246 TokenType.YEAR, 247 TokenType.UNIQUEIDENTIFIER, 248 TokenType.USERDEFINED, 249 TokenType.MONEY, 250 TokenType.SMALLMONEY, 251 TokenType.ROWVERSION, 252 TokenType.IMAGE, 253 TokenType.VARIANT, 254 TokenType.OBJECT, 255 TokenType.OBJECT_IDENTIFIER, 256 TokenType.INET, 257 TokenType.IPADDRESS, 258 TokenType.IPPREFIX, 259 TokenType.IPV4, 260 TokenType.IPV6, 261 TokenType.UNKNOWN, 262 TokenType.NULL, 263 *ENUM_TYPE_TOKENS, 264 *NESTED_TYPE_TOKENS, 265 *AGGREGATE_TYPE_TOKENS, 266 } 267 268 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 269 TokenType.BIGINT: TokenType.UBIGINT, 270 TokenType.INT: TokenType.UINT, 271 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 272 TokenType.SMALLINT: TokenType.USMALLINT, 273 TokenType.TINYINT: TokenType.UTINYINT, 274 TokenType.DECIMAL: TokenType.UDECIMAL, 275 } 276 277 SUBQUERY_PREDICATES = { 278 TokenType.ANY: exp.Any, 279 TokenType.ALL: exp.All, 280 TokenType.EXISTS: exp.Exists, 281 TokenType.SOME: exp.Any, 282 } 283 284 RESERVED_TOKENS = { 285 *Tokenizer.SINGLE_TOKENS.values(), 286 TokenType.SELECT, 287 } 288 289 DB_CREATABLES = { 290 TokenType.DATABASE, 291 TokenType.SCHEMA, 292 TokenType.TABLE, 293 TokenType.VIEW, 294 TokenType.MODEL, 295 TokenType.DICTIONARY, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.WINDOW, 442 TokenType.XOR, 443 *TYPE_TOKENS, 444 *SUBQUERY_PREDICATES, 445 } 446 447 CONJUNCTION = { 448 TokenType.AND: exp.And, 449 TokenType.OR: exp.Or, 450 } 451 452 EQUALITY = { 453 TokenType.COLON_EQ: exp.PropertyEQ, 454 TokenType.EQ: exp.EQ, 455 TokenType.NEQ: exp.NEQ, 456 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 457 } 458 459 COMPARISON = { 460 TokenType.GT: exp.GT, 461 TokenType.GTE: exp.GTE, 462 TokenType.LT: exp.LT, 463 TokenType.LTE: exp.LTE, 464 } 465 466 BITWISE = { 467 TokenType.AMP: exp.BitwiseAnd, 468 TokenType.CARET: exp.BitwiseXor, 469 TokenType.PIPE: exp.BitwiseOr, 470 } 471 472 TERM = { 473 TokenType.DASH: exp.Sub, 474 TokenType.PLUS: exp.Add, 475 TokenType.MOD: exp.Mod, 476 TokenType.COLLATE: exp.Collate, 477 } 478 479 FACTOR = { 480 TokenType.DIV: exp.IntDiv, 481 TokenType.LR_ARROW: exp.Distance, 482 TokenType.SLASH: exp.Div, 483 TokenType.STAR: exp.Mul, 484 } 485 486 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 487 488 TIMES = { 489 TokenType.TIME, 490 TokenType.TIMETZ, 491 } 492 493 TIMESTAMPS = { 494 TokenType.TIMESTAMP, 495 TokenType.TIMESTAMPTZ, 496 TokenType.TIMESTAMPLTZ, 497 *TIMES, 498 } 499 500 SET_OPERATIONS = { 501 TokenType.UNION, 502 TokenType.INTERSECT, 503 TokenType.EXCEPT, 504 } 505 506 JOIN_METHODS = { 507 TokenType.NATURAL, 508 TokenType.ASOF, 509 } 510 511 JOIN_SIDES = { 512 TokenType.LEFT, 513 TokenType.RIGHT, 514 TokenType.FULL, 515 } 516 517 JOIN_KINDS = { 518 TokenType.INNER, 519 TokenType.OUTER, 520 TokenType.CROSS, 521 TokenType.SEMI, 522 TokenType.ANTI, 523 } 524 525 JOIN_HINTS: t.Set[str] = set() 526 527 LAMBDAS = { 528 TokenType.ARROW: lambda self, expressions: self.expression( 529 exp.Lambda, 530 this=self._replace_lambda( 531 self._parse_conjunction(), 532 {node.name for node in expressions}, 533 ), 534 expressions=expressions, 535 ), 536 TokenType.FARROW: lambda self, expressions: self.expression( 537 exp.Kwarg, 538 this=exp.var(expressions[0].name), 539 expression=self._parse_conjunction(), 540 ), 541 } 542 543 COLUMN_OPERATORS = { 544 TokenType.DOT: None, 545 TokenType.DCOLON: lambda self, this, to: self.expression( 546 exp.Cast if self.STRICT_CAST else exp.TryCast, 547 this=this, 548 to=to, 549 ), 550 TokenType.ARROW: lambda self, this, path: self.expression( 551 exp.JSONExtract, 552 this=this, 553 expression=self.dialect.to_json_path(path), 554 ), 555 TokenType.DARROW: lambda self, this, path: self.expression( 556 exp.JSONExtractScalar, 557 this=this, 558 expression=self.dialect.to_json_path(path), 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 643 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 644 } 645 646 PRIMARY_PARSERS = { 647 TokenType.STRING: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=True 649 ), 650 TokenType.NUMBER: lambda self, token: self.expression( 651 exp.Literal, this=token.text, is_string=False 652 ), 653 TokenType.STAR: lambda self, _: self.expression( 654 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 655 ), 656 TokenType.NULL: lambda self, _: self.expression(exp.Null), 657 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 658 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 659 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 660 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 661 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 662 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 663 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 664 exp.National, this=token.text 665 ), 666 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 667 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 668 exp.RawString, this=token.text 669 ), 670 TokenType.UNICODE_STRING: lambda self, token: self.expression( 671 exp.UnicodeString, 672 this=token.text, 673 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 674 ), 675 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 676 } 677 678 PLACEHOLDER_PARSERS = { 679 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 680 TokenType.PARAMETER: lambda self: self._parse_parameter(), 681 TokenType.COLON: lambda self: ( 682 self.expression(exp.Placeholder, this=self._prev.text) 683 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 684 else None 685 ), 686 } 687 688 RANGE_PARSERS = { 689 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 690 TokenType.GLOB: binary_range_parser(exp.Glob), 691 TokenType.ILIKE: binary_range_parser(exp.ILike), 692 TokenType.IN: lambda self, this: self._parse_in(this), 693 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 694 TokenType.IS: lambda self, this: self._parse_is(this), 695 TokenType.LIKE: binary_range_parser(exp.Like), 696 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 697 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 698 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 699 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 700 } 701 702 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 703 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 704 "AUTO": lambda self: self._parse_auto_property(), 705 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 706 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 707 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 708 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 709 "CHECKSUM": lambda self: self._parse_checksum(), 710 "CLUSTER BY": lambda self: self._parse_cluster(), 711 "CLUSTERED": lambda self: self._parse_clustered_by(), 712 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 713 exp.CollateProperty, **kwargs 714 ), 715 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 716 "CONTAINS": lambda self: self._parse_contains_property(), 717 "COPY": lambda self: self._parse_copy_property(), 718 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 719 "DEFINER": lambda self: self._parse_definer(), 720 "DETERMINISTIC": lambda self: self.expression( 721 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 722 ), 723 "DISTKEY": lambda self: self._parse_distkey(), 724 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 725 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 726 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 727 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 728 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 729 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 730 "FREESPACE": lambda self: self._parse_freespace(), 731 "HEAP": lambda self: self.expression(exp.HeapProperty), 732 "IMMUTABLE": lambda self: self.expression( 733 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 734 ), 735 "INHERITS": lambda self: self.expression( 736 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 737 ), 738 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 739 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 740 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 741 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 742 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 743 "LIKE": lambda self: self._parse_create_like(), 744 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 745 "LOCK": lambda self: self._parse_locking(), 746 "LOCKING": lambda self: self._parse_locking(), 747 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 748 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 749 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 750 "MODIFIES": lambda self: self._parse_modifies_property(), 751 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 752 "NO": lambda self: self._parse_no_property(), 753 "ON": lambda self: self._parse_on_property(), 754 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 755 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 756 "PARTITION": lambda self: self._parse_partitioned_of(), 757 "PARTITION BY": lambda self: self._parse_partitioned_by(), 758 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 759 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 760 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 761 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 762 "READS": lambda self: self._parse_reads_property(), 763 "REMOTE": lambda self: self._parse_remote_with_connection(), 764 "RETURNS": lambda self: self._parse_returns(), 765 "ROW": lambda self: self._parse_row(), 766 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 767 "SAMPLE": lambda self: self.expression( 768 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 769 ), 770 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 771 "SETTINGS": lambda self: self.expression( 772 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 773 ), 774 "SORTKEY": lambda self: self._parse_sortkey(), 775 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 776 "STABLE": lambda self: self.expression( 777 exp.StabilityProperty, this=exp.Literal.string("STABLE") 778 ), 779 "STORED": lambda self: self._parse_stored(), 780 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 781 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 782 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 783 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 784 "TO": lambda self: self._parse_to_table(), 785 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 786 "TRANSFORM": lambda self: self.expression( 787 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 788 ), 789 "TTL": lambda self: self._parse_ttl(), 790 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 791 "VOLATILE": lambda self: self._parse_volatile_property(), 792 "WITH": lambda self: self._parse_with_property(), 793 } 794 795 CONSTRAINT_PARSERS = { 796 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 797 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 798 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 799 "CHARACTER SET": lambda self: self.expression( 800 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 801 ), 802 "CHECK": lambda self: self.expression( 803 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 804 ), 805 "COLLATE": lambda self: self.expression( 806 exp.CollateColumnConstraint, this=self._parse_var() 807 ), 808 "COMMENT": lambda self: self.expression( 809 exp.CommentColumnConstraint, this=self._parse_string() 810 ), 811 "COMPRESS": lambda self: self._parse_compress(), 812 "CLUSTERED": lambda self: self.expression( 813 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 814 ), 815 "NONCLUSTERED": lambda self: self.expression( 816 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 817 ), 818 "DEFAULT": lambda self: self.expression( 819 exp.DefaultColumnConstraint, this=self._parse_bitwise() 820 ), 821 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 822 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 823 "FORMAT": lambda self: self.expression( 824 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 825 ), 826 "GENERATED": lambda self: self._parse_generated_as_identity(), 827 "IDENTITY": lambda self: self._parse_auto_increment(), 828 "INLINE": lambda self: self._parse_inline(), 829 "LIKE": lambda self: self._parse_create_like(), 830 "NOT": lambda self: self._parse_not_constraint(), 831 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 832 "ON": lambda self: ( 833 self._match(TokenType.UPDATE) 834 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 835 ) 836 or self.expression(exp.OnProperty, this=self._parse_id_var()), 837 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 838 "PERIOD": lambda self: self._parse_period_for_system_time(), 839 "PRIMARY KEY": lambda self: self._parse_primary_key(), 840 "REFERENCES": lambda self: self._parse_references(match=False), 841 "TITLE": lambda self: self.expression( 842 exp.TitleColumnConstraint, this=self._parse_var_or_string() 843 ), 844 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 845 "UNIQUE": lambda self: self._parse_unique(), 846 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 847 "WITH": lambda self: self.expression( 848 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 849 ), 850 } 851 852 ALTER_PARSERS = { 853 "ADD": lambda self: self._parse_alter_table_add(), 854 "ALTER": lambda self: self._parse_alter_table_alter(), 855 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 856 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 857 "DROP": lambda self: self._parse_alter_table_drop(), 858 "RENAME": lambda self: self._parse_alter_table_rename(), 859 } 860 861 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 862 863 NO_PAREN_FUNCTION_PARSERS = { 864 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 865 "CASE": lambda self: self._parse_case(), 866 "IF": lambda self: self._parse_if(), 867 "NEXT": lambda self: self._parse_next_value_for(), 868 } 869 870 INVALID_FUNC_NAME_TOKENS = { 871 TokenType.IDENTIFIER, 872 TokenType.STRING, 873 } 874 875 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 876 877 FUNCTION_PARSERS = { 878 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 879 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 880 "DECODE": lambda self: self._parse_decode(), 881 "EXTRACT": lambda self: self._parse_extract(), 882 "JSON_OBJECT": lambda self: self._parse_json_object(), 883 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 884 "JSON_TABLE": lambda self: self._parse_json_table(), 885 "MATCH": lambda self: self._parse_match_against(), 886 "OPENJSON": lambda self: self._parse_open_json(), 887 "POSITION": lambda self: self._parse_position(), 888 "PREDICT": lambda self: self._parse_predict(), 889 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 890 "STRING_AGG": lambda self: self._parse_string_agg(), 891 "SUBSTRING": lambda self: self._parse_substring(), 892 "TRIM": lambda self: self._parse_trim(), 893 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 894 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 895 } 896 897 QUERY_MODIFIER_PARSERS = { 898 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 899 TokenType.WHERE: lambda self: ("where", self._parse_where()), 900 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 901 TokenType.HAVING: lambda self: ("having", self._parse_having()), 902 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 903 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 904 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 905 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 906 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 907 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 908 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 909 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 910 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 912 TokenType.CLUSTER_BY: lambda self: ( 913 "cluster", 914 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 915 ), 916 TokenType.DISTRIBUTE_BY: lambda self: ( 917 "distribute", 918 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 919 ), 920 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 921 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 922 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 923 } 924 925 SET_PARSERS = { 926 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 927 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 928 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 929 "TRANSACTION": lambda self: self._parse_set_transaction(), 930 } 931 932 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 933 934 TYPE_LITERAL_PARSERS = { 935 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 936 } 937 938 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 939 940 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 941 942 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 943 944 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 945 TRANSACTION_CHARACTERISTICS = { 946 "ISOLATION LEVEL REPEATABLE READ", 947 "ISOLATION LEVEL READ COMMITTED", 948 "ISOLATION LEVEL READ UNCOMMITTED", 949 "ISOLATION LEVEL SERIALIZABLE", 950 "READ WRITE", 951 "READ ONLY", 952 } 953 954 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 955 956 CLONE_KEYWORDS = {"CLONE", "COPY"} 957 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 958 959 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 960 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 961 962 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 963 964 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 965 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 966 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 967 968 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 969 970 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 971 972 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 973 974 DISTINCT_TOKENS = {TokenType.DISTINCT} 975 976 NULL_TOKENS = {TokenType.NULL} 977 978 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 979 980 STRICT_CAST = True 981 982 PREFIXED_PIVOT_COLUMNS = False 983 IDENTIFY_PIVOT_STRINGS = False 984 985 LOG_DEFAULTS_TO_LN = False 986 987 # Whether or not ADD is present for each column added by ALTER TABLE 988 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 989 990 # Whether or not the table sample clause expects CSV syntax 991 TABLESAMPLE_CSV = False 992 993 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 994 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 995 996 # Whether the TRIM function expects the characters to trim as its first argument 997 TRIM_PATTERN_FIRST = False 998 999 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 1000 STRING_ALIASES = False 1001 1002 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1003 MODIFIERS_ATTACHED_TO_UNION = True 1004 UNION_MODIFIERS = {"order", "limit", "offset"} 1005 1006 # parses no parenthesis if statements as commands 1007 NO_PAREN_IF_COMMANDS = True 1008 1009 __slots__ = ( 1010 "error_level", 1011 "error_message_context", 1012 "max_errors", 1013 "dialect", 1014 "sql", 1015 "errors", 1016 "_tokens", 1017 "_index", 1018 "_curr", 1019 "_next", 1020 "_prev", 1021 "_prev_comments", 1022 ) 1023 1024 # Autofilled 1025 SHOW_TRIE: t.Dict = {} 1026 SET_TRIE: t.Dict = {} 1027 1028 def __init__( 1029 self, 1030 error_level: t.Optional[ErrorLevel] = None, 1031 error_message_context: int = 100, 1032 max_errors: int = 3, 1033 dialect: DialectType = None, 1034 ): 1035 from sqlglot.dialects import Dialect 1036 1037 self.error_level = error_level or ErrorLevel.IMMEDIATE 1038 self.error_message_context = error_message_context 1039 self.max_errors = max_errors 1040 self.dialect = Dialect.get_or_raise(dialect) 1041 self.reset() 1042 1043 def reset(self): 1044 self.sql = "" 1045 self.errors = [] 1046 self._tokens = [] 1047 self._index = 0 1048 self._curr = None 1049 self._next = None 1050 self._prev = None 1051 self._prev_comments = None 1052 1053 def parse( 1054 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1055 ) -> t.List[t.Optional[exp.Expression]]: 1056 """ 1057 Parses a list of tokens and returns a list of syntax trees, one tree 1058 per parsed SQL statement. 1059 1060 Args: 1061 raw_tokens: The list of tokens. 1062 sql: The original SQL string, used to produce helpful debug messages. 1063 1064 Returns: 1065 The list of the produced syntax trees. 1066 """ 1067 return self._parse( 1068 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1069 ) 1070 1071 def parse_into( 1072 self, 1073 expression_types: exp.IntoType, 1074 raw_tokens: t.List[Token], 1075 sql: t.Optional[str] = None, 1076 ) -> t.List[t.Optional[exp.Expression]]: 1077 """ 1078 Parses a list of tokens into a given Expression type. If a collection of Expression 1079 types is given instead, this method will try to parse the token list into each one 1080 of them, stopping at the first for which the parsing succeeds. 1081 1082 Args: 1083 expression_types: The expression type(s) to try and parse the token list into. 1084 raw_tokens: The list of tokens. 1085 sql: The original SQL string, used to produce helpful debug messages. 1086 1087 Returns: 1088 The target Expression. 1089 """ 1090 errors = [] 1091 for expression_type in ensure_list(expression_types): 1092 parser = self.EXPRESSION_PARSERS.get(expression_type) 1093 if not parser: 1094 raise TypeError(f"No parser registered for {expression_type}") 1095 1096 try: 1097 return self._parse(parser, raw_tokens, sql) 1098 except ParseError as e: 1099 e.errors[0]["into_expression"] = expression_type 1100 errors.append(e) 1101 1102 raise ParseError( 1103 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1104 errors=merge_errors(errors), 1105 ) from errors[-1] 1106 1107 def _parse( 1108 self, 1109 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1110 raw_tokens: t.List[Token], 1111 sql: t.Optional[str] = None, 1112 ) -> t.List[t.Optional[exp.Expression]]: 1113 self.reset() 1114 self.sql = sql or "" 1115 1116 total = len(raw_tokens) 1117 chunks: t.List[t.List[Token]] = [[]] 1118 1119 for i, token in enumerate(raw_tokens): 1120 if token.token_type == TokenType.SEMICOLON: 1121 if i < total - 1: 1122 chunks.append([]) 1123 else: 1124 chunks[-1].append(token) 1125 1126 expressions = [] 1127 1128 for tokens in chunks: 1129 self._index = -1 1130 self._tokens = tokens 1131 self._advance() 1132 1133 expressions.append(parse_method(self)) 1134 1135 if self._index < len(self._tokens): 1136 self.raise_error("Invalid expression / Unexpected token") 1137 1138 self.check_errors() 1139 1140 return expressions 1141 1142 def check_errors(self) -> None: 1143 """Logs or raises any found errors, depending on the chosen error level setting.""" 1144 if self.error_level == ErrorLevel.WARN: 1145 for error in self.errors: 1146 logger.error(str(error)) 1147 elif self.error_level == ErrorLevel.RAISE and self.errors: 1148 raise ParseError( 1149 concat_messages(self.errors, self.max_errors), 1150 errors=merge_errors(self.errors), 1151 ) 1152 1153 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1154 """ 1155 Appends an error in the list of recorded errors or raises it, depending on the chosen 1156 error level setting. 1157 """ 1158 token = token or self._curr or self._prev or Token.string("") 1159 start = token.start 1160 end = token.end + 1 1161 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1162 highlight = self.sql[start:end] 1163 end_context = self.sql[end : end + self.error_message_context] 1164 1165 error = ParseError.new( 1166 f"{message}. Line {token.line}, Col: {token.col}.\n" 1167 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1168 description=message, 1169 line=token.line, 1170 col=token.col, 1171 start_context=start_context, 1172 highlight=highlight, 1173 end_context=end_context, 1174 ) 1175 1176 if self.error_level == ErrorLevel.IMMEDIATE: 1177 raise error 1178 1179 self.errors.append(error) 1180 1181 def expression( 1182 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1183 ) -> E: 1184 """ 1185 Creates a new, validated Expression. 1186 1187 Args: 1188 exp_class: The expression class to instantiate. 1189 comments: An optional list of comments to attach to the expression. 1190 kwargs: The arguments to set for the expression along with their respective values. 1191 1192 Returns: 1193 The target expression. 1194 """ 1195 instance = exp_class(**kwargs) 1196 instance.add_comments(comments) if comments else self._add_comments(instance) 1197 return self.validate_expression(instance) 1198 1199 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1200 if expression and self._prev_comments: 1201 expression.add_comments(self._prev_comments) 1202 self._prev_comments = None 1203 1204 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1205 """ 1206 Validates an Expression, making sure that all its mandatory arguments are set. 1207 1208 Args: 1209 expression: The expression to validate. 1210 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1211 1212 Returns: 1213 The validated expression. 1214 """ 1215 if self.error_level != ErrorLevel.IGNORE: 1216 for error_message in expression.error_messages(args): 1217 self.raise_error(error_message) 1218 1219 return expression 1220 1221 def _find_sql(self, start: Token, end: Token) -> str: 1222 return self.sql[start.start : end.end + 1] 1223 1224 def _is_connected(self) -> bool: 1225 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1226 1227 def _advance(self, times: int = 1) -> None: 1228 self._index += times 1229 self._curr = seq_get(self._tokens, self._index) 1230 self._next = seq_get(self._tokens, self._index + 1) 1231 1232 if self._index > 0: 1233 self._prev = self._tokens[self._index - 1] 1234 self._prev_comments = self._prev.comments 1235 else: 1236 self._prev = None 1237 self._prev_comments = None 1238 1239 def _retreat(self, index: int) -> None: 1240 if index != self._index: 1241 self._advance(index - self._index) 1242 1243 def _warn_unsupported(self) -> None: 1244 if len(self._tokens) <= 1: 1245 return 1246 1247 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1248 # interested in emitting a warning for the one being currently processed. 1249 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1250 1251 logger.warning( 1252 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1253 ) 1254 1255 def _parse_command(self) -> exp.Command: 1256 self._warn_unsupported() 1257 return self.expression( 1258 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1259 ) 1260 1261 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1262 start = self._prev 1263 exists = self._parse_exists() if allow_exists else None 1264 1265 self._match(TokenType.ON) 1266 1267 kind = self._match_set(self.CREATABLES) and self._prev 1268 if not kind: 1269 return self._parse_as_command(start) 1270 1271 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1272 this = self._parse_user_defined_function(kind=kind.token_type) 1273 elif kind.token_type == TokenType.TABLE: 1274 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1275 elif kind.token_type == TokenType.COLUMN: 1276 this = self._parse_column() 1277 else: 1278 this = self._parse_id_var() 1279 1280 self._match(TokenType.IS) 1281 1282 return self.expression( 1283 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1284 ) 1285 1286 def _parse_to_table( 1287 self, 1288 ) -> exp.ToTableProperty: 1289 table = self._parse_table_parts(schema=True) 1290 return self.expression(exp.ToTableProperty, this=table) 1291 1292 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1293 def _parse_ttl(self) -> exp.Expression: 1294 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1295 this = self._parse_bitwise() 1296 1297 if self._match_text_seq("DELETE"): 1298 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1299 if self._match_text_seq("RECOMPRESS"): 1300 return self.expression( 1301 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1302 ) 1303 if self._match_text_seq("TO", "DISK"): 1304 return self.expression( 1305 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1306 ) 1307 if self._match_text_seq("TO", "VOLUME"): 1308 return self.expression( 1309 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1310 ) 1311 1312 return this 1313 1314 expressions = self._parse_csv(_parse_ttl_action) 1315 where = self._parse_where() 1316 group = self._parse_group() 1317 1318 aggregates = None 1319 if group and self._match(TokenType.SET): 1320 aggregates = self._parse_csv(self._parse_set_item) 1321 1322 return self.expression( 1323 exp.MergeTreeTTL, 1324 expressions=expressions, 1325 where=where, 1326 group=group, 1327 aggregates=aggregates, 1328 ) 1329 1330 def _parse_statement(self) -> t.Optional[exp.Expression]: 1331 if self._curr is None: 1332 return None 1333 1334 if self._match_set(self.STATEMENT_PARSERS): 1335 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1336 1337 if self._match_set(Tokenizer.COMMANDS): 1338 return self._parse_command() 1339 1340 expression = self._parse_expression() 1341 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1342 return self._parse_query_modifiers(expression) 1343 1344 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1345 start = self._prev 1346 temporary = self._match(TokenType.TEMPORARY) 1347 materialized = self._match_text_seq("MATERIALIZED") 1348 1349 kind = self._match_set(self.CREATABLES) and self._prev.text 1350 if not kind: 1351 return self._parse_as_command(start) 1352 1353 return self.expression( 1354 exp.Drop, 1355 comments=start.comments, 1356 exists=exists or self._parse_exists(), 1357 this=self._parse_table( 1358 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1359 ), 1360 kind=kind, 1361 temporary=temporary, 1362 materialized=materialized, 1363 cascade=self._match_text_seq("CASCADE"), 1364 constraints=self._match_text_seq("CONSTRAINTS"), 1365 purge=self._match_text_seq("PURGE"), 1366 ) 1367 1368 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1369 return ( 1370 self._match_text_seq("IF") 1371 and (not not_ or self._match(TokenType.NOT)) 1372 and self._match(TokenType.EXISTS) 1373 ) 1374 1375 def _parse_create(self) -> exp.Create | exp.Command: 1376 # Note: this can't be None because we've matched a statement parser 1377 start = self._prev 1378 comments = self._prev_comments 1379 1380 replace = ( 1381 start.token_type == TokenType.REPLACE 1382 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1383 or self._match_pair(TokenType.OR, TokenType.ALTER) 1384 ) 1385 unique = self._match(TokenType.UNIQUE) 1386 1387 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1388 self._advance() 1389 1390 properties = None 1391 create_token = self._match_set(self.CREATABLES) and self._prev 1392 1393 if not create_token: 1394 # exp.Properties.Location.POST_CREATE 1395 properties = self._parse_properties() 1396 create_token = self._match_set(self.CREATABLES) and self._prev 1397 1398 if not properties or not create_token: 1399 return self._parse_as_command(start) 1400 1401 exists = self._parse_exists(not_=True) 1402 this = None 1403 expression: t.Optional[exp.Expression] = None 1404 indexes = None 1405 no_schema_binding = None 1406 begin = None 1407 end = None 1408 clone = None 1409 1410 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1411 nonlocal properties 1412 if properties and temp_props: 1413 properties.expressions.extend(temp_props.expressions) 1414 elif temp_props: 1415 properties = temp_props 1416 1417 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1418 this = self._parse_user_defined_function(kind=create_token.token_type) 1419 1420 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1421 extend_props(self._parse_properties()) 1422 1423 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1424 1425 if not expression: 1426 if self._match(TokenType.COMMAND): 1427 expression = self._parse_as_command(self._prev) 1428 else: 1429 begin = self._match(TokenType.BEGIN) 1430 return_ = self._match_text_seq("RETURN") 1431 1432 if self._match(TokenType.STRING, advance=False): 1433 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1434 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1435 expression = self._parse_string() 1436 extend_props(self._parse_properties()) 1437 else: 1438 expression = self._parse_statement() 1439 1440 end = self._match_text_seq("END") 1441 1442 if return_: 1443 expression = self.expression(exp.Return, this=expression) 1444 elif create_token.token_type == TokenType.INDEX: 1445 this = self._parse_index(index=self._parse_id_var()) 1446 elif create_token.token_type in self.DB_CREATABLES: 1447 table_parts = self._parse_table_parts( 1448 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1449 ) 1450 1451 # exp.Properties.Location.POST_NAME 1452 self._match(TokenType.COMMA) 1453 extend_props(self._parse_properties(before=True)) 1454 1455 this = self._parse_schema(this=table_parts) 1456 1457 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1458 extend_props(self._parse_properties()) 1459 1460 self._match(TokenType.ALIAS) 1461 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1462 # exp.Properties.Location.POST_ALIAS 1463 extend_props(self._parse_properties()) 1464 1465 expression = self._parse_ddl_select() 1466 1467 if create_token.token_type == TokenType.TABLE: 1468 # exp.Properties.Location.POST_EXPRESSION 1469 extend_props(self._parse_properties()) 1470 1471 indexes = [] 1472 while True: 1473 index = self._parse_index() 1474 1475 # exp.Properties.Location.POST_INDEX 1476 extend_props(self._parse_properties()) 1477 1478 if not index: 1479 break 1480 else: 1481 self._match(TokenType.COMMA) 1482 indexes.append(index) 1483 elif create_token.token_type == TokenType.VIEW: 1484 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1485 no_schema_binding = True 1486 1487 shallow = self._match_text_seq("SHALLOW") 1488 1489 if self._match_texts(self.CLONE_KEYWORDS): 1490 copy = self._prev.text.lower() == "copy" 1491 clone = self.expression( 1492 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1493 ) 1494 1495 if self._curr: 1496 return self._parse_as_command(start) 1497 1498 return self.expression( 1499 exp.Create, 1500 comments=comments, 1501 this=this, 1502 kind=create_token.text.upper(), 1503 replace=replace, 1504 unique=unique, 1505 expression=expression, 1506 exists=exists, 1507 properties=properties, 1508 indexes=indexes, 1509 no_schema_binding=no_schema_binding, 1510 begin=begin, 1511 end=end, 1512 clone=clone, 1513 ) 1514 1515 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1516 # only used for teradata currently 1517 self._match(TokenType.COMMA) 1518 1519 kwargs = { 1520 "no": self._match_text_seq("NO"), 1521 "dual": self._match_text_seq("DUAL"), 1522 "before": self._match_text_seq("BEFORE"), 1523 "default": self._match_text_seq("DEFAULT"), 1524 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1525 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1526 "after": self._match_text_seq("AFTER"), 1527 "minimum": self._match_texts(("MIN", "MINIMUM")), 1528 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1529 } 1530 1531 if self._match_texts(self.PROPERTY_PARSERS): 1532 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1533 try: 1534 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1535 except TypeError: 1536 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1537 1538 return None 1539 1540 def _parse_property(self) -> t.Optional[exp.Expression]: 1541 if self._match_texts(self.PROPERTY_PARSERS): 1542 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1543 1544 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1545 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1546 1547 if self._match_text_seq("COMPOUND", "SORTKEY"): 1548 return self._parse_sortkey(compound=True) 1549 1550 if self._match_text_seq("SQL", "SECURITY"): 1551 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1552 1553 index = self._index 1554 key = self._parse_column() 1555 1556 if not self._match(TokenType.EQ): 1557 self._retreat(index) 1558 return None 1559 1560 return self.expression( 1561 exp.Property, 1562 this=key.to_dot() if isinstance(key, exp.Column) else key, 1563 value=self._parse_column() or self._parse_var(any_token=True), 1564 ) 1565 1566 def _parse_stored(self) -> exp.FileFormatProperty: 1567 self._match(TokenType.ALIAS) 1568 1569 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1570 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1571 1572 return self.expression( 1573 exp.FileFormatProperty, 1574 this=( 1575 self.expression( 1576 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1577 ) 1578 if input_format or output_format 1579 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1580 ), 1581 ) 1582 1583 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1584 self._match(TokenType.EQ) 1585 self._match(TokenType.ALIAS) 1586 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1587 1588 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1589 properties = [] 1590 while True: 1591 if before: 1592 prop = self._parse_property_before() 1593 else: 1594 prop = self._parse_property() 1595 1596 if not prop: 1597 break 1598 for p in ensure_list(prop): 1599 properties.append(p) 1600 1601 if properties: 1602 return self.expression(exp.Properties, expressions=properties) 1603 1604 return None 1605 1606 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1607 return self.expression( 1608 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1609 ) 1610 1611 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1612 if self._index >= 2: 1613 pre_volatile_token = self._tokens[self._index - 2] 1614 else: 1615 pre_volatile_token = None 1616 1617 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1618 return exp.VolatileProperty() 1619 1620 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1621 1622 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1623 self._match_pair(TokenType.EQ, TokenType.ON) 1624 1625 prop = self.expression(exp.WithSystemVersioningProperty) 1626 if self._match(TokenType.L_PAREN): 1627 self._match_text_seq("HISTORY_TABLE", "=") 1628 prop.set("this", self._parse_table_parts()) 1629 1630 if self._match(TokenType.COMMA): 1631 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1632 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1633 1634 self._match_r_paren() 1635 1636 return prop 1637 1638 def _parse_with_property( 1639 self, 1640 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1641 if self._match(TokenType.L_PAREN, advance=False): 1642 return self._parse_wrapped_csv(self._parse_property) 1643 1644 if self._match_text_seq("JOURNAL"): 1645 return self._parse_withjournaltable() 1646 1647 if self._match_text_seq("DATA"): 1648 return self._parse_withdata(no=False) 1649 elif self._match_text_seq("NO", "DATA"): 1650 return self._parse_withdata(no=True) 1651 1652 if not self._next: 1653 return None 1654 1655 return self._parse_withisolatedloading() 1656 1657 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1658 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1659 self._match(TokenType.EQ) 1660 1661 user = self._parse_id_var() 1662 self._match(TokenType.PARAMETER) 1663 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1664 1665 if not user or not host: 1666 return None 1667 1668 return exp.DefinerProperty(this=f"{user}@{host}") 1669 1670 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1671 self._match(TokenType.TABLE) 1672 self._match(TokenType.EQ) 1673 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1674 1675 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1676 return self.expression(exp.LogProperty, no=no) 1677 1678 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1679 return self.expression(exp.JournalProperty, **kwargs) 1680 1681 def _parse_checksum(self) -> exp.ChecksumProperty: 1682 self._match(TokenType.EQ) 1683 1684 on = None 1685 if self._match(TokenType.ON): 1686 on = True 1687 elif self._match_text_seq("OFF"): 1688 on = False 1689 1690 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1691 1692 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1693 return self.expression( 1694 exp.Cluster, 1695 expressions=( 1696 self._parse_wrapped_csv(self._parse_ordered) 1697 if wrapped 1698 else self._parse_csv(self._parse_ordered) 1699 ), 1700 ) 1701 1702 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1703 self._match_text_seq("BY") 1704 1705 self._match_l_paren() 1706 expressions = self._parse_csv(self._parse_column) 1707 self._match_r_paren() 1708 1709 if self._match_text_seq("SORTED", "BY"): 1710 self._match_l_paren() 1711 sorted_by = self._parse_csv(self._parse_ordered) 1712 self._match_r_paren() 1713 else: 1714 sorted_by = None 1715 1716 self._match(TokenType.INTO) 1717 buckets = self._parse_number() 1718 self._match_text_seq("BUCKETS") 1719 1720 return self.expression( 1721 exp.ClusteredByProperty, 1722 expressions=expressions, 1723 sorted_by=sorted_by, 1724 buckets=buckets, 1725 ) 1726 1727 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1728 if not self._match_text_seq("GRANTS"): 1729 self._retreat(self._index - 1) 1730 return None 1731 1732 return self.expression(exp.CopyGrantsProperty) 1733 1734 def _parse_freespace(self) -> exp.FreespaceProperty: 1735 self._match(TokenType.EQ) 1736 return self.expression( 1737 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1738 ) 1739 1740 def _parse_mergeblockratio( 1741 self, no: bool = False, default: bool = False 1742 ) -> exp.MergeBlockRatioProperty: 1743 if self._match(TokenType.EQ): 1744 return self.expression( 1745 exp.MergeBlockRatioProperty, 1746 this=self._parse_number(), 1747 percent=self._match(TokenType.PERCENT), 1748 ) 1749 1750 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1751 1752 def _parse_datablocksize( 1753 self, 1754 default: t.Optional[bool] = None, 1755 minimum: t.Optional[bool] = None, 1756 maximum: t.Optional[bool] = None, 1757 ) -> exp.DataBlocksizeProperty: 1758 self._match(TokenType.EQ) 1759 size = self._parse_number() 1760 1761 units = None 1762 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1763 units = self._prev.text 1764 1765 return self.expression( 1766 exp.DataBlocksizeProperty, 1767 size=size, 1768 units=units, 1769 default=default, 1770 minimum=minimum, 1771 maximum=maximum, 1772 ) 1773 1774 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1775 self._match(TokenType.EQ) 1776 always = self._match_text_seq("ALWAYS") 1777 manual = self._match_text_seq("MANUAL") 1778 never = self._match_text_seq("NEVER") 1779 default = self._match_text_seq("DEFAULT") 1780 1781 autotemp = None 1782 if self._match_text_seq("AUTOTEMP"): 1783 autotemp = self._parse_schema() 1784 1785 return self.expression( 1786 exp.BlockCompressionProperty, 1787 always=always, 1788 manual=manual, 1789 never=never, 1790 default=default, 1791 autotemp=autotemp, 1792 ) 1793 1794 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1795 no = self._match_text_seq("NO") 1796 concurrent = self._match_text_seq("CONCURRENT") 1797 self._match_text_seq("ISOLATED", "LOADING") 1798 for_all = self._match_text_seq("FOR", "ALL") 1799 for_insert = self._match_text_seq("FOR", "INSERT") 1800 for_none = self._match_text_seq("FOR", "NONE") 1801 return self.expression( 1802 exp.IsolatedLoadingProperty, 1803 no=no, 1804 concurrent=concurrent, 1805 for_all=for_all, 1806 for_insert=for_insert, 1807 for_none=for_none, 1808 ) 1809 1810 def _parse_locking(self) -> exp.LockingProperty: 1811 if self._match(TokenType.TABLE): 1812 kind = "TABLE" 1813 elif self._match(TokenType.VIEW): 1814 kind = "VIEW" 1815 elif self._match(TokenType.ROW): 1816 kind = "ROW" 1817 elif self._match_text_seq("DATABASE"): 1818 kind = "DATABASE" 1819 else: 1820 kind = None 1821 1822 if kind in ("DATABASE", "TABLE", "VIEW"): 1823 this = self._parse_table_parts() 1824 else: 1825 this = None 1826 1827 if self._match(TokenType.FOR): 1828 for_or_in = "FOR" 1829 elif self._match(TokenType.IN): 1830 for_or_in = "IN" 1831 else: 1832 for_or_in = None 1833 1834 if self._match_text_seq("ACCESS"): 1835 lock_type = "ACCESS" 1836 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1837 lock_type = "EXCLUSIVE" 1838 elif self._match_text_seq("SHARE"): 1839 lock_type = "SHARE" 1840 elif self._match_text_seq("READ"): 1841 lock_type = "READ" 1842 elif self._match_text_seq("WRITE"): 1843 lock_type = "WRITE" 1844 elif self._match_text_seq("CHECKSUM"): 1845 lock_type = "CHECKSUM" 1846 else: 1847 lock_type = None 1848 1849 override = self._match_text_seq("OVERRIDE") 1850 1851 return self.expression( 1852 exp.LockingProperty, 1853 this=this, 1854 kind=kind, 1855 for_or_in=for_or_in, 1856 lock_type=lock_type, 1857 override=override, 1858 ) 1859 1860 def _parse_partition_by(self) -> t.List[exp.Expression]: 1861 if self._match(TokenType.PARTITION_BY): 1862 return self._parse_csv(self._parse_conjunction) 1863 return [] 1864 1865 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1866 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1867 if self._match_text_seq("MINVALUE"): 1868 return exp.var("MINVALUE") 1869 if self._match_text_seq("MAXVALUE"): 1870 return exp.var("MAXVALUE") 1871 return self._parse_bitwise() 1872 1873 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1874 expression = None 1875 from_expressions = None 1876 to_expressions = None 1877 1878 if self._match(TokenType.IN): 1879 this = self._parse_wrapped_csv(self._parse_bitwise) 1880 elif self._match(TokenType.FROM): 1881 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1882 self._match_text_seq("TO") 1883 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1884 elif self._match_text_seq("WITH", "(", "MODULUS"): 1885 this = self._parse_number() 1886 self._match_text_seq(",", "REMAINDER") 1887 expression = self._parse_number() 1888 self._match_r_paren() 1889 else: 1890 self.raise_error("Failed to parse partition bound spec.") 1891 1892 return self.expression( 1893 exp.PartitionBoundSpec, 1894 this=this, 1895 expression=expression, 1896 from_expressions=from_expressions, 1897 to_expressions=to_expressions, 1898 ) 1899 1900 # https://www.postgresql.org/docs/current/sql-createtable.html 1901 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1902 if not self._match_text_seq("OF"): 1903 self._retreat(self._index - 1) 1904 return None 1905 1906 this = self._parse_table(schema=True) 1907 1908 if self._match(TokenType.DEFAULT): 1909 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1910 elif self._match_text_seq("FOR", "VALUES"): 1911 expression = self._parse_partition_bound_spec() 1912 else: 1913 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1914 1915 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1916 1917 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1918 self._match(TokenType.EQ) 1919 return self.expression( 1920 exp.PartitionedByProperty, 1921 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1922 ) 1923 1924 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1925 if self._match_text_seq("AND", "STATISTICS"): 1926 statistics = True 1927 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1928 statistics = False 1929 else: 1930 statistics = None 1931 1932 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1933 1934 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1935 if self._match_text_seq("SQL"): 1936 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1937 return None 1938 1939 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1940 if self._match_text_seq("SQL", "DATA"): 1941 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1942 return None 1943 1944 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1945 if self._match_text_seq("PRIMARY", "INDEX"): 1946 return exp.NoPrimaryIndexProperty() 1947 if self._match_text_seq("SQL"): 1948 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1949 return None 1950 1951 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1952 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1953 return exp.OnCommitProperty() 1954 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1955 return exp.OnCommitProperty(delete=True) 1956 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1957 1958 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1959 if self._match_text_seq("SQL", "DATA"): 1960 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1961 return None 1962 1963 def _parse_distkey(self) -> exp.DistKeyProperty: 1964 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1965 1966 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1967 table = self._parse_table(schema=True) 1968 1969 options = [] 1970 while self._match_texts(("INCLUDING", "EXCLUDING")): 1971 this = self._prev.text.upper() 1972 1973 id_var = self._parse_id_var() 1974 if not id_var: 1975 return None 1976 1977 options.append( 1978 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1979 ) 1980 1981 return self.expression(exp.LikeProperty, this=table, expressions=options) 1982 1983 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1984 return self.expression( 1985 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1986 ) 1987 1988 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1989 self._match(TokenType.EQ) 1990 return self.expression( 1991 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1992 ) 1993 1994 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1995 self._match_text_seq("WITH", "CONNECTION") 1996 return self.expression( 1997 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1998 ) 1999 2000 def _parse_returns(self) -> exp.ReturnsProperty: 2001 value: t.Optional[exp.Expression] 2002 is_table = self._match(TokenType.TABLE) 2003 2004 if is_table: 2005 if self._match(TokenType.LT): 2006 value = self.expression( 2007 exp.Schema, 2008 this="TABLE", 2009 expressions=self._parse_csv(self._parse_struct_types), 2010 ) 2011 if not self._match(TokenType.GT): 2012 self.raise_error("Expecting >") 2013 else: 2014 value = self._parse_schema(exp.var("TABLE")) 2015 else: 2016 value = self._parse_types() 2017 2018 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2019 2020 def _parse_describe(self) -> exp.Describe: 2021 kind = self._match_set(self.CREATABLES) and self._prev.text 2022 extended = self._match_text_seq("EXTENDED") 2023 this = self._parse_table(schema=True) 2024 properties = self._parse_properties() 2025 expressions = properties.expressions if properties else None 2026 return self.expression( 2027 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2028 ) 2029 2030 def _parse_insert(self) -> exp.Insert: 2031 comments = ensure_list(self._prev_comments) 2032 overwrite = self._match(TokenType.OVERWRITE) 2033 ignore = self._match(TokenType.IGNORE) 2034 local = self._match_text_seq("LOCAL") 2035 alternative = None 2036 2037 if self._match_text_seq("DIRECTORY"): 2038 this: t.Optional[exp.Expression] = self.expression( 2039 exp.Directory, 2040 this=self._parse_var_or_string(), 2041 local=local, 2042 row_format=self._parse_row_format(match_row=True), 2043 ) 2044 else: 2045 if self._match(TokenType.OR): 2046 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2047 2048 self._match(TokenType.INTO) 2049 comments += ensure_list(self._prev_comments) 2050 self._match(TokenType.TABLE) 2051 this = self._parse_table(schema=True) 2052 2053 returning = self._parse_returning() 2054 2055 return self.expression( 2056 exp.Insert, 2057 comments=comments, 2058 this=this, 2059 by_name=self._match_text_seq("BY", "NAME"), 2060 exists=self._parse_exists(), 2061 partition=self._parse_partition(), 2062 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2063 and self._parse_conjunction(), 2064 expression=self._parse_ddl_select(), 2065 conflict=self._parse_on_conflict(), 2066 returning=returning or self._parse_returning(), 2067 overwrite=overwrite, 2068 alternative=alternative, 2069 ignore=ignore, 2070 ) 2071 2072 def _parse_kill(self) -> exp.Kill: 2073 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2074 2075 return self.expression( 2076 exp.Kill, 2077 this=self._parse_primary(), 2078 kind=kind, 2079 ) 2080 2081 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2082 conflict = self._match_text_seq("ON", "CONFLICT") 2083 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2084 2085 if not conflict and not duplicate: 2086 return None 2087 2088 nothing = None 2089 expressions = None 2090 key = None 2091 constraint = None 2092 2093 if conflict: 2094 if self._match_text_seq("ON", "CONSTRAINT"): 2095 constraint = self._parse_id_var() 2096 else: 2097 key = self._parse_csv(self._parse_value) 2098 2099 self._match_text_seq("DO") 2100 if self._match_text_seq("NOTHING"): 2101 nothing = True 2102 else: 2103 self._match(TokenType.UPDATE) 2104 self._match(TokenType.SET) 2105 expressions = self._parse_csv(self._parse_equality) 2106 2107 return self.expression( 2108 exp.OnConflict, 2109 duplicate=duplicate, 2110 expressions=expressions, 2111 nothing=nothing, 2112 key=key, 2113 constraint=constraint, 2114 ) 2115 2116 def _parse_returning(self) -> t.Optional[exp.Returning]: 2117 if not self._match(TokenType.RETURNING): 2118 return None 2119 return self.expression( 2120 exp.Returning, 2121 expressions=self._parse_csv(self._parse_expression), 2122 into=self._match(TokenType.INTO) and self._parse_table_part(), 2123 ) 2124 2125 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2126 if not self._match(TokenType.FORMAT): 2127 return None 2128 return self._parse_row_format() 2129 2130 def _parse_row_format( 2131 self, match_row: bool = False 2132 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2133 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2134 return None 2135 2136 if self._match_text_seq("SERDE"): 2137 this = self._parse_string() 2138 2139 serde_properties = None 2140 if self._match(TokenType.SERDE_PROPERTIES): 2141 serde_properties = self.expression( 2142 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2143 ) 2144 2145 return self.expression( 2146 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2147 ) 2148 2149 self._match_text_seq("DELIMITED") 2150 2151 kwargs = {} 2152 2153 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2154 kwargs["fields"] = self._parse_string() 2155 if self._match_text_seq("ESCAPED", "BY"): 2156 kwargs["escaped"] = self._parse_string() 2157 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2158 kwargs["collection_items"] = self._parse_string() 2159 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2160 kwargs["map_keys"] = self._parse_string() 2161 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2162 kwargs["lines"] = self._parse_string() 2163 if self._match_text_seq("NULL", "DEFINED", "AS"): 2164 kwargs["null"] = self._parse_string() 2165 2166 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2167 2168 def _parse_load(self) -> exp.LoadData | exp.Command: 2169 if self._match_text_seq("DATA"): 2170 local = self._match_text_seq("LOCAL") 2171 self._match_text_seq("INPATH") 2172 inpath = self._parse_string() 2173 overwrite = self._match(TokenType.OVERWRITE) 2174 self._match_pair(TokenType.INTO, TokenType.TABLE) 2175 2176 return self.expression( 2177 exp.LoadData, 2178 this=self._parse_table(schema=True), 2179 local=local, 2180 overwrite=overwrite, 2181 inpath=inpath, 2182 partition=self._parse_partition(), 2183 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2184 serde=self._match_text_seq("SERDE") and self._parse_string(), 2185 ) 2186 return self._parse_as_command(self._prev) 2187 2188 def _parse_delete(self) -> exp.Delete: 2189 # This handles MySQL's "Multiple-Table Syntax" 2190 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2191 tables = None 2192 comments = self._prev_comments 2193 if not self._match(TokenType.FROM, advance=False): 2194 tables = self._parse_csv(self._parse_table) or None 2195 2196 returning = self._parse_returning() 2197 2198 return self.expression( 2199 exp.Delete, 2200 comments=comments, 2201 tables=tables, 2202 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2203 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2204 where=self._parse_where(), 2205 returning=returning or self._parse_returning(), 2206 limit=self._parse_limit(), 2207 ) 2208 2209 def _parse_update(self) -> exp.Update: 2210 comments = self._prev_comments 2211 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2212 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2213 returning = self._parse_returning() 2214 return self.expression( 2215 exp.Update, 2216 comments=comments, 2217 **{ # type: ignore 2218 "this": this, 2219 "expressions": expressions, 2220 "from": self._parse_from(joins=True), 2221 "where": self._parse_where(), 2222 "returning": returning or self._parse_returning(), 2223 "order": self._parse_order(), 2224 "limit": self._parse_limit(), 2225 }, 2226 ) 2227 2228 def _parse_uncache(self) -> exp.Uncache: 2229 if not self._match(TokenType.TABLE): 2230 self.raise_error("Expecting TABLE after UNCACHE") 2231 2232 return self.expression( 2233 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2234 ) 2235 2236 def _parse_cache(self) -> exp.Cache: 2237 lazy = self._match_text_seq("LAZY") 2238 self._match(TokenType.TABLE) 2239 table = self._parse_table(schema=True) 2240 2241 options = [] 2242 if self._match_text_seq("OPTIONS"): 2243 self._match_l_paren() 2244 k = self._parse_string() 2245 self._match(TokenType.EQ) 2246 v = self._parse_string() 2247 options = [k, v] 2248 self._match_r_paren() 2249 2250 self._match(TokenType.ALIAS) 2251 return self.expression( 2252 exp.Cache, 2253 this=table, 2254 lazy=lazy, 2255 options=options, 2256 expression=self._parse_select(nested=True), 2257 ) 2258 2259 def _parse_partition(self) -> t.Optional[exp.Partition]: 2260 if not self._match(TokenType.PARTITION): 2261 return None 2262 2263 return self.expression( 2264 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2265 ) 2266 2267 def _parse_value(self) -> exp.Tuple: 2268 if self._match(TokenType.L_PAREN): 2269 expressions = self._parse_csv(self._parse_expression) 2270 self._match_r_paren() 2271 return self.expression(exp.Tuple, expressions=expressions) 2272 2273 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2274 # https://prestodb.io/docs/current/sql/values.html 2275 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2276 2277 def _parse_projections(self) -> t.List[exp.Expression]: 2278 return self._parse_expressions() 2279 2280 def _parse_select( 2281 self, 2282 nested: bool = False, 2283 table: bool = False, 2284 parse_subquery_alias: bool = True, 2285 parse_set_operation: bool = True, 2286 ) -> t.Optional[exp.Expression]: 2287 cte = self._parse_with() 2288 2289 if cte: 2290 this = self._parse_statement() 2291 2292 if not this: 2293 self.raise_error("Failed to parse any statement following CTE") 2294 return cte 2295 2296 if "with" in this.arg_types: 2297 this.set("with", cte) 2298 else: 2299 self.raise_error(f"{this.key} does not support CTE") 2300 this = cte 2301 2302 return this 2303 2304 # duckdb supports leading with FROM x 2305 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2306 2307 if self._match(TokenType.SELECT): 2308 comments = self._prev_comments 2309 2310 hint = self._parse_hint() 2311 all_ = self._match(TokenType.ALL) 2312 distinct = self._match_set(self.DISTINCT_TOKENS) 2313 2314 kind = ( 2315 self._match(TokenType.ALIAS) 2316 and self._match_texts(("STRUCT", "VALUE")) 2317 and self._prev.text.upper() 2318 ) 2319 2320 if distinct: 2321 distinct = self.expression( 2322 exp.Distinct, 2323 on=self._parse_value() if self._match(TokenType.ON) else None, 2324 ) 2325 2326 if all_ and distinct: 2327 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2328 2329 limit = self._parse_limit(top=True) 2330 projections = self._parse_projections() 2331 2332 this = self.expression( 2333 exp.Select, 2334 kind=kind, 2335 hint=hint, 2336 distinct=distinct, 2337 expressions=projections, 2338 limit=limit, 2339 ) 2340 this.comments = comments 2341 2342 into = self._parse_into() 2343 if into: 2344 this.set("into", into) 2345 2346 if not from_: 2347 from_ = self._parse_from() 2348 2349 if from_: 2350 this.set("from", from_) 2351 2352 this = self._parse_query_modifiers(this) 2353 elif (table or nested) and self._match(TokenType.L_PAREN): 2354 if self._match(TokenType.PIVOT): 2355 this = self._parse_simplified_pivot() 2356 elif self._match(TokenType.FROM): 2357 this = exp.select("*").from_( 2358 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2359 ) 2360 else: 2361 this = ( 2362 self._parse_table() 2363 if table 2364 else self._parse_select(nested=True, parse_set_operation=False) 2365 ) 2366 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2367 2368 self._match_r_paren() 2369 2370 # We return early here so that the UNION isn't attached to the subquery by the 2371 # following call to _parse_set_operations, but instead becomes the parent node 2372 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2373 elif self._match(TokenType.VALUES): 2374 this = self.expression( 2375 exp.Values, 2376 expressions=self._parse_csv(self._parse_value), 2377 alias=self._parse_table_alias(), 2378 ) 2379 elif from_: 2380 this = exp.select("*").from_(from_.this, copy=False) 2381 else: 2382 this = None 2383 2384 if parse_set_operation: 2385 return self._parse_set_operations(this) 2386 return this 2387 2388 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2389 if not skip_with_token and not self._match(TokenType.WITH): 2390 return None 2391 2392 comments = self._prev_comments 2393 recursive = self._match(TokenType.RECURSIVE) 2394 2395 expressions = [] 2396 while True: 2397 expressions.append(self._parse_cte()) 2398 2399 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2400 break 2401 else: 2402 self._match(TokenType.WITH) 2403 2404 return self.expression( 2405 exp.With, comments=comments, expressions=expressions, recursive=recursive 2406 ) 2407 2408 def _parse_cte(self) -> exp.CTE: 2409 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2410 if not alias or not alias.this: 2411 self.raise_error("Expected CTE to have alias") 2412 2413 self._match(TokenType.ALIAS) 2414 return self.expression( 2415 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2416 ) 2417 2418 def _parse_table_alias( 2419 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2420 ) -> t.Optional[exp.TableAlias]: 2421 any_token = self._match(TokenType.ALIAS) 2422 alias = ( 2423 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2424 or self._parse_string_as_identifier() 2425 ) 2426 2427 index = self._index 2428 if self._match(TokenType.L_PAREN): 2429 columns = self._parse_csv(self._parse_function_parameter) 2430 self._match_r_paren() if columns else self._retreat(index) 2431 else: 2432 columns = None 2433 2434 if not alias and not columns: 2435 return None 2436 2437 return self.expression(exp.TableAlias, this=alias, columns=columns) 2438 2439 def _parse_subquery( 2440 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2441 ) -> t.Optional[exp.Subquery]: 2442 if not this: 2443 return None 2444 2445 return self.expression( 2446 exp.Subquery, 2447 this=this, 2448 pivots=self._parse_pivots(), 2449 alias=self._parse_table_alias() if parse_alias else None, 2450 ) 2451 2452 def _parse_query_modifiers( 2453 self, this: t.Optional[exp.Expression] 2454 ) -> t.Optional[exp.Expression]: 2455 if isinstance(this, self.MODIFIABLES): 2456 for join in iter(self._parse_join, None): 2457 this.append("joins", join) 2458 for lateral in iter(self._parse_lateral, None): 2459 this.append("laterals", lateral) 2460 2461 while True: 2462 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2463 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2464 key, expression = parser(self) 2465 2466 if expression: 2467 this.set(key, expression) 2468 if key == "limit": 2469 offset = expression.args.pop("offset", None) 2470 2471 if offset: 2472 offset = exp.Offset(expression=offset) 2473 this.set("offset", offset) 2474 2475 limit_by_expressions = expression.expressions 2476 expression.set("expressions", None) 2477 offset.set("expressions", limit_by_expressions) 2478 continue 2479 break 2480 return this 2481 2482 def _parse_hint(self) -> t.Optional[exp.Hint]: 2483 if self._match(TokenType.HINT): 2484 hints = [] 2485 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2486 hints.extend(hint) 2487 2488 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2489 self.raise_error("Expected */ after HINT") 2490 2491 return self.expression(exp.Hint, expressions=hints) 2492 2493 return None 2494 2495 def _parse_into(self) -> t.Optional[exp.Into]: 2496 if not self._match(TokenType.INTO): 2497 return None 2498 2499 temp = self._match(TokenType.TEMPORARY) 2500 unlogged = self._match_text_seq("UNLOGGED") 2501 self._match(TokenType.TABLE) 2502 2503 return self.expression( 2504 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2505 ) 2506 2507 def _parse_from( 2508 self, joins: bool = False, skip_from_token: bool = False 2509 ) -> t.Optional[exp.From]: 2510 if not skip_from_token and not self._match(TokenType.FROM): 2511 return None 2512 2513 return self.expression( 2514 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2515 ) 2516 2517 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2518 if not self._match(TokenType.MATCH_RECOGNIZE): 2519 return None 2520 2521 self._match_l_paren() 2522 2523 partition = self._parse_partition_by() 2524 order = self._parse_order() 2525 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2526 2527 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2528 rows = exp.var("ONE ROW PER MATCH") 2529 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2530 text = "ALL ROWS PER MATCH" 2531 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2532 text += " SHOW EMPTY MATCHES" 2533 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2534 text += " OMIT EMPTY MATCHES" 2535 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2536 text += " WITH UNMATCHED ROWS" 2537 rows = exp.var(text) 2538 else: 2539 rows = None 2540 2541 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2542 text = "AFTER MATCH SKIP" 2543 if self._match_text_seq("PAST", "LAST", "ROW"): 2544 text += " PAST LAST ROW" 2545 elif self._match_text_seq("TO", "NEXT", "ROW"): 2546 text += " TO NEXT ROW" 2547 elif self._match_text_seq("TO", "FIRST"): 2548 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2549 elif self._match_text_seq("TO", "LAST"): 2550 text += f" TO LAST {self._advance_any().text}" # type: ignore 2551 after = exp.var(text) 2552 else: 2553 after = None 2554 2555 if self._match_text_seq("PATTERN"): 2556 self._match_l_paren() 2557 2558 if not self._curr: 2559 self.raise_error("Expecting )", self._curr) 2560 2561 paren = 1 2562 start = self._curr 2563 2564 while self._curr and paren > 0: 2565 if self._curr.token_type == TokenType.L_PAREN: 2566 paren += 1 2567 if self._curr.token_type == TokenType.R_PAREN: 2568 paren -= 1 2569 2570 end = self._prev 2571 self._advance() 2572 2573 if paren > 0: 2574 self.raise_error("Expecting )", self._curr) 2575 2576 pattern = exp.var(self._find_sql(start, end)) 2577 else: 2578 pattern = None 2579 2580 define = ( 2581 self._parse_csv(self._parse_name_as_expression) 2582 if self._match_text_seq("DEFINE") 2583 else None 2584 ) 2585 2586 self._match_r_paren() 2587 2588 return self.expression( 2589 exp.MatchRecognize, 2590 partition_by=partition, 2591 order=order, 2592 measures=measures, 2593 rows=rows, 2594 after=after, 2595 pattern=pattern, 2596 define=define, 2597 alias=self._parse_table_alias(), 2598 ) 2599 2600 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2601 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2602 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2603 cross_apply = False 2604 2605 if cross_apply is not None: 2606 this = self._parse_select(table=True) 2607 view = None 2608 outer = None 2609 elif self._match(TokenType.LATERAL): 2610 this = self._parse_select(table=True) 2611 view = self._match(TokenType.VIEW) 2612 outer = self._match(TokenType.OUTER) 2613 else: 2614 return None 2615 2616 if not this: 2617 this = ( 2618 self._parse_unnest() 2619 or self._parse_function() 2620 or self._parse_id_var(any_token=False) 2621 ) 2622 2623 while self._match(TokenType.DOT): 2624 this = exp.Dot( 2625 this=this, 2626 expression=self._parse_function() or self._parse_id_var(any_token=False), 2627 ) 2628 2629 if view: 2630 table = self._parse_id_var(any_token=False) 2631 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2632 table_alias: t.Optional[exp.TableAlias] = self.expression( 2633 exp.TableAlias, this=table, columns=columns 2634 ) 2635 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2636 # We move the alias from the lateral's child node to the lateral itself 2637 table_alias = this.args["alias"].pop() 2638 else: 2639 table_alias = self._parse_table_alias() 2640 2641 return self.expression( 2642 exp.Lateral, 2643 this=this, 2644 view=view, 2645 outer=outer, 2646 alias=table_alias, 2647 cross_apply=cross_apply, 2648 ) 2649 2650 def _parse_join_parts( 2651 self, 2652 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2653 return ( 2654 self._match_set(self.JOIN_METHODS) and self._prev, 2655 self._match_set(self.JOIN_SIDES) and self._prev, 2656 self._match_set(self.JOIN_KINDS) and self._prev, 2657 ) 2658 2659 def _parse_join( 2660 self, skip_join_token: bool = False, parse_bracket: bool = False 2661 ) -> t.Optional[exp.Join]: 2662 if self._match(TokenType.COMMA): 2663 return self.expression(exp.Join, this=self._parse_table()) 2664 2665 index = self._index 2666 method, side, kind = self._parse_join_parts() 2667 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2668 join = self._match(TokenType.JOIN) 2669 2670 if not skip_join_token and not join: 2671 self._retreat(index) 2672 kind = None 2673 method = None 2674 side = None 2675 2676 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2677 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2678 2679 if not skip_join_token and not join and not outer_apply and not cross_apply: 2680 return None 2681 2682 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2683 2684 if method: 2685 kwargs["method"] = method.text 2686 if side: 2687 kwargs["side"] = side.text 2688 if kind: 2689 kwargs["kind"] = kind.text 2690 if hint: 2691 kwargs["hint"] = hint 2692 2693 if self._match(TokenType.ON): 2694 kwargs["on"] = self._parse_conjunction() 2695 elif self._match(TokenType.USING): 2696 kwargs["using"] = self._parse_wrapped_id_vars() 2697 elif not (kind and kind.token_type == TokenType.CROSS): 2698 index = self._index 2699 join = self._parse_join() 2700 2701 if join and self._match(TokenType.ON): 2702 kwargs["on"] = self._parse_conjunction() 2703 elif join and self._match(TokenType.USING): 2704 kwargs["using"] = self._parse_wrapped_id_vars() 2705 else: 2706 join = None 2707 self._retreat(index) 2708 2709 kwargs["this"].set("joins", [join] if join else None) 2710 2711 comments = [c for token in (method, side, kind) if token for c in token.comments] 2712 return self.expression(exp.Join, comments=comments, **kwargs) 2713 2714 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2715 this = self._parse_conjunction() 2716 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2717 return this 2718 2719 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2720 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2721 2722 return this 2723 2724 def _parse_index( 2725 self, 2726 index: t.Optional[exp.Expression] = None, 2727 ) -> t.Optional[exp.Index]: 2728 if index: 2729 unique = None 2730 primary = None 2731 amp = None 2732 2733 self._match(TokenType.ON) 2734 self._match(TokenType.TABLE) # hive 2735 table = self._parse_table_parts(schema=True) 2736 else: 2737 unique = self._match(TokenType.UNIQUE) 2738 primary = self._match_text_seq("PRIMARY") 2739 amp = self._match_text_seq("AMP") 2740 2741 if not self._match(TokenType.INDEX): 2742 return None 2743 2744 index = self._parse_id_var() 2745 table = None 2746 2747 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2748 2749 if self._match(TokenType.L_PAREN, advance=False): 2750 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2751 else: 2752 columns = None 2753 2754 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2755 2756 return self.expression( 2757 exp.Index, 2758 this=index, 2759 table=table, 2760 using=using, 2761 columns=columns, 2762 unique=unique, 2763 primary=primary, 2764 amp=amp, 2765 include=include, 2766 partition_by=self._parse_partition_by(), 2767 where=self._parse_where(), 2768 ) 2769 2770 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2771 hints: t.List[exp.Expression] = [] 2772 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2773 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2774 hints.append( 2775 self.expression( 2776 exp.WithTableHint, 2777 expressions=self._parse_csv( 2778 lambda: self._parse_function() or self._parse_var(any_token=True) 2779 ), 2780 ) 2781 ) 2782 self._match_r_paren() 2783 else: 2784 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2785 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2786 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2787 2788 self._match_texts(("INDEX", "KEY")) 2789 if self._match(TokenType.FOR): 2790 hint.set("target", self._advance_any() and self._prev.text.upper()) 2791 2792 hint.set("expressions", self._parse_wrapped_id_vars()) 2793 hints.append(hint) 2794 2795 return hints or None 2796 2797 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2798 return ( 2799 (not schema and self._parse_function(optional_parens=False)) 2800 or self._parse_id_var(any_token=False) 2801 or self._parse_string_as_identifier() 2802 or self._parse_placeholder() 2803 ) 2804 2805 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2806 catalog = None 2807 db = None 2808 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2809 2810 while self._match(TokenType.DOT): 2811 if catalog: 2812 # This allows nesting the table in arbitrarily many dot expressions if needed 2813 table = self.expression( 2814 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2815 ) 2816 else: 2817 catalog = db 2818 db = table 2819 table = self._parse_table_part(schema=schema) or "" 2820 2821 if is_db_reference: 2822 catalog = db 2823 db = table 2824 table = None 2825 2826 if not table and not is_db_reference: 2827 self.raise_error(f"Expected table name but got {self._curr}") 2828 if not db and is_db_reference: 2829 self.raise_error(f"Expected database name but got {self._curr}") 2830 2831 return self.expression( 2832 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2833 ) 2834 2835 def _parse_table( 2836 self, 2837 schema: bool = False, 2838 joins: bool = False, 2839 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2840 parse_bracket: bool = False, 2841 is_db_reference: bool = False, 2842 ) -> t.Optional[exp.Expression]: 2843 lateral = self._parse_lateral() 2844 if lateral: 2845 return lateral 2846 2847 unnest = self._parse_unnest() 2848 if unnest: 2849 return unnest 2850 2851 values = self._parse_derived_table_values() 2852 if values: 2853 return values 2854 2855 subquery = self._parse_select(table=True) 2856 if subquery: 2857 if not subquery.args.get("pivots"): 2858 subquery.set("pivots", self._parse_pivots()) 2859 return subquery 2860 2861 bracket = parse_bracket and self._parse_bracket(None) 2862 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2863 this = t.cast( 2864 exp.Expression, 2865 bracket 2866 or self._parse_bracket( 2867 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2868 ), 2869 ) 2870 2871 if schema: 2872 return self._parse_schema(this=this) 2873 2874 version = self._parse_version() 2875 2876 if version: 2877 this.set("version", version) 2878 2879 if self.dialect.ALIAS_POST_TABLESAMPLE: 2880 table_sample = self._parse_table_sample() 2881 2882 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2883 if alias: 2884 this.set("alias", alias) 2885 2886 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2887 return self.expression( 2888 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2889 ) 2890 2891 this.set("hints", self._parse_table_hints()) 2892 2893 if not this.args.get("pivots"): 2894 this.set("pivots", self._parse_pivots()) 2895 2896 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2897 table_sample = self._parse_table_sample() 2898 2899 if table_sample: 2900 table_sample.set("this", this) 2901 this = table_sample 2902 2903 if joins: 2904 for join in iter(self._parse_join, None): 2905 this.append("joins", join) 2906 2907 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2908 this.set("ordinality", True) 2909 this.set("alias", self._parse_table_alias()) 2910 2911 return this 2912 2913 def _parse_version(self) -> t.Optional[exp.Version]: 2914 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2915 this = "TIMESTAMP" 2916 elif self._match(TokenType.VERSION_SNAPSHOT): 2917 this = "VERSION" 2918 else: 2919 return None 2920 2921 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2922 kind = self._prev.text.upper() 2923 start = self._parse_bitwise() 2924 self._match_texts(("TO", "AND")) 2925 end = self._parse_bitwise() 2926 expression: t.Optional[exp.Expression] = self.expression( 2927 exp.Tuple, expressions=[start, end] 2928 ) 2929 elif self._match_text_seq("CONTAINED", "IN"): 2930 kind = "CONTAINED IN" 2931 expression = self.expression( 2932 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2933 ) 2934 elif self._match(TokenType.ALL): 2935 kind = "ALL" 2936 expression = None 2937 else: 2938 self._match_text_seq("AS", "OF") 2939 kind = "AS OF" 2940 expression = self._parse_type() 2941 2942 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2943 2944 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2945 if not self._match(TokenType.UNNEST): 2946 return None 2947 2948 expressions = self._parse_wrapped_csv(self._parse_equality) 2949 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2950 2951 alias = self._parse_table_alias() if with_alias else None 2952 2953 if alias: 2954 if self.dialect.UNNEST_COLUMN_ONLY: 2955 if alias.args.get("columns"): 2956 self.raise_error("Unexpected extra column alias in unnest.") 2957 2958 alias.set("columns", [alias.this]) 2959 alias.set("this", None) 2960 2961 columns = alias.args.get("columns") or [] 2962 if offset and len(expressions) < len(columns): 2963 offset = columns.pop() 2964 2965 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2966 self._match(TokenType.ALIAS) 2967 offset = self._parse_id_var( 2968 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2969 ) or exp.to_identifier("offset") 2970 2971 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2972 2973 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2974 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2975 if not is_derived and not self._match(TokenType.VALUES): 2976 return None 2977 2978 expressions = self._parse_csv(self._parse_value) 2979 alias = self._parse_table_alias() 2980 2981 if is_derived: 2982 self._match_r_paren() 2983 2984 return self.expression( 2985 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2986 ) 2987 2988 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2989 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2990 as_modifier and self._match_text_seq("USING", "SAMPLE") 2991 ): 2992 return None 2993 2994 bucket_numerator = None 2995 bucket_denominator = None 2996 bucket_field = None 2997 percent = None 2998 size = None 2999 seed = None 3000 3001 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3002 matched_l_paren = self._match(TokenType.L_PAREN) 3003 3004 if self.TABLESAMPLE_CSV: 3005 num = None 3006 expressions = self._parse_csv(self._parse_primary) 3007 else: 3008 expressions = None 3009 num = ( 3010 self._parse_factor() 3011 if self._match(TokenType.NUMBER, advance=False) 3012 else self._parse_primary() or self._parse_placeholder() 3013 ) 3014 3015 if self._match_text_seq("BUCKET"): 3016 bucket_numerator = self._parse_number() 3017 self._match_text_seq("OUT", "OF") 3018 bucket_denominator = bucket_denominator = self._parse_number() 3019 self._match(TokenType.ON) 3020 bucket_field = self._parse_field() 3021 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3022 percent = num 3023 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3024 size = num 3025 else: 3026 percent = num 3027 3028 if matched_l_paren: 3029 self._match_r_paren() 3030 3031 if self._match(TokenType.L_PAREN): 3032 method = self._parse_var(upper=True) 3033 seed = self._match(TokenType.COMMA) and self._parse_number() 3034 self._match_r_paren() 3035 elif self._match_texts(("SEED", "REPEATABLE")): 3036 seed = self._parse_wrapped(self._parse_number) 3037 3038 return self.expression( 3039 exp.TableSample, 3040 expressions=expressions, 3041 method=method, 3042 bucket_numerator=bucket_numerator, 3043 bucket_denominator=bucket_denominator, 3044 bucket_field=bucket_field, 3045 percent=percent, 3046 size=size, 3047 seed=seed, 3048 ) 3049 3050 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3051 return list(iter(self._parse_pivot, None)) or None 3052 3053 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3054 return list(iter(self._parse_join, None)) or None 3055 3056 # https://duckdb.org/docs/sql/statements/pivot 3057 def _parse_simplified_pivot(self) -> exp.Pivot: 3058 def _parse_on() -> t.Optional[exp.Expression]: 3059 this = self._parse_bitwise() 3060 return self._parse_in(this) if self._match(TokenType.IN) else this 3061 3062 this = self._parse_table() 3063 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3064 using = self._match(TokenType.USING) and self._parse_csv( 3065 lambda: self._parse_alias(self._parse_function()) 3066 ) 3067 group = self._parse_group() 3068 return self.expression( 3069 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3070 ) 3071 3072 def _parse_pivot_in(self) -> exp.In: 3073 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3074 this = self._parse_conjunction() 3075 3076 self._match(TokenType.ALIAS) 3077 alias = self._parse_field() 3078 if alias: 3079 return self.expression(exp.PivotAlias, this=this, alias=alias) 3080 3081 return this 3082 3083 value = self._parse_column() 3084 3085 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3086 self.raise_error("Expecting IN (") 3087 3088 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3089 3090 self._match_r_paren() 3091 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3092 3093 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3094 index = self._index 3095 include_nulls = None 3096 3097 if self._match(TokenType.PIVOT): 3098 unpivot = False 3099 elif self._match(TokenType.UNPIVOT): 3100 unpivot = True 3101 3102 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3103 if self._match_text_seq("INCLUDE", "NULLS"): 3104 include_nulls = True 3105 elif self._match_text_seq("EXCLUDE", "NULLS"): 3106 include_nulls = False 3107 else: 3108 return None 3109 3110 expressions = [] 3111 3112 if not self._match(TokenType.L_PAREN): 3113 self._retreat(index) 3114 return None 3115 3116 if unpivot: 3117 expressions = self._parse_csv(self._parse_column) 3118 else: 3119 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3120 3121 if not expressions: 3122 self.raise_error("Failed to parse PIVOT's aggregation list") 3123 3124 if not self._match(TokenType.FOR): 3125 self.raise_error("Expecting FOR") 3126 3127 field = self._parse_pivot_in() 3128 3129 self._match_r_paren() 3130 3131 pivot = self.expression( 3132 exp.Pivot, 3133 expressions=expressions, 3134 field=field, 3135 unpivot=unpivot, 3136 include_nulls=include_nulls, 3137 ) 3138 3139 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3140 pivot.set("alias", self._parse_table_alias()) 3141 3142 if not unpivot: 3143 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3144 3145 columns: t.List[exp.Expression] = [] 3146 for fld in pivot.args["field"].expressions: 3147 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3148 for name in names: 3149 if self.PREFIXED_PIVOT_COLUMNS: 3150 name = f"{name}_{field_name}" if name else field_name 3151 else: 3152 name = f"{field_name}_{name}" if name else field_name 3153 3154 columns.append(exp.to_identifier(name)) 3155 3156 pivot.set("columns", columns) 3157 3158 return pivot 3159 3160 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3161 return [agg.alias for agg in aggregations] 3162 3163 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3164 if not skip_where_token and not self._match(TokenType.WHERE): 3165 return None 3166 3167 return self.expression( 3168 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3169 ) 3170 3171 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3172 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3173 return None 3174 3175 elements = defaultdict(list) 3176 3177 if self._match(TokenType.ALL): 3178 return self.expression(exp.Group, all=True) 3179 3180 while True: 3181 expressions = self._parse_csv(self._parse_conjunction) 3182 if expressions: 3183 elements["expressions"].extend(expressions) 3184 3185 grouping_sets = self._parse_grouping_sets() 3186 if grouping_sets: 3187 elements["grouping_sets"].extend(grouping_sets) 3188 3189 rollup = None 3190 cube = None 3191 totals = None 3192 3193 index = self._index 3194 with_ = self._match(TokenType.WITH) 3195 if self._match(TokenType.ROLLUP): 3196 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3197 elements["rollup"].extend(ensure_list(rollup)) 3198 3199 if self._match(TokenType.CUBE): 3200 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3201 elements["cube"].extend(ensure_list(cube)) 3202 3203 if self._match_text_seq("TOTALS"): 3204 totals = True 3205 elements["totals"] = True # type: ignore 3206 3207 if not (grouping_sets or rollup or cube or totals): 3208 if with_: 3209 self._retreat(index) 3210 break 3211 3212 return self.expression(exp.Group, **elements) # type: ignore 3213 3214 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3215 if not self._match(TokenType.GROUPING_SETS): 3216 return None 3217 3218 return self._parse_wrapped_csv(self._parse_grouping_set) 3219 3220 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3221 if self._match(TokenType.L_PAREN): 3222 grouping_set = self._parse_csv(self._parse_column) 3223 self._match_r_paren() 3224 return self.expression(exp.Tuple, expressions=grouping_set) 3225 3226 return self._parse_column() 3227 3228 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3229 if not skip_having_token and not self._match(TokenType.HAVING): 3230 return None 3231 return self.expression(exp.Having, this=self._parse_conjunction()) 3232 3233 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3234 if not self._match(TokenType.QUALIFY): 3235 return None 3236 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3237 3238 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3239 if skip_start_token: 3240 start = None 3241 elif self._match(TokenType.START_WITH): 3242 start = self._parse_conjunction() 3243 else: 3244 return None 3245 3246 self._match(TokenType.CONNECT_BY) 3247 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3248 exp.Prior, this=self._parse_bitwise() 3249 ) 3250 connect = self._parse_conjunction() 3251 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3252 3253 if not start and self._match(TokenType.START_WITH): 3254 start = self._parse_conjunction() 3255 3256 return self.expression(exp.Connect, start=start, connect=connect) 3257 3258 def _parse_name_as_expression(self) -> exp.Alias: 3259 return self.expression( 3260 exp.Alias, 3261 alias=self._parse_id_var(any_token=True), 3262 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3263 ) 3264 3265 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3266 if self._match_text_seq("INTERPOLATE"): 3267 return self._parse_wrapped_csv(self._parse_name_as_expression) 3268 return None 3269 3270 def _parse_order( 3271 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3272 ) -> t.Optional[exp.Expression]: 3273 siblings = None 3274 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3275 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3276 return this 3277 3278 siblings = True 3279 3280 return self.expression( 3281 exp.Order, 3282 this=this, 3283 expressions=self._parse_csv(self._parse_ordered), 3284 interpolate=self._parse_interpolate(), 3285 siblings=siblings, 3286 ) 3287 3288 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3289 if not self._match(token): 3290 return None 3291 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3292 3293 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3294 this = parse_method() if parse_method else self._parse_conjunction() 3295 3296 asc = self._match(TokenType.ASC) 3297 desc = self._match(TokenType.DESC) or (asc and False) 3298 3299 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3300 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3301 3302 nulls_first = is_nulls_first or False 3303 explicitly_null_ordered = is_nulls_first or is_nulls_last 3304 3305 if ( 3306 not explicitly_null_ordered 3307 and ( 3308 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3309 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3310 ) 3311 and self.dialect.NULL_ORDERING != "nulls_are_last" 3312 ): 3313 nulls_first = True 3314 3315 if self._match_text_seq("WITH", "FILL"): 3316 with_fill = self.expression( 3317 exp.WithFill, 3318 **{ # type: ignore 3319 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3320 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3321 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3322 }, 3323 ) 3324 else: 3325 with_fill = None 3326 3327 return self.expression( 3328 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3329 ) 3330 3331 def _parse_limit( 3332 self, this: t.Optional[exp.Expression] = None, top: bool = False 3333 ) -> t.Optional[exp.Expression]: 3334 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3335 comments = self._prev_comments 3336 if top: 3337 limit_paren = self._match(TokenType.L_PAREN) 3338 expression = self._parse_term() if limit_paren else self._parse_number() 3339 3340 if limit_paren: 3341 self._match_r_paren() 3342 else: 3343 expression = self._parse_term() 3344 3345 if self._match(TokenType.COMMA): 3346 offset = expression 3347 expression = self._parse_term() 3348 else: 3349 offset = None 3350 3351 limit_exp = self.expression( 3352 exp.Limit, 3353 this=this, 3354 expression=expression, 3355 offset=offset, 3356 comments=comments, 3357 expressions=self._parse_limit_by(), 3358 ) 3359 3360 return limit_exp 3361 3362 if self._match(TokenType.FETCH): 3363 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3364 direction = self._prev.text.upper() if direction else "FIRST" 3365 3366 count = self._parse_field(tokens=self.FETCH_TOKENS) 3367 percent = self._match(TokenType.PERCENT) 3368 3369 self._match_set((TokenType.ROW, TokenType.ROWS)) 3370 3371 only = self._match_text_seq("ONLY") 3372 with_ties = self._match_text_seq("WITH", "TIES") 3373 3374 if only and with_ties: 3375 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3376 3377 return self.expression( 3378 exp.Fetch, 3379 direction=direction, 3380 count=count, 3381 percent=percent, 3382 with_ties=with_ties, 3383 ) 3384 3385 return this 3386 3387 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3388 if not self._match(TokenType.OFFSET): 3389 return this 3390 3391 count = self._parse_term() 3392 self._match_set((TokenType.ROW, TokenType.ROWS)) 3393 3394 return self.expression( 3395 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3396 ) 3397 3398 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3399 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3400 3401 def _parse_locks(self) -> t.List[exp.Lock]: 3402 locks = [] 3403 while True: 3404 if self._match_text_seq("FOR", "UPDATE"): 3405 update = True 3406 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3407 "LOCK", "IN", "SHARE", "MODE" 3408 ): 3409 update = False 3410 else: 3411 break 3412 3413 expressions = None 3414 if self._match_text_seq("OF"): 3415 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3416 3417 wait: t.Optional[bool | exp.Expression] = None 3418 if self._match_text_seq("NOWAIT"): 3419 wait = True 3420 elif self._match_text_seq("WAIT"): 3421 wait = self._parse_primary() 3422 elif self._match_text_seq("SKIP", "LOCKED"): 3423 wait = False 3424 3425 locks.append( 3426 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3427 ) 3428 3429 return locks 3430 3431 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3432 while this and self._match_set(self.SET_OPERATIONS): 3433 token_type = self._prev.token_type 3434 3435 if token_type == TokenType.UNION: 3436 operation = exp.Union 3437 elif token_type == TokenType.EXCEPT: 3438 operation = exp.Except 3439 else: 3440 operation = exp.Intersect 3441 3442 comments = self._prev.comments 3443 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3444 by_name = self._match_text_seq("BY", "NAME") 3445 expression = self._parse_select(nested=True, parse_set_operation=False) 3446 3447 this = self.expression( 3448 operation, 3449 comments=comments, 3450 this=this, 3451 distinct=distinct, 3452 by_name=by_name, 3453 expression=expression, 3454 ) 3455 3456 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3457 expression = this.expression 3458 3459 if expression: 3460 for arg in self.UNION_MODIFIERS: 3461 expr = expression.args.get(arg) 3462 if expr: 3463 this.set(arg, expr.pop()) 3464 3465 return this 3466 3467 def _parse_expression(self) -> t.Optional[exp.Expression]: 3468 return self._parse_alias(self._parse_conjunction()) 3469 3470 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3471 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3472 3473 def _parse_equality(self) -> t.Optional[exp.Expression]: 3474 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3475 3476 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3477 return self._parse_tokens(self._parse_range, self.COMPARISON) 3478 3479 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3480 this = this or self._parse_bitwise() 3481 negate = self._match(TokenType.NOT) 3482 3483 if self._match_set(self.RANGE_PARSERS): 3484 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3485 if not expression: 3486 return this 3487 3488 this = expression 3489 elif self._match(TokenType.ISNULL): 3490 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3491 3492 # Postgres supports ISNULL and NOTNULL for conditions. 3493 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3494 if self._match(TokenType.NOTNULL): 3495 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3496 this = self.expression(exp.Not, this=this) 3497 3498 if negate: 3499 this = self.expression(exp.Not, this=this) 3500 3501 if self._match(TokenType.IS): 3502 this = self._parse_is(this) 3503 3504 return this 3505 3506 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3507 index = self._index - 1 3508 negate = self._match(TokenType.NOT) 3509 3510 if self._match_text_seq("DISTINCT", "FROM"): 3511 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3512 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3513 3514 expression = self._parse_null() or self._parse_boolean() 3515 if not expression: 3516 self._retreat(index) 3517 return None 3518 3519 this = self.expression(exp.Is, this=this, expression=expression) 3520 return self.expression(exp.Not, this=this) if negate else this 3521 3522 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3523 unnest = self._parse_unnest(with_alias=False) 3524 if unnest: 3525 this = self.expression(exp.In, this=this, unnest=unnest) 3526 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3527 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3528 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3529 3530 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3531 this = self.expression(exp.In, this=this, query=expressions[0]) 3532 else: 3533 this = self.expression(exp.In, this=this, expressions=expressions) 3534 3535 if matched_l_paren: 3536 self._match_r_paren(this) 3537 elif not self._match(TokenType.R_BRACKET, expression=this): 3538 self.raise_error("Expecting ]") 3539 else: 3540 this = self.expression(exp.In, this=this, field=self._parse_field()) 3541 3542 return this 3543 3544 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3545 low = self._parse_bitwise() 3546 self._match(TokenType.AND) 3547 high = self._parse_bitwise() 3548 return self.expression(exp.Between, this=this, low=low, high=high) 3549 3550 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3551 if not self._match(TokenType.ESCAPE): 3552 return this 3553 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3554 3555 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3556 index = self._index 3557 3558 if not self._match(TokenType.INTERVAL) and match_interval: 3559 return None 3560 3561 if self._match(TokenType.STRING, advance=False): 3562 this = self._parse_primary() 3563 else: 3564 this = self._parse_term() 3565 3566 if not this or ( 3567 isinstance(this, exp.Column) 3568 and not this.table 3569 and not this.this.quoted 3570 and this.name.upper() == "IS" 3571 ): 3572 self._retreat(index) 3573 return None 3574 3575 unit = self._parse_function() or ( 3576 not self._match(TokenType.ALIAS, advance=False) 3577 and self._parse_var(any_token=True, upper=True) 3578 ) 3579 3580 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3581 # each INTERVAL expression into this canonical form so it's easy to transpile 3582 if this and this.is_number: 3583 this = exp.Literal.string(this.name) 3584 elif this and this.is_string: 3585 parts = this.name.split() 3586 3587 if len(parts) == 2: 3588 if unit: 3589 # This is not actually a unit, it's something else (e.g. a "window side") 3590 unit = None 3591 self._retreat(self._index - 1) 3592 3593 this = exp.Literal.string(parts[0]) 3594 unit = self.expression(exp.Var, this=parts[1].upper()) 3595 3596 return self.expression(exp.Interval, this=this, unit=unit) 3597 3598 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3599 this = self._parse_term() 3600 3601 while True: 3602 if self._match_set(self.BITWISE): 3603 this = self.expression( 3604 self.BITWISE[self._prev.token_type], 3605 this=this, 3606 expression=self._parse_term(), 3607 ) 3608 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3609 this = self.expression( 3610 exp.DPipe, 3611 this=this, 3612 expression=self._parse_term(), 3613 safe=not self.dialect.STRICT_STRING_CONCAT, 3614 ) 3615 elif self._match(TokenType.DQMARK): 3616 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3617 elif self._match_pair(TokenType.LT, TokenType.LT): 3618 this = self.expression( 3619 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3620 ) 3621 elif self._match_pair(TokenType.GT, TokenType.GT): 3622 this = self.expression( 3623 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3624 ) 3625 else: 3626 break 3627 3628 return this 3629 3630 def _parse_term(self) -> t.Optional[exp.Expression]: 3631 return self._parse_tokens(self._parse_factor, self.TERM) 3632 3633 def _parse_factor(self) -> t.Optional[exp.Expression]: 3634 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3635 this = parse_method() 3636 3637 while self._match_set(self.FACTOR): 3638 this = self.expression( 3639 self.FACTOR[self._prev.token_type], 3640 this=this, 3641 comments=self._prev_comments, 3642 expression=parse_method(), 3643 ) 3644 if isinstance(this, exp.Div): 3645 this.args["typed"] = self.dialect.TYPED_DIVISION 3646 this.args["safe"] = self.dialect.SAFE_DIVISION 3647 3648 return this 3649 3650 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3651 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3652 3653 def _parse_unary(self) -> t.Optional[exp.Expression]: 3654 if self._match_set(self.UNARY_PARSERS): 3655 return self.UNARY_PARSERS[self._prev.token_type](self) 3656 return self._parse_at_time_zone(self._parse_type()) 3657 3658 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3659 interval = parse_interval and self._parse_interval() 3660 if interval: 3661 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3662 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3663 interval = self.expression( # type: ignore 3664 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3665 ) 3666 3667 return interval 3668 3669 index = self._index 3670 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3671 this = self._parse_column() 3672 3673 if data_type: 3674 if isinstance(this, exp.Literal): 3675 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3676 if parser: 3677 return parser(self, this, data_type) 3678 return self.expression(exp.Cast, this=this, to=data_type) 3679 if not data_type.expressions: 3680 self._retreat(index) 3681 return self._parse_column() 3682 return self._parse_column_ops(data_type) 3683 3684 return this and self._parse_column_ops(this) 3685 3686 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3687 this = self._parse_type() 3688 if not this: 3689 return None 3690 3691 return self.expression( 3692 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3693 ) 3694 3695 def _parse_types( 3696 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3697 ) -> t.Optional[exp.Expression]: 3698 index = self._index 3699 3700 prefix = self._match_text_seq("SYSUDTLIB", ".") 3701 3702 if not self._match_set(self.TYPE_TOKENS): 3703 identifier = allow_identifiers and self._parse_id_var( 3704 any_token=False, tokens=(TokenType.VAR,) 3705 ) 3706 if identifier: 3707 tokens = self.dialect.tokenize(identifier.name) 3708 3709 if len(tokens) != 1: 3710 self.raise_error("Unexpected identifier", self._prev) 3711 3712 if tokens[0].token_type in self.TYPE_TOKENS: 3713 self._prev = tokens[0] 3714 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3715 type_name = identifier.name 3716 3717 while self._match(TokenType.DOT): 3718 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3719 3720 return exp.DataType.build(type_name, udt=True) 3721 else: 3722 self._retreat(self._index - 1) 3723 return None 3724 else: 3725 return None 3726 3727 type_token = self._prev.token_type 3728 3729 if type_token == TokenType.PSEUDO_TYPE: 3730 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3731 3732 if type_token == TokenType.OBJECT_IDENTIFIER: 3733 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3734 3735 nested = type_token in self.NESTED_TYPE_TOKENS 3736 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3737 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3738 expressions = None 3739 maybe_func = False 3740 3741 if self._match(TokenType.L_PAREN): 3742 if is_struct: 3743 expressions = self._parse_csv(self._parse_struct_types) 3744 elif nested: 3745 expressions = self._parse_csv( 3746 lambda: self._parse_types( 3747 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3748 ) 3749 ) 3750 elif type_token in self.ENUM_TYPE_TOKENS: 3751 expressions = self._parse_csv(self._parse_equality) 3752 elif is_aggregate: 3753 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3754 any_token=False, tokens=(TokenType.VAR,) 3755 ) 3756 if not func_or_ident or not self._match(TokenType.COMMA): 3757 return None 3758 expressions = self._parse_csv( 3759 lambda: self._parse_types( 3760 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3761 ) 3762 ) 3763 expressions.insert(0, func_or_ident) 3764 else: 3765 expressions = self._parse_csv(self._parse_type_size) 3766 3767 if not expressions or not self._match(TokenType.R_PAREN): 3768 self._retreat(index) 3769 return None 3770 3771 maybe_func = True 3772 3773 this: t.Optional[exp.Expression] = None 3774 values: t.Optional[t.List[exp.Expression]] = None 3775 3776 if nested and self._match(TokenType.LT): 3777 if is_struct: 3778 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3779 else: 3780 expressions = self._parse_csv( 3781 lambda: self._parse_types( 3782 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3783 ) 3784 ) 3785 3786 if not self._match(TokenType.GT): 3787 self.raise_error("Expecting >") 3788 3789 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3790 values = self._parse_csv(self._parse_conjunction) 3791 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3792 3793 if type_token in self.TIMESTAMPS: 3794 if self._match_text_seq("WITH", "TIME", "ZONE"): 3795 maybe_func = False 3796 tz_type = ( 3797 exp.DataType.Type.TIMETZ 3798 if type_token in self.TIMES 3799 else exp.DataType.Type.TIMESTAMPTZ 3800 ) 3801 this = exp.DataType(this=tz_type, expressions=expressions) 3802 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3803 maybe_func = False 3804 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3805 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3806 maybe_func = False 3807 elif type_token == TokenType.INTERVAL: 3808 unit = self._parse_var() 3809 3810 if self._match_text_seq("TO"): 3811 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3812 else: 3813 span = None 3814 3815 if span or not unit: 3816 this = self.expression( 3817 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3818 ) 3819 else: 3820 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3821 3822 if maybe_func and check_func: 3823 index2 = self._index 3824 peek = self._parse_string() 3825 3826 if not peek: 3827 self._retreat(index) 3828 return None 3829 3830 self._retreat(index2) 3831 3832 if not this: 3833 if self._match_text_seq("UNSIGNED"): 3834 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3835 if not unsigned_type_token: 3836 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3837 3838 type_token = unsigned_type_token or type_token 3839 3840 this = exp.DataType( 3841 this=exp.DataType.Type[type_token.value], 3842 expressions=expressions, 3843 nested=nested, 3844 values=values, 3845 prefix=prefix, 3846 ) 3847 3848 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3849 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3850 3851 return this 3852 3853 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3854 index = self._index 3855 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3856 self._match(TokenType.COLON) 3857 column_def = self._parse_column_def(this) 3858 3859 if type_required and ( 3860 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3861 ): 3862 self._retreat(index) 3863 return self._parse_types() 3864 3865 return column_def 3866 3867 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3868 if not self._match_text_seq("AT", "TIME", "ZONE"): 3869 return this 3870 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3871 3872 def _parse_column(self) -> t.Optional[exp.Expression]: 3873 this = self._parse_column_reference() 3874 return self._parse_column_ops(this) if this else self._parse_bracket(this) 3875 3876 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 3877 this = self._parse_field() 3878 if isinstance(this, exp.Identifier): 3879 this = self.expression(exp.Column, this=this) 3880 return this 3881 3882 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3883 this = self._parse_bracket(this) 3884 3885 while self._match_set(self.COLUMN_OPERATORS): 3886 op_token = self._prev.token_type 3887 op = self.COLUMN_OPERATORS.get(op_token) 3888 3889 if op_token == TokenType.DCOLON: 3890 field = self._parse_types() 3891 if not field: 3892 self.raise_error("Expected type") 3893 elif op and self._curr: 3894 field = self._parse_column_reference() 3895 else: 3896 field = self._parse_field(anonymous_func=True, any_token=True) 3897 3898 if isinstance(field, exp.Func): 3899 # bigquery allows function calls like x.y.count(...) 3900 # SAFE.SUBSTR(...) 3901 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3902 this = self._replace_columns_with_dots(this) 3903 3904 if op: 3905 this = op(self, this, field) 3906 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3907 this = self.expression( 3908 exp.Column, 3909 this=field, 3910 table=this.this, 3911 db=this.args.get("table"), 3912 catalog=this.args.get("db"), 3913 ) 3914 else: 3915 this = self.expression(exp.Dot, this=this, expression=field) 3916 this = self._parse_bracket(this) 3917 return this 3918 3919 def _parse_primary(self) -> t.Optional[exp.Expression]: 3920 if self._match_set(self.PRIMARY_PARSERS): 3921 token_type = self._prev.token_type 3922 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3923 3924 if token_type == TokenType.STRING: 3925 expressions = [primary] 3926 while self._match(TokenType.STRING): 3927 expressions.append(exp.Literal.string(self._prev.text)) 3928 3929 if len(expressions) > 1: 3930 return self.expression(exp.Concat, expressions=expressions) 3931 3932 return primary 3933 3934 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3935 return exp.Literal.number(f"0.{self._prev.text}") 3936 3937 if self._match(TokenType.L_PAREN): 3938 comments = self._prev_comments 3939 query = self._parse_select() 3940 3941 if query: 3942 expressions = [query] 3943 else: 3944 expressions = self._parse_expressions() 3945 3946 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3947 3948 if isinstance(this, exp.Subqueryable): 3949 this = self._parse_set_operations( 3950 self._parse_subquery(this=this, parse_alias=False) 3951 ) 3952 elif len(expressions) > 1: 3953 this = self.expression(exp.Tuple, expressions=expressions) 3954 else: 3955 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3956 3957 if this: 3958 this.add_comments(comments) 3959 3960 self._match_r_paren(expression=this) 3961 return this 3962 3963 return None 3964 3965 def _parse_field( 3966 self, 3967 any_token: bool = False, 3968 tokens: t.Optional[t.Collection[TokenType]] = None, 3969 anonymous_func: bool = False, 3970 ) -> t.Optional[exp.Expression]: 3971 return ( 3972 self._parse_primary() 3973 or self._parse_function(anonymous=anonymous_func) 3974 or self._parse_id_var(any_token=any_token, tokens=tokens) 3975 ) 3976 3977 def _parse_function( 3978 self, 3979 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3980 anonymous: bool = False, 3981 optional_parens: bool = True, 3982 ) -> t.Optional[exp.Expression]: 3983 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3984 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3985 fn_syntax = False 3986 if ( 3987 self._match(TokenType.L_BRACE, advance=False) 3988 and self._next 3989 and self._next.text.upper() == "FN" 3990 ): 3991 self._advance(2) 3992 fn_syntax = True 3993 3994 func = self._parse_function_call( 3995 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3996 ) 3997 3998 if fn_syntax: 3999 self._match(TokenType.R_BRACE) 4000 4001 return func 4002 4003 def _parse_function_call( 4004 self, 4005 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4006 anonymous: bool = False, 4007 optional_parens: bool = True, 4008 ) -> t.Optional[exp.Expression]: 4009 if not self._curr: 4010 return None 4011 4012 comments = self._curr.comments 4013 token_type = self._curr.token_type 4014 this = self._curr.text 4015 upper = this.upper() 4016 4017 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4018 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4019 self._advance() 4020 return parser(self) 4021 4022 if not self._next or self._next.token_type != TokenType.L_PAREN: 4023 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4024 self._advance() 4025 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4026 4027 return None 4028 4029 if token_type not in self.FUNC_TOKENS: 4030 return None 4031 4032 self._advance(2) 4033 4034 parser = self.FUNCTION_PARSERS.get(upper) 4035 if parser and not anonymous: 4036 this = parser(self) 4037 else: 4038 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4039 4040 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4041 this = self.expression(subquery_predicate, this=self._parse_select()) 4042 self._match_r_paren() 4043 return this 4044 4045 if functions is None: 4046 functions = self.FUNCTIONS 4047 4048 function = functions.get(upper) 4049 4050 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4051 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4052 4053 if function and not anonymous: 4054 if "dialect" in function.__code__.co_varnames: 4055 func = function(args, dialect=self.dialect) 4056 else: 4057 func = function(args) 4058 4059 func = self.validate_expression(func, args) 4060 if not self.dialect.NORMALIZE_FUNCTIONS: 4061 func.meta["name"] = this 4062 4063 this = func 4064 else: 4065 this = self.expression(exp.Anonymous, this=this, expressions=args) 4066 4067 if isinstance(this, exp.Expression): 4068 this.add_comments(comments) 4069 4070 self._match_r_paren(this) 4071 return self._parse_window(this) 4072 4073 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4074 return self._parse_column_def(self._parse_id_var()) 4075 4076 def _parse_user_defined_function( 4077 self, kind: t.Optional[TokenType] = None 4078 ) -> t.Optional[exp.Expression]: 4079 this = self._parse_id_var() 4080 4081 while self._match(TokenType.DOT): 4082 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4083 4084 if not self._match(TokenType.L_PAREN): 4085 return this 4086 4087 expressions = self._parse_csv(self._parse_function_parameter) 4088 self._match_r_paren() 4089 return self.expression( 4090 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4091 ) 4092 4093 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4094 literal = self._parse_primary() 4095 if literal: 4096 return self.expression(exp.Introducer, this=token.text, expression=literal) 4097 4098 return self.expression(exp.Identifier, this=token.text) 4099 4100 def _parse_session_parameter(self) -> exp.SessionParameter: 4101 kind = None 4102 this = self._parse_id_var() or self._parse_primary() 4103 4104 if this and self._match(TokenType.DOT): 4105 kind = this.name 4106 this = self._parse_var() or self._parse_primary() 4107 4108 return self.expression(exp.SessionParameter, this=this, kind=kind) 4109 4110 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4111 index = self._index 4112 4113 if self._match(TokenType.L_PAREN): 4114 expressions = t.cast( 4115 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4116 ) 4117 4118 if not self._match(TokenType.R_PAREN): 4119 self._retreat(index) 4120 else: 4121 expressions = [self._parse_id_var()] 4122 4123 if self._match_set(self.LAMBDAS): 4124 return self.LAMBDAS[self._prev.token_type](self, expressions) 4125 4126 self._retreat(index) 4127 4128 this: t.Optional[exp.Expression] 4129 4130 if self._match(TokenType.DISTINCT): 4131 this = self.expression( 4132 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4133 ) 4134 else: 4135 this = self._parse_select_or_expression(alias=alias) 4136 4137 return self._parse_limit( 4138 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4139 ) 4140 4141 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4142 index = self._index 4143 4144 if not self.errors: 4145 try: 4146 if self._parse_select(nested=True): 4147 return this 4148 except ParseError: 4149 pass 4150 finally: 4151 self.errors.clear() 4152 self._retreat(index) 4153 4154 if not self._match(TokenType.L_PAREN): 4155 return this 4156 4157 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4158 4159 self._match_r_paren() 4160 return self.expression(exp.Schema, this=this, expressions=args) 4161 4162 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4163 return self._parse_column_def(self._parse_field(any_token=True)) 4164 4165 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4166 # column defs are not really columns, they're identifiers 4167 if isinstance(this, exp.Column): 4168 this = this.this 4169 4170 kind = self._parse_types(schema=True) 4171 4172 if self._match_text_seq("FOR", "ORDINALITY"): 4173 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4174 4175 constraints: t.List[exp.Expression] = [] 4176 4177 if not kind and self._match(TokenType.ALIAS): 4178 constraints.append( 4179 self.expression( 4180 exp.ComputedColumnConstraint, 4181 this=self._parse_conjunction(), 4182 persisted=self._match_text_seq("PERSISTED"), 4183 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4184 ) 4185 ) 4186 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4187 self._match(TokenType.ALIAS) 4188 constraints.append( 4189 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4190 ) 4191 4192 while True: 4193 constraint = self._parse_column_constraint() 4194 if not constraint: 4195 break 4196 constraints.append(constraint) 4197 4198 if not kind and not constraints: 4199 return this 4200 4201 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4202 4203 def _parse_auto_increment( 4204 self, 4205 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4206 start = None 4207 increment = None 4208 4209 if self._match(TokenType.L_PAREN, advance=False): 4210 args = self._parse_wrapped_csv(self._parse_bitwise) 4211 start = seq_get(args, 0) 4212 increment = seq_get(args, 1) 4213 elif self._match_text_seq("START"): 4214 start = self._parse_bitwise() 4215 self._match_text_seq("INCREMENT") 4216 increment = self._parse_bitwise() 4217 4218 if start and increment: 4219 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4220 4221 return exp.AutoIncrementColumnConstraint() 4222 4223 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4224 if not self._match_text_seq("REFRESH"): 4225 self._retreat(self._index - 1) 4226 return None 4227 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4228 4229 def _parse_compress(self) -> exp.CompressColumnConstraint: 4230 if self._match(TokenType.L_PAREN, advance=False): 4231 return self.expression( 4232 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4233 ) 4234 4235 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4236 4237 def _parse_generated_as_identity( 4238 self, 4239 ) -> ( 4240 exp.GeneratedAsIdentityColumnConstraint 4241 | exp.ComputedColumnConstraint 4242 | exp.GeneratedAsRowColumnConstraint 4243 ): 4244 if self._match_text_seq("BY", "DEFAULT"): 4245 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4246 this = self.expression( 4247 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4248 ) 4249 else: 4250 self._match_text_seq("ALWAYS") 4251 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4252 4253 self._match(TokenType.ALIAS) 4254 4255 if self._match_text_seq("ROW"): 4256 start = self._match_text_seq("START") 4257 if not start: 4258 self._match(TokenType.END) 4259 hidden = self._match_text_seq("HIDDEN") 4260 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4261 4262 identity = self._match_text_seq("IDENTITY") 4263 4264 if self._match(TokenType.L_PAREN): 4265 if self._match(TokenType.START_WITH): 4266 this.set("start", self._parse_bitwise()) 4267 if self._match_text_seq("INCREMENT", "BY"): 4268 this.set("increment", self._parse_bitwise()) 4269 if self._match_text_seq("MINVALUE"): 4270 this.set("minvalue", self._parse_bitwise()) 4271 if self._match_text_seq("MAXVALUE"): 4272 this.set("maxvalue", self._parse_bitwise()) 4273 4274 if self._match_text_seq("CYCLE"): 4275 this.set("cycle", True) 4276 elif self._match_text_seq("NO", "CYCLE"): 4277 this.set("cycle", False) 4278 4279 if not identity: 4280 this.set("expression", self._parse_bitwise()) 4281 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4282 args = self._parse_csv(self._parse_bitwise) 4283 this.set("start", seq_get(args, 0)) 4284 this.set("increment", seq_get(args, 1)) 4285 4286 self._match_r_paren() 4287 4288 return this 4289 4290 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4291 self._match_text_seq("LENGTH") 4292 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4293 4294 def _parse_not_constraint( 4295 self, 4296 ) -> t.Optional[exp.Expression]: 4297 if self._match_text_seq("NULL"): 4298 return self.expression(exp.NotNullColumnConstraint) 4299 if self._match_text_seq("CASESPECIFIC"): 4300 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4301 if self._match_text_seq("FOR", "REPLICATION"): 4302 return self.expression(exp.NotForReplicationColumnConstraint) 4303 return None 4304 4305 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4306 if self._match(TokenType.CONSTRAINT): 4307 this = self._parse_id_var() 4308 else: 4309 this = None 4310 4311 if self._match_texts(self.CONSTRAINT_PARSERS): 4312 return self.expression( 4313 exp.ColumnConstraint, 4314 this=this, 4315 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4316 ) 4317 4318 return this 4319 4320 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4321 if not self._match(TokenType.CONSTRAINT): 4322 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4323 4324 this = self._parse_id_var() 4325 expressions = [] 4326 4327 while True: 4328 constraint = self._parse_unnamed_constraint() or self._parse_function() 4329 if not constraint: 4330 break 4331 expressions.append(constraint) 4332 4333 return self.expression(exp.Constraint, this=this, expressions=expressions) 4334 4335 def _parse_unnamed_constraint( 4336 self, constraints: t.Optional[t.Collection[str]] = None 4337 ) -> t.Optional[exp.Expression]: 4338 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4339 constraints or self.CONSTRAINT_PARSERS 4340 ): 4341 return None 4342 4343 constraint = self._prev.text.upper() 4344 if constraint not in self.CONSTRAINT_PARSERS: 4345 self.raise_error(f"No parser found for schema constraint {constraint}.") 4346 4347 return self.CONSTRAINT_PARSERS[constraint](self) 4348 4349 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4350 self._match_text_seq("KEY") 4351 return self.expression( 4352 exp.UniqueColumnConstraint, 4353 this=self._parse_schema(self._parse_id_var(any_token=False)), 4354 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4355 ) 4356 4357 def _parse_key_constraint_options(self) -> t.List[str]: 4358 options = [] 4359 while True: 4360 if not self._curr: 4361 break 4362 4363 if self._match(TokenType.ON): 4364 action = None 4365 on = self._advance_any() and self._prev.text 4366 4367 if self._match_text_seq("NO", "ACTION"): 4368 action = "NO ACTION" 4369 elif self._match_text_seq("CASCADE"): 4370 action = "CASCADE" 4371 elif self._match_text_seq("RESTRICT"): 4372 action = "RESTRICT" 4373 elif self._match_pair(TokenType.SET, TokenType.NULL): 4374 action = "SET NULL" 4375 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4376 action = "SET DEFAULT" 4377 else: 4378 self.raise_error("Invalid key constraint") 4379 4380 options.append(f"ON {on} {action}") 4381 elif self._match_text_seq("NOT", "ENFORCED"): 4382 options.append("NOT ENFORCED") 4383 elif self._match_text_seq("DEFERRABLE"): 4384 options.append("DEFERRABLE") 4385 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4386 options.append("INITIALLY DEFERRED") 4387 elif self._match_text_seq("NORELY"): 4388 options.append("NORELY") 4389 elif self._match_text_seq("MATCH", "FULL"): 4390 options.append("MATCH FULL") 4391 else: 4392 break 4393 4394 return options 4395 4396 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4397 if match and not self._match(TokenType.REFERENCES): 4398 return None 4399 4400 expressions = None 4401 this = self._parse_table(schema=True) 4402 options = self._parse_key_constraint_options() 4403 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4404 4405 def _parse_foreign_key(self) -> exp.ForeignKey: 4406 expressions = self._parse_wrapped_id_vars() 4407 reference = self._parse_references() 4408 options = {} 4409 4410 while self._match(TokenType.ON): 4411 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4412 self.raise_error("Expected DELETE or UPDATE") 4413 4414 kind = self._prev.text.lower() 4415 4416 if self._match_text_seq("NO", "ACTION"): 4417 action = "NO ACTION" 4418 elif self._match(TokenType.SET): 4419 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4420 action = "SET " + self._prev.text.upper() 4421 else: 4422 self._advance() 4423 action = self._prev.text.upper() 4424 4425 options[kind] = action 4426 4427 return self.expression( 4428 exp.ForeignKey, 4429 expressions=expressions, 4430 reference=reference, 4431 **options, # type: ignore 4432 ) 4433 4434 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4435 return self._parse_field() 4436 4437 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4438 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4439 self._retreat(self._index - 1) 4440 return None 4441 4442 id_vars = self._parse_wrapped_id_vars() 4443 return self.expression( 4444 exp.PeriodForSystemTimeConstraint, 4445 this=seq_get(id_vars, 0), 4446 expression=seq_get(id_vars, 1), 4447 ) 4448 4449 def _parse_primary_key( 4450 self, wrapped_optional: bool = False, in_props: bool = False 4451 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4452 desc = ( 4453 self._match_set((TokenType.ASC, TokenType.DESC)) 4454 and self._prev.token_type == TokenType.DESC 4455 ) 4456 4457 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4458 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4459 4460 expressions = self._parse_wrapped_csv( 4461 self._parse_primary_key_part, optional=wrapped_optional 4462 ) 4463 options = self._parse_key_constraint_options() 4464 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4465 4466 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4467 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4468 4469 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4470 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4471 return this 4472 4473 bracket_kind = self._prev.token_type 4474 expressions = self._parse_csv( 4475 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4476 ) 4477 4478 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4479 self.raise_error("Expected ]") 4480 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4481 self.raise_error("Expected }") 4482 4483 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4484 if bracket_kind == TokenType.L_BRACE: 4485 this = self.expression(exp.Struct, expressions=expressions) 4486 elif not this or this.name.upper() == "ARRAY": 4487 this = self.expression(exp.Array, expressions=expressions) 4488 else: 4489 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4490 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4491 4492 self._add_comments(this) 4493 return self._parse_bracket(this) 4494 4495 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4496 if self._match(TokenType.COLON): 4497 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4498 return this 4499 4500 def _parse_case(self) -> t.Optional[exp.Expression]: 4501 ifs = [] 4502 default = None 4503 4504 comments = self._prev_comments 4505 expression = self._parse_conjunction() 4506 4507 while self._match(TokenType.WHEN): 4508 this = self._parse_conjunction() 4509 self._match(TokenType.THEN) 4510 then = self._parse_conjunction() 4511 ifs.append(self.expression(exp.If, this=this, true=then)) 4512 4513 if self._match(TokenType.ELSE): 4514 default = self._parse_conjunction() 4515 4516 if not self._match(TokenType.END): 4517 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4518 default = exp.column("interval") 4519 else: 4520 self.raise_error("Expected END after CASE", self._prev) 4521 4522 return self._parse_window( 4523 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4524 ) 4525 4526 def _parse_if(self) -> t.Optional[exp.Expression]: 4527 if self._match(TokenType.L_PAREN): 4528 args = self._parse_csv(self._parse_conjunction) 4529 this = self.validate_expression(exp.If.from_arg_list(args), args) 4530 self._match_r_paren() 4531 else: 4532 index = self._index - 1 4533 4534 if self.NO_PAREN_IF_COMMANDS and index == 0: 4535 return self._parse_as_command(self._prev) 4536 4537 condition = self._parse_conjunction() 4538 4539 if not condition: 4540 self._retreat(index) 4541 return None 4542 4543 self._match(TokenType.THEN) 4544 true = self._parse_conjunction() 4545 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4546 self._match(TokenType.END) 4547 this = self.expression(exp.If, this=condition, true=true, false=false) 4548 4549 return self._parse_window(this) 4550 4551 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4552 if not self._match_text_seq("VALUE", "FOR"): 4553 self._retreat(self._index - 1) 4554 return None 4555 4556 return self.expression( 4557 exp.NextValueFor, 4558 this=self._parse_column(), 4559 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4560 ) 4561 4562 def _parse_extract(self) -> exp.Extract: 4563 this = self._parse_function() or self._parse_var() or self._parse_type() 4564 4565 if self._match(TokenType.FROM): 4566 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4567 4568 if not self._match(TokenType.COMMA): 4569 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4570 4571 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4572 4573 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4574 this = self._parse_conjunction() 4575 4576 if not self._match(TokenType.ALIAS): 4577 if self._match(TokenType.COMMA): 4578 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4579 4580 self.raise_error("Expected AS after CAST") 4581 4582 fmt = None 4583 to = self._parse_types() 4584 4585 if self._match(TokenType.FORMAT): 4586 fmt_string = self._parse_string() 4587 fmt = self._parse_at_time_zone(fmt_string) 4588 4589 if not to: 4590 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4591 if to.this in exp.DataType.TEMPORAL_TYPES: 4592 this = self.expression( 4593 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4594 this=this, 4595 format=exp.Literal.string( 4596 format_time( 4597 fmt_string.this if fmt_string else "", 4598 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4599 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4600 ) 4601 ), 4602 ) 4603 4604 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4605 this.set("zone", fmt.args["zone"]) 4606 return this 4607 elif not to: 4608 self.raise_error("Expected TYPE after CAST") 4609 elif isinstance(to, exp.Identifier): 4610 to = exp.DataType.build(to.name, udt=True) 4611 elif to.this == exp.DataType.Type.CHAR: 4612 if self._match(TokenType.CHARACTER_SET): 4613 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4614 4615 return self.expression( 4616 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4617 ) 4618 4619 def _parse_string_agg(self) -> exp.Expression: 4620 if self._match(TokenType.DISTINCT): 4621 args: t.List[t.Optional[exp.Expression]] = [ 4622 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4623 ] 4624 if self._match(TokenType.COMMA): 4625 args.extend(self._parse_csv(self._parse_conjunction)) 4626 else: 4627 args = self._parse_csv(self._parse_conjunction) # type: ignore 4628 4629 index = self._index 4630 if not self._match(TokenType.R_PAREN) and args: 4631 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4632 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4633 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4634 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4635 4636 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4637 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4638 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4639 if not self._match_text_seq("WITHIN", "GROUP"): 4640 self._retreat(index) 4641 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4642 4643 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4644 order = self._parse_order(this=seq_get(args, 0)) 4645 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4646 4647 def _parse_convert( 4648 self, strict: bool, safe: t.Optional[bool] = None 4649 ) -> t.Optional[exp.Expression]: 4650 this = self._parse_bitwise() 4651 4652 if self._match(TokenType.USING): 4653 to: t.Optional[exp.Expression] = self.expression( 4654 exp.CharacterSet, this=self._parse_var() 4655 ) 4656 elif self._match(TokenType.COMMA): 4657 to = self._parse_types() 4658 else: 4659 to = None 4660 4661 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4662 4663 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4664 """ 4665 There are generally two variants of the DECODE function: 4666 4667 - DECODE(bin, charset) 4668 - DECODE(expression, search, result [, search, result] ... [, default]) 4669 4670 The second variant will always be parsed into a CASE expression. Note that NULL 4671 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4672 instead of relying on pattern matching. 4673 """ 4674 args = self._parse_csv(self._parse_conjunction) 4675 4676 if len(args) < 3: 4677 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4678 4679 expression, *expressions = args 4680 if not expression: 4681 return None 4682 4683 ifs = [] 4684 for search, result in zip(expressions[::2], expressions[1::2]): 4685 if not search or not result: 4686 return None 4687 4688 if isinstance(search, exp.Literal): 4689 ifs.append( 4690 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4691 ) 4692 elif isinstance(search, exp.Null): 4693 ifs.append( 4694 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4695 ) 4696 else: 4697 cond = exp.or_( 4698 exp.EQ(this=expression.copy(), expression=search), 4699 exp.and_( 4700 exp.Is(this=expression.copy(), expression=exp.Null()), 4701 exp.Is(this=search.copy(), expression=exp.Null()), 4702 copy=False, 4703 ), 4704 copy=False, 4705 ) 4706 ifs.append(exp.If(this=cond, true=result)) 4707 4708 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4709 4710 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4711 self._match_text_seq("KEY") 4712 key = self._parse_column() 4713 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4714 self._match_text_seq("VALUE") 4715 value = self._parse_bitwise() 4716 4717 if not key and not value: 4718 return None 4719 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4720 4721 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4722 if not this or not self._match_text_seq("FORMAT", "JSON"): 4723 return this 4724 4725 return self.expression(exp.FormatJson, this=this) 4726 4727 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4728 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4729 for value in values: 4730 if self._match_text_seq(value, "ON", on): 4731 return f"{value} ON {on}" 4732 4733 return None 4734 4735 @t.overload 4736 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4737 ... 4738 4739 @t.overload 4740 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4741 ... 4742 4743 def _parse_json_object(self, agg=False): 4744 star = self._parse_star() 4745 expressions = ( 4746 [star] 4747 if star 4748 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4749 ) 4750 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4751 4752 unique_keys = None 4753 if self._match_text_seq("WITH", "UNIQUE"): 4754 unique_keys = True 4755 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4756 unique_keys = False 4757 4758 self._match_text_seq("KEYS") 4759 4760 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4761 self._parse_type() 4762 ) 4763 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4764 4765 return self.expression( 4766 exp.JSONObjectAgg if agg else exp.JSONObject, 4767 expressions=expressions, 4768 null_handling=null_handling, 4769 unique_keys=unique_keys, 4770 return_type=return_type, 4771 encoding=encoding, 4772 ) 4773 4774 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4775 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4776 if not self._match_text_seq("NESTED"): 4777 this = self._parse_id_var() 4778 kind = self._parse_types(allow_identifiers=False) 4779 nested = None 4780 else: 4781 this = None 4782 kind = None 4783 nested = True 4784 4785 path = self._match_text_seq("PATH") and self._parse_string() 4786 nested_schema = nested and self._parse_json_schema() 4787 4788 return self.expression( 4789 exp.JSONColumnDef, 4790 this=this, 4791 kind=kind, 4792 path=path, 4793 nested_schema=nested_schema, 4794 ) 4795 4796 def _parse_json_schema(self) -> exp.JSONSchema: 4797 self._match_text_seq("COLUMNS") 4798 return self.expression( 4799 exp.JSONSchema, 4800 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4801 ) 4802 4803 def _parse_json_table(self) -> exp.JSONTable: 4804 this = self._parse_format_json(self._parse_bitwise()) 4805 path = self._match(TokenType.COMMA) and self._parse_string() 4806 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4807 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4808 schema = self._parse_json_schema() 4809 4810 return exp.JSONTable( 4811 this=this, 4812 schema=schema, 4813 path=path, 4814 error_handling=error_handling, 4815 empty_handling=empty_handling, 4816 ) 4817 4818 def _parse_match_against(self) -> exp.MatchAgainst: 4819 expressions = self._parse_csv(self._parse_column) 4820 4821 self._match_text_seq(")", "AGAINST", "(") 4822 4823 this = self._parse_string() 4824 4825 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4826 modifier = "IN NATURAL LANGUAGE MODE" 4827 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4828 modifier = f"{modifier} WITH QUERY EXPANSION" 4829 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4830 modifier = "IN BOOLEAN MODE" 4831 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4832 modifier = "WITH QUERY EXPANSION" 4833 else: 4834 modifier = None 4835 4836 return self.expression( 4837 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4838 ) 4839 4840 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4841 def _parse_open_json(self) -> exp.OpenJSON: 4842 this = self._parse_bitwise() 4843 path = self._match(TokenType.COMMA) and self._parse_string() 4844 4845 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4846 this = self._parse_field(any_token=True) 4847 kind = self._parse_types() 4848 path = self._parse_string() 4849 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4850 4851 return self.expression( 4852 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4853 ) 4854 4855 expressions = None 4856 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4857 self._match_l_paren() 4858 expressions = self._parse_csv(_parse_open_json_column_def) 4859 4860 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4861 4862 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4863 args = self._parse_csv(self._parse_bitwise) 4864 4865 if self._match(TokenType.IN): 4866 return self.expression( 4867 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4868 ) 4869 4870 if haystack_first: 4871 haystack = seq_get(args, 0) 4872 needle = seq_get(args, 1) 4873 else: 4874 needle = seq_get(args, 0) 4875 haystack = seq_get(args, 1) 4876 4877 return self.expression( 4878 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4879 ) 4880 4881 def _parse_predict(self) -> exp.Predict: 4882 self._match_text_seq("MODEL") 4883 this = self._parse_table() 4884 4885 self._match(TokenType.COMMA) 4886 self._match_text_seq("TABLE") 4887 4888 return self.expression( 4889 exp.Predict, 4890 this=this, 4891 expression=self._parse_table(), 4892 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4893 ) 4894 4895 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4896 args = self._parse_csv(self._parse_table) 4897 return exp.JoinHint(this=func_name.upper(), expressions=args) 4898 4899 def _parse_substring(self) -> exp.Substring: 4900 # Postgres supports the form: substring(string [from int] [for int]) 4901 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4902 4903 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4904 4905 if self._match(TokenType.FROM): 4906 args.append(self._parse_bitwise()) 4907 if self._match(TokenType.FOR): 4908 args.append(self._parse_bitwise()) 4909 4910 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4911 4912 def _parse_trim(self) -> exp.Trim: 4913 # https://www.w3resource.com/sql/character-functions/trim.php 4914 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4915 4916 position = None 4917 collation = None 4918 expression = None 4919 4920 if self._match_texts(self.TRIM_TYPES): 4921 position = self._prev.text.upper() 4922 4923 this = self._parse_bitwise() 4924 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4925 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4926 expression = self._parse_bitwise() 4927 4928 if invert_order: 4929 this, expression = expression, this 4930 4931 if self._match(TokenType.COLLATE): 4932 collation = self._parse_bitwise() 4933 4934 return self.expression( 4935 exp.Trim, this=this, position=position, expression=expression, collation=collation 4936 ) 4937 4938 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4939 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4940 4941 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4942 return self._parse_window(self._parse_id_var(), alias=True) 4943 4944 def _parse_respect_or_ignore_nulls( 4945 self, this: t.Optional[exp.Expression] 4946 ) -> t.Optional[exp.Expression]: 4947 if self._match_text_seq("IGNORE", "NULLS"): 4948 return self.expression(exp.IgnoreNulls, this=this) 4949 if self._match_text_seq("RESPECT", "NULLS"): 4950 return self.expression(exp.RespectNulls, this=this) 4951 return this 4952 4953 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4954 if self._match(TokenType.HAVING): 4955 self._match_texts(("MAX", "MIN")) 4956 max = self._prev.text.upper() != "MIN" 4957 return self.expression( 4958 exp.HavingMax, this=this, expression=self._parse_column(), max=max 4959 ) 4960 4961 return this 4962 4963 def _parse_window( 4964 self, this: t.Optional[exp.Expression], alias: bool = False 4965 ) -> t.Optional[exp.Expression]: 4966 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4967 self._match(TokenType.WHERE) 4968 this = self.expression( 4969 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4970 ) 4971 self._match_r_paren() 4972 4973 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4974 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4975 if self._match_text_seq("WITHIN", "GROUP"): 4976 order = self._parse_wrapped(self._parse_order) 4977 this = self.expression(exp.WithinGroup, this=this, expression=order) 4978 4979 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4980 # Some dialects choose to implement and some do not. 4981 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4982 4983 # There is some code above in _parse_lambda that handles 4984 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4985 4986 # The below changes handle 4987 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4988 4989 # Oracle allows both formats 4990 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4991 # and Snowflake chose to do the same for familiarity 4992 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4993 if isinstance(this, exp.AggFunc): 4994 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4995 4996 if ignore_respect and ignore_respect is not this: 4997 ignore_respect.replace(ignore_respect.this) 4998 this = self.expression(ignore_respect.__class__, this=this) 4999 5000 this = self._parse_respect_or_ignore_nulls(this) 5001 5002 # bigquery select from window x AS (partition by ...) 5003 if alias: 5004 over = None 5005 self._match(TokenType.ALIAS) 5006 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5007 return this 5008 else: 5009 over = self._prev.text.upper() 5010 5011 if not self._match(TokenType.L_PAREN): 5012 return self.expression( 5013 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5014 ) 5015 5016 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5017 5018 first = self._match(TokenType.FIRST) 5019 if self._match_text_seq("LAST"): 5020 first = False 5021 5022 partition, order = self._parse_partition_and_order() 5023 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5024 5025 if kind: 5026 self._match(TokenType.BETWEEN) 5027 start = self._parse_window_spec() 5028 self._match(TokenType.AND) 5029 end = self._parse_window_spec() 5030 5031 spec = self.expression( 5032 exp.WindowSpec, 5033 kind=kind, 5034 start=start["value"], 5035 start_side=start["side"], 5036 end=end["value"], 5037 end_side=end["side"], 5038 ) 5039 else: 5040 spec = None 5041 5042 self._match_r_paren() 5043 5044 window = self.expression( 5045 exp.Window, 5046 this=this, 5047 partition_by=partition, 5048 order=order, 5049 spec=spec, 5050 alias=window_alias, 5051 over=over, 5052 first=first, 5053 ) 5054 5055 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5056 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5057 return self._parse_window(window, alias=alias) 5058 5059 return window 5060 5061 def _parse_partition_and_order( 5062 self, 5063 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5064 return self._parse_partition_by(), self._parse_order() 5065 5066 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5067 self._match(TokenType.BETWEEN) 5068 5069 return { 5070 "value": ( 5071 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5072 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5073 or self._parse_bitwise() 5074 ), 5075 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5076 } 5077 5078 def _parse_alias( 5079 self, this: t.Optional[exp.Expression], explicit: bool = False 5080 ) -> t.Optional[exp.Expression]: 5081 any_token = self._match(TokenType.ALIAS) 5082 comments = self._prev_comments 5083 5084 if explicit and not any_token: 5085 return this 5086 5087 if self._match(TokenType.L_PAREN): 5088 aliases = self.expression( 5089 exp.Aliases, 5090 comments=comments, 5091 this=this, 5092 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5093 ) 5094 self._match_r_paren(aliases) 5095 return aliases 5096 5097 alias = self._parse_id_var(any_token) or ( 5098 self.STRING_ALIASES and self._parse_string_as_identifier() 5099 ) 5100 5101 if alias: 5102 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5103 column = this.this 5104 5105 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5106 if not this.comments and column and column.comments: 5107 this.comments = column.comments 5108 column.comments = None 5109 5110 return this 5111 5112 def _parse_id_var( 5113 self, 5114 any_token: bool = True, 5115 tokens: t.Optional[t.Collection[TokenType]] = None, 5116 ) -> t.Optional[exp.Expression]: 5117 identifier = self._parse_identifier() 5118 5119 if identifier: 5120 return identifier 5121 5122 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5123 quoted = self._prev.token_type == TokenType.STRING 5124 return exp.Identifier(this=self._prev.text, quoted=quoted) 5125 5126 return None 5127 5128 def _parse_string(self) -> t.Optional[exp.Expression]: 5129 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5130 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5131 return self._parse_placeholder() 5132 5133 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5134 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5135 5136 def _parse_number(self) -> t.Optional[exp.Expression]: 5137 if self._match(TokenType.NUMBER): 5138 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5139 return self._parse_placeholder() 5140 5141 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5142 if self._match(TokenType.IDENTIFIER): 5143 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5144 return self._parse_placeholder() 5145 5146 def _parse_var( 5147 self, 5148 any_token: bool = False, 5149 tokens: t.Optional[t.Collection[TokenType]] = None, 5150 upper: bool = False, 5151 ) -> t.Optional[exp.Expression]: 5152 if ( 5153 (any_token and self._advance_any()) 5154 or self._match(TokenType.VAR) 5155 or (self._match_set(tokens) if tokens else False) 5156 ): 5157 return self.expression( 5158 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5159 ) 5160 return self._parse_placeholder() 5161 5162 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5163 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5164 self._advance() 5165 return self._prev 5166 return None 5167 5168 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5169 return self._parse_var() or self._parse_string() 5170 5171 def _parse_null(self) -> t.Optional[exp.Expression]: 5172 if self._match_set(self.NULL_TOKENS): 5173 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5174 return self._parse_placeholder() 5175 5176 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5177 if self._match(TokenType.TRUE): 5178 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5179 if self._match(TokenType.FALSE): 5180 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5181 return self._parse_placeholder() 5182 5183 def _parse_star(self) -> t.Optional[exp.Expression]: 5184 if self._match(TokenType.STAR): 5185 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5186 return self._parse_placeholder() 5187 5188 def _parse_parameter(self) -> exp.Parameter: 5189 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5190 return ( 5191 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5192 ) 5193 5194 self._match(TokenType.L_BRACE) 5195 this = _parse_parameter_part() 5196 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5197 self._match(TokenType.R_BRACE) 5198 5199 return self.expression(exp.Parameter, this=this, expression=expression) 5200 5201 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5202 if self._match_set(self.PLACEHOLDER_PARSERS): 5203 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5204 if placeholder: 5205 return placeholder 5206 self._advance(-1) 5207 return None 5208 5209 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5210 if not self._match(TokenType.EXCEPT): 5211 return None 5212 if self._match(TokenType.L_PAREN, advance=False): 5213 return self._parse_wrapped_csv(self._parse_column) 5214 5215 except_column = self._parse_column() 5216 return [except_column] if except_column else None 5217 5218 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5219 if not self._match(TokenType.REPLACE): 5220 return None 5221 if self._match(TokenType.L_PAREN, advance=False): 5222 return self._parse_wrapped_csv(self._parse_expression) 5223 5224 replace_expression = self._parse_expression() 5225 return [replace_expression] if replace_expression else None 5226 5227 def _parse_csv( 5228 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5229 ) -> t.List[exp.Expression]: 5230 parse_result = parse_method() 5231 items = [parse_result] if parse_result is not None else [] 5232 5233 while self._match(sep): 5234 self._add_comments(parse_result) 5235 parse_result = parse_method() 5236 if parse_result is not None: 5237 items.append(parse_result) 5238 5239 return items 5240 5241 def _parse_tokens( 5242 self, parse_method: t.Callable, expressions: t.Dict 5243 ) -> t.Optional[exp.Expression]: 5244 this = parse_method() 5245 5246 while self._match_set(expressions): 5247 this = self.expression( 5248 expressions[self._prev.token_type], 5249 this=this, 5250 comments=self._prev_comments, 5251 expression=parse_method(), 5252 ) 5253 5254 return this 5255 5256 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5257 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5258 5259 def _parse_wrapped_csv( 5260 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5261 ) -> t.List[exp.Expression]: 5262 return self._parse_wrapped( 5263 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5264 ) 5265 5266 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5267 wrapped = self._match(TokenType.L_PAREN) 5268 if not wrapped and not optional: 5269 self.raise_error("Expecting (") 5270 parse_result = parse_method() 5271 if wrapped: 5272 self._match_r_paren() 5273 return parse_result 5274 5275 def _parse_expressions(self) -> t.List[exp.Expression]: 5276 return self._parse_csv(self._parse_expression) 5277 5278 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5279 return self._parse_select() or self._parse_set_operations( 5280 self._parse_expression() if alias else self._parse_conjunction() 5281 ) 5282 5283 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5284 return self._parse_query_modifiers( 5285 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5286 ) 5287 5288 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5289 this = None 5290 if self._match_texts(self.TRANSACTION_KIND): 5291 this = self._prev.text 5292 5293 self._match_texts(("TRANSACTION", "WORK")) 5294 5295 modes = [] 5296 while True: 5297 mode = [] 5298 while self._match(TokenType.VAR): 5299 mode.append(self._prev.text) 5300 5301 if mode: 5302 modes.append(" ".join(mode)) 5303 if not self._match(TokenType.COMMA): 5304 break 5305 5306 return self.expression(exp.Transaction, this=this, modes=modes) 5307 5308 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5309 chain = None 5310 savepoint = None 5311 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5312 5313 self._match_texts(("TRANSACTION", "WORK")) 5314 5315 if self._match_text_seq("TO"): 5316 self._match_text_seq("SAVEPOINT") 5317 savepoint = self._parse_id_var() 5318 5319 if self._match(TokenType.AND): 5320 chain = not self._match_text_seq("NO") 5321 self._match_text_seq("CHAIN") 5322 5323 if is_rollback: 5324 return self.expression(exp.Rollback, savepoint=savepoint) 5325 5326 return self.expression(exp.Commit, chain=chain) 5327 5328 def _parse_refresh(self) -> exp.Refresh: 5329 self._match(TokenType.TABLE) 5330 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5331 5332 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5333 if not self._match_text_seq("ADD"): 5334 return None 5335 5336 self._match(TokenType.COLUMN) 5337 exists_column = self._parse_exists(not_=True) 5338 expression = self._parse_field_def() 5339 5340 if expression: 5341 expression.set("exists", exists_column) 5342 5343 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5344 if self._match_texts(("FIRST", "AFTER")): 5345 position = self._prev.text 5346 column_position = self.expression( 5347 exp.ColumnPosition, this=self._parse_column(), position=position 5348 ) 5349 expression.set("position", column_position) 5350 5351 return expression 5352 5353 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5354 drop = self._match(TokenType.DROP) and self._parse_drop() 5355 if drop and not isinstance(drop, exp.Command): 5356 drop.set("kind", drop.args.get("kind", "COLUMN")) 5357 return drop 5358 5359 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5360 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5361 return self.expression( 5362 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5363 ) 5364 5365 def _parse_add_constraint(self) -> exp.AddConstraint: 5366 this = None 5367 kind = self._prev.token_type 5368 5369 if kind == TokenType.CONSTRAINT: 5370 this = self._parse_id_var() 5371 5372 if self._match_text_seq("CHECK"): 5373 expression = self._parse_wrapped(self._parse_conjunction) 5374 enforced = self._match_text_seq("ENFORCED") or False 5375 5376 return self.expression( 5377 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5378 ) 5379 5380 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5381 expression = self._parse_foreign_key() 5382 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5383 expression = self._parse_primary_key() 5384 else: 5385 expression = None 5386 5387 return self.expression(exp.AddConstraint, this=this, expression=expression) 5388 5389 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5390 index = self._index - 1 5391 5392 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5393 return self._parse_csv(self._parse_add_constraint) 5394 5395 self._retreat(index) 5396 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5397 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5398 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5399 5400 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5401 self._match(TokenType.COLUMN) 5402 column = self._parse_field(any_token=True) 5403 5404 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5405 return self.expression(exp.AlterColumn, this=column, drop=True) 5406 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5407 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5408 if self._match(TokenType.COMMENT): 5409 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5410 5411 self._match_text_seq("SET", "DATA") 5412 return self.expression( 5413 exp.AlterColumn, 5414 this=column, 5415 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5416 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5417 using=self._match(TokenType.USING) and self._parse_conjunction(), 5418 ) 5419 5420 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5421 index = self._index - 1 5422 5423 partition_exists = self._parse_exists() 5424 if self._match(TokenType.PARTITION, advance=False): 5425 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5426 5427 self._retreat(index) 5428 return self._parse_csv(self._parse_drop_column) 5429 5430 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5431 if self._match(TokenType.COLUMN): 5432 exists = self._parse_exists() 5433 old_column = self._parse_column() 5434 to = self._match_text_seq("TO") 5435 new_column = self._parse_column() 5436 5437 if old_column is None or to is None or new_column is None: 5438 return None 5439 5440 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5441 5442 self._match_text_seq("TO") 5443 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5444 5445 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5446 start = self._prev 5447 5448 if not self._match(TokenType.TABLE): 5449 return self._parse_as_command(start) 5450 5451 exists = self._parse_exists() 5452 only = self._match_text_seq("ONLY") 5453 this = self._parse_table(schema=True) 5454 5455 if self._next: 5456 self._advance() 5457 5458 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5459 if parser: 5460 actions = ensure_list(parser(self)) 5461 5462 if not self._curr and actions: 5463 return self.expression( 5464 exp.AlterTable, 5465 this=this, 5466 exists=exists, 5467 actions=actions, 5468 only=only, 5469 ) 5470 5471 return self._parse_as_command(start) 5472 5473 def _parse_merge(self) -> exp.Merge: 5474 self._match(TokenType.INTO) 5475 target = self._parse_table() 5476 5477 if target and self._match(TokenType.ALIAS, advance=False): 5478 target.set("alias", self._parse_table_alias()) 5479 5480 self._match(TokenType.USING) 5481 using = self._parse_table() 5482 5483 self._match(TokenType.ON) 5484 on = self._parse_conjunction() 5485 5486 return self.expression( 5487 exp.Merge, 5488 this=target, 5489 using=using, 5490 on=on, 5491 expressions=self._parse_when_matched(), 5492 ) 5493 5494 def _parse_when_matched(self) -> t.List[exp.When]: 5495 whens = [] 5496 5497 while self._match(TokenType.WHEN): 5498 matched = not self._match(TokenType.NOT) 5499 self._match_text_seq("MATCHED") 5500 source = ( 5501 False 5502 if self._match_text_seq("BY", "TARGET") 5503 else self._match_text_seq("BY", "SOURCE") 5504 ) 5505 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5506 5507 self._match(TokenType.THEN) 5508 5509 if self._match(TokenType.INSERT): 5510 _this = self._parse_star() 5511 if _this: 5512 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5513 else: 5514 then = self.expression( 5515 exp.Insert, 5516 this=self._parse_value(), 5517 expression=self._match(TokenType.VALUES) and self._parse_value(), 5518 ) 5519 elif self._match(TokenType.UPDATE): 5520 expressions = self._parse_star() 5521 if expressions: 5522 then = self.expression(exp.Update, expressions=expressions) 5523 else: 5524 then = self.expression( 5525 exp.Update, 5526 expressions=self._match(TokenType.SET) 5527 and self._parse_csv(self._parse_equality), 5528 ) 5529 elif self._match(TokenType.DELETE): 5530 then = self.expression(exp.Var, this=self._prev.text) 5531 else: 5532 then = None 5533 5534 whens.append( 5535 self.expression( 5536 exp.When, 5537 matched=matched, 5538 source=source, 5539 condition=condition, 5540 then=then, 5541 ) 5542 ) 5543 return whens 5544 5545 def _parse_show(self) -> t.Optional[exp.Expression]: 5546 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5547 if parser: 5548 return parser(self) 5549 return self._parse_as_command(self._prev) 5550 5551 def _parse_set_item_assignment( 5552 self, kind: t.Optional[str] = None 5553 ) -> t.Optional[exp.Expression]: 5554 index = self._index 5555 5556 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5557 return self._parse_set_transaction(global_=kind == "GLOBAL") 5558 5559 left = self._parse_primary() or self._parse_id_var() 5560 assignment_delimiter = self._match_texts(("=", "TO")) 5561 5562 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5563 self._retreat(index) 5564 return None 5565 5566 right = self._parse_statement() or self._parse_id_var() 5567 this = self.expression(exp.EQ, this=left, expression=right) 5568 5569 return self.expression(exp.SetItem, this=this, kind=kind) 5570 5571 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5572 self._match_text_seq("TRANSACTION") 5573 characteristics = self._parse_csv( 5574 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5575 ) 5576 return self.expression( 5577 exp.SetItem, 5578 expressions=characteristics, 5579 kind="TRANSACTION", 5580 **{"global": global_}, # type: ignore 5581 ) 5582 5583 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5584 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5585 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5586 5587 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5588 index = self._index 5589 set_ = self.expression( 5590 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5591 ) 5592 5593 if self._curr: 5594 self._retreat(index) 5595 return self._parse_as_command(self._prev) 5596 5597 return set_ 5598 5599 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5600 for option in options: 5601 if self._match_text_seq(*option.split(" ")): 5602 return exp.var(option) 5603 return None 5604 5605 def _parse_as_command(self, start: Token) -> exp.Command: 5606 while self._curr: 5607 self._advance() 5608 text = self._find_sql(start, self._prev) 5609 size = len(start.text) 5610 self._warn_unsupported() 5611 return exp.Command(this=text[:size], expression=text[size:]) 5612 5613 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5614 settings = [] 5615 5616 self._match_l_paren() 5617 kind = self._parse_id_var() 5618 5619 if self._match(TokenType.L_PAREN): 5620 while True: 5621 key = self._parse_id_var() 5622 value = self._parse_primary() 5623 5624 if not key and value is None: 5625 break 5626 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5627 self._match(TokenType.R_PAREN) 5628 5629 self._match_r_paren() 5630 5631 return self.expression( 5632 exp.DictProperty, 5633 this=this, 5634 kind=kind.this if kind else None, 5635 settings=settings, 5636 ) 5637 5638 def _parse_dict_range(self, this: str) -> exp.DictRange: 5639 self._match_l_paren() 5640 has_min = self._match_text_seq("MIN") 5641 if has_min: 5642 min = self._parse_var() or self._parse_primary() 5643 self._match_text_seq("MAX") 5644 max = self._parse_var() or self._parse_primary() 5645 else: 5646 max = self._parse_var() or self._parse_primary() 5647 min = exp.Literal.number(0) 5648 self._match_r_paren() 5649 return self.expression(exp.DictRange, this=this, min=min, max=max) 5650 5651 def _parse_comprehension( 5652 self, this: t.Optional[exp.Expression] 5653 ) -> t.Optional[exp.Comprehension]: 5654 index = self._index 5655 expression = self._parse_column() 5656 if not self._match(TokenType.IN): 5657 self._retreat(index - 1) 5658 return None 5659 iterator = self._parse_column() 5660 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5661 return self.expression( 5662 exp.Comprehension, 5663 this=this, 5664 expression=expression, 5665 iterator=iterator, 5666 condition=condition, 5667 ) 5668 5669 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5670 if self._match(TokenType.HEREDOC_STRING): 5671 return self.expression(exp.Heredoc, this=self._prev.text) 5672 5673 if not self._match_text_seq("$"): 5674 return None 5675 5676 tags = ["$"] 5677 tag_text = None 5678 5679 if self._is_connected(): 5680 self._advance() 5681 tags.append(self._prev.text.upper()) 5682 else: 5683 self.raise_error("No closing $ found") 5684 5685 if tags[-1] != "$": 5686 if self._is_connected() and self._match_text_seq("$"): 5687 tag_text = tags[-1] 5688 tags.append("$") 5689 else: 5690 self.raise_error("No closing $ found") 5691 5692 heredoc_start = self._curr 5693 5694 while self._curr: 5695 if self._match_text_seq(*tags, advance=False): 5696 this = self._find_sql(heredoc_start, self._prev) 5697 self._advance(len(tags)) 5698 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5699 5700 self._advance() 5701 5702 self.raise_error(f"No closing {''.join(tags)} found") 5703 return None 5704 5705 def _find_parser( 5706 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5707 ) -> t.Optional[t.Callable]: 5708 if not self._curr: 5709 return None 5710 5711 index = self._index 5712 this = [] 5713 while True: 5714 # The current token might be multiple words 5715 curr = self._curr.text.upper() 5716 key = curr.split(" ") 5717 this.append(curr) 5718 5719 self._advance() 5720 result, trie = in_trie(trie, key) 5721 if result == TrieResult.FAILED: 5722 break 5723 5724 if result == TrieResult.EXISTS: 5725 subparser = parsers[" ".join(this)] 5726 return subparser 5727 5728 self._retreat(index) 5729 return None 5730 5731 def _match(self, token_type, advance=True, expression=None): 5732 if not self._curr: 5733 return None 5734 5735 if self._curr.token_type == token_type: 5736 if advance: 5737 self._advance() 5738 self._add_comments(expression) 5739 return True 5740 5741 return None 5742 5743 def _match_set(self, types, advance=True): 5744 if not self._curr: 5745 return None 5746 5747 if self._curr.token_type in types: 5748 if advance: 5749 self._advance() 5750 return True 5751 5752 return None 5753 5754 def _match_pair(self, token_type_a, token_type_b, advance=True): 5755 if not self._curr or not self._next: 5756 return None 5757 5758 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5759 if advance: 5760 self._advance(2) 5761 return True 5762 5763 return None 5764 5765 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5766 if not self._match(TokenType.L_PAREN, expression=expression): 5767 self.raise_error("Expecting (") 5768 5769 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5770 if not self._match(TokenType.R_PAREN, expression=expression): 5771 self.raise_error("Expecting )") 5772 5773 def _match_texts(self, texts, advance=True): 5774 if self._curr and self._curr.text.upper() in texts: 5775 if advance: 5776 self._advance() 5777 return True 5778 return None 5779 5780 def _match_text_seq(self, *texts, advance=True): 5781 index = self._index 5782 for text in texts: 5783 if self._curr and self._curr.text.upper() == text: 5784 self._advance() 5785 else: 5786 self._retreat(index) 5787 return None 5788 5789 if not advance: 5790 self._retreat(index) 5791 5792 return True 5793 5794 @t.overload 5795 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5796 ... 5797 5798 @t.overload 5799 def _replace_columns_with_dots( 5800 self, this: t.Optional[exp.Expression] 5801 ) -> t.Optional[exp.Expression]: 5802 ... 5803 5804 def _replace_columns_with_dots(self, this): 5805 if isinstance(this, exp.Dot): 5806 exp.replace_children(this, self._replace_columns_with_dots) 5807 elif isinstance(this, exp.Column): 5808 exp.replace_children(this, self._replace_columns_with_dots) 5809 table = this.args.get("table") 5810 this = ( 5811 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5812 ) 5813 5814 return this 5815 5816 def _replace_lambda( 5817 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5818 ) -> t.Optional[exp.Expression]: 5819 if not node: 5820 return node 5821 5822 for column in node.find_all(exp.Column): 5823 if column.parts[0].name in lambda_variables: 5824 dot_or_id = column.to_dot() if column.table else column.this 5825 parent = column.parent 5826 5827 while isinstance(parent, exp.Dot): 5828 if not isinstance(parent.parent, exp.Dot): 5829 parent.replace(dot_or_id) 5830 break 5831 parent = parent.parent 5832 else: 5833 if column is node: 5834 node = dot_or_id 5835 else: 5836 column.replace(dot_or_id) 5837 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1028 def __init__( 1029 self, 1030 error_level: t.Optional[ErrorLevel] = None, 1031 error_message_context: int = 100, 1032 max_errors: int = 3, 1033 dialect: DialectType = None, 1034 ): 1035 from sqlglot.dialects import Dialect 1036 1037 self.error_level = error_level or ErrorLevel.IMMEDIATE 1038 self.error_message_context = error_message_context 1039 self.max_errors = max_errors 1040 self.dialect = Dialect.get_or_raise(dialect) 1041 self.reset()
1053 def parse( 1054 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1055 ) -> t.List[t.Optional[exp.Expression]]: 1056 """ 1057 Parses a list of tokens and returns a list of syntax trees, one tree 1058 per parsed SQL statement. 1059 1060 Args: 1061 raw_tokens: The list of tokens. 1062 sql: The original SQL string, used to produce helpful debug messages. 1063 1064 Returns: 1065 The list of the produced syntax trees. 1066 """ 1067 return self._parse( 1068 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1069 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1071 def parse_into( 1072 self, 1073 expression_types: exp.IntoType, 1074 raw_tokens: t.List[Token], 1075 sql: t.Optional[str] = None, 1076 ) -> t.List[t.Optional[exp.Expression]]: 1077 """ 1078 Parses a list of tokens into a given Expression type. If a collection of Expression 1079 types is given instead, this method will try to parse the token list into each one 1080 of them, stopping at the first for which the parsing succeeds. 1081 1082 Args: 1083 expression_types: The expression type(s) to try and parse the token list into. 1084 raw_tokens: The list of tokens. 1085 sql: The original SQL string, used to produce helpful debug messages. 1086 1087 Returns: 1088 The target Expression. 1089 """ 1090 errors = [] 1091 for expression_type in ensure_list(expression_types): 1092 parser = self.EXPRESSION_PARSERS.get(expression_type) 1093 if not parser: 1094 raise TypeError(f"No parser registered for {expression_type}") 1095 1096 try: 1097 return self._parse(parser, raw_tokens, sql) 1098 except ParseError as e: 1099 e.errors[0]["into_expression"] = expression_type 1100 errors.append(e) 1101 1102 raise ParseError( 1103 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1104 errors=merge_errors(errors), 1105 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1142 def check_errors(self) -> None: 1143 """Logs or raises any found errors, depending on the chosen error level setting.""" 1144 if self.error_level == ErrorLevel.WARN: 1145 for error in self.errors: 1146 logger.error(str(error)) 1147 elif self.error_level == ErrorLevel.RAISE and self.errors: 1148 raise ParseError( 1149 concat_messages(self.errors, self.max_errors), 1150 errors=merge_errors(self.errors), 1151 )
Logs or raises any found errors, depending on the chosen error level setting.
1153 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1154 """ 1155 Appends an error in the list of recorded errors or raises it, depending on the chosen 1156 error level setting. 1157 """ 1158 token = token or self._curr or self._prev or Token.string("") 1159 start = token.start 1160 end = token.end + 1 1161 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1162 highlight = self.sql[start:end] 1163 end_context = self.sql[end : end + self.error_message_context] 1164 1165 error = ParseError.new( 1166 f"{message}. Line {token.line}, Col: {token.col}.\n" 1167 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1168 description=message, 1169 line=token.line, 1170 col=token.col, 1171 start_context=start_context, 1172 highlight=highlight, 1173 end_context=end_context, 1174 ) 1175 1176 if self.error_level == ErrorLevel.IMMEDIATE: 1177 raise error 1178 1179 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1181 def expression( 1182 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1183 ) -> E: 1184 """ 1185 Creates a new, validated Expression. 1186 1187 Args: 1188 exp_class: The expression class to instantiate. 1189 comments: An optional list of comments to attach to the expression. 1190 kwargs: The arguments to set for the expression along with their respective values. 1191 1192 Returns: 1193 The target expression. 1194 """ 1195 instance = exp_class(**kwargs) 1196 instance.add_comments(comments) if comments else self._add_comments(instance) 1197 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1204 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1205 """ 1206 Validates an Expression, making sure that all its mandatory arguments are set. 1207 1208 Args: 1209 expression: The expression to validate. 1210 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1211 1212 Returns: 1213 The validated expression. 1214 """ 1215 if self.error_level != ErrorLevel.IGNORE: 1216 for error_message in expression.error_messages(args): 1217 self.raise_error(error_message) 1218 1219 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.