sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_concat(args: t.List, dialect: Dialect) -> t.Optional[exp.Expression]: 51 if dialect.parser_class.CONCAT_NULL_OUTPUTS_STRING: 52 args = _ensure_string_if_null(args) 53 54 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 55 # we find such a call we replace it with its argument. 56 if len(args) == 1: 57 return args[0] 58 59 return exp.Concat(expressions=args, safe=not dialect.STRICT_STRING_CONCAT) 60 61 62def parse_concat_ws(args: t.List, dialect: Dialect) -> t.Optional[exp.Expression]: 63 if len(args) < 2: 64 return exp.ConcatWs(expressions=args) 65 66 delim, *values = args 67 if dialect.parser_class.CONCAT_NULL_OUTPUTS_STRING: 68 values = _ensure_string_if_null(values) 69 70 return exp.ConcatWs(expressions=[delim] + values) 71 72 73def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 74 # Default argument order is base, expression 75 this = seq_get(args, 0) 76 expression = seq_get(args, 1) 77 78 if expression: 79 if not dialect.LOG_BASE_FIRST: 80 this, expression = expression, this 81 return exp.Log(this=this, expression=expression) 82 83 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 84 85 86class _Parser(type): 87 def __new__(cls, clsname, bases, attrs): 88 klass = super().__new__(cls, clsname, bases, attrs) 89 90 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 91 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 92 93 return klass 94 95 96class Parser(metaclass=_Parser): 97 """ 98 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 99 100 Args: 101 error_level: The desired error level. 102 Default: ErrorLevel.IMMEDIATE 103 error_message_context: Determines the amount of context to capture from a 104 query string when displaying the error message (in number of characters). 105 Default: 100 106 max_errors: Maximum number of error messages to include in a raised ParseError. 107 This is only relevant if error_level is ErrorLevel.RAISE. 108 Default: 3 109 """ 110 111 FUNCTIONS: t.Dict[str, t.Callable] = { 112 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 113 "CONCAT": parse_concat, 114 "CONCAT_WS": parse_concat_ws, 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "LIKE": parse_like, 121 "LOG": parse_logarithm, 122 "TIME_TO_TIME_STR": lambda args: exp.Cast( 123 this=seq_get(args, 0), 124 to=exp.DataType(this=exp.DataType.Type.TEXT), 125 ), 126 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 127 this=exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 start=exp.Literal.number(1), 132 length=exp.Literal.number(10), 133 ), 134 "VAR_MAP": parse_var_map, 135 } 136 137 NO_PAREN_FUNCTIONS = { 138 TokenType.CURRENT_DATE: exp.CurrentDate, 139 TokenType.CURRENT_DATETIME: exp.CurrentDate, 140 TokenType.CURRENT_TIME: exp.CurrentTime, 141 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 142 TokenType.CURRENT_USER: exp.CurrentUser, 143 } 144 145 STRUCT_TYPE_TOKENS = { 146 TokenType.NESTED, 147 TokenType.STRUCT, 148 } 149 150 NESTED_TYPE_TOKENS = { 151 TokenType.ARRAY, 152 TokenType.LOWCARDINALITY, 153 TokenType.MAP, 154 TokenType.NULLABLE, 155 *STRUCT_TYPE_TOKENS, 156 } 157 158 ENUM_TYPE_TOKENS = { 159 TokenType.ENUM, 160 TokenType.ENUM8, 161 TokenType.ENUM16, 162 } 163 164 TYPE_TOKENS = { 165 TokenType.BIT, 166 TokenType.BOOLEAN, 167 TokenType.TINYINT, 168 TokenType.UTINYINT, 169 TokenType.SMALLINT, 170 TokenType.USMALLINT, 171 TokenType.INT, 172 TokenType.UINT, 173 TokenType.BIGINT, 174 TokenType.UBIGINT, 175 TokenType.INT128, 176 TokenType.UINT128, 177 TokenType.INT256, 178 TokenType.UINT256, 179 TokenType.MEDIUMINT, 180 TokenType.UMEDIUMINT, 181 TokenType.FIXEDSTRING, 182 TokenType.FLOAT, 183 TokenType.DOUBLE, 184 TokenType.CHAR, 185 TokenType.NCHAR, 186 TokenType.VARCHAR, 187 TokenType.NVARCHAR, 188 TokenType.TEXT, 189 TokenType.MEDIUMTEXT, 190 TokenType.LONGTEXT, 191 TokenType.MEDIUMBLOB, 192 TokenType.LONGBLOB, 193 TokenType.BINARY, 194 TokenType.VARBINARY, 195 TokenType.JSON, 196 TokenType.JSONB, 197 TokenType.INTERVAL, 198 TokenType.TINYBLOB, 199 TokenType.TINYTEXT, 200 TokenType.TIME, 201 TokenType.TIMETZ, 202 TokenType.TIMESTAMP, 203 TokenType.TIMESTAMP_S, 204 TokenType.TIMESTAMP_MS, 205 TokenType.TIMESTAMP_NS, 206 TokenType.TIMESTAMPTZ, 207 TokenType.TIMESTAMPLTZ, 208 TokenType.DATETIME, 209 TokenType.DATETIME64, 210 TokenType.DATE, 211 TokenType.INT4RANGE, 212 TokenType.INT4MULTIRANGE, 213 TokenType.INT8RANGE, 214 TokenType.INT8MULTIRANGE, 215 TokenType.NUMRANGE, 216 TokenType.NUMMULTIRANGE, 217 TokenType.TSRANGE, 218 TokenType.TSMULTIRANGE, 219 TokenType.TSTZRANGE, 220 TokenType.TSTZMULTIRANGE, 221 TokenType.DATERANGE, 222 TokenType.DATEMULTIRANGE, 223 TokenType.DECIMAL, 224 TokenType.UDECIMAL, 225 TokenType.BIGDECIMAL, 226 TokenType.UUID, 227 TokenType.GEOGRAPHY, 228 TokenType.GEOMETRY, 229 TokenType.HLLSKETCH, 230 TokenType.HSTORE, 231 TokenType.PSEUDO_TYPE, 232 TokenType.SUPER, 233 TokenType.SERIAL, 234 TokenType.SMALLSERIAL, 235 TokenType.BIGSERIAL, 236 TokenType.XML, 237 TokenType.YEAR, 238 TokenType.UNIQUEIDENTIFIER, 239 TokenType.USERDEFINED, 240 TokenType.MONEY, 241 TokenType.SMALLMONEY, 242 TokenType.ROWVERSION, 243 TokenType.IMAGE, 244 TokenType.VARIANT, 245 TokenType.OBJECT, 246 TokenType.OBJECT_IDENTIFIER, 247 TokenType.INET, 248 TokenType.IPADDRESS, 249 TokenType.IPPREFIX, 250 TokenType.UNKNOWN, 251 TokenType.NULL, 252 *ENUM_TYPE_TOKENS, 253 *NESTED_TYPE_TOKENS, 254 } 255 256 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 257 TokenType.BIGINT: TokenType.UBIGINT, 258 TokenType.INT: TokenType.UINT, 259 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 260 TokenType.SMALLINT: TokenType.USMALLINT, 261 TokenType.TINYINT: TokenType.UTINYINT, 262 TokenType.DECIMAL: TokenType.UDECIMAL, 263 } 264 265 SUBQUERY_PREDICATES = { 266 TokenType.ANY: exp.Any, 267 TokenType.ALL: exp.All, 268 TokenType.EXISTS: exp.Exists, 269 TokenType.SOME: exp.Any, 270 } 271 272 RESERVED_TOKENS = { 273 *Tokenizer.SINGLE_TOKENS.values(), 274 TokenType.SELECT, 275 } 276 277 DB_CREATABLES = { 278 TokenType.DATABASE, 279 TokenType.SCHEMA, 280 TokenType.TABLE, 281 TokenType.VIEW, 282 TokenType.MODEL, 283 TokenType.DICTIONARY, 284 } 285 286 CREATABLES = { 287 TokenType.COLUMN, 288 TokenType.CONSTRAINT, 289 TokenType.FUNCTION, 290 TokenType.INDEX, 291 TokenType.PROCEDURE, 292 TokenType.FOREIGN_KEY, 293 *DB_CREATABLES, 294 } 295 296 # Tokens that can represent identifiers 297 ID_VAR_TOKENS = { 298 TokenType.VAR, 299 TokenType.ANTI, 300 TokenType.APPLY, 301 TokenType.ASC, 302 TokenType.AUTO_INCREMENT, 303 TokenType.BEGIN, 304 TokenType.CACHE, 305 TokenType.CASE, 306 TokenType.COLLATE, 307 TokenType.COMMAND, 308 TokenType.COMMENT, 309 TokenType.COMMIT, 310 TokenType.CONSTRAINT, 311 TokenType.DEFAULT, 312 TokenType.DELETE, 313 TokenType.DESC, 314 TokenType.DESCRIBE, 315 TokenType.DICTIONARY, 316 TokenType.DIV, 317 TokenType.END, 318 TokenType.EXECUTE, 319 TokenType.ESCAPE, 320 TokenType.FALSE, 321 TokenType.FIRST, 322 TokenType.FILTER, 323 TokenType.FORMAT, 324 TokenType.FULL, 325 TokenType.IS, 326 TokenType.ISNULL, 327 TokenType.INTERVAL, 328 TokenType.KEEP, 329 TokenType.KILL, 330 TokenType.LEFT, 331 TokenType.LOAD, 332 TokenType.MERGE, 333 TokenType.NATURAL, 334 TokenType.NEXT, 335 TokenType.OFFSET, 336 TokenType.OPERATOR, 337 TokenType.ORDINALITY, 338 TokenType.OVERLAPS, 339 TokenType.OVERWRITE, 340 TokenType.PARTITION, 341 TokenType.PERCENT, 342 TokenType.PIVOT, 343 TokenType.PRAGMA, 344 TokenType.RANGE, 345 TokenType.RECURSIVE, 346 TokenType.REFERENCES, 347 TokenType.REFRESH, 348 TokenType.REPLACE, 349 TokenType.RIGHT, 350 TokenType.ROW, 351 TokenType.ROWS, 352 TokenType.SEMI, 353 TokenType.SET, 354 TokenType.SETTINGS, 355 TokenType.SHOW, 356 TokenType.TEMPORARY, 357 TokenType.TOP, 358 TokenType.TRUE, 359 TokenType.UNIQUE, 360 TokenType.UNPIVOT, 361 TokenType.UPDATE, 362 TokenType.USE, 363 TokenType.VOLATILE, 364 TokenType.WINDOW, 365 *CREATABLES, 366 *SUBQUERY_PREDICATES, 367 *TYPE_TOKENS, 368 *NO_PAREN_FUNCTIONS, 369 } 370 371 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 372 373 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 374 TokenType.ANTI, 375 TokenType.APPLY, 376 TokenType.ASOF, 377 TokenType.FULL, 378 TokenType.LEFT, 379 TokenType.LOCK, 380 TokenType.NATURAL, 381 TokenType.OFFSET, 382 TokenType.RIGHT, 383 TokenType.SEMI, 384 TokenType.WINDOW, 385 } 386 387 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 388 389 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 390 391 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 392 393 FUNC_TOKENS = { 394 TokenType.COLLATE, 395 TokenType.COMMAND, 396 TokenType.CURRENT_DATE, 397 TokenType.CURRENT_DATETIME, 398 TokenType.CURRENT_TIMESTAMP, 399 TokenType.CURRENT_TIME, 400 TokenType.CURRENT_USER, 401 TokenType.FILTER, 402 TokenType.FIRST, 403 TokenType.FORMAT, 404 TokenType.GLOB, 405 TokenType.IDENTIFIER, 406 TokenType.INDEX, 407 TokenType.ISNULL, 408 TokenType.ILIKE, 409 TokenType.INSERT, 410 TokenType.LIKE, 411 TokenType.MERGE, 412 TokenType.OFFSET, 413 TokenType.PRIMARY_KEY, 414 TokenType.RANGE, 415 TokenType.REPLACE, 416 TokenType.RLIKE, 417 TokenType.ROW, 418 TokenType.UNNEST, 419 TokenType.VAR, 420 TokenType.LEFT, 421 TokenType.RIGHT, 422 TokenType.DATE, 423 TokenType.DATETIME, 424 TokenType.TABLE, 425 TokenType.TIMESTAMP, 426 TokenType.TIMESTAMPTZ, 427 TokenType.WINDOW, 428 TokenType.XOR, 429 *TYPE_TOKENS, 430 *SUBQUERY_PREDICATES, 431 } 432 433 CONJUNCTION = { 434 TokenType.AND: exp.And, 435 TokenType.OR: exp.Or, 436 } 437 438 EQUALITY = { 439 TokenType.COLON_EQ: exp.PropertyEQ, 440 TokenType.EQ: exp.EQ, 441 TokenType.NEQ: exp.NEQ, 442 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 443 } 444 445 COMPARISON = { 446 TokenType.GT: exp.GT, 447 TokenType.GTE: exp.GTE, 448 TokenType.LT: exp.LT, 449 TokenType.LTE: exp.LTE, 450 } 451 452 BITWISE = { 453 TokenType.AMP: exp.BitwiseAnd, 454 TokenType.CARET: exp.BitwiseXor, 455 TokenType.PIPE: exp.BitwiseOr, 456 } 457 458 TERM = { 459 TokenType.DASH: exp.Sub, 460 TokenType.PLUS: exp.Add, 461 TokenType.MOD: exp.Mod, 462 TokenType.COLLATE: exp.Collate, 463 } 464 465 FACTOR = { 466 TokenType.DIV: exp.IntDiv, 467 TokenType.LR_ARROW: exp.Distance, 468 TokenType.SLASH: exp.Div, 469 TokenType.STAR: exp.Mul, 470 } 471 472 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 473 474 TIMES = { 475 TokenType.TIME, 476 TokenType.TIMETZ, 477 } 478 479 TIMESTAMPS = { 480 TokenType.TIMESTAMP, 481 TokenType.TIMESTAMPTZ, 482 TokenType.TIMESTAMPLTZ, 483 *TIMES, 484 } 485 486 SET_OPERATIONS = { 487 TokenType.UNION, 488 TokenType.INTERSECT, 489 TokenType.EXCEPT, 490 } 491 492 JOIN_METHODS = { 493 TokenType.NATURAL, 494 TokenType.ASOF, 495 } 496 497 JOIN_SIDES = { 498 TokenType.LEFT, 499 TokenType.RIGHT, 500 TokenType.FULL, 501 } 502 503 JOIN_KINDS = { 504 TokenType.INNER, 505 TokenType.OUTER, 506 TokenType.CROSS, 507 TokenType.SEMI, 508 TokenType.ANTI, 509 } 510 511 JOIN_HINTS: t.Set[str] = set() 512 513 LAMBDAS = { 514 TokenType.ARROW: lambda self, expressions: self.expression( 515 exp.Lambda, 516 this=self._replace_lambda( 517 self._parse_conjunction(), 518 {node.name for node in expressions}, 519 ), 520 expressions=expressions, 521 ), 522 TokenType.FARROW: lambda self, expressions: self.expression( 523 exp.Kwarg, 524 this=exp.var(expressions[0].name), 525 expression=self._parse_conjunction(), 526 ), 527 } 528 529 COLUMN_OPERATORS = { 530 TokenType.DOT: None, 531 TokenType.DCOLON: lambda self, this, to: self.expression( 532 exp.Cast if self.STRICT_CAST else exp.TryCast, 533 this=this, 534 to=to, 535 ), 536 TokenType.ARROW: lambda self, this, path: self.expression( 537 exp.JSONExtract, 538 this=this, 539 expression=path, 540 ), 541 TokenType.DARROW: lambda self, this, path: self.expression( 542 exp.JSONExtractScalar, 543 this=this, 544 expression=path, 545 ), 546 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 547 exp.JSONBExtract, 548 this=this, 549 expression=path, 550 ), 551 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 552 exp.JSONBExtractScalar, 553 this=this, 554 expression=path, 555 ), 556 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 557 exp.JSONBContains, 558 this=this, 559 expression=key, 560 ), 561 } 562 563 EXPRESSION_PARSERS = { 564 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 565 exp.Column: lambda self: self._parse_column(), 566 exp.Condition: lambda self: self._parse_conjunction(), 567 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 568 exp.Expression: lambda self: self._parse_statement(), 569 exp.From: lambda self: self._parse_from(), 570 exp.Group: lambda self: self._parse_group(), 571 exp.Having: lambda self: self._parse_having(), 572 exp.Identifier: lambda self: self._parse_id_var(), 573 exp.Join: lambda self: self._parse_join(), 574 exp.Lambda: lambda self: self._parse_lambda(), 575 exp.Lateral: lambda self: self._parse_lateral(), 576 exp.Limit: lambda self: self._parse_limit(), 577 exp.Offset: lambda self: self._parse_offset(), 578 exp.Order: lambda self: self._parse_order(), 579 exp.Ordered: lambda self: self._parse_ordered(), 580 exp.Properties: lambda self: self._parse_properties(), 581 exp.Qualify: lambda self: self._parse_qualify(), 582 exp.Returning: lambda self: self._parse_returning(), 583 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 584 exp.Table: lambda self: self._parse_table_parts(), 585 exp.TableAlias: lambda self: self._parse_table_alias(), 586 exp.Where: lambda self: self._parse_where(), 587 exp.Window: lambda self: self._parse_named_window(), 588 exp.With: lambda self: self._parse_with(), 589 "JOIN_TYPE": lambda self: self._parse_join_parts(), 590 } 591 592 STATEMENT_PARSERS = { 593 TokenType.ALTER: lambda self: self._parse_alter(), 594 TokenType.BEGIN: lambda self: self._parse_transaction(), 595 TokenType.CACHE: lambda self: self._parse_cache(), 596 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 597 TokenType.COMMENT: lambda self: self._parse_comment(), 598 TokenType.CREATE: lambda self: self._parse_create(), 599 TokenType.DELETE: lambda self: self._parse_delete(), 600 TokenType.DESC: lambda self: self._parse_describe(), 601 TokenType.DESCRIBE: lambda self: self._parse_describe(), 602 TokenType.DROP: lambda self: self._parse_drop(), 603 TokenType.INSERT: lambda self: self._parse_insert(), 604 TokenType.KILL: lambda self: self._parse_kill(), 605 TokenType.LOAD: lambda self: self._parse_load(), 606 TokenType.MERGE: lambda self: self._parse_merge(), 607 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 608 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 609 TokenType.REFRESH: lambda self: self._parse_refresh(), 610 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 611 TokenType.SET: lambda self: self._parse_set(), 612 TokenType.UNCACHE: lambda self: self._parse_uncache(), 613 TokenType.UPDATE: lambda self: self._parse_update(), 614 TokenType.USE: lambda self: self.expression( 615 exp.Use, 616 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 617 and exp.var(self._prev.text), 618 this=self._parse_table(schema=False), 619 ), 620 } 621 622 UNARY_PARSERS = { 623 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 624 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 625 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 626 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 627 } 628 629 PRIMARY_PARSERS = { 630 TokenType.STRING: lambda self, token: self.expression( 631 exp.Literal, this=token.text, is_string=True 632 ), 633 TokenType.NUMBER: lambda self, token: self.expression( 634 exp.Literal, this=token.text, is_string=False 635 ), 636 TokenType.STAR: lambda self, _: self.expression( 637 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 638 ), 639 TokenType.NULL: lambda self, _: self.expression(exp.Null), 640 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 641 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 642 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 643 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 644 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 645 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 646 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 647 exp.National, this=token.text 648 ), 649 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 650 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 651 exp.RawString, this=token.text 652 ), 653 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 654 } 655 656 PLACEHOLDER_PARSERS = { 657 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 658 TokenType.PARAMETER: lambda self: self._parse_parameter(), 659 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 660 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 661 else None, 662 } 663 664 RANGE_PARSERS = { 665 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 666 TokenType.GLOB: binary_range_parser(exp.Glob), 667 TokenType.ILIKE: binary_range_parser(exp.ILike), 668 TokenType.IN: lambda self, this: self._parse_in(this), 669 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 670 TokenType.IS: lambda self, this: self._parse_is(this), 671 TokenType.LIKE: binary_range_parser(exp.Like), 672 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 673 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 674 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 675 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 676 } 677 678 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 679 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 680 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 681 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 682 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 683 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 684 "CHECKSUM": lambda self: self._parse_checksum(), 685 "CLUSTER BY": lambda self: self._parse_cluster(), 686 "CLUSTERED": lambda self: self._parse_clustered_by(), 687 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 688 exp.CollateProperty, **kwargs 689 ), 690 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 691 "COPY": lambda self: self._parse_copy_property(), 692 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 693 "DEFINER": lambda self: self._parse_definer(), 694 "DETERMINISTIC": lambda self: self.expression( 695 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 696 ), 697 "DISTKEY": lambda self: self._parse_distkey(), 698 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 699 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 700 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 701 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 702 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 703 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 704 "FREESPACE": lambda self: self._parse_freespace(), 705 "HEAP": lambda self: self.expression(exp.HeapProperty), 706 "IMMUTABLE": lambda self: self.expression( 707 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 708 ), 709 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 710 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 711 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 712 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 713 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 714 "LIKE": lambda self: self._parse_create_like(), 715 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 716 "LOCK": lambda self: self._parse_locking(), 717 "LOCKING": lambda self: self._parse_locking(), 718 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 719 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 720 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 721 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 722 "NO": lambda self: self._parse_no_property(), 723 "ON": lambda self: self._parse_on_property(), 724 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 725 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 726 "PARTITION": lambda self: self._parse_partitioned_of(), 727 "PARTITION BY": lambda self: self._parse_partitioned_by(), 728 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 729 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 730 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 731 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 732 "REMOTE": lambda self: self._parse_remote_with_connection(), 733 "RETURNS": lambda self: self._parse_returns(), 734 "ROW": lambda self: self._parse_row(), 735 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 736 "SAMPLE": lambda self: self.expression( 737 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 738 ), 739 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 740 "SETTINGS": lambda self: self.expression( 741 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 742 ), 743 "SORTKEY": lambda self: self._parse_sortkey(), 744 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 745 "STABLE": lambda self: self.expression( 746 exp.StabilityProperty, this=exp.Literal.string("STABLE") 747 ), 748 "STORED": lambda self: self._parse_stored(), 749 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 750 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 751 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 752 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 753 "TO": lambda self: self._parse_to_table(), 754 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 755 "TRANSFORM": lambda self: self.expression( 756 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 757 ), 758 "TTL": lambda self: self._parse_ttl(), 759 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 760 "VOLATILE": lambda self: self._parse_volatile_property(), 761 "WITH": lambda self: self._parse_with_property(), 762 } 763 764 CONSTRAINT_PARSERS = { 765 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 766 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 767 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 768 "CHARACTER SET": lambda self: self.expression( 769 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 770 ), 771 "CHECK": lambda self: self.expression( 772 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 773 ), 774 "COLLATE": lambda self: self.expression( 775 exp.CollateColumnConstraint, this=self._parse_var() 776 ), 777 "COMMENT": lambda self: self.expression( 778 exp.CommentColumnConstraint, this=self._parse_string() 779 ), 780 "COMPRESS": lambda self: self._parse_compress(), 781 "CLUSTERED": lambda self: self.expression( 782 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 783 ), 784 "NONCLUSTERED": lambda self: self.expression( 785 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 786 ), 787 "DEFAULT": lambda self: self.expression( 788 exp.DefaultColumnConstraint, this=self._parse_bitwise() 789 ), 790 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 791 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 792 "FORMAT": lambda self: self.expression( 793 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 794 ), 795 "GENERATED": lambda self: self._parse_generated_as_identity(), 796 "IDENTITY": lambda self: self._parse_auto_increment(), 797 "INLINE": lambda self: self._parse_inline(), 798 "LIKE": lambda self: self._parse_create_like(), 799 "NOT": lambda self: self._parse_not_constraint(), 800 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 801 "ON": lambda self: ( 802 self._match(TokenType.UPDATE) 803 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 804 ) 805 or self.expression(exp.OnProperty, this=self._parse_id_var()), 806 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 807 "PERIOD": lambda self: self._parse_period_for_system_time(), 808 "PRIMARY KEY": lambda self: self._parse_primary_key(), 809 "REFERENCES": lambda self: self._parse_references(match=False), 810 "TITLE": lambda self: self.expression( 811 exp.TitleColumnConstraint, this=self._parse_var_or_string() 812 ), 813 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 814 "UNIQUE": lambda self: self._parse_unique(), 815 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 816 "WITH": lambda self: self.expression( 817 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 818 ), 819 } 820 821 ALTER_PARSERS = { 822 "ADD": lambda self: self._parse_alter_table_add(), 823 "ALTER": lambda self: self._parse_alter_table_alter(), 824 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 825 "DROP": lambda self: self._parse_alter_table_drop(), 826 "RENAME": lambda self: self._parse_alter_table_rename(), 827 } 828 829 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 830 831 NO_PAREN_FUNCTION_PARSERS = { 832 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 833 "CASE": lambda self: self._parse_case(), 834 "IF": lambda self: self._parse_if(), 835 "NEXT": lambda self: self._parse_next_value_for(), 836 } 837 838 INVALID_FUNC_NAME_TOKENS = { 839 TokenType.IDENTIFIER, 840 TokenType.STRING, 841 } 842 843 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 844 845 FUNCTION_PARSERS = { 846 "ANY_VALUE": lambda self: self._parse_any_value(), 847 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 848 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 849 "DECODE": lambda self: self._parse_decode(), 850 "EXTRACT": lambda self: self._parse_extract(), 851 "JSON_OBJECT": lambda self: self._parse_json_object(), 852 "JSON_TABLE": lambda self: self._parse_json_table(), 853 "MATCH": lambda self: self._parse_match_against(), 854 "OPENJSON": lambda self: self._parse_open_json(), 855 "POSITION": lambda self: self._parse_position(), 856 "PREDICT": lambda self: self._parse_predict(), 857 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 858 "STRING_AGG": lambda self: self._parse_string_agg(), 859 "SUBSTRING": lambda self: self._parse_substring(), 860 "TRIM": lambda self: self._parse_trim(), 861 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 862 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 863 } 864 865 QUERY_MODIFIER_PARSERS = { 866 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 867 TokenType.WHERE: lambda self: ("where", self._parse_where()), 868 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 869 TokenType.HAVING: lambda self: ("having", self._parse_having()), 870 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 871 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 872 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 873 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 874 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 875 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 876 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 877 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 878 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 879 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 880 TokenType.CLUSTER_BY: lambda self: ( 881 "cluster", 882 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 883 ), 884 TokenType.DISTRIBUTE_BY: lambda self: ( 885 "distribute", 886 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 887 ), 888 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 889 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 890 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 891 } 892 893 SET_PARSERS = { 894 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 895 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 896 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 897 "TRANSACTION": lambda self: self._parse_set_transaction(), 898 } 899 900 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 901 902 TYPE_LITERAL_PARSERS = { 903 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 904 } 905 906 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 907 908 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 909 910 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 911 912 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 913 TRANSACTION_CHARACTERISTICS = { 914 "ISOLATION LEVEL REPEATABLE READ", 915 "ISOLATION LEVEL READ COMMITTED", 916 "ISOLATION LEVEL READ UNCOMMITTED", 917 "ISOLATION LEVEL SERIALIZABLE", 918 "READ WRITE", 919 "READ ONLY", 920 } 921 922 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 923 924 CLONE_KEYWORDS = {"CLONE", "COPY"} 925 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 926 927 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 928 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 929 930 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 931 932 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 933 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 934 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 935 936 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 937 938 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 939 940 DISTINCT_TOKENS = {TokenType.DISTINCT} 941 942 NULL_TOKENS = {TokenType.NULL} 943 944 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 945 946 STRICT_CAST = True 947 948 # A NULL arg in CONCAT yields NULL by default 949 CONCAT_NULL_OUTPUTS_STRING = False 950 951 PREFIXED_PIVOT_COLUMNS = False 952 IDENTIFY_PIVOT_STRINGS = False 953 954 LOG_DEFAULTS_TO_LN = False 955 956 # Whether or not ADD is present for each column added by ALTER TABLE 957 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 958 959 # Whether or not the table sample clause expects CSV syntax 960 TABLESAMPLE_CSV = False 961 962 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 963 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 964 965 # Whether the TRIM function expects the characters to trim as its first argument 966 TRIM_PATTERN_FIRST = False 967 968 __slots__ = ( 969 "error_level", 970 "error_message_context", 971 "max_errors", 972 "dialect", 973 "sql", 974 "errors", 975 "_tokens", 976 "_index", 977 "_curr", 978 "_next", 979 "_prev", 980 "_prev_comments", 981 ) 982 983 # Autofilled 984 SHOW_TRIE: t.Dict = {} 985 SET_TRIE: t.Dict = {} 986 987 def __init__( 988 self, 989 error_level: t.Optional[ErrorLevel] = None, 990 error_message_context: int = 100, 991 max_errors: int = 3, 992 dialect: DialectType = None, 993 ): 994 from sqlglot.dialects import Dialect 995 996 self.error_level = error_level or ErrorLevel.IMMEDIATE 997 self.error_message_context = error_message_context 998 self.max_errors = max_errors 999 self.dialect = Dialect.get_or_raise(dialect) 1000 self.reset() 1001 1002 def reset(self): 1003 self.sql = "" 1004 self.errors = [] 1005 self._tokens = [] 1006 self._index = 0 1007 self._curr = None 1008 self._next = None 1009 self._prev = None 1010 self._prev_comments = None 1011 1012 def parse( 1013 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1014 ) -> t.List[t.Optional[exp.Expression]]: 1015 """ 1016 Parses a list of tokens and returns a list of syntax trees, one tree 1017 per parsed SQL statement. 1018 1019 Args: 1020 raw_tokens: The list of tokens. 1021 sql: The original SQL string, used to produce helpful debug messages. 1022 1023 Returns: 1024 The list of the produced syntax trees. 1025 """ 1026 return self._parse( 1027 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1028 ) 1029 1030 def parse_into( 1031 self, 1032 expression_types: exp.IntoType, 1033 raw_tokens: t.List[Token], 1034 sql: t.Optional[str] = None, 1035 ) -> t.List[t.Optional[exp.Expression]]: 1036 """ 1037 Parses a list of tokens into a given Expression type. If a collection of Expression 1038 types is given instead, this method will try to parse the token list into each one 1039 of them, stopping at the first for which the parsing succeeds. 1040 1041 Args: 1042 expression_types: The expression type(s) to try and parse the token list into. 1043 raw_tokens: The list of tokens. 1044 sql: The original SQL string, used to produce helpful debug messages. 1045 1046 Returns: 1047 The target Expression. 1048 """ 1049 errors = [] 1050 for expression_type in ensure_list(expression_types): 1051 parser = self.EXPRESSION_PARSERS.get(expression_type) 1052 if not parser: 1053 raise TypeError(f"No parser registered for {expression_type}") 1054 1055 try: 1056 return self._parse(parser, raw_tokens, sql) 1057 except ParseError as e: 1058 e.errors[0]["into_expression"] = expression_type 1059 errors.append(e) 1060 1061 raise ParseError( 1062 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1063 errors=merge_errors(errors), 1064 ) from errors[-1] 1065 1066 def _parse( 1067 self, 1068 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1069 raw_tokens: t.List[Token], 1070 sql: t.Optional[str] = None, 1071 ) -> t.List[t.Optional[exp.Expression]]: 1072 self.reset() 1073 self.sql = sql or "" 1074 1075 total = len(raw_tokens) 1076 chunks: t.List[t.List[Token]] = [[]] 1077 1078 for i, token in enumerate(raw_tokens): 1079 if token.token_type == TokenType.SEMICOLON: 1080 if i < total - 1: 1081 chunks.append([]) 1082 else: 1083 chunks[-1].append(token) 1084 1085 expressions = [] 1086 1087 for tokens in chunks: 1088 self._index = -1 1089 self._tokens = tokens 1090 self._advance() 1091 1092 expressions.append(parse_method(self)) 1093 1094 if self._index < len(self._tokens): 1095 self.raise_error("Invalid expression / Unexpected token") 1096 1097 self.check_errors() 1098 1099 return expressions 1100 1101 def check_errors(self) -> None: 1102 """Logs or raises any found errors, depending on the chosen error level setting.""" 1103 if self.error_level == ErrorLevel.WARN: 1104 for error in self.errors: 1105 logger.error(str(error)) 1106 elif self.error_level == ErrorLevel.RAISE and self.errors: 1107 raise ParseError( 1108 concat_messages(self.errors, self.max_errors), 1109 errors=merge_errors(self.errors), 1110 ) 1111 1112 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1113 """ 1114 Appends an error in the list of recorded errors or raises it, depending on the chosen 1115 error level setting. 1116 """ 1117 token = token or self._curr or self._prev or Token.string("") 1118 start = token.start 1119 end = token.end + 1 1120 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1121 highlight = self.sql[start:end] 1122 end_context = self.sql[end : end + self.error_message_context] 1123 1124 error = ParseError.new( 1125 f"{message}. Line {token.line}, Col: {token.col}.\n" 1126 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1127 description=message, 1128 line=token.line, 1129 col=token.col, 1130 start_context=start_context, 1131 highlight=highlight, 1132 end_context=end_context, 1133 ) 1134 1135 if self.error_level == ErrorLevel.IMMEDIATE: 1136 raise error 1137 1138 self.errors.append(error) 1139 1140 def expression( 1141 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1142 ) -> E: 1143 """ 1144 Creates a new, validated Expression. 1145 1146 Args: 1147 exp_class: The expression class to instantiate. 1148 comments: An optional list of comments to attach to the expression. 1149 kwargs: The arguments to set for the expression along with their respective values. 1150 1151 Returns: 1152 The target expression. 1153 """ 1154 instance = exp_class(**kwargs) 1155 instance.add_comments(comments) if comments else self._add_comments(instance) 1156 return self.validate_expression(instance) 1157 1158 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1159 if expression and self._prev_comments: 1160 expression.add_comments(self._prev_comments) 1161 self._prev_comments = None 1162 1163 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1164 """ 1165 Validates an Expression, making sure that all its mandatory arguments are set. 1166 1167 Args: 1168 expression: The expression to validate. 1169 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1170 1171 Returns: 1172 The validated expression. 1173 """ 1174 if self.error_level != ErrorLevel.IGNORE: 1175 for error_message in expression.error_messages(args): 1176 self.raise_error(error_message) 1177 1178 return expression 1179 1180 def _find_sql(self, start: Token, end: Token) -> str: 1181 return self.sql[start.start : end.end + 1] 1182 1183 def _advance(self, times: int = 1) -> None: 1184 self._index += times 1185 self._curr = seq_get(self._tokens, self._index) 1186 self._next = seq_get(self._tokens, self._index + 1) 1187 1188 if self._index > 0: 1189 self._prev = self._tokens[self._index - 1] 1190 self._prev_comments = self._prev.comments 1191 else: 1192 self._prev = None 1193 self._prev_comments = None 1194 1195 def _retreat(self, index: int) -> None: 1196 if index != self._index: 1197 self._advance(index - self._index) 1198 1199 def _parse_command(self) -> exp.Command: 1200 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1201 1202 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1203 start = self._prev 1204 exists = self._parse_exists() if allow_exists else None 1205 1206 self._match(TokenType.ON) 1207 1208 kind = self._match_set(self.CREATABLES) and self._prev 1209 if not kind: 1210 return self._parse_as_command(start) 1211 1212 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1213 this = self._parse_user_defined_function(kind=kind.token_type) 1214 elif kind.token_type == TokenType.TABLE: 1215 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1216 elif kind.token_type == TokenType.COLUMN: 1217 this = self._parse_column() 1218 else: 1219 this = self._parse_id_var() 1220 1221 self._match(TokenType.IS) 1222 1223 return self.expression( 1224 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1225 ) 1226 1227 def _parse_to_table( 1228 self, 1229 ) -> exp.ToTableProperty: 1230 table = self._parse_table_parts(schema=True) 1231 return self.expression(exp.ToTableProperty, this=table) 1232 1233 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1234 def _parse_ttl(self) -> exp.Expression: 1235 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1236 this = self._parse_bitwise() 1237 1238 if self._match_text_seq("DELETE"): 1239 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1240 if self._match_text_seq("RECOMPRESS"): 1241 return self.expression( 1242 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1243 ) 1244 if self._match_text_seq("TO", "DISK"): 1245 return self.expression( 1246 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1247 ) 1248 if self._match_text_seq("TO", "VOLUME"): 1249 return self.expression( 1250 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1251 ) 1252 1253 return this 1254 1255 expressions = self._parse_csv(_parse_ttl_action) 1256 where = self._parse_where() 1257 group = self._parse_group() 1258 1259 aggregates = None 1260 if group and self._match(TokenType.SET): 1261 aggregates = self._parse_csv(self._parse_set_item) 1262 1263 return self.expression( 1264 exp.MergeTreeTTL, 1265 expressions=expressions, 1266 where=where, 1267 group=group, 1268 aggregates=aggregates, 1269 ) 1270 1271 def _parse_statement(self) -> t.Optional[exp.Expression]: 1272 if self._curr is None: 1273 return None 1274 1275 if self._match_set(self.STATEMENT_PARSERS): 1276 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1277 1278 if self._match_set(Tokenizer.COMMANDS): 1279 return self._parse_command() 1280 1281 expression = self._parse_expression() 1282 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1283 return self._parse_query_modifiers(expression) 1284 1285 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1286 start = self._prev 1287 temporary = self._match(TokenType.TEMPORARY) 1288 materialized = self._match_text_seq("MATERIALIZED") 1289 1290 kind = self._match_set(self.CREATABLES) and self._prev.text 1291 if not kind: 1292 return self._parse_as_command(start) 1293 1294 return self.expression( 1295 exp.Drop, 1296 comments=start.comments, 1297 exists=exists or self._parse_exists(), 1298 this=self._parse_table(schema=True), 1299 kind=kind, 1300 temporary=temporary, 1301 materialized=materialized, 1302 cascade=self._match_text_seq("CASCADE"), 1303 constraints=self._match_text_seq("CONSTRAINTS"), 1304 purge=self._match_text_seq("PURGE"), 1305 ) 1306 1307 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1308 return ( 1309 self._match_text_seq("IF") 1310 and (not not_ or self._match(TokenType.NOT)) 1311 and self._match(TokenType.EXISTS) 1312 ) 1313 1314 def _parse_create(self) -> exp.Create | exp.Command: 1315 # Note: this can't be None because we've matched a statement parser 1316 start = self._prev 1317 comments = self._prev_comments 1318 1319 replace = start.text.upper() == "REPLACE" or self._match_pair( 1320 TokenType.OR, TokenType.REPLACE 1321 ) 1322 unique = self._match(TokenType.UNIQUE) 1323 1324 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1325 self._advance() 1326 1327 properties = None 1328 create_token = self._match_set(self.CREATABLES) and self._prev 1329 1330 if not create_token: 1331 # exp.Properties.Location.POST_CREATE 1332 properties = self._parse_properties() 1333 create_token = self._match_set(self.CREATABLES) and self._prev 1334 1335 if not properties or not create_token: 1336 return self._parse_as_command(start) 1337 1338 exists = self._parse_exists(not_=True) 1339 this = None 1340 expression: t.Optional[exp.Expression] = None 1341 indexes = None 1342 no_schema_binding = None 1343 begin = None 1344 end = None 1345 clone = None 1346 1347 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1348 nonlocal properties 1349 if properties and temp_props: 1350 properties.expressions.extend(temp_props.expressions) 1351 elif temp_props: 1352 properties = temp_props 1353 1354 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1355 this = self._parse_user_defined_function(kind=create_token.token_type) 1356 1357 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1358 extend_props(self._parse_properties()) 1359 1360 self._match(TokenType.ALIAS) 1361 1362 if self._match(TokenType.COMMAND): 1363 expression = self._parse_as_command(self._prev) 1364 else: 1365 begin = self._match(TokenType.BEGIN) 1366 return_ = self._match_text_seq("RETURN") 1367 1368 if self._match(TokenType.STRING, advance=False): 1369 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1370 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1371 expression = self._parse_string() 1372 extend_props(self._parse_properties()) 1373 else: 1374 expression = self._parse_statement() 1375 1376 end = self._match_text_seq("END") 1377 1378 if return_: 1379 expression = self.expression(exp.Return, this=expression) 1380 elif create_token.token_type == TokenType.INDEX: 1381 this = self._parse_index(index=self._parse_id_var()) 1382 elif create_token.token_type in self.DB_CREATABLES: 1383 table_parts = self._parse_table_parts(schema=True) 1384 1385 # exp.Properties.Location.POST_NAME 1386 self._match(TokenType.COMMA) 1387 extend_props(self._parse_properties(before=True)) 1388 1389 this = self._parse_schema(this=table_parts) 1390 1391 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1392 extend_props(self._parse_properties()) 1393 1394 self._match(TokenType.ALIAS) 1395 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1396 # exp.Properties.Location.POST_ALIAS 1397 extend_props(self._parse_properties()) 1398 1399 expression = self._parse_ddl_select() 1400 1401 if create_token.token_type == TokenType.TABLE: 1402 # exp.Properties.Location.POST_EXPRESSION 1403 extend_props(self._parse_properties()) 1404 1405 indexes = [] 1406 while True: 1407 index = self._parse_index() 1408 1409 # exp.Properties.Location.POST_INDEX 1410 extend_props(self._parse_properties()) 1411 1412 if not index: 1413 break 1414 else: 1415 self._match(TokenType.COMMA) 1416 indexes.append(index) 1417 elif create_token.token_type == TokenType.VIEW: 1418 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1419 no_schema_binding = True 1420 1421 shallow = self._match_text_seq("SHALLOW") 1422 1423 if self._match_texts(self.CLONE_KEYWORDS): 1424 copy = self._prev.text.lower() == "copy" 1425 clone = self._parse_table(schema=True) 1426 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1427 clone_kind = ( 1428 self._match(TokenType.L_PAREN) 1429 and self._match_texts(self.CLONE_KINDS) 1430 and self._prev.text.upper() 1431 ) 1432 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1433 self._match(TokenType.R_PAREN) 1434 clone = self.expression( 1435 exp.Clone, 1436 this=clone, 1437 when=when, 1438 kind=clone_kind, 1439 shallow=shallow, 1440 expression=clone_expression, 1441 copy=copy, 1442 ) 1443 1444 return self.expression( 1445 exp.Create, 1446 comments=comments, 1447 this=this, 1448 kind=create_token.text, 1449 replace=replace, 1450 unique=unique, 1451 expression=expression, 1452 exists=exists, 1453 properties=properties, 1454 indexes=indexes, 1455 no_schema_binding=no_schema_binding, 1456 begin=begin, 1457 end=end, 1458 clone=clone, 1459 ) 1460 1461 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1462 # only used for teradata currently 1463 self._match(TokenType.COMMA) 1464 1465 kwargs = { 1466 "no": self._match_text_seq("NO"), 1467 "dual": self._match_text_seq("DUAL"), 1468 "before": self._match_text_seq("BEFORE"), 1469 "default": self._match_text_seq("DEFAULT"), 1470 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1471 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1472 "after": self._match_text_seq("AFTER"), 1473 "minimum": self._match_texts(("MIN", "MINIMUM")), 1474 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1475 } 1476 1477 if self._match_texts(self.PROPERTY_PARSERS): 1478 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1479 try: 1480 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1481 except TypeError: 1482 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1483 1484 return None 1485 1486 def _parse_property(self) -> t.Optional[exp.Expression]: 1487 if self._match_texts(self.PROPERTY_PARSERS): 1488 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1489 1490 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1491 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1492 1493 if self._match_text_seq("COMPOUND", "SORTKEY"): 1494 return self._parse_sortkey(compound=True) 1495 1496 if self._match_text_seq("SQL", "SECURITY"): 1497 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1498 1499 index = self._index 1500 key = self._parse_column() 1501 1502 if not self._match(TokenType.EQ): 1503 self._retreat(index) 1504 return None 1505 1506 return self.expression( 1507 exp.Property, 1508 this=key.to_dot() if isinstance(key, exp.Column) else key, 1509 value=self._parse_column() or self._parse_var(any_token=True), 1510 ) 1511 1512 def _parse_stored(self) -> exp.FileFormatProperty: 1513 self._match(TokenType.ALIAS) 1514 1515 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1516 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1517 1518 return self.expression( 1519 exp.FileFormatProperty, 1520 this=self.expression( 1521 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1522 ) 1523 if input_format or output_format 1524 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1525 ) 1526 1527 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1528 self._match(TokenType.EQ) 1529 self._match(TokenType.ALIAS) 1530 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1531 1532 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1533 properties = [] 1534 while True: 1535 if before: 1536 prop = self._parse_property_before() 1537 else: 1538 prop = self._parse_property() 1539 1540 if not prop: 1541 break 1542 for p in ensure_list(prop): 1543 properties.append(p) 1544 1545 if properties: 1546 return self.expression(exp.Properties, expressions=properties) 1547 1548 return None 1549 1550 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1551 return self.expression( 1552 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1553 ) 1554 1555 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1556 if self._index >= 2: 1557 pre_volatile_token = self._tokens[self._index - 2] 1558 else: 1559 pre_volatile_token = None 1560 1561 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1562 return exp.VolatileProperty() 1563 1564 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1565 1566 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1567 self._match_pair(TokenType.EQ, TokenType.ON) 1568 1569 prop = self.expression(exp.WithSystemVersioningProperty) 1570 if self._match(TokenType.L_PAREN): 1571 self._match_text_seq("HISTORY_TABLE", "=") 1572 prop.set("this", self._parse_table_parts()) 1573 1574 if self._match(TokenType.COMMA): 1575 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1576 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1577 1578 self._match_r_paren() 1579 1580 return prop 1581 1582 def _parse_with_property( 1583 self, 1584 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1585 if self._match(TokenType.L_PAREN, advance=False): 1586 return self._parse_wrapped_csv(self._parse_property) 1587 1588 if self._match_text_seq("JOURNAL"): 1589 return self._parse_withjournaltable() 1590 1591 if self._match_text_seq("DATA"): 1592 return self._parse_withdata(no=False) 1593 elif self._match_text_seq("NO", "DATA"): 1594 return self._parse_withdata(no=True) 1595 1596 if not self._next: 1597 return None 1598 1599 return self._parse_withisolatedloading() 1600 1601 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1602 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1603 self._match(TokenType.EQ) 1604 1605 user = self._parse_id_var() 1606 self._match(TokenType.PARAMETER) 1607 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1608 1609 if not user or not host: 1610 return None 1611 1612 return exp.DefinerProperty(this=f"{user}@{host}") 1613 1614 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1615 self._match(TokenType.TABLE) 1616 self._match(TokenType.EQ) 1617 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1618 1619 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1620 return self.expression(exp.LogProperty, no=no) 1621 1622 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1623 return self.expression(exp.JournalProperty, **kwargs) 1624 1625 def _parse_checksum(self) -> exp.ChecksumProperty: 1626 self._match(TokenType.EQ) 1627 1628 on = None 1629 if self._match(TokenType.ON): 1630 on = True 1631 elif self._match_text_seq("OFF"): 1632 on = False 1633 1634 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1635 1636 def _parse_cluster(self) -> exp.Cluster: 1637 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1638 1639 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1640 self._match_text_seq("BY") 1641 1642 self._match_l_paren() 1643 expressions = self._parse_csv(self._parse_column) 1644 self._match_r_paren() 1645 1646 if self._match_text_seq("SORTED", "BY"): 1647 self._match_l_paren() 1648 sorted_by = self._parse_csv(self._parse_ordered) 1649 self._match_r_paren() 1650 else: 1651 sorted_by = None 1652 1653 self._match(TokenType.INTO) 1654 buckets = self._parse_number() 1655 self._match_text_seq("BUCKETS") 1656 1657 return self.expression( 1658 exp.ClusteredByProperty, 1659 expressions=expressions, 1660 sorted_by=sorted_by, 1661 buckets=buckets, 1662 ) 1663 1664 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1665 if not self._match_text_seq("GRANTS"): 1666 self._retreat(self._index - 1) 1667 return None 1668 1669 return self.expression(exp.CopyGrantsProperty) 1670 1671 def _parse_freespace(self) -> exp.FreespaceProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1675 ) 1676 1677 def _parse_mergeblockratio( 1678 self, no: bool = False, default: bool = False 1679 ) -> exp.MergeBlockRatioProperty: 1680 if self._match(TokenType.EQ): 1681 return self.expression( 1682 exp.MergeBlockRatioProperty, 1683 this=self._parse_number(), 1684 percent=self._match(TokenType.PERCENT), 1685 ) 1686 1687 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1688 1689 def _parse_datablocksize( 1690 self, 1691 default: t.Optional[bool] = None, 1692 minimum: t.Optional[bool] = None, 1693 maximum: t.Optional[bool] = None, 1694 ) -> exp.DataBlocksizeProperty: 1695 self._match(TokenType.EQ) 1696 size = self._parse_number() 1697 1698 units = None 1699 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1700 units = self._prev.text 1701 1702 return self.expression( 1703 exp.DataBlocksizeProperty, 1704 size=size, 1705 units=units, 1706 default=default, 1707 minimum=minimum, 1708 maximum=maximum, 1709 ) 1710 1711 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1712 self._match(TokenType.EQ) 1713 always = self._match_text_seq("ALWAYS") 1714 manual = self._match_text_seq("MANUAL") 1715 never = self._match_text_seq("NEVER") 1716 default = self._match_text_seq("DEFAULT") 1717 1718 autotemp = None 1719 if self._match_text_seq("AUTOTEMP"): 1720 autotemp = self._parse_schema() 1721 1722 return self.expression( 1723 exp.BlockCompressionProperty, 1724 always=always, 1725 manual=manual, 1726 never=never, 1727 default=default, 1728 autotemp=autotemp, 1729 ) 1730 1731 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1732 no = self._match_text_seq("NO") 1733 concurrent = self._match_text_seq("CONCURRENT") 1734 self._match_text_seq("ISOLATED", "LOADING") 1735 for_all = self._match_text_seq("FOR", "ALL") 1736 for_insert = self._match_text_seq("FOR", "INSERT") 1737 for_none = self._match_text_seq("FOR", "NONE") 1738 return self.expression( 1739 exp.IsolatedLoadingProperty, 1740 no=no, 1741 concurrent=concurrent, 1742 for_all=for_all, 1743 for_insert=for_insert, 1744 for_none=for_none, 1745 ) 1746 1747 def _parse_locking(self) -> exp.LockingProperty: 1748 if self._match(TokenType.TABLE): 1749 kind = "TABLE" 1750 elif self._match(TokenType.VIEW): 1751 kind = "VIEW" 1752 elif self._match(TokenType.ROW): 1753 kind = "ROW" 1754 elif self._match_text_seq("DATABASE"): 1755 kind = "DATABASE" 1756 else: 1757 kind = None 1758 1759 if kind in ("DATABASE", "TABLE", "VIEW"): 1760 this = self._parse_table_parts() 1761 else: 1762 this = None 1763 1764 if self._match(TokenType.FOR): 1765 for_or_in = "FOR" 1766 elif self._match(TokenType.IN): 1767 for_or_in = "IN" 1768 else: 1769 for_or_in = None 1770 1771 if self._match_text_seq("ACCESS"): 1772 lock_type = "ACCESS" 1773 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1774 lock_type = "EXCLUSIVE" 1775 elif self._match_text_seq("SHARE"): 1776 lock_type = "SHARE" 1777 elif self._match_text_seq("READ"): 1778 lock_type = "READ" 1779 elif self._match_text_seq("WRITE"): 1780 lock_type = "WRITE" 1781 elif self._match_text_seq("CHECKSUM"): 1782 lock_type = "CHECKSUM" 1783 else: 1784 lock_type = None 1785 1786 override = self._match_text_seq("OVERRIDE") 1787 1788 return self.expression( 1789 exp.LockingProperty, 1790 this=this, 1791 kind=kind, 1792 for_or_in=for_or_in, 1793 lock_type=lock_type, 1794 override=override, 1795 ) 1796 1797 def _parse_partition_by(self) -> t.List[exp.Expression]: 1798 if self._match(TokenType.PARTITION_BY): 1799 return self._parse_csv(self._parse_conjunction) 1800 return [] 1801 1802 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1803 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1804 if self._match_text_seq("MINVALUE"): 1805 return exp.var("MINVALUE") 1806 if self._match_text_seq("MAXVALUE"): 1807 return exp.var("MAXVALUE") 1808 return self._parse_bitwise() 1809 1810 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1811 expression = None 1812 from_expressions = None 1813 to_expressions = None 1814 1815 if self._match(TokenType.IN): 1816 this = self._parse_wrapped_csv(self._parse_bitwise) 1817 elif self._match(TokenType.FROM): 1818 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1819 self._match_text_seq("TO") 1820 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1821 elif self._match_text_seq("WITH", "(", "MODULUS"): 1822 this = self._parse_number() 1823 self._match_text_seq(",", "REMAINDER") 1824 expression = self._parse_number() 1825 self._match_r_paren() 1826 else: 1827 self.raise_error("Failed to parse partition bound spec.") 1828 1829 return self.expression( 1830 exp.PartitionBoundSpec, 1831 this=this, 1832 expression=expression, 1833 from_expressions=from_expressions, 1834 to_expressions=to_expressions, 1835 ) 1836 1837 # https://www.postgresql.org/docs/current/sql-createtable.html 1838 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1839 if not self._match_text_seq("OF"): 1840 self._retreat(self._index - 1) 1841 return None 1842 1843 this = self._parse_table(schema=True) 1844 1845 if self._match(TokenType.DEFAULT): 1846 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1847 elif self._match_text_seq("FOR", "VALUES"): 1848 expression = self._parse_partition_bound_spec() 1849 else: 1850 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1851 1852 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1853 1854 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1855 self._match(TokenType.EQ) 1856 return self.expression( 1857 exp.PartitionedByProperty, 1858 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1859 ) 1860 1861 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1862 if self._match_text_seq("AND", "STATISTICS"): 1863 statistics = True 1864 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1865 statistics = False 1866 else: 1867 statistics = None 1868 1869 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1870 1871 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1872 if self._match_text_seq("PRIMARY", "INDEX"): 1873 return exp.NoPrimaryIndexProperty() 1874 return None 1875 1876 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1877 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1878 return exp.OnCommitProperty() 1879 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1880 return exp.OnCommitProperty(delete=True) 1881 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1882 1883 def _parse_distkey(self) -> exp.DistKeyProperty: 1884 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1885 1886 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1887 table = self._parse_table(schema=True) 1888 1889 options = [] 1890 while self._match_texts(("INCLUDING", "EXCLUDING")): 1891 this = self._prev.text.upper() 1892 1893 id_var = self._parse_id_var() 1894 if not id_var: 1895 return None 1896 1897 options.append( 1898 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1899 ) 1900 1901 return self.expression(exp.LikeProperty, this=table, expressions=options) 1902 1903 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1904 return self.expression( 1905 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1906 ) 1907 1908 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1909 self._match(TokenType.EQ) 1910 return self.expression( 1911 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1912 ) 1913 1914 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1915 self._match_text_seq("WITH", "CONNECTION") 1916 return self.expression( 1917 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1918 ) 1919 1920 def _parse_returns(self) -> exp.ReturnsProperty: 1921 value: t.Optional[exp.Expression] 1922 is_table = self._match(TokenType.TABLE) 1923 1924 if is_table: 1925 if self._match(TokenType.LT): 1926 value = self.expression( 1927 exp.Schema, 1928 this="TABLE", 1929 expressions=self._parse_csv(self._parse_struct_types), 1930 ) 1931 if not self._match(TokenType.GT): 1932 self.raise_error("Expecting >") 1933 else: 1934 value = self._parse_schema(exp.var("TABLE")) 1935 else: 1936 value = self._parse_types() 1937 1938 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1939 1940 def _parse_describe(self) -> exp.Describe: 1941 kind = self._match_set(self.CREATABLES) and self._prev.text 1942 this = self._parse_table(schema=True) 1943 properties = self._parse_properties() 1944 expressions = properties.expressions if properties else None 1945 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1946 1947 def _parse_insert(self) -> exp.Insert: 1948 comments = ensure_list(self._prev_comments) 1949 overwrite = self._match(TokenType.OVERWRITE) 1950 ignore = self._match(TokenType.IGNORE) 1951 local = self._match_text_seq("LOCAL") 1952 alternative = None 1953 1954 if self._match_text_seq("DIRECTORY"): 1955 this: t.Optional[exp.Expression] = self.expression( 1956 exp.Directory, 1957 this=self._parse_var_or_string(), 1958 local=local, 1959 row_format=self._parse_row_format(match_row=True), 1960 ) 1961 else: 1962 if self._match(TokenType.OR): 1963 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1964 1965 self._match(TokenType.INTO) 1966 comments += ensure_list(self._prev_comments) 1967 self._match(TokenType.TABLE) 1968 this = self._parse_table(schema=True) 1969 1970 returning = self._parse_returning() 1971 1972 return self.expression( 1973 exp.Insert, 1974 comments=comments, 1975 this=this, 1976 by_name=self._match_text_seq("BY", "NAME"), 1977 exists=self._parse_exists(), 1978 partition=self._parse_partition(), 1979 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1980 and self._parse_conjunction(), 1981 expression=self._parse_ddl_select(), 1982 conflict=self._parse_on_conflict(), 1983 returning=returning or self._parse_returning(), 1984 overwrite=overwrite, 1985 alternative=alternative, 1986 ignore=ignore, 1987 ) 1988 1989 def _parse_kill(self) -> exp.Kill: 1990 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1991 1992 return self.expression( 1993 exp.Kill, 1994 this=self._parse_primary(), 1995 kind=kind, 1996 ) 1997 1998 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1999 conflict = self._match_text_seq("ON", "CONFLICT") 2000 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2001 2002 if not conflict and not duplicate: 2003 return None 2004 2005 nothing = None 2006 expressions = None 2007 key = None 2008 constraint = None 2009 2010 if conflict: 2011 if self._match_text_seq("ON", "CONSTRAINT"): 2012 constraint = self._parse_id_var() 2013 else: 2014 key = self._parse_csv(self._parse_value) 2015 2016 self._match_text_seq("DO") 2017 if self._match_text_seq("NOTHING"): 2018 nothing = True 2019 else: 2020 self._match(TokenType.UPDATE) 2021 self._match(TokenType.SET) 2022 expressions = self._parse_csv(self._parse_equality) 2023 2024 return self.expression( 2025 exp.OnConflict, 2026 duplicate=duplicate, 2027 expressions=expressions, 2028 nothing=nothing, 2029 key=key, 2030 constraint=constraint, 2031 ) 2032 2033 def _parse_returning(self) -> t.Optional[exp.Returning]: 2034 if not self._match(TokenType.RETURNING): 2035 return None 2036 return self.expression( 2037 exp.Returning, 2038 expressions=self._parse_csv(self._parse_expression), 2039 into=self._match(TokenType.INTO) and self._parse_table_part(), 2040 ) 2041 2042 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2043 if not self._match(TokenType.FORMAT): 2044 return None 2045 return self._parse_row_format() 2046 2047 def _parse_row_format( 2048 self, match_row: bool = False 2049 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2050 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2051 return None 2052 2053 if self._match_text_seq("SERDE"): 2054 this = self._parse_string() 2055 2056 serde_properties = None 2057 if self._match(TokenType.SERDE_PROPERTIES): 2058 serde_properties = self.expression( 2059 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2060 ) 2061 2062 return self.expression( 2063 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2064 ) 2065 2066 self._match_text_seq("DELIMITED") 2067 2068 kwargs = {} 2069 2070 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2071 kwargs["fields"] = self._parse_string() 2072 if self._match_text_seq("ESCAPED", "BY"): 2073 kwargs["escaped"] = self._parse_string() 2074 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2075 kwargs["collection_items"] = self._parse_string() 2076 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2077 kwargs["map_keys"] = self._parse_string() 2078 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2079 kwargs["lines"] = self._parse_string() 2080 if self._match_text_seq("NULL", "DEFINED", "AS"): 2081 kwargs["null"] = self._parse_string() 2082 2083 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2084 2085 def _parse_load(self) -> exp.LoadData | exp.Command: 2086 if self._match_text_seq("DATA"): 2087 local = self._match_text_seq("LOCAL") 2088 self._match_text_seq("INPATH") 2089 inpath = self._parse_string() 2090 overwrite = self._match(TokenType.OVERWRITE) 2091 self._match_pair(TokenType.INTO, TokenType.TABLE) 2092 2093 return self.expression( 2094 exp.LoadData, 2095 this=self._parse_table(schema=True), 2096 local=local, 2097 overwrite=overwrite, 2098 inpath=inpath, 2099 partition=self._parse_partition(), 2100 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2101 serde=self._match_text_seq("SERDE") and self._parse_string(), 2102 ) 2103 return self._parse_as_command(self._prev) 2104 2105 def _parse_delete(self) -> exp.Delete: 2106 # This handles MySQL's "Multiple-Table Syntax" 2107 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2108 tables = None 2109 comments = self._prev_comments 2110 if not self._match(TokenType.FROM, advance=False): 2111 tables = self._parse_csv(self._parse_table) or None 2112 2113 returning = self._parse_returning() 2114 2115 return self.expression( 2116 exp.Delete, 2117 comments=comments, 2118 tables=tables, 2119 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2120 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2121 where=self._parse_where(), 2122 returning=returning or self._parse_returning(), 2123 limit=self._parse_limit(), 2124 ) 2125 2126 def _parse_update(self) -> exp.Update: 2127 comments = self._prev_comments 2128 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2129 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2130 returning = self._parse_returning() 2131 return self.expression( 2132 exp.Update, 2133 comments=comments, 2134 **{ # type: ignore 2135 "this": this, 2136 "expressions": expressions, 2137 "from": self._parse_from(joins=True), 2138 "where": self._parse_where(), 2139 "returning": returning or self._parse_returning(), 2140 "order": self._parse_order(), 2141 "limit": self._parse_limit(), 2142 }, 2143 ) 2144 2145 def _parse_uncache(self) -> exp.Uncache: 2146 if not self._match(TokenType.TABLE): 2147 self.raise_error("Expecting TABLE after UNCACHE") 2148 2149 return self.expression( 2150 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2151 ) 2152 2153 def _parse_cache(self) -> exp.Cache: 2154 lazy = self._match_text_seq("LAZY") 2155 self._match(TokenType.TABLE) 2156 table = self._parse_table(schema=True) 2157 2158 options = [] 2159 if self._match_text_seq("OPTIONS"): 2160 self._match_l_paren() 2161 k = self._parse_string() 2162 self._match(TokenType.EQ) 2163 v = self._parse_string() 2164 options = [k, v] 2165 self._match_r_paren() 2166 2167 self._match(TokenType.ALIAS) 2168 return self.expression( 2169 exp.Cache, 2170 this=table, 2171 lazy=lazy, 2172 options=options, 2173 expression=self._parse_select(nested=True), 2174 ) 2175 2176 def _parse_partition(self) -> t.Optional[exp.Partition]: 2177 if not self._match(TokenType.PARTITION): 2178 return None 2179 2180 return self.expression( 2181 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2182 ) 2183 2184 def _parse_value(self) -> exp.Tuple: 2185 if self._match(TokenType.L_PAREN): 2186 expressions = self._parse_csv(self._parse_conjunction) 2187 self._match_r_paren() 2188 return self.expression(exp.Tuple, expressions=expressions) 2189 2190 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2191 # https://prestodb.io/docs/current/sql/values.html 2192 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2193 2194 def _parse_projections(self) -> t.List[exp.Expression]: 2195 return self._parse_expressions() 2196 2197 def _parse_select( 2198 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2199 ) -> t.Optional[exp.Expression]: 2200 cte = self._parse_with() 2201 2202 if cte: 2203 this = self._parse_statement() 2204 2205 if not this: 2206 self.raise_error("Failed to parse any statement following CTE") 2207 return cte 2208 2209 if "with" in this.arg_types: 2210 this.set("with", cte) 2211 else: 2212 self.raise_error(f"{this.key} does not support CTE") 2213 this = cte 2214 2215 return this 2216 2217 # duckdb supports leading with FROM x 2218 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2219 2220 if self._match(TokenType.SELECT): 2221 comments = self._prev_comments 2222 2223 hint = self._parse_hint() 2224 all_ = self._match(TokenType.ALL) 2225 distinct = self._match_set(self.DISTINCT_TOKENS) 2226 2227 kind = ( 2228 self._match(TokenType.ALIAS) 2229 and self._match_texts(("STRUCT", "VALUE")) 2230 and self._prev.text 2231 ) 2232 2233 if distinct: 2234 distinct = self.expression( 2235 exp.Distinct, 2236 on=self._parse_value() if self._match(TokenType.ON) else None, 2237 ) 2238 2239 if all_ and distinct: 2240 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2241 2242 limit = self._parse_limit(top=True) 2243 projections = self._parse_projections() 2244 2245 this = self.expression( 2246 exp.Select, 2247 kind=kind, 2248 hint=hint, 2249 distinct=distinct, 2250 expressions=projections, 2251 limit=limit, 2252 ) 2253 this.comments = comments 2254 2255 into = self._parse_into() 2256 if into: 2257 this.set("into", into) 2258 2259 if not from_: 2260 from_ = self._parse_from() 2261 2262 if from_: 2263 this.set("from", from_) 2264 2265 this = self._parse_query_modifiers(this) 2266 elif (table or nested) and self._match(TokenType.L_PAREN): 2267 if self._match(TokenType.PIVOT): 2268 this = self._parse_simplified_pivot() 2269 elif self._match(TokenType.FROM): 2270 this = exp.select("*").from_( 2271 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2272 ) 2273 else: 2274 this = self._parse_table() if table else self._parse_select(nested=True) 2275 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2276 2277 self._match_r_paren() 2278 2279 # We return early here so that the UNION isn't attached to the subquery by the 2280 # following call to _parse_set_operations, but instead becomes the parent node 2281 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2282 elif self._match(TokenType.VALUES): 2283 this = self.expression( 2284 exp.Values, 2285 expressions=self._parse_csv(self._parse_value), 2286 alias=self._parse_table_alias(), 2287 ) 2288 elif from_: 2289 this = exp.select("*").from_(from_.this, copy=False) 2290 else: 2291 this = None 2292 2293 return self._parse_set_operations(this) 2294 2295 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2296 if not skip_with_token and not self._match(TokenType.WITH): 2297 return None 2298 2299 comments = self._prev_comments 2300 recursive = self._match(TokenType.RECURSIVE) 2301 2302 expressions = [] 2303 while True: 2304 expressions.append(self._parse_cte()) 2305 2306 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2307 break 2308 else: 2309 self._match(TokenType.WITH) 2310 2311 return self.expression( 2312 exp.With, comments=comments, expressions=expressions, recursive=recursive 2313 ) 2314 2315 def _parse_cte(self) -> exp.CTE: 2316 alias = self._parse_table_alias() 2317 if not alias or not alias.this: 2318 self.raise_error("Expected CTE to have alias") 2319 2320 self._match(TokenType.ALIAS) 2321 return self.expression( 2322 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2323 ) 2324 2325 def _parse_table_alias( 2326 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2327 ) -> t.Optional[exp.TableAlias]: 2328 any_token = self._match(TokenType.ALIAS) 2329 alias = ( 2330 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2331 or self._parse_string_as_identifier() 2332 ) 2333 2334 index = self._index 2335 if self._match(TokenType.L_PAREN): 2336 columns = self._parse_csv(self._parse_function_parameter) 2337 self._match_r_paren() if columns else self._retreat(index) 2338 else: 2339 columns = None 2340 2341 if not alias and not columns: 2342 return None 2343 2344 return self.expression(exp.TableAlias, this=alias, columns=columns) 2345 2346 def _parse_subquery( 2347 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2348 ) -> t.Optional[exp.Subquery]: 2349 if not this: 2350 return None 2351 2352 return self.expression( 2353 exp.Subquery, 2354 this=this, 2355 pivots=self._parse_pivots(), 2356 alias=self._parse_table_alias() if parse_alias else None, 2357 ) 2358 2359 def _parse_query_modifiers( 2360 self, this: t.Optional[exp.Expression] 2361 ) -> t.Optional[exp.Expression]: 2362 if isinstance(this, self.MODIFIABLES): 2363 for join in iter(self._parse_join, None): 2364 this.append("joins", join) 2365 for lateral in iter(self._parse_lateral, None): 2366 this.append("laterals", lateral) 2367 2368 while True: 2369 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2370 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2371 key, expression = parser(self) 2372 2373 if expression: 2374 this.set(key, expression) 2375 if key == "limit": 2376 offset = expression.args.pop("offset", None) 2377 if offset: 2378 this.set("offset", exp.Offset(expression=offset)) 2379 continue 2380 break 2381 return this 2382 2383 def _parse_hint(self) -> t.Optional[exp.Hint]: 2384 if self._match(TokenType.HINT): 2385 hints = [] 2386 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2387 hints.extend(hint) 2388 2389 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2390 self.raise_error("Expected */ after HINT") 2391 2392 return self.expression(exp.Hint, expressions=hints) 2393 2394 return None 2395 2396 def _parse_into(self) -> t.Optional[exp.Into]: 2397 if not self._match(TokenType.INTO): 2398 return None 2399 2400 temp = self._match(TokenType.TEMPORARY) 2401 unlogged = self._match_text_seq("UNLOGGED") 2402 self._match(TokenType.TABLE) 2403 2404 return self.expression( 2405 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2406 ) 2407 2408 def _parse_from( 2409 self, joins: bool = False, skip_from_token: bool = False 2410 ) -> t.Optional[exp.From]: 2411 if not skip_from_token and not self._match(TokenType.FROM): 2412 return None 2413 2414 return self.expression( 2415 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2416 ) 2417 2418 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2419 if not self._match(TokenType.MATCH_RECOGNIZE): 2420 return None 2421 2422 self._match_l_paren() 2423 2424 partition = self._parse_partition_by() 2425 order = self._parse_order() 2426 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2427 2428 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2429 rows = exp.var("ONE ROW PER MATCH") 2430 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2431 text = "ALL ROWS PER MATCH" 2432 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2433 text += f" SHOW EMPTY MATCHES" 2434 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2435 text += f" OMIT EMPTY MATCHES" 2436 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2437 text += f" WITH UNMATCHED ROWS" 2438 rows = exp.var(text) 2439 else: 2440 rows = None 2441 2442 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2443 text = "AFTER MATCH SKIP" 2444 if self._match_text_seq("PAST", "LAST", "ROW"): 2445 text += f" PAST LAST ROW" 2446 elif self._match_text_seq("TO", "NEXT", "ROW"): 2447 text += f" TO NEXT ROW" 2448 elif self._match_text_seq("TO", "FIRST"): 2449 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2450 elif self._match_text_seq("TO", "LAST"): 2451 text += f" TO LAST {self._advance_any().text}" # type: ignore 2452 after = exp.var(text) 2453 else: 2454 after = None 2455 2456 if self._match_text_seq("PATTERN"): 2457 self._match_l_paren() 2458 2459 if not self._curr: 2460 self.raise_error("Expecting )", self._curr) 2461 2462 paren = 1 2463 start = self._curr 2464 2465 while self._curr and paren > 0: 2466 if self._curr.token_type == TokenType.L_PAREN: 2467 paren += 1 2468 if self._curr.token_type == TokenType.R_PAREN: 2469 paren -= 1 2470 2471 end = self._prev 2472 self._advance() 2473 2474 if paren > 0: 2475 self.raise_error("Expecting )", self._curr) 2476 2477 pattern = exp.var(self._find_sql(start, end)) 2478 else: 2479 pattern = None 2480 2481 define = ( 2482 self._parse_csv( 2483 lambda: self.expression( 2484 exp.Alias, 2485 alias=self._parse_id_var(any_token=True), 2486 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2487 ) 2488 ) 2489 if self._match_text_seq("DEFINE") 2490 else None 2491 ) 2492 2493 self._match_r_paren() 2494 2495 return self.expression( 2496 exp.MatchRecognize, 2497 partition_by=partition, 2498 order=order, 2499 measures=measures, 2500 rows=rows, 2501 after=after, 2502 pattern=pattern, 2503 define=define, 2504 alias=self._parse_table_alias(), 2505 ) 2506 2507 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2508 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2509 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2510 2511 if outer_apply or cross_apply: 2512 this = self._parse_select(table=True) 2513 view = None 2514 outer = not cross_apply 2515 elif self._match(TokenType.LATERAL): 2516 this = self._parse_select(table=True) 2517 view = self._match(TokenType.VIEW) 2518 outer = self._match(TokenType.OUTER) 2519 else: 2520 return None 2521 2522 if not this: 2523 this = ( 2524 self._parse_unnest() 2525 or self._parse_function() 2526 or self._parse_id_var(any_token=False) 2527 ) 2528 2529 while self._match(TokenType.DOT): 2530 this = exp.Dot( 2531 this=this, 2532 expression=self._parse_function() or self._parse_id_var(any_token=False), 2533 ) 2534 2535 if view: 2536 table = self._parse_id_var(any_token=False) 2537 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2538 table_alias: t.Optional[exp.TableAlias] = self.expression( 2539 exp.TableAlias, this=table, columns=columns 2540 ) 2541 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2542 # We move the alias from the lateral's child node to the lateral itself 2543 table_alias = this.args["alias"].pop() 2544 else: 2545 table_alias = self._parse_table_alias() 2546 2547 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2548 2549 def _parse_join_parts( 2550 self, 2551 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2552 return ( 2553 self._match_set(self.JOIN_METHODS) and self._prev, 2554 self._match_set(self.JOIN_SIDES) and self._prev, 2555 self._match_set(self.JOIN_KINDS) and self._prev, 2556 ) 2557 2558 def _parse_join( 2559 self, skip_join_token: bool = False, parse_bracket: bool = False 2560 ) -> t.Optional[exp.Join]: 2561 if self._match(TokenType.COMMA): 2562 return self.expression(exp.Join, this=self._parse_table()) 2563 2564 index = self._index 2565 method, side, kind = self._parse_join_parts() 2566 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2567 join = self._match(TokenType.JOIN) 2568 2569 if not skip_join_token and not join: 2570 self._retreat(index) 2571 kind = None 2572 method = None 2573 side = None 2574 2575 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2576 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2577 2578 if not skip_join_token and not join and not outer_apply and not cross_apply: 2579 return None 2580 2581 if outer_apply: 2582 side = Token(TokenType.LEFT, "LEFT") 2583 2584 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2585 2586 if method: 2587 kwargs["method"] = method.text 2588 if side: 2589 kwargs["side"] = side.text 2590 if kind: 2591 kwargs["kind"] = kind.text 2592 if hint: 2593 kwargs["hint"] = hint 2594 2595 if self._match(TokenType.ON): 2596 kwargs["on"] = self._parse_conjunction() 2597 elif self._match(TokenType.USING): 2598 kwargs["using"] = self._parse_wrapped_id_vars() 2599 elif not (kind and kind.token_type == TokenType.CROSS): 2600 index = self._index 2601 join = self._parse_join() 2602 2603 if join and self._match(TokenType.ON): 2604 kwargs["on"] = self._parse_conjunction() 2605 elif join and self._match(TokenType.USING): 2606 kwargs["using"] = self._parse_wrapped_id_vars() 2607 else: 2608 join = None 2609 self._retreat(index) 2610 2611 kwargs["this"].set("joins", [join] if join else None) 2612 2613 comments = [c for token in (method, side, kind) if token for c in token.comments] 2614 return self.expression(exp.Join, comments=comments, **kwargs) 2615 2616 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2617 this = self._parse_conjunction() 2618 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2619 return this 2620 2621 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2622 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2623 2624 return this 2625 2626 def _parse_index( 2627 self, 2628 index: t.Optional[exp.Expression] = None, 2629 ) -> t.Optional[exp.Index]: 2630 if index: 2631 unique = None 2632 primary = None 2633 amp = None 2634 2635 self._match(TokenType.ON) 2636 self._match(TokenType.TABLE) # hive 2637 table = self._parse_table_parts(schema=True) 2638 else: 2639 unique = self._match(TokenType.UNIQUE) 2640 primary = self._match_text_seq("PRIMARY") 2641 amp = self._match_text_seq("AMP") 2642 2643 if not self._match(TokenType.INDEX): 2644 return None 2645 2646 index = self._parse_id_var() 2647 table = None 2648 2649 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2650 2651 if self._match(TokenType.L_PAREN, advance=False): 2652 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2653 else: 2654 columns = None 2655 2656 return self.expression( 2657 exp.Index, 2658 this=index, 2659 table=table, 2660 using=using, 2661 columns=columns, 2662 unique=unique, 2663 primary=primary, 2664 amp=amp, 2665 partition_by=self._parse_partition_by(), 2666 where=self._parse_where(), 2667 ) 2668 2669 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2670 hints: t.List[exp.Expression] = [] 2671 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2672 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2673 hints.append( 2674 self.expression( 2675 exp.WithTableHint, 2676 expressions=self._parse_csv( 2677 lambda: self._parse_function() or self._parse_var(any_token=True) 2678 ), 2679 ) 2680 ) 2681 self._match_r_paren() 2682 else: 2683 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2684 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2685 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2686 2687 self._match_texts(("INDEX", "KEY")) 2688 if self._match(TokenType.FOR): 2689 hint.set("target", self._advance_any() and self._prev.text.upper()) 2690 2691 hint.set("expressions", self._parse_wrapped_id_vars()) 2692 hints.append(hint) 2693 2694 return hints or None 2695 2696 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2697 return ( 2698 (not schema and self._parse_function(optional_parens=False)) 2699 or self._parse_id_var(any_token=False) 2700 or self._parse_string_as_identifier() 2701 or self._parse_placeholder() 2702 ) 2703 2704 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2705 catalog = None 2706 db = None 2707 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2708 2709 while self._match(TokenType.DOT): 2710 if catalog: 2711 # This allows nesting the table in arbitrarily many dot expressions if needed 2712 table = self.expression( 2713 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2714 ) 2715 else: 2716 catalog = db 2717 db = table 2718 table = self._parse_table_part(schema=schema) or "" 2719 2720 if not table: 2721 self.raise_error(f"Expected table name but got {self._curr}") 2722 2723 return self.expression( 2724 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2725 ) 2726 2727 def _parse_table( 2728 self, 2729 schema: bool = False, 2730 joins: bool = False, 2731 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2732 parse_bracket: bool = False, 2733 ) -> t.Optional[exp.Expression]: 2734 lateral = self._parse_lateral() 2735 if lateral: 2736 return lateral 2737 2738 unnest = self._parse_unnest() 2739 if unnest: 2740 return unnest 2741 2742 values = self._parse_derived_table_values() 2743 if values: 2744 return values 2745 2746 subquery = self._parse_select(table=True) 2747 if subquery: 2748 if not subquery.args.get("pivots"): 2749 subquery.set("pivots", self._parse_pivots()) 2750 return subquery 2751 2752 bracket = parse_bracket and self._parse_bracket(None) 2753 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2754 this = t.cast( 2755 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2756 ) 2757 2758 if schema: 2759 return self._parse_schema(this=this) 2760 2761 version = self._parse_version() 2762 2763 if version: 2764 this.set("version", version) 2765 2766 if self.dialect.ALIAS_POST_TABLESAMPLE: 2767 table_sample = self._parse_table_sample() 2768 2769 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2770 if alias: 2771 this.set("alias", alias) 2772 2773 if self._match_text_seq("AT"): 2774 this.set("index", self._parse_id_var()) 2775 2776 this.set("hints", self._parse_table_hints()) 2777 2778 if not this.args.get("pivots"): 2779 this.set("pivots", self._parse_pivots()) 2780 2781 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2782 table_sample = self._parse_table_sample() 2783 2784 if table_sample: 2785 table_sample.set("this", this) 2786 this = table_sample 2787 2788 if joins: 2789 for join in iter(self._parse_join, None): 2790 this.append("joins", join) 2791 2792 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2793 this.set("ordinality", True) 2794 this.set("alias", self._parse_table_alias()) 2795 2796 return this 2797 2798 def _parse_version(self) -> t.Optional[exp.Version]: 2799 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2800 this = "TIMESTAMP" 2801 elif self._match(TokenType.VERSION_SNAPSHOT): 2802 this = "VERSION" 2803 else: 2804 return None 2805 2806 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2807 kind = self._prev.text.upper() 2808 start = self._parse_bitwise() 2809 self._match_texts(("TO", "AND")) 2810 end = self._parse_bitwise() 2811 expression: t.Optional[exp.Expression] = self.expression( 2812 exp.Tuple, expressions=[start, end] 2813 ) 2814 elif self._match_text_seq("CONTAINED", "IN"): 2815 kind = "CONTAINED IN" 2816 expression = self.expression( 2817 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2818 ) 2819 elif self._match(TokenType.ALL): 2820 kind = "ALL" 2821 expression = None 2822 else: 2823 self._match_text_seq("AS", "OF") 2824 kind = "AS OF" 2825 expression = self._parse_type() 2826 2827 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2828 2829 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2830 if not self._match(TokenType.UNNEST): 2831 return None 2832 2833 expressions = self._parse_wrapped_csv(self._parse_equality) 2834 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2835 2836 alias = self._parse_table_alias() if with_alias else None 2837 2838 if alias: 2839 if self.dialect.UNNEST_COLUMN_ONLY: 2840 if alias.args.get("columns"): 2841 self.raise_error("Unexpected extra column alias in unnest.") 2842 2843 alias.set("columns", [alias.this]) 2844 alias.set("this", None) 2845 2846 columns = alias.args.get("columns") or [] 2847 if offset and len(expressions) < len(columns): 2848 offset = columns.pop() 2849 2850 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2851 self._match(TokenType.ALIAS) 2852 offset = self._parse_id_var( 2853 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2854 ) or exp.to_identifier("offset") 2855 2856 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2857 2858 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2859 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2860 if not is_derived and not self._match(TokenType.VALUES): 2861 return None 2862 2863 expressions = self._parse_csv(self._parse_value) 2864 alias = self._parse_table_alias() 2865 2866 if is_derived: 2867 self._match_r_paren() 2868 2869 return self.expression( 2870 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2871 ) 2872 2873 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2874 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2875 as_modifier and self._match_text_seq("USING", "SAMPLE") 2876 ): 2877 return None 2878 2879 bucket_numerator = None 2880 bucket_denominator = None 2881 bucket_field = None 2882 percent = None 2883 rows = None 2884 size = None 2885 seed = None 2886 2887 kind = ( 2888 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2889 ) 2890 method = self._parse_var(tokens=(TokenType.ROW,)) 2891 2892 matched_l_paren = self._match(TokenType.L_PAREN) 2893 2894 if self.TABLESAMPLE_CSV: 2895 num = None 2896 expressions = self._parse_csv(self._parse_primary) 2897 else: 2898 expressions = None 2899 num = ( 2900 self._parse_factor() 2901 if self._match(TokenType.NUMBER, advance=False) 2902 else self._parse_primary() or self._parse_placeholder() 2903 ) 2904 2905 if self._match_text_seq("BUCKET"): 2906 bucket_numerator = self._parse_number() 2907 self._match_text_seq("OUT", "OF") 2908 bucket_denominator = bucket_denominator = self._parse_number() 2909 self._match(TokenType.ON) 2910 bucket_field = self._parse_field() 2911 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2912 percent = num 2913 elif self._match(TokenType.ROWS): 2914 rows = num 2915 elif num: 2916 size = num 2917 2918 if matched_l_paren: 2919 self._match_r_paren() 2920 2921 if self._match(TokenType.L_PAREN): 2922 method = self._parse_var() 2923 seed = self._match(TokenType.COMMA) and self._parse_number() 2924 self._match_r_paren() 2925 elif self._match_texts(("SEED", "REPEATABLE")): 2926 seed = self._parse_wrapped(self._parse_number) 2927 2928 return self.expression( 2929 exp.TableSample, 2930 expressions=expressions, 2931 method=method, 2932 bucket_numerator=bucket_numerator, 2933 bucket_denominator=bucket_denominator, 2934 bucket_field=bucket_field, 2935 percent=percent, 2936 rows=rows, 2937 size=size, 2938 seed=seed, 2939 kind=kind, 2940 ) 2941 2942 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2943 return list(iter(self._parse_pivot, None)) or None 2944 2945 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2946 return list(iter(self._parse_join, None)) or None 2947 2948 # https://duckdb.org/docs/sql/statements/pivot 2949 def _parse_simplified_pivot(self) -> exp.Pivot: 2950 def _parse_on() -> t.Optional[exp.Expression]: 2951 this = self._parse_bitwise() 2952 return self._parse_in(this) if self._match(TokenType.IN) else this 2953 2954 this = self._parse_table() 2955 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2956 using = self._match(TokenType.USING) and self._parse_csv( 2957 lambda: self._parse_alias(self._parse_function()) 2958 ) 2959 group = self._parse_group() 2960 return self.expression( 2961 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2962 ) 2963 2964 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2965 index = self._index 2966 include_nulls = None 2967 2968 if self._match(TokenType.PIVOT): 2969 unpivot = False 2970 elif self._match(TokenType.UNPIVOT): 2971 unpivot = True 2972 2973 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2974 if self._match_text_seq("INCLUDE", "NULLS"): 2975 include_nulls = True 2976 elif self._match_text_seq("EXCLUDE", "NULLS"): 2977 include_nulls = False 2978 else: 2979 return None 2980 2981 expressions = [] 2982 field = None 2983 2984 if not self._match(TokenType.L_PAREN): 2985 self._retreat(index) 2986 return None 2987 2988 if unpivot: 2989 expressions = self._parse_csv(self._parse_column) 2990 else: 2991 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2992 2993 if not expressions: 2994 self.raise_error("Failed to parse PIVOT's aggregation list") 2995 2996 if not self._match(TokenType.FOR): 2997 self.raise_error("Expecting FOR") 2998 2999 value = self._parse_column() 3000 3001 if not self._match(TokenType.IN): 3002 self.raise_error("Expecting IN") 3003 3004 field = self._parse_in(value, alias=True) 3005 3006 self._match_r_paren() 3007 3008 pivot = self.expression( 3009 exp.Pivot, 3010 expressions=expressions, 3011 field=field, 3012 unpivot=unpivot, 3013 include_nulls=include_nulls, 3014 ) 3015 3016 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3017 pivot.set("alias", self._parse_table_alias()) 3018 3019 if not unpivot: 3020 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3021 3022 columns: t.List[exp.Expression] = [] 3023 for fld in pivot.args["field"].expressions: 3024 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3025 for name in names: 3026 if self.PREFIXED_PIVOT_COLUMNS: 3027 name = f"{name}_{field_name}" if name else field_name 3028 else: 3029 name = f"{field_name}_{name}" if name else field_name 3030 3031 columns.append(exp.to_identifier(name)) 3032 3033 pivot.set("columns", columns) 3034 3035 return pivot 3036 3037 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3038 return [agg.alias for agg in aggregations] 3039 3040 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3041 if not skip_where_token and not self._match(TokenType.WHERE): 3042 return None 3043 3044 return self.expression( 3045 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3046 ) 3047 3048 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3049 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3050 return None 3051 3052 elements = defaultdict(list) 3053 3054 if self._match(TokenType.ALL): 3055 return self.expression(exp.Group, all=True) 3056 3057 while True: 3058 expressions = self._parse_csv(self._parse_conjunction) 3059 if expressions: 3060 elements["expressions"].extend(expressions) 3061 3062 grouping_sets = self._parse_grouping_sets() 3063 if grouping_sets: 3064 elements["grouping_sets"].extend(grouping_sets) 3065 3066 rollup = None 3067 cube = None 3068 totals = None 3069 3070 index = self._index 3071 with_ = self._match(TokenType.WITH) 3072 if self._match(TokenType.ROLLUP): 3073 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3074 elements["rollup"].extend(ensure_list(rollup)) 3075 3076 if self._match(TokenType.CUBE): 3077 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3078 elements["cube"].extend(ensure_list(cube)) 3079 3080 if self._match_text_seq("TOTALS"): 3081 totals = True 3082 elements["totals"] = True # type: ignore 3083 3084 if not (grouping_sets or rollup or cube or totals): 3085 if with_: 3086 self._retreat(index) 3087 break 3088 3089 return self.expression(exp.Group, **elements) # type: ignore 3090 3091 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3092 if not self._match(TokenType.GROUPING_SETS): 3093 return None 3094 3095 return self._parse_wrapped_csv(self._parse_grouping_set) 3096 3097 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3098 if self._match(TokenType.L_PAREN): 3099 grouping_set = self._parse_csv(self._parse_column) 3100 self._match_r_paren() 3101 return self.expression(exp.Tuple, expressions=grouping_set) 3102 3103 return self._parse_column() 3104 3105 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3106 if not skip_having_token and not self._match(TokenType.HAVING): 3107 return None 3108 return self.expression(exp.Having, this=self._parse_conjunction()) 3109 3110 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3111 if not self._match(TokenType.QUALIFY): 3112 return None 3113 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3114 3115 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3116 if skip_start_token: 3117 start = None 3118 elif self._match(TokenType.START_WITH): 3119 start = self._parse_conjunction() 3120 else: 3121 return None 3122 3123 self._match(TokenType.CONNECT_BY) 3124 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3125 exp.Prior, this=self._parse_bitwise() 3126 ) 3127 connect = self._parse_conjunction() 3128 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3129 3130 if not start and self._match(TokenType.START_WITH): 3131 start = self._parse_conjunction() 3132 3133 return self.expression(exp.Connect, start=start, connect=connect) 3134 3135 def _parse_order( 3136 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3137 ) -> t.Optional[exp.Expression]: 3138 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3139 return this 3140 3141 return self.expression( 3142 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3143 ) 3144 3145 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3146 if not self._match(token): 3147 return None 3148 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3149 3150 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3151 this = parse_method() if parse_method else self._parse_conjunction() 3152 3153 asc = self._match(TokenType.ASC) 3154 desc = self._match(TokenType.DESC) or (asc and False) 3155 3156 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3157 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3158 3159 nulls_first = is_nulls_first or False 3160 explicitly_null_ordered = is_nulls_first or is_nulls_last 3161 3162 if ( 3163 not explicitly_null_ordered 3164 and ( 3165 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3166 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3167 ) 3168 and self.dialect.NULL_ORDERING != "nulls_are_last" 3169 ): 3170 nulls_first = True 3171 3172 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3173 3174 def _parse_limit( 3175 self, this: t.Optional[exp.Expression] = None, top: bool = False 3176 ) -> t.Optional[exp.Expression]: 3177 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3178 comments = self._prev_comments 3179 if top: 3180 limit_paren = self._match(TokenType.L_PAREN) 3181 expression = self._parse_term() if limit_paren else self._parse_number() 3182 3183 if limit_paren: 3184 self._match_r_paren() 3185 else: 3186 expression = self._parse_term() 3187 3188 if self._match(TokenType.COMMA): 3189 offset = expression 3190 expression = self._parse_term() 3191 else: 3192 offset = None 3193 3194 limit_exp = self.expression( 3195 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3196 ) 3197 3198 return limit_exp 3199 3200 if self._match(TokenType.FETCH): 3201 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3202 direction = self._prev.text if direction else "FIRST" 3203 3204 count = self._parse_field(tokens=self.FETCH_TOKENS) 3205 percent = self._match(TokenType.PERCENT) 3206 3207 self._match_set((TokenType.ROW, TokenType.ROWS)) 3208 3209 only = self._match_text_seq("ONLY") 3210 with_ties = self._match_text_seq("WITH", "TIES") 3211 3212 if only and with_ties: 3213 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3214 3215 return self.expression( 3216 exp.Fetch, 3217 direction=direction, 3218 count=count, 3219 percent=percent, 3220 with_ties=with_ties, 3221 ) 3222 3223 return this 3224 3225 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3226 if not self._match(TokenType.OFFSET): 3227 return this 3228 3229 count = self._parse_term() 3230 self._match_set((TokenType.ROW, TokenType.ROWS)) 3231 return self.expression(exp.Offset, this=this, expression=count) 3232 3233 def _parse_locks(self) -> t.List[exp.Lock]: 3234 locks = [] 3235 while True: 3236 if self._match_text_seq("FOR", "UPDATE"): 3237 update = True 3238 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3239 "LOCK", "IN", "SHARE", "MODE" 3240 ): 3241 update = False 3242 else: 3243 break 3244 3245 expressions = None 3246 if self._match_text_seq("OF"): 3247 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3248 3249 wait: t.Optional[bool | exp.Expression] = None 3250 if self._match_text_seq("NOWAIT"): 3251 wait = True 3252 elif self._match_text_seq("WAIT"): 3253 wait = self._parse_primary() 3254 elif self._match_text_seq("SKIP", "LOCKED"): 3255 wait = False 3256 3257 locks.append( 3258 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3259 ) 3260 3261 return locks 3262 3263 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3264 if not self._match_set(self.SET_OPERATIONS): 3265 return this 3266 3267 token_type = self._prev.token_type 3268 3269 if token_type == TokenType.UNION: 3270 expression = exp.Union 3271 elif token_type == TokenType.EXCEPT: 3272 expression = exp.Except 3273 else: 3274 expression = exp.Intersect 3275 3276 return self.expression( 3277 expression, 3278 comments=self._prev.comments, 3279 this=this, 3280 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3281 by_name=self._match_text_seq("BY", "NAME"), 3282 expression=self._parse_set_operations(self._parse_select(nested=True)), 3283 ) 3284 3285 def _parse_expression(self) -> t.Optional[exp.Expression]: 3286 return self._parse_alias(self._parse_conjunction()) 3287 3288 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3289 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3290 3291 def _parse_equality(self) -> t.Optional[exp.Expression]: 3292 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3293 3294 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3295 return self._parse_tokens(self._parse_range, self.COMPARISON) 3296 3297 def _parse_range(self) -> t.Optional[exp.Expression]: 3298 this = self._parse_bitwise() 3299 negate = self._match(TokenType.NOT) 3300 3301 if self._match_set(self.RANGE_PARSERS): 3302 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3303 if not expression: 3304 return this 3305 3306 this = expression 3307 elif self._match(TokenType.ISNULL): 3308 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3309 3310 # Postgres supports ISNULL and NOTNULL for conditions. 3311 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3312 if self._match(TokenType.NOTNULL): 3313 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3314 this = self.expression(exp.Not, this=this) 3315 3316 if negate: 3317 this = self.expression(exp.Not, this=this) 3318 3319 if self._match(TokenType.IS): 3320 this = self._parse_is(this) 3321 3322 return this 3323 3324 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3325 index = self._index - 1 3326 negate = self._match(TokenType.NOT) 3327 3328 if self._match_text_seq("DISTINCT", "FROM"): 3329 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3330 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3331 3332 expression = self._parse_null() or self._parse_boolean() 3333 if not expression: 3334 self._retreat(index) 3335 return None 3336 3337 this = self.expression(exp.Is, this=this, expression=expression) 3338 return self.expression(exp.Not, this=this) if negate else this 3339 3340 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3341 unnest = self._parse_unnest(with_alias=False) 3342 if unnest: 3343 this = self.expression(exp.In, this=this, unnest=unnest) 3344 elif self._match(TokenType.L_PAREN): 3345 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3346 3347 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3348 this = self.expression(exp.In, this=this, query=expressions[0]) 3349 else: 3350 this = self.expression(exp.In, this=this, expressions=expressions) 3351 3352 self._match_r_paren(this) 3353 else: 3354 this = self.expression(exp.In, this=this, field=self._parse_field()) 3355 3356 return this 3357 3358 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3359 low = self._parse_bitwise() 3360 self._match(TokenType.AND) 3361 high = self._parse_bitwise() 3362 return self.expression(exp.Between, this=this, low=low, high=high) 3363 3364 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3365 if not self._match(TokenType.ESCAPE): 3366 return this 3367 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3368 3369 def _parse_interval(self) -> t.Optional[exp.Interval]: 3370 index = self._index 3371 3372 if not self._match(TokenType.INTERVAL): 3373 return None 3374 3375 if self._match(TokenType.STRING, advance=False): 3376 this = self._parse_primary() 3377 else: 3378 this = self._parse_term() 3379 3380 if not this: 3381 self._retreat(index) 3382 return None 3383 3384 unit = self._parse_function() or self._parse_var(any_token=True) 3385 3386 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3387 # each INTERVAL expression into this canonical form so it's easy to transpile 3388 if this and this.is_number: 3389 this = exp.Literal.string(this.name) 3390 elif this and this.is_string: 3391 parts = this.name.split() 3392 3393 if len(parts) == 2: 3394 if unit: 3395 # This is not actually a unit, it's something else (e.g. a "window side") 3396 unit = None 3397 self._retreat(self._index - 1) 3398 3399 this = exp.Literal.string(parts[0]) 3400 unit = self.expression(exp.Var, this=parts[1]) 3401 3402 return self.expression(exp.Interval, this=this, unit=unit) 3403 3404 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3405 this = self._parse_term() 3406 3407 while True: 3408 if self._match_set(self.BITWISE): 3409 this = self.expression( 3410 self.BITWISE[self._prev.token_type], 3411 this=this, 3412 expression=self._parse_term(), 3413 ) 3414 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3415 this = self.expression( 3416 exp.DPipe, 3417 this=this, 3418 expression=self._parse_term(), 3419 safe=not self.dialect.STRICT_STRING_CONCAT, 3420 ) 3421 elif self._match(TokenType.DQMARK): 3422 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3423 elif self._match_pair(TokenType.LT, TokenType.LT): 3424 this = self.expression( 3425 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3426 ) 3427 elif self._match_pair(TokenType.GT, TokenType.GT): 3428 this = self.expression( 3429 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3430 ) 3431 else: 3432 break 3433 3434 return this 3435 3436 def _parse_term(self) -> t.Optional[exp.Expression]: 3437 return self._parse_tokens(self._parse_factor, self.TERM) 3438 3439 def _parse_factor(self) -> t.Optional[exp.Expression]: 3440 if self.EXPONENT: 3441 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3442 else: 3443 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3444 if isinstance(factor, exp.Div): 3445 factor.args["typed"] = self.dialect.TYPED_DIVISION 3446 factor.args["safe"] = self.dialect.SAFE_DIVISION 3447 return factor 3448 3449 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3450 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3451 3452 def _parse_unary(self) -> t.Optional[exp.Expression]: 3453 if self._match_set(self.UNARY_PARSERS): 3454 return self.UNARY_PARSERS[self._prev.token_type](self) 3455 return self._parse_at_time_zone(self._parse_type()) 3456 3457 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3458 interval = parse_interval and self._parse_interval() 3459 if interval: 3460 return interval 3461 3462 index = self._index 3463 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3464 this = self._parse_column() 3465 3466 if data_type: 3467 if isinstance(this, exp.Literal): 3468 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3469 if parser: 3470 return parser(self, this, data_type) 3471 return self.expression(exp.Cast, this=this, to=data_type) 3472 if not data_type.expressions: 3473 self._retreat(index) 3474 return self._parse_column() 3475 return self._parse_column_ops(data_type) 3476 3477 return this and self._parse_column_ops(this) 3478 3479 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3480 this = self._parse_type() 3481 if not this: 3482 return None 3483 3484 return self.expression( 3485 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3486 ) 3487 3488 def _parse_types( 3489 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3490 ) -> t.Optional[exp.Expression]: 3491 index = self._index 3492 3493 prefix = self._match_text_seq("SYSUDTLIB", ".") 3494 3495 if not self._match_set(self.TYPE_TOKENS): 3496 identifier = allow_identifiers and self._parse_id_var( 3497 any_token=False, tokens=(TokenType.VAR,) 3498 ) 3499 3500 if identifier: 3501 tokens = self.dialect.tokenize(identifier.name) 3502 3503 if len(tokens) != 1: 3504 self.raise_error("Unexpected identifier", self._prev) 3505 3506 if tokens[0].token_type in self.TYPE_TOKENS: 3507 self._prev = tokens[0] 3508 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3509 type_name = identifier.name 3510 3511 while self._match(TokenType.DOT): 3512 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3513 3514 return exp.DataType.build(type_name, udt=True) 3515 else: 3516 return None 3517 else: 3518 return None 3519 3520 type_token = self._prev.token_type 3521 3522 if type_token == TokenType.PSEUDO_TYPE: 3523 return self.expression(exp.PseudoType, this=self._prev.text) 3524 3525 if type_token == TokenType.OBJECT_IDENTIFIER: 3526 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3527 3528 nested = type_token in self.NESTED_TYPE_TOKENS 3529 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3530 expressions = None 3531 maybe_func = False 3532 3533 if self._match(TokenType.L_PAREN): 3534 if is_struct: 3535 expressions = self._parse_csv(self._parse_struct_types) 3536 elif nested: 3537 expressions = self._parse_csv( 3538 lambda: self._parse_types( 3539 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3540 ) 3541 ) 3542 elif type_token in self.ENUM_TYPE_TOKENS: 3543 expressions = self._parse_csv(self._parse_equality) 3544 else: 3545 expressions = self._parse_csv(self._parse_type_size) 3546 3547 if not expressions or not self._match(TokenType.R_PAREN): 3548 self._retreat(index) 3549 return None 3550 3551 maybe_func = True 3552 3553 this: t.Optional[exp.Expression] = None 3554 values: t.Optional[t.List[exp.Expression]] = None 3555 3556 if nested and self._match(TokenType.LT): 3557 if is_struct: 3558 expressions = self._parse_csv(self._parse_struct_types) 3559 else: 3560 expressions = self._parse_csv( 3561 lambda: self._parse_types( 3562 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3563 ) 3564 ) 3565 3566 if not self._match(TokenType.GT): 3567 self.raise_error("Expecting >") 3568 3569 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3570 values = self._parse_csv(self._parse_conjunction) 3571 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3572 3573 if type_token in self.TIMESTAMPS: 3574 if self._match_text_seq("WITH", "TIME", "ZONE"): 3575 maybe_func = False 3576 tz_type = ( 3577 exp.DataType.Type.TIMETZ 3578 if type_token in self.TIMES 3579 else exp.DataType.Type.TIMESTAMPTZ 3580 ) 3581 this = exp.DataType(this=tz_type, expressions=expressions) 3582 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3583 maybe_func = False 3584 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3585 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3586 maybe_func = False 3587 elif type_token == TokenType.INTERVAL: 3588 unit = self._parse_var() 3589 3590 if self._match_text_seq("TO"): 3591 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3592 else: 3593 span = None 3594 3595 if span or not unit: 3596 this = self.expression( 3597 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3598 ) 3599 else: 3600 this = self.expression(exp.Interval, unit=unit) 3601 3602 if maybe_func and check_func: 3603 index2 = self._index 3604 peek = self._parse_string() 3605 3606 if not peek: 3607 self._retreat(index) 3608 return None 3609 3610 self._retreat(index2) 3611 3612 if not this: 3613 if self._match_text_seq("UNSIGNED"): 3614 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3615 if not unsigned_type_token: 3616 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3617 3618 type_token = unsigned_type_token or type_token 3619 3620 this = exp.DataType( 3621 this=exp.DataType.Type[type_token.value], 3622 expressions=expressions, 3623 nested=nested, 3624 values=values, 3625 prefix=prefix, 3626 ) 3627 3628 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3629 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3630 3631 return this 3632 3633 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3634 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3635 self._match(TokenType.COLON) 3636 return self._parse_column_def(this) 3637 3638 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3639 if not self._match_text_seq("AT", "TIME", "ZONE"): 3640 return this 3641 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3642 3643 def _parse_column(self) -> t.Optional[exp.Expression]: 3644 this = self._parse_field() 3645 if isinstance(this, exp.Identifier): 3646 this = self.expression(exp.Column, this=this) 3647 elif not this: 3648 return self._parse_bracket(this) 3649 return self._parse_column_ops(this) 3650 3651 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3652 this = self._parse_bracket(this) 3653 3654 while self._match_set(self.COLUMN_OPERATORS): 3655 op_token = self._prev.token_type 3656 op = self.COLUMN_OPERATORS.get(op_token) 3657 3658 if op_token == TokenType.DCOLON: 3659 field = self._parse_types() 3660 if not field: 3661 self.raise_error("Expected type") 3662 elif op and self._curr: 3663 self._advance() 3664 value = self._prev.text 3665 field = ( 3666 exp.Literal.number(value) 3667 if self._prev.token_type == TokenType.NUMBER 3668 else exp.Literal.string(value) 3669 ) 3670 else: 3671 field = self._parse_field(anonymous_func=True, any_token=True) 3672 3673 if isinstance(field, exp.Func): 3674 # bigquery allows function calls like x.y.count(...) 3675 # SAFE.SUBSTR(...) 3676 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3677 this = self._replace_columns_with_dots(this) 3678 3679 if op: 3680 this = op(self, this, field) 3681 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3682 this = self.expression( 3683 exp.Column, 3684 this=field, 3685 table=this.this, 3686 db=this.args.get("table"), 3687 catalog=this.args.get("db"), 3688 ) 3689 else: 3690 this = self.expression(exp.Dot, this=this, expression=field) 3691 this = self._parse_bracket(this) 3692 return this 3693 3694 def _parse_primary(self) -> t.Optional[exp.Expression]: 3695 if self._match_set(self.PRIMARY_PARSERS): 3696 token_type = self._prev.token_type 3697 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3698 3699 if token_type == TokenType.STRING: 3700 expressions = [primary] 3701 while self._match(TokenType.STRING): 3702 expressions.append(exp.Literal.string(self._prev.text)) 3703 3704 if len(expressions) > 1: 3705 return self.expression(exp.Concat, expressions=expressions) 3706 3707 return primary 3708 3709 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3710 return exp.Literal.number(f"0.{self._prev.text}") 3711 3712 if self._match(TokenType.L_PAREN): 3713 comments = self._prev_comments 3714 query = self._parse_select() 3715 3716 if query: 3717 expressions = [query] 3718 else: 3719 expressions = self._parse_expressions() 3720 3721 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3722 3723 if isinstance(this, exp.Subqueryable): 3724 this = self._parse_set_operations( 3725 self._parse_subquery(this=this, parse_alias=False) 3726 ) 3727 elif len(expressions) > 1: 3728 this = self.expression(exp.Tuple, expressions=expressions) 3729 else: 3730 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3731 3732 if this: 3733 this.add_comments(comments) 3734 3735 self._match_r_paren(expression=this) 3736 return this 3737 3738 return None 3739 3740 def _parse_field( 3741 self, 3742 any_token: bool = False, 3743 tokens: t.Optional[t.Collection[TokenType]] = None, 3744 anonymous_func: bool = False, 3745 ) -> t.Optional[exp.Expression]: 3746 return ( 3747 self._parse_primary() 3748 or self._parse_function(anonymous=anonymous_func) 3749 or self._parse_id_var(any_token=any_token, tokens=tokens) 3750 ) 3751 3752 def _parse_function( 3753 self, 3754 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3755 anonymous: bool = False, 3756 optional_parens: bool = True, 3757 ) -> t.Optional[exp.Expression]: 3758 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3759 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3760 fn_syntax = False 3761 if ( 3762 self._match(TokenType.L_BRACE, advance=False) 3763 and self._next 3764 and self._next.text.upper() == "FN" 3765 ): 3766 self._advance(2) 3767 fn_syntax = True 3768 3769 func = self._parse_function_call( 3770 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3771 ) 3772 3773 if fn_syntax: 3774 self._match(TokenType.R_BRACE) 3775 3776 return func 3777 3778 def _parse_function_call( 3779 self, 3780 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3781 anonymous: bool = False, 3782 optional_parens: bool = True, 3783 ) -> t.Optional[exp.Expression]: 3784 if not self._curr: 3785 return None 3786 3787 comments = self._curr.comments 3788 token_type = self._curr.token_type 3789 this = self._curr.text 3790 upper = this.upper() 3791 3792 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3793 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3794 self._advance() 3795 return parser(self) 3796 3797 if not self._next or self._next.token_type != TokenType.L_PAREN: 3798 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3799 self._advance() 3800 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3801 3802 return None 3803 3804 if token_type not in self.FUNC_TOKENS: 3805 return None 3806 3807 self._advance(2) 3808 3809 parser = self.FUNCTION_PARSERS.get(upper) 3810 if parser and not anonymous: 3811 this = parser(self) 3812 else: 3813 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3814 3815 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3816 this = self.expression(subquery_predicate, this=self._parse_select()) 3817 self._match_r_paren() 3818 return this 3819 3820 if functions is None: 3821 functions = self.FUNCTIONS 3822 3823 function = functions.get(upper) 3824 3825 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3826 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3827 3828 if function and not anonymous: 3829 if "dialect" in function.__code__.co_varnames: 3830 func = function(args, dialect=self.dialect) 3831 else: 3832 func = function(args) 3833 3834 func = self.validate_expression(func, args) 3835 if not self.dialect.NORMALIZE_FUNCTIONS: 3836 func.meta["name"] = this 3837 3838 this = func 3839 else: 3840 this = self.expression(exp.Anonymous, this=this, expressions=args) 3841 3842 if isinstance(this, exp.Expression): 3843 this.add_comments(comments) 3844 3845 self._match_r_paren(this) 3846 return self._parse_window(this) 3847 3848 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3849 return self._parse_column_def(self._parse_id_var()) 3850 3851 def _parse_user_defined_function( 3852 self, kind: t.Optional[TokenType] = None 3853 ) -> t.Optional[exp.Expression]: 3854 this = self._parse_id_var() 3855 3856 while self._match(TokenType.DOT): 3857 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3858 3859 if not self._match(TokenType.L_PAREN): 3860 return this 3861 3862 expressions = self._parse_csv(self._parse_function_parameter) 3863 self._match_r_paren() 3864 return self.expression( 3865 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3866 ) 3867 3868 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3869 literal = self._parse_primary() 3870 if literal: 3871 return self.expression(exp.Introducer, this=token.text, expression=literal) 3872 3873 return self.expression(exp.Identifier, this=token.text) 3874 3875 def _parse_session_parameter(self) -> exp.SessionParameter: 3876 kind = None 3877 this = self._parse_id_var() or self._parse_primary() 3878 3879 if this and self._match(TokenType.DOT): 3880 kind = this.name 3881 this = self._parse_var() or self._parse_primary() 3882 3883 return self.expression(exp.SessionParameter, this=this, kind=kind) 3884 3885 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3886 index = self._index 3887 3888 if self._match(TokenType.L_PAREN): 3889 expressions = t.cast( 3890 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3891 ) 3892 3893 if not self._match(TokenType.R_PAREN): 3894 self._retreat(index) 3895 else: 3896 expressions = [self._parse_id_var()] 3897 3898 if self._match_set(self.LAMBDAS): 3899 return self.LAMBDAS[self._prev.token_type](self, expressions) 3900 3901 self._retreat(index) 3902 3903 this: t.Optional[exp.Expression] 3904 3905 if self._match(TokenType.DISTINCT): 3906 this = self.expression( 3907 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3908 ) 3909 else: 3910 this = self._parse_select_or_expression(alias=alias) 3911 3912 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3913 3914 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3915 index = self._index 3916 3917 if not self.errors: 3918 try: 3919 if self._parse_select(nested=True): 3920 return this 3921 except ParseError: 3922 pass 3923 finally: 3924 self.errors.clear() 3925 self._retreat(index) 3926 3927 if not self._match(TokenType.L_PAREN): 3928 return this 3929 3930 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3931 3932 self._match_r_paren() 3933 return self.expression(exp.Schema, this=this, expressions=args) 3934 3935 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3936 return self._parse_column_def(self._parse_field(any_token=True)) 3937 3938 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3939 # column defs are not really columns, they're identifiers 3940 if isinstance(this, exp.Column): 3941 this = this.this 3942 3943 kind = self._parse_types(schema=True) 3944 3945 if self._match_text_seq("FOR", "ORDINALITY"): 3946 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3947 3948 constraints: t.List[exp.Expression] = [] 3949 3950 if not kind and self._match(TokenType.ALIAS): 3951 constraints.append( 3952 self.expression( 3953 exp.ComputedColumnConstraint, 3954 this=self._parse_conjunction(), 3955 persisted=self._match_text_seq("PERSISTED"), 3956 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3957 ) 3958 ) 3959 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3960 self._match(TokenType.ALIAS) 3961 constraints.append( 3962 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3963 ) 3964 3965 while True: 3966 constraint = self._parse_column_constraint() 3967 if not constraint: 3968 break 3969 constraints.append(constraint) 3970 3971 if not kind and not constraints: 3972 return this 3973 3974 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3975 3976 def _parse_auto_increment( 3977 self, 3978 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3979 start = None 3980 increment = None 3981 3982 if self._match(TokenType.L_PAREN, advance=False): 3983 args = self._parse_wrapped_csv(self._parse_bitwise) 3984 start = seq_get(args, 0) 3985 increment = seq_get(args, 1) 3986 elif self._match_text_seq("START"): 3987 start = self._parse_bitwise() 3988 self._match_text_seq("INCREMENT") 3989 increment = self._parse_bitwise() 3990 3991 if start and increment: 3992 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3993 3994 return exp.AutoIncrementColumnConstraint() 3995 3996 def _parse_compress(self) -> exp.CompressColumnConstraint: 3997 if self._match(TokenType.L_PAREN, advance=False): 3998 return self.expression( 3999 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4000 ) 4001 4002 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4003 4004 def _parse_generated_as_identity( 4005 self, 4006 ) -> ( 4007 exp.GeneratedAsIdentityColumnConstraint 4008 | exp.ComputedColumnConstraint 4009 | exp.GeneratedAsRowColumnConstraint 4010 ): 4011 if self._match_text_seq("BY", "DEFAULT"): 4012 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4013 this = self.expression( 4014 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4015 ) 4016 else: 4017 self._match_text_seq("ALWAYS") 4018 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4019 4020 self._match(TokenType.ALIAS) 4021 4022 if self._match_text_seq("ROW"): 4023 start = self._match_text_seq("START") 4024 if not start: 4025 self._match(TokenType.END) 4026 hidden = self._match_text_seq("HIDDEN") 4027 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4028 4029 identity = self._match_text_seq("IDENTITY") 4030 4031 if self._match(TokenType.L_PAREN): 4032 if self._match(TokenType.START_WITH): 4033 this.set("start", self._parse_bitwise()) 4034 if self._match_text_seq("INCREMENT", "BY"): 4035 this.set("increment", self._parse_bitwise()) 4036 if self._match_text_seq("MINVALUE"): 4037 this.set("minvalue", self._parse_bitwise()) 4038 if self._match_text_seq("MAXVALUE"): 4039 this.set("maxvalue", self._parse_bitwise()) 4040 4041 if self._match_text_seq("CYCLE"): 4042 this.set("cycle", True) 4043 elif self._match_text_seq("NO", "CYCLE"): 4044 this.set("cycle", False) 4045 4046 if not identity: 4047 this.set("expression", self._parse_bitwise()) 4048 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4049 args = self._parse_csv(self._parse_bitwise) 4050 this.set("start", seq_get(args, 0)) 4051 this.set("increment", seq_get(args, 1)) 4052 4053 self._match_r_paren() 4054 4055 return this 4056 4057 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4058 self._match_text_seq("LENGTH") 4059 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4060 4061 def _parse_not_constraint( 4062 self, 4063 ) -> t.Optional[exp.Expression]: 4064 if self._match_text_seq("NULL"): 4065 return self.expression(exp.NotNullColumnConstraint) 4066 if self._match_text_seq("CASESPECIFIC"): 4067 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4068 if self._match_text_seq("FOR", "REPLICATION"): 4069 return self.expression(exp.NotForReplicationColumnConstraint) 4070 return None 4071 4072 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4073 if self._match(TokenType.CONSTRAINT): 4074 this = self._parse_id_var() 4075 else: 4076 this = None 4077 4078 if self._match_texts(self.CONSTRAINT_PARSERS): 4079 return self.expression( 4080 exp.ColumnConstraint, 4081 this=this, 4082 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4083 ) 4084 4085 return this 4086 4087 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4088 if not self._match(TokenType.CONSTRAINT): 4089 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4090 4091 this = self._parse_id_var() 4092 expressions = [] 4093 4094 while True: 4095 constraint = self._parse_unnamed_constraint() or self._parse_function() 4096 if not constraint: 4097 break 4098 expressions.append(constraint) 4099 4100 return self.expression(exp.Constraint, this=this, expressions=expressions) 4101 4102 def _parse_unnamed_constraint( 4103 self, constraints: t.Optional[t.Collection[str]] = None 4104 ) -> t.Optional[exp.Expression]: 4105 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4106 constraints or self.CONSTRAINT_PARSERS 4107 ): 4108 return None 4109 4110 constraint = self._prev.text.upper() 4111 if constraint not in self.CONSTRAINT_PARSERS: 4112 self.raise_error(f"No parser found for schema constraint {constraint}.") 4113 4114 return self.CONSTRAINT_PARSERS[constraint](self) 4115 4116 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4117 self._match_text_seq("KEY") 4118 return self.expression( 4119 exp.UniqueColumnConstraint, 4120 this=self._parse_schema(self._parse_id_var(any_token=False)), 4121 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4122 ) 4123 4124 def _parse_key_constraint_options(self) -> t.List[str]: 4125 options = [] 4126 while True: 4127 if not self._curr: 4128 break 4129 4130 if self._match(TokenType.ON): 4131 action = None 4132 on = self._advance_any() and self._prev.text 4133 4134 if self._match_text_seq("NO", "ACTION"): 4135 action = "NO ACTION" 4136 elif self._match_text_seq("CASCADE"): 4137 action = "CASCADE" 4138 elif self._match_text_seq("RESTRICT"): 4139 action = "RESTRICT" 4140 elif self._match_pair(TokenType.SET, TokenType.NULL): 4141 action = "SET NULL" 4142 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4143 action = "SET DEFAULT" 4144 else: 4145 self.raise_error("Invalid key constraint") 4146 4147 options.append(f"ON {on} {action}") 4148 elif self._match_text_seq("NOT", "ENFORCED"): 4149 options.append("NOT ENFORCED") 4150 elif self._match_text_seq("DEFERRABLE"): 4151 options.append("DEFERRABLE") 4152 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4153 options.append("INITIALLY DEFERRED") 4154 elif self._match_text_seq("NORELY"): 4155 options.append("NORELY") 4156 elif self._match_text_seq("MATCH", "FULL"): 4157 options.append("MATCH FULL") 4158 else: 4159 break 4160 4161 return options 4162 4163 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4164 if match and not self._match(TokenType.REFERENCES): 4165 return None 4166 4167 expressions = None 4168 this = self._parse_table(schema=True) 4169 options = self._parse_key_constraint_options() 4170 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4171 4172 def _parse_foreign_key(self) -> exp.ForeignKey: 4173 expressions = self._parse_wrapped_id_vars() 4174 reference = self._parse_references() 4175 options = {} 4176 4177 while self._match(TokenType.ON): 4178 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4179 self.raise_error("Expected DELETE or UPDATE") 4180 4181 kind = self._prev.text.lower() 4182 4183 if self._match_text_seq("NO", "ACTION"): 4184 action = "NO ACTION" 4185 elif self._match(TokenType.SET): 4186 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4187 action = "SET " + self._prev.text.upper() 4188 else: 4189 self._advance() 4190 action = self._prev.text.upper() 4191 4192 options[kind] = action 4193 4194 return self.expression( 4195 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4196 ) 4197 4198 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4199 return self._parse_field() 4200 4201 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4202 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4203 4204 id_vars = self._parse_wrapped_id_vars() 4205 return self.expression( 4206 exp.PeriodForSystemTimeConstraint, 4207 this=seq_get(id_vars, 0), 4208 expression=seq_get(id_vars, 1), 4209 ) 4210 4211 def _parse_primary_key( 4212 self, wrapped_optional: bool = False, in_props: bool = False 4213 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4214 desc = ( 4215 self._match_set((TokenType.ASC, TokenType.DESC)) 4216 and self._prev.token_type == TokenType.DESC 4217 ) 4218 4219 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4220 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4221 4222 expressions = self._parse_wrapped_csv( 4223 self._parse_primary_key_part, optional=wrapped_optional 4224 ) 4225 options = self._parse_key_constraint_options() 4226 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4227 4228 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4229 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4230 return this 4231 4232 bracket_kind = self._prev.token_type 4233 4234 if self._match(TokenType.COLON): 4235 expressions: t.List[exp.Expression] = [ 4236 self.expression(exp.Slice, expression=self._parse_conjunction()) 4237 ] 4238 else: 4239 expressions = self._parse_csv( 4240 lambda: self._parse_slice( 4241 self._parse_alias(self._parse_conjunction(), explicit=True) 4242 ) 4243 ) 4244 4245 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4246 self.raise_error("Expected ]") 4247 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4248 self.raise_error("Expected }") 4249 4250 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4251 if bracket_kind == TokenType.L_BRACE: 4252 this = self.expression(exp.Struct, expressions=expressions) 4253 elif not this or this.name.upper() == "ARRAY": 4254 this = self.expression(exp.Array, expressions=expressions) 4255 else: 4256 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4257 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4258 4259 self._add_comments(this) 4260 return self._parse_bracket(this) 4261 4262 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4263 if self._match(TokenType.COLON): 4264 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4265 return this 4266 4267 def _parse_case(self) -> t.Optional[exp.Expression]: 4268 ifs = [] 4269 default = None 4270 4271 comments = self._prev_comments 4272 expression = self._parse_conjunction() 4273 4274 while self._match(TokenType.WHEN): 4275 this = self._parse_conjunction() 4276 self._match(TokenType.THEN) 4277 then = self._parse_conjunction() 4278 ifs.append(self.expression(exp.If, this=this, true=then)) 4279 4280 if self._match(TokenType.ELSE): 4281 default = self._parse_conjunction() 4282 4283 if not self._match(TokenType.END): 4284 self.raise_error("Expected END after CASE", self._prev) 4285 4286 return self._parse_window( 4287 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4288 ) 4289 4290 def _parse_if(self) -> t.Optional[exp.Expression]: 4291 if self._match(TokenType.L_PAREN): 4292 args = self._parse_csv(self._parse_conjunction) 4293 this = self.validate_expression(exp.If.from_arg_list(args), args) 4294 self._match_r_paren() 4295 else: 4296 index = self._index - 1 4297 condition = self._parse_conjunction() 4298 4299 if not condition: 4300 self._retreat(index) 4301 return None 4302 4303 self._match(TokenType.THEN) 4304 true = self._parse_conjunction() 4305 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4306 self._match(TokenType.END) 4307 this = self.expression(exp.If, this=condition, true=true, false=false) 4308 4309 return self._parse_window(this) 4310 4311 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4312 if not self._match_text_seq("VALUE", "FOR"): 4313 self._retreat(self._index - 1) 4314 return None 4315 4316 return self.expression( 4317 exp.NextValueFor, 4318 this=self._parse_column(), 4319 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4320 ) 4321 4322 def _parse_extract(self) -> exp.Extract: 4323 this = self._parse_function() or self._parse_var() or self._parse_type() 4324 4325 if self._match(TokenType.FROM): 4326 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4327 4328 if not self._match(TokenType.COMMA): 4329 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4330 4331 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4332 4333 def _parse_any_value(self) -> exp.AnyValue: 4334 this = self._parse_lambda() 4335 is_max = None 4336 having = None 4337 4338 if self._match(TokenType.HAVING): 4339 self._match_texts(("MAX", "MIN")) 4340 is_max = self._prev.text == "MAX" 4341 having = self._parse_column() 4342 4343 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4344 4345 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4346 this = self._parse_conjunction() 4347 4348 if not self._match(TokenType.ALIAS): 4349 if self._match(TokenType.COMMA): 4350 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4351 4352 self.raise_error("Expected AS after CAST") 4353 4354 fmt = None 4355 to = self._parse_types() 4356 4357 if self._match(TokenType.FORMAT): 4358 fmt_string = self._parse_string() 4359 fmt = self._parse_at_time_zone(fmt_string) 4360 4361 if not to: 4362 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4363 if to.this in exp.DataType.TEMPORAL_TYPES: 4364 this = self.expression( 4365 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4366 this=this, 4367 format=exp.Literal.string( 4368 format_time( 4369 fmt_string.this if fmt_string else "", 4370 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4371 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4372 ) 4373 ), 4374 ) 4375 4376 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4377 this.set("zone", fmt.args["zone"]) 4378 return this 4379 elif not to: 4380 self.raise_error("Expected TYPE after CAST") 4381 elif isinstance(to, exp.Identifier): 4382 to = exp.DataType.build(to.name, udt=True) 4383 elif to.this == exp.DataType.Type.CHAR: 4384 if self._match(TokenType.CHARACTER_SET): 4385 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4386 4387 return self.expression( 4388 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4389 ) 4390 4391 def _parse_string_agg(self) -> exp.Expression: 4392 if self._match(TokenType.DISTINCT): 4393 args: t.List[t.Optional[exp.Expression]] = [ 4394 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4395 ] 4396 if self._match(TokenType.COMMA): 4397 args.extend(self._parse_csv(self._parse_conjunction)) 4398 else: 4399 args = self._parse_csv(self._parse_conjunction) # type: ignore 4400 4401 index = self._index 4402 if not self._match(TokenType.R_PAREN) and args: 4403 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4404 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4405 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4406 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4407 4408 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4409 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4410 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4411 if not self._match_text_seq("WITHIN", "GROUP"): 4412 self._retreat(index) 4413 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4414 4415 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4416 order = self._parse_order(this=seq_get(args, 0)) 4417 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4418 4419 def _parse_convert( 4420 self, strict: bool, safe: t.Optional[bool] = None 4421 ) -> t.Optional[exp.Expression]: 4422 this = self._parse_bitwise() 4423 4424 if self._match(TokenType.USING): 4425 to: t.Optional[exp.Expression] = self.expression( 4426 exp.CharacterSet, this=self._parse_var() 4427 ) 4428 elif self._match(TokenType.COMMA): 4429 to = self._parse_types() 4430 else: 4431 to = None 4432 4433 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4434 4435 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4436 """ 4437 There are generally two variants of the DECODE function: 4438 4439 - DECODE(bin, charset) 4440 - DECODE(expression, search, result [, search, result] ... [, default]) 4441 4442 The second variant will always be parsed into a CASE expression. Note that NULL 4443 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4444 instead of relying on pattern matching. 4445 """ 4446 args = self._parse_csv(self._parse_conjunction) 4447 4448 if len(args) < 3: 4449 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4450 4451 expression, *expressions = args 4452 if not expression: 4453 return None 4454 4455 ifs = [] 4456 for search, result in zip(expressions[::2], expressions[1::2]): 4457 if not search or not result: 4458 return None 4459 4460 if isinstance(search, exp.Literal): 4461 ifs.append( 4462 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4463 ) 4464 elif isinstance(search, exp.Null): 4465 ifs.append( 4466 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4467 ) 4468 else: 4469 cond = exp.or_( 4470 exp.EQ(this=expression.copy(), expression=search), 4471 exp.and_( 4472 exp.Is(this=expression.copy(), expression=exp.Null()), 4473 exp.Is(this=search.copy(), expression=exp.Null()), 4474 copy=False, 4475 ), 4476 copy=False, 4477 ) 4478 ifs.append(exp.If(this=cond, true=result)) 4479 4480 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4481 4482 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4483 self._match_text_seq("KEY") 4484 key = self._parse_column() 4485 self._match_set((TokenType.COLON, TokenType.COMMA)) 4486 self._match_text_seq("VALUE") 4487 value = self._parse_bitwise() 4488 4489 if not key and not value: 4490 return None 4491 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4492 4493 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4494 if not this or not self._match_text_seq("FORMAT", "JSON"): 4495 return this 4496 4497 return self.expression(exp.FormatJson, this=this) 4498 4499 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4500 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4501 for value in values: 4502 if self._match_text_seq(value, "ON", on): 4503 return f"{value} ON {on}" 4504 4505 return None 4506 4507 def _parse_json_object(self) -> exp.JSONObject: 4508 star = self._parse_star() 4509 expressions = ( 4510 [star] 4511 if star 4512 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4513 ) 4514 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4515 4516 unique_keys = None 4517 if self._match_text_seq("WITH", "UNIQUE"): 4518 unique_keys = True 4519 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4520 unique_keys = False 4521 4522 self._match_text_seq("KEYS") 4523 4524 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4525 self._parse_type() 4526 ) 4527 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4528 4529 return self.expression( 4530 exp.JSONObject, 4531 expressions=expressions, 4532 null_handling=null_handling, 4533 unique_keys=unique_keys, 4534 return_type=return_type, 4535 encoding=encoding, 4536 ) 4537 4538 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4539 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4540 if not self._match_text_seq("NESTED"): 4541 this = self._parse_id_var() 4542 kind = self._parse_types(allow_identifiers=False) 4543 nested = None 4544 else: 4545 this = None 4546 kind = None 4547 nested = True 4548 4549 path = self._match_text_seq("PATH") and self._parse_string() 4550 nested_schema = nested and self._parse_json_schema() 4551 4552 return self.expression( 4553 exp.JSONColumnDef, 4554 this=this, 4555 kind=kind, 4556 path=path, 4557 nested_schema=nested_schema, 4558 ) 4559 4560 def _parse_json_schema(self) -> exp.JSONSchema: 4561 self._match_text_seq("COLUMNS") 4562 return self.expression( 4563 exp.JSONSchema, 4564 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4565 ) 4566 4567 def _parse_json_table(self) -> exp.JSONTable: 4568 this = self._parse_format_json(self._parse_bitwise()) 4569 path = self._match(TokenType.COMMA) and self._parse_string() 4570 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4571 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4572 schema = self._parse_json_schema() 4573 4574 return exp.JSONTable( 4575 this=this, 4576 schema=schema, 4577 path=path, 4578 error_handling=error_handling, 4579 empty_handling=empty_handling, 4580 ) 4581 4582 def _parse_match_against(self) -> exp.MatchAgainst: 4583 expressions = self._parse_csv(self._parse_column) 4584 4585 self._match_text_seq(")", "AGAINST", "(") 4586 4587 this = self._parse_string() 4588 4589 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4590 modifier = "IN NATURAL LANGUAGE MODE" 4591 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4592 modifier = f"{modifier} WITH QUERY EXPANSION" 4593 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4594 modifier = "IN BOOLEAN MODE" 4595 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4596 modifier = "WITH QUERY EXPANSION" 4597 else: 4598 modifier = None 4599 4600 return self.expression( 4601 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4602 ) 4603 4604 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4605 def _parse_open_json(self) -> exp.OpenJSON: 4606 this = self._parse_bitwise() 4607 path = self._match(TokenType.COMMA) and self._parse_string() 4608 4609 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4610 this = self._parse_field(any_token=True) 4611 kind = self._parse_types() 4612 path = self._parse_string() 4613 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4614 4615 return self.expression( 4616 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4617 ) 4618 4619 expressions = None 4620 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4621 self._match_l_paren() 4622 expressions = self._parse_csv(_parse_open_json_column_def) 4623 4624 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4625 4626 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4627 args = self._parse_csv(self._parse_bitwise) 4628 4629 if self._match(TokenType.IN): 4630 return self.expression( 4631 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4632 ) 4633 4634 if haystack_first: 4635 haystack = seq_get(args, 0) 4636 needle = seq_get(args, 1) 4637 else: 4638 needle = seq_get(args, 0) 4639 haystack = seq_get(args, 1) 4640 4641 return self.expression( 4642 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4643 ) 4644 4645 def _parse_predict(self) -> exp.Predict: 4646 self._match_text_seq("MODEL") 4647 this = self._parse_table() 4648 4649 self._match(TokenType.COMMA) 4650 self._match_text_seq("TABLE") 4651 4652 return self.expression( 4653 exp.Predict, 4654 this=this, 4655 expression=self._parse_table(), 4656 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4657 ) 4658 4659 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4660 args = self._parse_csv(self._parse_table) 4661 return exp.JoinHint(this=func_name.upper(), expressions=args) 4662 4663 def _parse_substring(self) -> exp.Substring: 4664 # Postgres supports the form: substring(string [from int] [for int]) 4665 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4666 4667 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4668 4669 if self._match(TokenType.FROM): 4670 args.append(self._parse_bitwise()) 4671 if self._match(TokenType.FOR): 4672 args.append(self._parse_bitwise()) 4673 4674 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4675 4676 def _parse_trim(self) -> exp.Trim: 4677 # https://www.w3resource.com/sql/character-functions/trim.php 4678 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4679 4680 position = None 4681 collation = None 4682 expression = None 4683 4684 if self._match_texts(self.TRIM_TYPES): 4685 position = self._prev.text.upper() 4686 4687 this = self._parse_bitwise() 4688 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4689 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4690 expression = self._parse_bitwise() 4691 4692 if invert_order: 4693 this, expression = expression, this 4694 4695 if self._match(TokenType.COLLATE): 4696 collation = self._parse_bitwise() 4697 4698 return self.expression( 4699 exp.Trim, this=this, position=position, expression=expression, collation=collation 4700 ) 4701 4702 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4703 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4704 4705 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4706 return self._parse_window(self._parse_id_var(), alias=True) 4707 4708 def _parse_respect_or_ignore_nulls( 4709 self, this: t.Optional[exp.Expression] 4710 ) -> t.Optional[exp.Expression]: 4711 if self._match_text_seq("IGNORE", "NULLS"): 4712 return self.expression(exp.IgnoreNulls, this=this) 4713 if self._match_text_seq("RESPECT", "NULLS"): 4714 return self.expression(exp.RespectNulls, this=this) 4715 return this 4716 4717 def _parse_window( 4718 self, this: t.Optional[exp.Expression], alias: bool = False 4719 ) -> t.Optional[exp.Expression]: 4720 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4721 self._match(TokenType.WHERE) 4722 this = self.expression( 4723 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4724 ) 4725 self._match_r_paren() 4726 4727 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4728 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4729 if self._match_text_seq("WITHIN", "GROUP"): 4730 order = self._parse_wrapped(self._parse_order) 4731 this = self.expression(exp.WithinGroup, this=this, expression=order) 4732 4733 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4734 # Some dialects choose to implement and some do not. 4735 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4736 4737 # There is some code above in _parse_lambda that handles 4738 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4739 4740 # The below changes handle 4741 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4742 4743 # Oracle allows both formats 4744 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4745 # and Snowflake chose to do the same for familiarity 4746 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4747 this = self._parse_respect_or_ignore_nulls(this) 4748 4749 # bigquery select from window x AS (partition by ...) 4750 if alias: 4751 over = None 4752 self._match(TokenType.ALIAS) 4753 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4754 return this 4755 else: 4756 over = self._prev.text.upper() 4757 4758 if not self._match(TokenType.L_PAREN): 4759 return self.expression( 4760 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4761 ) 4762 4763 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4764 4765 first = self._match(TokenType.FIRST) 4766 if self._match_text_seq("LAST"): 4767 first = False 4768 4769 partition, order = self._parse_partition_and_order() 4770 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4771 4772 if kind: 4773 self._match(TokenType.BETWEEN) 4774 start = self._parse_window_spec() 4775 self._match(TokenType.AND) 4776 end = self._parse_window_spec() 4777 4778 spec = self.expression( 4779 exp.WindowSpec, 4780 kind=kind, 4781 start=start["value"], 4782 start_side=start["side"], 4783 end=end["value"], 4784 end_side=end["side"], 4785 ) 4786 else: 4787 spec = None 4788 4789 self._match_r_paren() 4790 4791 window = self.expression( 4792 exp.Window, 4793 this=this, 4794 partition_by=partition, 4795 order=order, 4796 spec=spec, 4797 alias=window_alias, 4798 over=over, 4799 first=first, 4800 ) 4801 4802 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4803 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4804 return self._parse_window(window, alias=alias) 4805 4806 return window 4807 4808 def _parse_partition_and_order( 4809 self, 4810 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4811 return self._parse_partition_by(), self._parse_order() 4812 4813 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4814 self._match(TokenType.BETWEEN) 4815 4816 return { 4817 "value": ( 4818 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4819 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4820 or self._parse_bitwise() 4821 ), 4822 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4823 } 4824 4825 def _parse_alias( 4826 self, this: t.Optional[exp.Expression], explicit: bool = False 4827 ) -> t.Optional[exp.Expression]: 4828 any_token = self._match(TokenType.ALIAS) 4829 comments = self._prev_comments 4830 4831 if explicit and not any_token: 4832 return this 4833 4834 if self._match(TokenType.L_PAREN): 4835 aliases = self.expression( 4836 exp.Aliases, 4837 comments=comments, 4838 this=this, 4839 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4840 ) 4841 self._match_r_paren(aliases) 4842 return aliases 4843 4844 alias = self._parse_id_var(any_token) 4845 4846 if alias: 4847 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4848 4849 return this 4850 4851 def _parse_id_var( 4852 self, 4853 any_token: bool = True, 4854 tokens: t.Optional[t.Collection[TokenType]] = None, 4855 ) -> t.Optional[exp.Expression]: 4856 identifier = self._parse_identifier() 4857 4858 if identifier: 4859 return identifier 4860 4861 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4862 quoted = self._prev.token_type == TokenType.STRING 4863 return exp.Identifier(this=self._prev.text, quoted=quoted) 4864 4865 return None 4866 4867 def _parse_string(self) -> t.Optional[exp.Expression]: 4868 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4869 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4870 return self._parse_placeholder() 4871 4872 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4873 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4874 4875 def _parse_number(self) -> t.Optional[exp.Expression]: 4876 if self._match(TokenType.NUMBER): 4877 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4878 return self._parse_placeholder() 4879 4880 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4881 if self._match(TokenType.IDENTIFIER): 4882 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4883 return self._parse_placeholder() 4884 4885 def _parse_var( 4886 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4887 ) -> t.Optional[exp.Expression]: 4888 if ( 4889 (any_token and self._advance_any()) 4890 or self._match(TokenType.VAR) 4891 or (self._match_set(tokens) if tokens else False) 4892 ): 4893 return self.expression(exp.Var, this=self._prev.text) 4894 return self._parse_placeholder() 4895 4896 def _advance_any(self) -> t.Optional[Token]: 4897 if self._curr and self._curr.token_type not in self.RESERVED_TOKENS: 4898 self._advance() 4899 return self._prev 4900 return None 4901 4902 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4903 return self._parse_var() or self._parse_string() 4904 4905 def _parse_null(self) -> t.Optional[exp.Expression]: 4906 if self._match_set(self.NULL_TOKENS): 4907 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4908 return self._parse_placeholder() 4909 4910 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4911 if self._match(TokenType.TRUE): 4912 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4913 if self._match(TokenType.FALSE): 4914 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4915 return self._parse_placeholder() 4916 4917 def _parse_star(self) -> t.Optional[exp.Expression]: 4918 if self._match(TokenType.STAR): 4919 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4920 return self._parse_placeholder() 4921 4922 def _parse_parameter(self) -> exp.Parameter: 4923 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4924 return ( 4925 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4926 ) 4927 4928 self._match(TokenType.L_BRACE) 4929 this = _parse_parameter_part() 4930 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4931 self._match(TokenType.R_BRACE) 4932 4933 return self.expression(exp.Parameter, this=this, expression=expression) 4934 4935 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4936 if self._match_set(self.PLACEHOLDER_PARSERS): 4937 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4938 if placeholder: 4939 return placeholder 4940 self._advance(-1) 4941 return None 4942 4943 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4944 if not self._match(TokenType.EXCEPT): 4945 return None 4946 if self._match(TokenType.L_PAREN, advance=False): 4947 return self._parse_wrapped_csv(self._parse_column) 4948 4949 except_column = self._parse_column() 4950 return [except_column] if except_column else None 4951 4952 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4953 if not self._match(TokenType.REPLACE): 4954 return None 4955 if self._match(TokenType.L_PAREN, advance=False): 4956 return self._parse_wrapped_csv(self._parse_expression) 4957 4958 replace_expression = self._parse_expression() 4959 return [replace_expression] if replace_expression else None 4960 4961 def _parse_csv( 4962 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4963 ) -> t.List[exp.Expression]: 4964 parse_result = parse_method() 4965 items = [parse_result] if parse_result is not None else [] 4966 4967 while self._match(sep): 4968 self._add_comments(parse_result) 4969 parse_result = parse_method() 4970 if parse_result is not None: 4971 items.append(parse_result) 4972 4973 return items 4974 4975 def _parse_tokens( 4976 self, parse_method: t.Callable, expressions: t.Dict 4977 ) -> t.Optional[exp.Expression]: 4978 this = parse_method() 4979 4980 while self._match_set(expressions): 4981 this = self.expression( 4982 expressions[self._prev.token_type], 4983 this=this, 4984 comments=self._prev_comments, 4985 expression=parse_method(), 4986 ) 4987 4988 return this 4989 4990 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4991 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4992 4993 def _parse_wrapped_csv( 4994 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4995 ) -> t.List[exp.Expression]: 4996 return self._parse_wrapped( 4997 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4998 ) 4999 5000 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5001 wrapped = self._match(TokenType.L_PAREN) 5002 if not wrapped and not optional: 5003 self.raise_error("Expecting (") 5004 parse_result = parse_method() 5005 if wrapped: 5006 self._match_r_paren() 5007 return parse_result 5008 5009 def _parse_expressions(self) -> t.List[exp.Expression]: 5010 return self._parse_csv(self._parse_expression) 5011 5012 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5013 return self._parse_select() or self._parse_set_operations( 5014 self._parse_expression() if alias else self._parse_conjunction() 5015 ) 5016 5017 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5018 return self._parse_query_modifiers( 5019 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5020 ) 5021 5022 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5023 this = None 5024 if self._match_texts(self.TRANSACTION_KIND): 5025 this = self._prev.text 5026 5027 self._match_texts(("TRANSACTION", "WORK")) 5028 5029 modes = [] 5030 while True: 5031 mode = [] 5032 while self._match(TokenType.VAR): 5033 mode.append(self._prev.text) 5034 5035 if mode: 5036 modes.append(" ".join(mode)) 5037 if not self._match(TokenType.COMMA): 5038 break 5039 5040 return self.expression(exp.Transaction, this=this, modes=modes) 5041 5042 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5043 chain = None 5044 savepoint = None 5045 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5046 5047 self._match_texts(("TRANSACTION", "WORK")) 5048 5049 if self._match_text_seq("TO"): 5050 self._match_text_seq("SAVEPOINT") 5051 savepoint = self._parse_id_var() 5052 5053 if self._match(TokenType.AND): 5054 chain = not self._match_text_seq("NO") 5055 self._match_text_seq("CHAIN") 5056 5057 if is_rollback: 5058 return self.expression(exp.Rollback, savepoint=savepoint) 5059 5060 return self.expression(exp.Commit, chain=chain) 5061 5062 def _parse_refresh(self) -> exp.Refresh: 5063 self._match(TokenType.TABLE) 5064 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5065 5066 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5067 if not self._match_text_seq("ADD"): 5068 return None 5069 5070 self._match(TokenType.COLUMN) 5071 exists_column = self._parse_exists(not_=True) 5072 expression = self._parse_field_def() 5073 5074 if expression: 5075 expression.set("exists", exists_column) 5076 5077 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5078 if self._match_texts(("FIRST", "AFTER")): 5079 position = self._prev.text 5080 column_position = self.expression( 5081 exp.ColumnPosition, this=self._parse_column(), position=position 5082 ) 5083 expression.set("position", column_position) 5084 5085 return expression 5086 5087 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5088 drop = self._match(TokenType.DROP) and self._parse_drop() 5089 if drop and not isinstance(drop, exp.Command): 5090 drop.set("kind", drop.args.get("kind", "COLUMN")) 5091 return drop 5092 5093 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5094 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5095 return self.expression( 5096 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5097 ) 5098 5099 def _parse_add_constraint(self) -> exp.AddConstraint: 5100 this = None 5101 kind = self._prev.token_type 5102 5103 if kind == TokenType.CONSTRAINT: 5104 this = self._parse_id_var() 5105 5106 if self._match_text_seq("CHECK"): 5107 expression = self._parse_wrapped(self._parse_conjunction) 5108 enforced = self._match_text_seq("ENFORCED") 5109 5110 return self.expression( 5111 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5112 ) 5113 5114 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5115 expression = self._parse_foreign_key() 5116 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5117 expression = self._parse_primary_key() 5118 else: 5119 expression = None 5120 5121 return self.expression(exp.AddConstraint, this=this, expression=expression) 5122 5123 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5124 index = self._index - 1 5125 5126 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5127 return self._parse_csv(self._parse_add_constraint) 5128 5129 self._retreat(index) 5130 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5131 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5132 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5133 5134 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5135 self._match(TokenType.COLUMN) 5136 column = self._parse_field(any_token=True) 5137 5138 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5139 return self.expression(exp.AlterColumn, this=column, drop=True) 5140 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5141 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5142 5143 self._match_text_seq("SET", "DATA") 5144 return self.expression( 5145 exp.AlterColumn, 5146 this=column, 5147 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5148 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5149 using=self._match(TokenType.USING) and self._parse_conjunction(), 5150 ) 5151 5152 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5153 index = self._index - 1 5154 5155 partition_exists = self._parse_exists() 5156 if self._match(TokenType.PARTITION, advance=False): 5157 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5158 5159 self._retreat(index) 5160 return self._parse_csv(self._parse_drop_column) 5161 5162 def _parse_alter_table_rename(self) -> exp.RenameTable: 5163 self._match_text_seq("TO") 5164 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5165 5166 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5167 start = self._prev 5168 5169 if not self._match(TokenType.TABLE): 5170 return self._parse_as_command(start) 5171 5172 exists = self._parse_exists() 5173 only = self._match_text_seq("ONLY") 5174 this = self._parse_table(schema=True) 5175 5176 if self._next: 5177 self._advance() 5178 5179 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5180 if parser: 5181 actions = ensure_list(parser(self)) 5182 5183 if not self._curr: 5184 return self.expression( 5185 exp.AlterTable, 5186 this=this, 5187 exists=exists, 5188 actions=actions, 5189 only=only, 5190 ) 5191 5192 return self._parse_as_command(start) 5193 5194 def _parse_merge(self) -> exp.Merge: 5195 self._match(TokenType.INTO) 5196 target = self._parse_table() 5197 5198 if target and self._match(TokenType.ALIAS, advance=False): 5199 target.set("alias", self._parse_table_alias()) 5200 5201 self._match(TokenType.USING) 5202 using = self._parse_table() 5203 5204 self._match(TokenType.ON) 5205 on = self._parse_conjunction() 5206 5207 return self.expression( 5208 exp.Merge, 5209 this=target, 5210 using=using, 5211 on=on, 5212 expressions=self._parse_when_matched(), 5213 ) 5214 5215 def _parse_when_matched(self) -> t.List[exp.When]: 5216 whens = [] 5217 5218 while self._match(TokenType.WHEN): 5219 matched = not self._match(TokenType.NOT) 5220 self._match_text_seq("MATCHED") 5221 source = ( 5222 False 5223 if self._match_text_seq("BY", "TARGET") 5224 else self._match_text_seq("BY", "SOURCE") 5225 ) 5226 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5227 5228 self._match(TokenType.THEN) 5229 5230 if self._match(TokenType.INSERT): 5231 _this = self._parse_star() 5232 if _this: 5233 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5234 else: 5235 then = self.expression( 5236 exp.Insert, 5237 this=self._parse_value(), 5238 expression=self._match(TokenType.VALUES) and self._parse_value(), 5239 ) 5240 elif self._match(TokenType.UPDATE): 5241 expressions = self._parse_star() 5242 if expressions: 5243 then = self.expression(exp.Update, expressions=expressions) 5244 else: 5245 then = self.expression( 5246 exp.Update, 5247 expressions=self._match(TokenType.SET) 5248 and self._parse_csv(self._parse_equality), 5249 ) 5250 elif self._match(TokenType.DELETE): 5251 then = self.expression(exp.Var, this=self._prev.text) 5252 else: 5253 then = None 5254 5255 whens.append( 5256 self.expression( 5257 exp.When, 5258 matched=matched, 5259 source=source, 5260 condition=condition, 5261 then=then, 5262 ) 5263 ) 5264 return whens 5265 5266 def _parse_show(self) -> t.Optional[exp.Expression]: 5267 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5268 if parser: 5269 return parser(self) 5270 return self._parse_as_command(self._prev) 5271 5272 def _parse_set_item_assignment( 5273 self, kind: t.Optional[str] = None 5274 ) -> t.Optional[exp.Expression]: 5275 index = self._index 5276 5277 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5278 return self._parse_set_transaction(global_=kind == "GLOBAL") 5279 5280 left = self._parse_primary() or self._parse_id_var() 5281 assignment_delimiter = self._match_texts(("=", "TO")) 5282 5283 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5284 self._retreat(index) 5285 return None 5286 5287 right = self._parse_statement() or self._parse_id_var() 5288 this = self.expression(exp.EQ, this=left, expression=right) 5289 5290 return self.expression(exp.SetItem, this=this, kind=kind) 5291 5292 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5293 self._match_text_seq("TRANSACTION") 5294 characteristics = self._parse_csv( 5295 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5296 ) 5297 return self.expression( 5298 exp.SetItem, 5299 expressions=characteristics, 5300 kind="TRANSACTION", 5301 **{"global": global_}, # type: ignore 5302 ) 5303 5304 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5305 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5306 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5307 5308 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5309 index = self._index 5310 set_ = self.expression( 5311 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5312 ) 5313 5314 if self._curr: 5315 self._retreat(index) 5316 return self._parse_as_command(self._prev) 5317 5318 return set_ 5319 5320 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5321 for option in options: 5322 if self._match_text_seq(*option.split(" ")): 5323 return exp.var(option) 5324 return None 5325 5326 def _parse_as_command(self, start: Token) -> exp.Command: 5327 while self._curr: 5328 self._advance() 5329 text = self._find_sql(start, self._prev) 5330 size = len(start.text) 5331 return exp.Command(this=text[:size], expression=text[size:]) 5332 5333 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5334 settings = [] 5335 5336 self._match_l_paren() 5337 kind = self._parse_id_var() 5338 5339 if self._match(TokenType.L_PAREN): 5340 while True: 5341 key = self._parse_id_var() 5342 value = self._parse_primary() 5343 5344 if not key and value is None: 5345 break 5346 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5347 self._match(TokenType.R_PAREN) 5348 5349 self._match_r_paren() 5350 5351 return self.expression( 5352 exp.DictProperty, 5353 this=this, 5354 kind=kind.this if kind else None, 5355 settings=settings, 5356 ) 5357 5358 def _parse_dict_range(self, this: str) -> exp.DictRange: 5359 self._match_l_paren() 5360 has_min = self._match_text_seq("MIN") 5361 if has_min: 5362 min = self._parse_var() or self._parse_primary() 5363 self._match_text_seq("MAX") 5364 max = self._parse_var() or self._parse_primary() 5365 else: 5366 max = self._parse_var() or self._parse_primary() 5367 min = exp.Literal.number(0) 5368 self._match_r_paren() 5369 return self.expression(exp.DictRange, this=this, min=min, max=max) 5370 5371 def _parse_comprehension( 5372 self, this: t.Optional[exp.Expression] 5373 ) -> t.Optional[exp.Comprehension]: 5374 index = self._index 5375 expression = self._parse_column() 5376 if not self._match(TokenType.IN): 5377 self._retreat(index - 1) 5378 return None 5379 iterator = self._parse_column() 5380 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5381 return self.expression( 5382 exp.Comprehension, 5383 this=this, 5384 expression=expression, 5385 iterator=iterator, 5386 condition=condition, 5387 ) 5388 5389 def _find_parser( 5390 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5391 ) -> t.Optional[t.Callable]: 5392 if not self._curr: 5393 return None 5394 5395 index = self._index 5396 this = [] 5397 while True: 5398 # The current token might be multiple words 5399 curr = self._curr.text.upper() 5400 key = curr.split(" ") 5401 this.append(curr) 5402 5403 self._advance() 5404 result, trie = in_trie(trie, key) 5405 if result == TrieResult.FAILED: 5406 break 5407 5408 if result == TrieResult.EXISTS: 5409 subparser = parsers[" ".join(this)] 5410 return subparser 5411 5412 self._retreat(index) 5413 return None 5414 5415 def _match(self, token_type, advance=True, expression=None): 5416 if not self._curr: 5417 return None 5418 5419 if self._curr.token_type == token_type: 5420 if advance: 5421 self._advance() 5422 self._add_comments(expression) 5423 return True 5424 5425 return None 5426 5427 def _match_set(self, types, advance=True): 5428 if not self._curr: 5429 return None 5430 5431 if self._curr.token_type in types: 5432 if advance: 5433 self._advance() 5434 return True 5435 5436 return None 5437 5438 def _match_pair(self, token_type_a, token_type_b, advance=True): 5439 if not self._curr or not self._next: 5440 return None 5441 5442 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5443 if advance: 5444 self._advance(2) 5445 return True 5446 5447 return None 5448 5449 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5450 if not self._match(TokenType.L_PAREN, expression=expression): 5451 self.raise_error("Expecting (") 5452 5453 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5454 if not self._match(TokenType.R_PAREN, expression=expression): 5455 self.raise_error("Expecting )") 5456 5457 def _match_texts(self, texts, advance=True): 5458 if self._curr and self._curr.text.upper() in texts: 5459 if advance: 5460 self._advance() 5461 return True 5462 return False 5463 5464 def _match_text_seq(self, *texts, advance=True): 5465 index = self._index 5466 for text in texts: 5467 if self._curr and self._curr.text.upper() == text: 5468 self._advance() 5469 else: 5470 self._retreat(index) 5471 return False 5472 5473 if not advance: 5474 self._retreat(index) 5475 5476 return True 5477 5478 @t.overload 5479 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5480 ... 5481 5482 @t.overload 5483 def _replace_columns_with_dots( 5484 self, this: t.Optional[exp.Expression] 5485 ) -> t.Optional[exp.Expression]: 5486 ... 5487 5488 def _replace_columns_with_dots(self, this): 5489 if isinstance(this, exp.Dot): 5490 exp.replace_children(this, self._replace_columns_with_dots) 5491 elif isinstance(this, exp.Column): 5492 exp.replace_children(this, self._replace_columns_with_dots) 5493 table = this.args.get("table") 5494 this = ( 5495 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5496 ) 5497 5498 return this 5499 5500 def _replace_lambda( 5501 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5502 ) -> t.Optional[exp.Expression]: 5503 if not node: 5504 return node 5505 5506 for column in node.find_all(exp.Column): 5507 if column.parts[0].name in lambda_variables: 5508 dot_or_id = column.to_dot() if column.table else column.this 5509 parent = column.parent 5510 5511 while isinstance(parent, exp.Dot): 5512 if not isinstance(parent.parent, exp.Dot): 5513 parent.replace(dot_or_id) 5514 break 5515 parent = parent.parent 5516 else: 5517 if column is node: 5518 node = dot_or_id 5519 else: 5520 column.replace(dot_or_id) 5521 return node 5522 5523 5524def _ensure_string_if_null(values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5525 return [ 5526 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5527 for value in values 5528 if value 5529 ]
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_concat(args: t.List, dialect: Dialect) -> t.Optional[exp.Expression]: 52 if dialect.parser_class.CONCAT_NULL_OUTPUTS_STRING: 53 args = _ensure_string_if_null(args) 54 55 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 56 # we find such a call we replace it with its argument. 57 if len(args) == 1: 58 return args[0] 59 60 return exp.Concat(expressions=args, safe=not dialect.STRICT_STRING_CONCAT)
63def parse_concat_ws(args: t.List, dialect: Dialect) -> t.Optional[exp.Expression]: 64 if len(args) < 2: 65 return exp.ConcatWs(expressions=args) 66 67 delim, *values = args 68 if dialect.parser_class.CONCAT_NULL_OUTPUTS_STRING: 69 values = _ensure_string_if_null(values) 70 71 return exp.ConcatWs(expressions=[delim] + values)
74def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 75 # Default argument order is base, expression 76 this = seq_get(args, 0) 77 expression = seq_get(args, 1) 78 79 if expression: 80 if not dialect.LOG_BASE_FIRST: 81 this, expression = expression, this 82 return exp.Log(this=this, expression=expression) 83 84 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
97class Parser(metaclass=_Parser): 98 """ 99 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 100 101 Args: 102 error_level: The desired error level. 103 Default: ErrorLevel.IMMEDIATE 104 error_message_context: Determines the amount of context to capture from a 105 query string when displaying the error message (in number of characters). 106 Default: 100 107 max_errors: Maximum number of error messages to include in a raised ParseError. 108 This is only relevant if error_level is ErrorLevel.RAISE. 109 Default: 3 110 """ 111 112 FUNCTIONS: t.Dict[str, t.Callable] = { 113 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 114 "CONCAT": parse_concat, 115 "CONCAT_WS": parse_concat_ws, 116 "DATE_TO_DATE_STR": lambda args: exp.Cast( 117 this=seq_get(args, 0), 118 to=exp.DataType(this=exp.DataType.Type.TEXT), 119 ), 120 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 121 "LIKE": parse_like, 122 "LOG": parse_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": parse_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 TYPE_TOKENS = { 166 TokenType.BIT, 167 TokenType.BOOLEAN, 168 TokenType.TINYINT, 169 TokenType.UTINYINT, 170 TokenType.SMALLINT, 171 TokenType.USMALLINT, 172 TokenType.INT, 173 TokenType.UINT, 174 TokenType.BIGINT, 175 TokenType.UBIGINT, 176 TokenType.INT128, 177 TokenType.UINT128, 178 TokenType.INT256, 179 TokenType.UINT256, 180 TokenType.MEDIUMINT, 181 TokenType.UMEDIUMINT, 182 TokenType.FIXEDSTRING, 183 TokenType.FLOAT, 184 TokenType.DOUBLE, 185 TokenType.CHAR, 186 TokenType.NCHAR, 187 TokenType.VARCHAR, 188 TokenType.NVARCHAR, 189 TokenType.TEXT, 190 TokenType.MEDIUMTEXT, 191 TokenType.LONGTEXT, 192 TokenType.MEDIUMBLOB, 193 TokenType.LONGBLOB, 194 TokenType.BINARY, 195 TokenType.VARBINARY, 196 TokenType.JSON, 197 TokenType.JSONB, 198 TokenType.INTERVAL, 199 TokenType.TINYBLOB, 200 TokenType.TINYTEXT, 201 TokenType.TIME, 202 TokenType.TIMETZ, 203 TokenType.TIMESTAMP, 204 TokenType.TIMESTAMP_S, 205 TokenType.TIMESTAMP_MS, 206 TokenType.TIMESTAMP_NS, 207 TokenType.TIMESTAMPTZ, 208 TokenType.TIMESTAMPLTZ, 209 TokenType.DATETIME, 210 TokenType.DATETIME64, 211 TokenType.DATE, 212 TokenType.INT4RANGE, 213 TokenType.INT4MULTIRANGE, 214 TokenType.INT8RANGE, 215 TokenType.INT8MULTIRANGE, 216 TokenType.NUMRANGE, 217 TokenType.NUMMULTIRANGE, 218 TokenType.TSRANGE, 219 TokenType.TSMULTIRANGE, 220 TokenType.TSTZRANGE, 221 TokenType.TSTZMULTIRANGE, 222 TokenType.DATERANGE, 223 TokenType.DATEMULTIRANGE, 224 TokenType.DECIMAL, 225 TokenType.UDECIMAL, 226 TokenType.BIGDECIMAL, 227 TokenType.UUID, 228 TokenType.GEOGRAPHY, 229 TokenType.GEOMETRY, 230 TokenType.HLLSKETCH, 231 TokenType.HSTORE, 232 TokenType.PSEUDO_TYPE, 233 TokenType.SUPER, 234 TokenType.SERIAL, 235 TokenType.SMALLSERIAL, 236 TokenType.BIGSERIAL, 237 TokenType.XML, 238 TokenType.YEAR, 239 TokenType.UNIQUEIDENTIFIER, 240 TokenType.USERDEFINED, 241 TokenType.MONEY, 242 TokenType.SMALLMONEY, 243 TokenType.ROWVERSION, 244 TokenType.IMAGE, 245 TokenType.VARIANT, 246 TokenType.OBJECT, 247 TokenType.OBJECT_IDENTIFIER, 248 TokenType.INET, 249 TokenType.IPADDRESS, 250 TokenType.IPPREFIX, 251 TokenType.UNKNOWN, 252 TokenType.NULL, 253 *ENUM_TYPE_TOKENS, 254 *NESTED_TYPE_TOKENS, 255 } 256 257 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 258 TokenType.BIGINT: TokenType.UBIGINT, 259 TokenType.INT: TokenType.UINT, 260 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 261 TokenType.SMALLINT: TokenType.USMALLINT, 262 TokenType.TINYINT: TokenType.UTINYINT, 263 TokenType.DECIMAL: TokenType.UDECIMAL, 264 } 265 266 SUBQUERY_PREDICATES = { 267 TokenType.ANY: exp.Any, 268 TokenType.ALL: exp.All, 269 TokenType.EXISTS: exp.Exists, 270 TokenType.SOME: exp.Any, 271 } 272 273 RESERVED_TOKENS = { 274 *Tokenizer.SINGLE_TOKENS.values(), 275 TokenType.SELECT, 276 } 277 278 DB_CREATABLES = { 279 TokenType.DATABASE, 280 TokenType.SCHEMA, 281 TokenType.TABLE, 282 TokenType.VIEW, 283 TokenType.MODEL, 284 TokenType.DICTIONARY, 285 } 286 287 CREATABLES = { 288 TokenType.COLUMN, 289 TokenType.CONSTRAINT, 290 TokenType.FUNCTION, 291 TokenType.INDEX, 292 TokenType.PROCEDURE, 293 TokenType.FOREIGN_KEY, 294 *DB_CREATABLES, 295 } 296 297 # Tokens that can represent identifiers 298 ID_VAR_TOKENS = { 299 TokenType.VAR, 300 TokenType.ANTI, 301 TokenType.APPLY, 302 TokenType.ASC, 303 TokenType.AUTO_INCREMENT, 304 TokenType.BEGIN, 305 TokenType.CACHE, 306 TokenType.CASE, 307 TokenType.COLLATE, 308 TokenType.COMMAND, 309 TokenType.COMMENT, 310 TokenType.COMMIT, 311 TokenType.CONSTRAINT, 312 TokenType.DEFAULT, 313 TokenType.DELETE, 314 TokenType.DESC, 315 TokenType.DESCRIBE, 316 TokenType.DICTIONARY, 317 TokenType.DIV, 318 TokenType.END, 319 TokenType.EXECUTE, 320 TokenType.ESCAPE, 321 TokenType.FALSE, 322 TokenType.FIRST, 323 TokenType.FILTER, 324 TokenType.FORMAT, 325 TokenType.FULL, 326 TokenType.IS, 327 TokenType.ISNULL, 328 TokenType.INTERVAL, 329 TokenType.KEEP, 330 TokenType.KILL, 331 TokenType.LEFT, 332 TokenType.LOAD, 333 TokenType.MERGE, 334 TokenType.NATURAL, 335 TokenType.NEXT, 336 TokenType.OFFSET, 337 TokenType.OPERATOR, 338 TokenType.ORDINALITY, 339 TokenType.OVERLAPS, 340 TokenType.OVERWRITE, 341 TokenType.PARTITION, 342 TokenType.PERCENT, 343 TokenType.PIVOT, 344 TokenType.PRAGMA, 345 TokenType.RANGE, 346 TokenType.RECURSIVE, 347 TokenType.REFERENCES, 348 TokenType.REFRESH, 349 TokenType.REPLACE, 350 TokenType.RIGHT, 351 TokenType.ROW, 352 TokenType.ROWS, 353 TokenType.SEMI, 354 TokenType.SET, 355 TokenType.SETTINGS, 356 TokenType.SHOW, 357 TokenType.TEMPORARY, 358 TokenType.TOP, 359 TokenType.TRUE, 360 TokenType.UNIQUE, 361 TokenType.UNPIVOT, 362 TokenType.UPDATE, 363 TokenType.USE, 364 TokenType.VOLATILE, 365 TokenType.WINDOW, 366 *CREATABLES, 367 *SUBQUERY_PREDICATES, 368 *TYPE_TOKENS, 369 *NO_PAREN_FUNCTIONS, 370 } 371 372 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 373 374 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 375 TokenType.ANTI, 376 TokenType.APPLY, 377 TokenType.ASOF, 378 TokenType.FULL, 379 TokenType.LEFT, 380 TokenType.LOCK, 381 TokenType.NATURAL, 382 TokenType.OFFSET, 383 TokenType.RIGHT, 384 TokenType.SEMI, 385 TokenType.WINDOW, 386 } 387 388 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 389 390 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 391 392 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 393 394 FUNC_TOKENS = { 395 TokenType.COLLATE, 396 TokenType.COMMAND, 397 TokenType.CURRENT_DATE, 398 TokenType.CURRENT_DATETIME, 399 TokenType.CURRENT_TIMESTAMP, 400 TokenType.CURRENT_TIME, 401 TokenType.CURRENT_USER, 402 TokenType.FILTER, 403 TokenType.FIRST, 404 TokenType.FORMAT, 405 TokenType.GLOB, 406 TokenType.IDENTIFIER, 407 TokenType.INDEX, 408 TokenType.ISNULL, 409 TokenType.ILIKE, 410 TokenType.INSERT, 411 TokenType.LIKE, 412 TokenType.MERGE, 413 TokenType.OFFSET, 414 TokenType.PRIMARY_KEY, 415 TokenType.RANGE, 416 TokenType.REPLACE, 417 TokenType.RLIKE, 418 TokenType.ROW, 419 TokenType.UNNEST, 420 TokenType.VAR, 421 TokenType.LEFT, 422 TokenType.RIGHT, 423 TokenType.DATE, 424 TokenType.DATETIME, 425 TokenType.TABLE, 426 TokenType.TIMESTAMP, 427 TokenType.TIMESTAMPTZ, 428 TokenType.WINDOW, 429 TokenType.XOR, 430 *TYPE_TOKENS, 431 *SUBQUERY_PREDICATES, 432 } 433 434 CONJUNCTION = { 435 TokenType.AND: exp.And, 436 TokenType.OR: exp.Or, 437 } 438 439 EQUALITY = { 440 TokenType.COLON_EQ: exp.PropertyEQ, 441 TokenType.EQ: exp.EQ, 442 TokenType.NEQ: exp.NEQ, 443 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 444 } 445 446 COMPARISON = { 447 TokenType.GT: exp.GT, 448 TokenType.GTE: exp.GTE, 449 TokenType.LT: exp.LT, 450 TokenType.LTE: exp.LTE, 451 } 452 453 BITWISE = { 454 TokenType.AMP: exp.BitwiseAnd, 455 TokenType.CARET: exp.BitwiseXor, 456 TokenType.PIPE: exp.BitwiseOr, 457 } 458 459 TERM = { 460 TokenType.DASH: exp.Sub, 461 TokenType.PLUS: exp.Add, 462 TokenType.MOD: exp.Mod, 463 TokenType.COLLATE: exp.Collate, 464 } 465 466 FACTOR = { 467 TokenType.DIV: exp.IntDiv, 468 TokenType.LR_ARROW: exp.Distance, 469 TokenType.SLASH: exp.Div, 470 TokenType.STAR: exp.Mul, 471 } 472 473 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 474 475 TIMES = { 476 TokenType.TIME, 477 TokenType.TIMETZ, 478 } 479 480 TIMESTAMPS = { 481 TokenType.TIMESTAMP, 482 TokenType.TIMESTAMPTZ, 483 TokenType.TIMESTAMPLTZ, 484 *TIMES, 485 } 486 487 SET_OPERATIONS = { 488 TokenType.UNION, 489 TokenType.INTERSECT, 490 TokenType.EXCEPT, 491 } 492 493 JOIN_METHODS = { 494 TokenType.NATURAL, 495 TokenType.ASOF, 496 } 497 498 JOIN_SIDES = { 499 TokenType.LEFT, 500 TokenType.RIGHT, 501 TokenType.FULL, 502 } 503 504 JOIN_KINDS = { 505 TokenType.INNER, 506 TokenType.OUTER, 507 TokenType.CROSS, 508 TokenType.SEMI, 509 TokenType.ANTI, 510 } 511 512 JOIN_HINTS: t.Set[str] = set() 513 514 LAMBDAS = { 515 TokenType.ARROW: lambda self, expressions: self.expression( 516 exp.Lambda, 517 this=self._replace_lambda( 518 self._parse_conjunction(), 519 {node.name for node in expressions}, 520 ), 521 expressions=expressions, 522 ), 523 TokenType.FARROW: lambda self, expressions: self.expression( 524 exp.Kwarg, 525 this=exp.var(expressions[0].name), 526 expression=self._parse_conjunction(), 527 ), 528 } 529 530 COLUMN_OPERATORS = { 531 TokenType.DOT: None, 532 TokenType.DCOLON: lambda self, this, to: self.expression( 533 exp.Cast if self.STRICT_CAST else exp.TryCast, 534 this=this, 535 to=to, 536 ), 537 TokenType.ARROW: lambda self, this, path: self.expression( 538 exp.JSONExtract, 539 this=this, 540 expression=path, 541 ), 542 TokenType.DARROW: lambda self, this, path: self.expression( 543 exp.JSONExtractScalar, 544 this=this, 545 expression=path, 546 ), 547 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 548 exp.JSONBExtract, 549 this=this, 550 expression=path, 551 ), 552 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 553 exp.JSONBExtractScalar, 554 this=this, 555 expression=path, 556 ), 557 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 558 exp.JSONBContains, 559 this=this, 560 expression=key, 561 ), 562 } 563 564 EXPRESSION_PARSERS = { 565 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 566 exp.Column: lambda self: self._parse_column(), 567 exp.Condition: lambda self: self._parse_conjunction(), 568 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 569 exp.Expression: lambda self: self._parse_statement(), 570 exp.From: lambda self: self._parse_from(), 571 exp.Group: lambda self: self._parse_group(), 572 exp.Having: lambda self: self._parse_having(), 573 exp.Identifier: lambda self: self._parse_id_var(), 574 exp.Join: lambda self: self._parse_join(), 575 exp.Lambda: lambda self: self._parse_lambda(), 576 exp.Lateral: lambda self: self._parse_lateral(), 577 exp.Limit: lambda self: self._parse_limit(), 578 exp.Offset: lambda self: self._parse_offset(), 579 exp.Order: lambda self: self._parse_order(), 580 exp.Ordered: lambda self: self._parse_ordered(), 581 exp.Properties: lambda self: self._parse_properties(), 582 exp.Qualify: lambda self: self._parse_qualify(), 583 exp.Returning: lambda self: self._parse_returning(), 584 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 585 exp.Table: lambda self: self._parse_table_parts(), 586 exp.TableAlias: lambda self: self._parse_table_alias(), 587 exp.Where: lambda self: self._parse_where(), 588 exp.Window: lambda self: self._parse_named_window(), 589 exp.With: lambda self: self._parse_with(), 590 "JOIN_TYPE": lambda self: self._parse_join_parts(), 591 } 592 593 STATEMENT_PARSERS = { 594 TokenType.ALTER: lambda self: self._parse_alter(), 595 TokenType.BEGIN: lambda self: self._parse_transaction(), 596 TokenType.CACHE: lambda self: self._parse_cache(), 597 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 598 TokenType.COMMENT: lambda self: self._parse_comment(), 599 TokenType.CREATE: lambda self: self._parse_create(), 600 TokenType.DELETE: lambda self: self._parse_delete(), 601 TokenType.DESC: lambda self: self._parse_describe(), 602 TokenType.DESCRIBE: lambda self: self._parse_describe(), 603 TokenType.DROP: lambda self: self._parse_drop(), 604 TokenType.INSERT: lambda self: self._parse_insert(), 605 TokenType.KILL: lambda self: self._parse_kill(), 606 TokenType.LOAD: lambda self: self._parse_load(), 607 TokenType.MERGE: lambda self: self._parse_merge(), 608 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 609 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 610 TokenType.REFRESH: lambda self: self._parse_refresh(), 611 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 612 TokenType.SET: lambda self: self._parse_set(), 613 TokenType.UNCACHE: lambda self: self._parse_uncache(), 614 TokenType.UPDATE: lambda self: self._parse_update(), 615 TokenType.USE: lambda self: self.expression( 616 exp.Use, 617 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 618 and exp.var(self._prev.text), 619 this=self._parse_table(schema=False), 620 ), 621 } 622 623 UNARY_PARSERS = { 624 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 625 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 626 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 627 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 628 } 629 630 PRIMARY_PARSERS = { 631 TokenType.STRING: lambda self, token: self.expression( 632 exp.Literal, this=token.text, is_string=True 633 ), 634 TokenType.NUMBER: lambda self, token: self.expression( 635 exp.Literal, this=token.text, is_string=False 636 ), 637 TokenType.STAR: lambda self, _: self.expression( 638 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 639 ), 640 TokenType.NULL: lambda self, _: self.expression(exp.Null), 641 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 642 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 643 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 644 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 645 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 646 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 647 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 648 exp.National, this=token.text 649 ), 650 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 651 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 652 exp.RawString, this=token.text 653 ), 654 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 655 } 656 657 PLACEHOLDER_PARSERS = { 658 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 659 TokenType.PARAMETER: lambda self: self._parse_parameter(), 660 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 661 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 662 else None, 663 } 664 665 RANGE_PARSERS = { 666 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 667 TokenType.GLOB: binary_range_parser(exp.Glob), 668 TokenType.ILIKE: binary_range_parser(exp.ILike), 669 TokenType.IN: lambda self, this: self._parse_in(this), 670 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 671 TokenType.IS: lambda self, this: self._parse_is(this), 672 TokenType.LIKE: binary_range_parser(exp.Like), 673 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 674 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 675 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 676 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 677 } 678 679 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 680 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 681 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 682 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 683 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 684 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 685 "CHECKSUM": lambda self: self._parse_checksum(), 686 "CLUSTER BY": lambda self: self._parse_cluster(), 687 "CLUSTERED": lambda self: self._parse_clustered_by(), 688 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 689 exp.CollateProperty, **kwargs 690 ), 691 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 692 "COPY": lambda self: self._parse_copy_property(), 693 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 694 "DEFINER": lambda self: self._parse_definer(), 695 "DETERMINISTIC": lambda self: self.expression( 696 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 697 ), 698 "DISTKEY": lambda self: self._parse_distkey(), 699 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 700 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 701 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 702 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 703 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 704 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 705 "FREESPACE": lambda self: self._parse_freespace(), 706 "HEAP": lambda self: self.expression(exp.HeapProperty), 707 "IMMUTABLE": lambda self: self.expression( 708 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 709 ), 710 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 711 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 712 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 713 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 714 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 715 "LIKE": lambda self: self._parse_create_like(), 716 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 717 "LOCK": lambda self: self._parse_locking(), 718 "LOCKING": lambda self: self._parse_locking(), 719 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 720 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 721 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 722 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 723 "NO": lambda self: self._parse_no_property(), 724 "ON": lambda self: self._parse_on_property(), 725 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 726 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 727 "PARTITION": lambda self: self._parse_partitioned_of(), 728 "PARTITION BY": lambda self: self._parse_partitioned_by(), 729 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 730 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 731 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 732 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 733 "REMOTE": lambda self: self._parse_remote_with_connection(), 734 "RETURNS": lambda self: self._parse_returns(), 735 "ROW": lambda self: self._parse_row(), 736 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 737 "SAMPLE": lambda self: self.expression( 738 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 739 ), 740 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 741 "SETTINGS": lambda self: self.expression( 742 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 743 ), 744 "SORTKEY": lambda self: self._parse_sortkey(), 745 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 746 "STABLE": lambda self: self.expression( 747 exp.StabilityProperty, this=exp.Literal.string("STABLE") 748 ), 749 "STORED": lambda self: self._parse_stored(), 750 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 751 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 752 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 753 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 754 "TO": lambda self: self._parse_to_table(), 755 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 756 "TRANSFORM": lambda self: self.expression( 757 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 758 ), 759 "TTL": lambda self: self._parse_ttl(), 760 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 761 "VOLATILE": lambda self: self._parse_volatile_property(), 762 "WITH": lambda self: self._parse_with_property(), 763 } 764 765 CONSTRAINT_PARSERS = { 766 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 767 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 768 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 769 "CHARACTER SET": lambda self: self.expression( 770 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 771 ), 772 "CHECK": lambda self: self.expression( 773 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 774 ), 775 "COLLATE": lambda self: self.expression( 776 exp.CollateColumnConstraint, this=self._parse_var() 777 ), 778 "COMMENT": lambda self: self.expression( 779 exp.CommentColumnConstraint, this=self._parse_string() 780 ), 781 "COMPRESS": lambda self: self._parse_compress(), 782 "CLUSTERED": lambda self: self.expression( 783 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 784 ), 785 "NONCLUSTERED": lambda self: self.expression( 786 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 787 ), 788 "DEFAULT": lambda self: self.expression( 789 exp.DefaultColumnConstraint, this=self._parse_bitwise() 790 ), 791 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 792 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 793 "FORMAT": lambda self: self.expression( 794 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 795 ), 796 "GENERATED": lambda self: self._parse_generated_as_identity(), 797 "IDENTITY": lambda self: self._parse_auto_increment(), 798 "INLINE": lambda self: self._parse_inline(), 799 "LIKE": lambda self: self._parse_create_like(), 800 "NOT": lambda self: self._parse_not_constraint(), 801 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 802 "ON": lambda self: ( 803 self._match(TokenType.UPDATE) 804 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 805 ) 806 or self.expression(exp.OnProperty, this=self._parse_id_var()), 807 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 808 "PERIOD": lambda self: self._parse_period_for_system_time(), 809 "PRIMARY KEY": lambda self: self._parse_primary_key(), 810 "REFERENCES": lambda self: self._parse_references(match=False), 811 "TITLE": lambda self: self.expression( 812 exp.TitleColumnConstraint, this=self._parse_var_or_string() 813 ), 814 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 815 "UNIQUE": lambda self: self._parse_unique(), 816 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 817 "WITH": lambda self: self.expression( 818 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 819 ), 820 } 821 822 ALTER_PARSERS = { 823 "ADD": lambda self: self._parse_alter_table_add(), 824 "ALTER": lambda self: self._parse_alter_table_alter(), 825 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 826 "DROP": lambda self: self._parse_alter_table_drop(), 827 "RENAME": lambda self: self._parse_alter_table_rename(), 828 } 829 830 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 831 832 NO_PAREN_FUNCTION_PARSERS = { 833 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 834 "CASE": lambda self: self._parse_case(), 835 "IF": lambda self: self._parse_if(), 836 "NEXT": lambda self: self._parse_next_value_for(), 837 } 838 839 INVALID_FUNC_NAME_TOKENS = { 840 TokenType.IDENTIFIER, 841 TokenType.STRING, 842 } 843 844 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 845 846 FUNCTION_PARSERS = { 847 "ANY_VALUE": lambda self: self._parse_any_value(), 848 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 849 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 850 "DECODE": lambda self: self._parse_decode(), 851 "EXTRACT": lambda self: self._parse_extract(), 852 "JSON_OBJECT": lambda self: self._parse_json_object(), 853 "JSON_TABLE": lambda self: self._parse_json_table(), 854 "MATCH": lambda self: self._parse_match_against(), 855 "OPENJSON": lambda self: self._parse_open_json(), 856 "POSITION": lambda self: self._parse_position(), 857 "PREDICT": lambda self: self._parse_predict(), 858 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 859 "STRING_AGG": lambda self: self._parse_string_agg(), 860 "SUBSTRING": lambda self: self._parse_substring(), 861 "TRIM": lambda self: self._parse_trim(), 862 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 863 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 864 } 865 866 QUERY_MODIFIER_PARSERS = { 867 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 868 TokenType.WHERE: lambda self: ("where", self._parse_where()), 869 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 870 TokenType.HAVING: lambda self: ("having", self._parse_having()), 871 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 872 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 873 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 874 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 875 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 876 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 877 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 878 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 879 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 880 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 881 TokenType.CLUSTER_BY: lambda self: ( 882 "cluster", 883 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 884 ), 885 TokenType.DISTRIBUTE_BY: lambda self: ( 886 "distribute", 887 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 888 ), 889 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 890 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 891 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 892 } 893 894 SET_PARSERS = { 895 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 896 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 897 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 898 "TRANSACTION": lambda self: self._parse_set_transaction(), 899 } 900 901 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 902 903 TYPE_LITERAL_PARSERS = { 904 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 905 } 906 907 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 908 909 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 910 911 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 912 913 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 914 TRANSACTION_CHARACTERISTICS = { 915 "ISOLATION LEVEL REPEATABLE READ", 916 "ISOLATION LEVEL READ COMMITTED", 917 "ISOLATION LEVEL READ UNCOMMITTED", 918 "ISOLATION LEVEL SERIALIZABLE", 919 "READ WRITE", 920 "READ ONLY", 921 } 922 923 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 924 925 CLONE_KEYWORDS = {"CLONE", "COPY"} 926 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 927 928 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 929 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 930 931 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 932 933 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 934 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 935 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 936 937 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 938 939 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 940 941 DISTINCT_TOKENS = {TokenType.DISTINCT} 942 943 NULL_TOKENS = {TokenType.NULL} 944 945 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 946 947 STRICT_CAST = True 948 949 # A NULL arg in CONCAT yields NULL by default 950 CONCAT_NULL_OUTPUTS_STRING = False 951 952 PREFIXED_PIVOT_COLUMNS = False 953 IDENTIFY_PIVOT_STRINGS = False 954 955 LOG_DEFAULTS_TO_LN = False 956 957 # Whether or not ADD is present for each column added by ALTER TABLE 958 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 959 960 # Whether or not the table sample clause expects CSV syntax 961 TABLESAMPLE_CSV = False 962 963 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 964 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 965 966 # Whether the TRIM function expects the characters to trim as its first argument 967 TRIM_PATTERN_FIRST = False 968 969 __slots__ = ( 970 "error_level", 971 "error_message_context", 972 "max_errors", 973 "dialect", 974 "sql", 975 "errors", 976 "_tokens", 977 "_index", 978 "_curr", 979 "_next", 980 "_prev", 981 "_prev_comments", 982 ) 983 984 # Autofilled 985 SHOW_TRIE: t.Dict = {} 986 SET_TRIE: t.Dict = {} 987 988 def __init__( 989 self, 990 error_level: t.Optional[ErrorLevel] = None, 991 error_message_context: int = 100, 992 max_errors: int = 3, 993 dialect: DialectType = None, 994 ): 995 from sqlglot.dialects import Dialect 996 997 self.error_level = error_level or ErrorLevel.IMMEDIATE 998 self.error_message_context = error_message_context 999 self.max_errors = max_errors 1000 self.dialect = Dialect.get_or_raise(dialect) 1001 self.reset() 1002 1003 def reset(self): 1004 self.sql = "" 1005 self.errors = [] 1006 self._tokens = [] 1007 self._index = 0 1008 self._curr = None 1009 self._next = None 1010 self._prev = None 1011 self._prev_comments = None 1012 1013 def parse( 1014 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 """ 1017 Parses a list of tokens and returns a list of syntax trees, one tree 1018 per parsed SQL statement. 1019 1020 Args: 1021 raw_tokens: The list of tokens. 1022 sql: The original SQL string, used to produce helpful debug messages. 1023 1024 Returns: 1025 The list of the produced syntax trees. 1026 """ 1027 return self._parse( 1028 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1029 ) 1030 1031 def parse_into( 1032 self, 1033 expression_types: exp.IntoType, 1034 raw_tokens: t.List[Token], 1035 sql: t.Optional[str] = None, 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens into a given Expression type. If a collection of Expression 1039 types is given instead, this method will try to parse the token list into each one 1040 of them, stopping at the first for which the parsing succeeds. 1041 1042 Args: 1043 expression_types: The expression type(s) to try and parse the token list into. 1044 raw_tokens: The list of tokens. 1045 sql: The original SQL string, used to produce helpful debug messages. 1046 1047 Returns: 1048 The target Expression. 1049 """ 1050 errors = [] 1051 for expression_type in ensure_list(expression_types): 1052 parser = self.EXPRESSION_PARSERS.get(expression_type) 1053 if not parser: 1054 raise TypeError(f"No parser registered for {expression_type}") 1055 1056 try: 1057 return self._parse(parser, raw_tokens, sql) 1058 except ParseError as e: 1059 e.errors[0]["into_expression"] = expression_type 1060 errors.append(e) 1061 1062 raise ParseError( 1063 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1064 errors=merge_errors(errors), 1065 ) from errors[-1] 1066 1067 def _parse( 1068 self, 1069 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1070 raw_tokens: t.List[Token], 1071 sql: t.Optional[str] = None, 1072 ) -> t.List[t.Optional[exp.Expression]]: 1073 self.reset() 1074 self.sql = sql or "" 1075 1076 total = len(raw_tokens) 1077 chunks: t.List[t.List[Token]] = [[]] 1078 1079 for i, token in enumerate(raw_tokens): 1080 if token.token_type == TokenType.SEMICOLON: 1081 if i < total - 1: 1082 chunks.append([]) 1083 else: 1084 chunks[-1].append(token) 1085 1086 expressions = [] 1087 1088 for tokens in chunks: 1089 self._index = -1 1090 self._tokens = tokens 1091 self._advance() 1092 1093 expressions.append(parse_method(self)) 1094 1095 if self._index < len(self._tokens): 1096 self.raise_error("Invalid expression / Unexpected token") 1097 1098 self.check_errors() 1099 1100 return expressions 1101 1102 def check_errors(self) -> None: 1103 """Logs or raises any found errors, depending on the chosen error level setting.""" 1104 if self.error_level == ErrorLevel.WARN: 1105 for error in self.errors: 1106 logger.error(str(error)) 1107 elif self.error_level == ErrorLevel.RAISE and self.errors: 1108 raise ParseError( 1109 concat_messages(self.errors, self.max_errors), 1110 errors=merge_errors(self.errors), 1111 ) 1112 1113 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1114 """ 1115 Appends an error in the list of recorded errors or raises it, depending on the chosen 1116 error level setting. 1117 """ 1118 token = token or self._curr or self._prev or Token.string("") 1119 start = token.start 1120 end = token.end + 1 1121 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1122 highlight = self.sql[start:end] 1123 end_context = self.sql[end : end + self.error_message_context] 1124 1125 error = ParseError.new( 1126 f"{message}. Line {token.line}, Col: {token.col}.\n" 1127 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1128 description=message, 1129 line=token.line, 1130 col=token.col, 1131 start_context=start_context, 1132 highlight=highlight, 1133 end_context=end_context, 1134 ) 1135 1136 if self.error_level == ErrorLevel.IMMEDIATE: 1137 raise error 1138 1139 self.errors.append(error) 1140 1141 def expression( 1142 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1143 ) -> E: 1144 """ 1145 Creates a new, validated Expression. 1146 1147 Args: 1148 exp_class: The expression class to instantiate. 1149 comments: An optional list of comments to attach to the expression. 1150 kwargs: The arguments to set for the expression along with their respective values. 1151 1152 Returns: 1153 The target expression. 1154 """ 1155 instance = exp_class(**kwargs) 1156 instance.add_comments(comments) if comments else self._add_comments(instance) 1157 return self.validate_expression(instance) 1158 1159 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1160 if expression and self._prev_comments: 1161 expression.add_comments(self._prev_comments) 1162 self._prev_comments = None 1163 1164 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1165 """ 1166 Validates an Expression, making sure that all its mandatory arguments are set. 1167 1168 Args: 1169 expression: The expression to validate. 1170 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1171 1172 Returns: 1173 The validated expression. 1174 """ 1175 if self.error_level != ErrorLevel.IGNORE: 1176 for error_message in expression.error_messages(args): 1177 self.raise_error(error_message) 1178 1179 return expression 1180 1181 def _find_sql(self, start: Token, end: Token) -> str: 1182 return self.sql[start.start : end.end + 1] 1183 1184 def _advance(self, times: int = 1) -> None: 1185 self._index += times 1186 self._curr = seq_get(self._tokens, self._index) 1187 self._next = seq_get(self._tokens, self._index + 1) 1188 1189 if self._index > 0: 1190 self._prev = self._tokens[self._index - 1] 1191 self._prev_comments = self._prev.comments 1192 else: 1193 self._prev = None 1194 self._prev_comments = None 1195 1196 def _retreat(self, index: int) -> None: 1197 if index != self._index: 1198 self._advance(index - self._index) 1199 1200 def _parse_command(self) -> exp.Command: 1201 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1202 1203 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1204 start = self._prev 1205 exists = self._parse_exists() if allow_exists else None 1206 1207 self._match(TokenType.ON) 1208 1209 kind = self._match_set(self.CREATABLES) and self._prev 1210 if not kind: 1211 return self._parse_as_command(start) 1212 1213 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1214 this = self._parse_user_defined_function(kind=kind.token_type) 1215 elif kind.token_type == TokenType.TABLE: 1216 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1217 elif kind.token_type == TokenType.COLUMN: 1218 this = self._parse_column() 1219 else: 1220 this = self._parse_id_var() 1221 1222 self._match(TokenType.IS) 1223 1224 return self.expression( 1225 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1226 ) 1227 1228 def _parse_to_table( 1229 self, 1230 ) -> exp.ToTableProperty: 1231 table = self._parse_table_parts(schema=True) 1232 return self.expression(exp.ToTableProperty, this=table) 1233 1234 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1235 def _parse_ttl(self) -> exp.Expression: 1236 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1237 this = self._parse_bitwise() 1238 1239 if self._match_text_seq("DELETE"): 1240 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1241 if self._match_text_seq("RECOMPRESS"): 1242 return self.expression( 1243 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1244 ) 1245 if self._match_text_seq("TO", "DISK"): 1246 return self.expression( 1247 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1248 ) 1249 if self._match_text_seq("TO", "VOLUME"): 1250 return self.expression( 1251 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1252 ) 1253 1254 return this 1255 1256 expressions = self._parse_csv(_parse_ttl_action) 1257 where = self._parse_where() 1258 group = self._parse_group() 1259 1260 aggregates = None 1261 if group and self._match(TokenType.SET): 1262 aggregates = self._parse_csv(self._parse_set_item) 1263 1264 return self.expression( 1265 exp.MergeTreeTTL, 1266 expressions=expressions, 1267 where=where, 1268 group=group, 1269 aggregates=aggregates, 1270 ) 1271 1272 def _parse_statement(self) -> t.Optional[exp.Expression]: 1273 if self._curr is None: 1274 return None 1275 1276 if self._match_set(self.STATEMENT_PARSERS): 1277 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1278 1279 if self._match_set(Tokenizer.COMMANDS): 1280 return self._parse_command() 1281 1282 expression = self._parse_expression() 1283 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1284 return self._parse_query_modifiers(expression) 1285 1286 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1287 start = self._prev 1288 temporary = self._match(TokenType.TEMPORARY) 1289 materialized = self._match_text_seq("MATERIALIZED") 1290 1291 kind = self._match_set(self.CREATABLES) and self._prev.text 1292 if not kind: 1293 return self._parse_as_command(start) 1294 1295 return self.expression( 1296 exp.Drop, 1297 comments=start.comments, 1298 exists=exists or self._parse_exists(), 1299 this=self._parse_table(schema=True), 1300 kind=kind, 1301 temporary=temporary, 1302 materialized=materialized, 1303 cascade=self._match_text_seq("CASCADE"), 1304 constraints=self._match_text_seq("CONSTRAINTS"), 1305 purge=self._match_text_seq("PURGE"), 1306 ) 1307 1308 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1309 return ( 1310 self._match_text_seq("IF") 1311 and (not not_ or self._match(TokenType.NOT)) 1312 and self._match(TokenType.EXISTS) 1313 ) 1314 1315 def _parse_create(self) -> exp.Create | exp.Command: 1316 # Note: this can't be None because we've matched a statement parser 1317 start = self._prev 1318 comments = self._prev_comments 1319 1320 replace = start.text.upper() == "REPLACE" or self._match_pair( 1321 TokenType.OR, TokenType.REPLACE 1322 ) 1323 unique = self._match(TokenType.UNIQUE) 1324 1325 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1326 self._advance() 1327 1328 properties = None 1329 create_token = self._match_set(self.CREATABLES) and self._prev 1330 1331 if not create_token: 1332 # exp.Properties.Location.POST_CREATE 1333 properties = self._parse_properties() 1334 create_token = self._match_set(self.CREATABLES) and self._prev 1335 1336 if not properties or not create_token: 1337 return self._parse_as_command(start) 1338 1339 exists = self._parse_exists(not_=True) 1340 this = None 1341 expression: t.Optional[exp.Expression] = None 1342 indexes = None 1343 no_schema_binding = None 1344 begin = None 1345 end = None 1346 clone = None 1347 1348 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1349 nonlocal properties 1350 if properties and temp_props: 1351 properties.expressions.extend(temp_props.expressions) 1352 elif temp_props: 1353 properties = temp_props 1354 1355 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1356 this = self._parse_user_defined_function(kind=create_token.token_type) 1357 1358 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1359 extend_props(self._parse_properties()) 1360 1361 self._match(TokenType.ALIAS) 1362 1363 if self._match(TokenType.COMMAND): 1364 expression = self._parse_as_command(self._prev) 1365 else: 1366 begin = self._match(TokenType.BEGIN) 1367 return_ = self._match_text_seq("RETURN") 1368 1369 if self._match(TokenType.STRING, advance=False): 1370 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1371 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1372 expression = self._parse_string() 1373 extend_props(self._parse_properties()) 1374 else: 1375 expression = self._parse_statement() 1376 1377 end = self._match_text_seq("END") 1378 1379 if return_: 1380 expression = self.expression(exp.Return, this=expression) 1381 elif create_token.token_type == TokenType.INDEX: 1382 this = self._parse_index(index=self._parse_id_var()) 1383 elif create_token.token_type in self.DB_CREATABLES: 1384 table_parts = self._parse_table_parts(schema=True) 1385 1386 # exp.Properties.Location.POST_NAME 1387 self._match(TokenType.COMMA) 1388 extend_props(self._parse_properties(before=True)) 1389 1390 this = self._parse_schema(this=table_parts) 1391 1392 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1393 extend_props(self._parse_properties()) 1394 1395 self._match(TokenType.ALIAS) 1396 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1397 # exp.Properties.Location.POST_ALIAS 1398 extend_props(self._parse_properties()) 1399 1400 expression = self._parse_ddl_select() 1401 1402 if create_token.token_type == TokenType.TABLE: 1403 # exp.Properties.Location.POST_EXPRESSION 1404 extend_props(self._parse_properties()) 1405 1406 indexes = [] 1407 while True: 1408 index = self._parse_index() 1409 1410 # exp.Properties.Location.POST_INDEX 1411 extend_props(self._parse_properties()) 1412 1413 if not index: 1414 break 1415 else: 1416 self._match(TokenType.COMMA) 1417 indexes.append(index) 1418 elif create_token.token_type == TokenType.VIEW: 1419 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1420 no_schema_binding = True 1421 1422 shallow = self._match_text_seq("SHALLOW") 1423 1424 if self._match_texts(self.CLONE_KEYWORDS): 1425 copy = self._prev.text.lower() == "copy" 1426 clone = self._parse_table(schema=True) 1427 when = self._match_texts(("AT", "BEFORE")) and self._prev.text.upper() 1428 clone_kind = ( 1429 self._match(TokenType.L_PAREN) 1430 and self._match_texts(self.CLONE_KINDS) 1431 and self._prev.text.upper() 1432 ) 1433 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1434 self._match(TokenType.R_PAREN) 1435 clone = self.expression( 1436 exp.Clone, 1437 this=clone, 1438 when=when, 1439 kind=clone_kind, 1440 shallow=shallow, 1441 expression=clone_expression, 1442 copy=copy, 1443 ) 1444 1445 return self.expression( 1446 exp.Create, 1447 comments=comments, 1448 this=this, 1449 kind=create_token.text, 1450 replace=replace, 1451 unique=unique, 1452 expression=expression, 1453 exists=exists, 1454 properties=properties, 1455 indexes=indexes, 1456 no_schema_binding=no_schema_binding, 1457 begin=begin, 1458 end=end, 1459 clone=clone, 1460 ) 1461 1462 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1463 # only used for teradata currently 1464 self._match(TokenType.COMMA) 1465 1466 kwargs = { 1467 "no": self._match_text_seq("NO"), 1468 "dual": self._match_text_seq("DUAL"), 1469 "before": self._match_text_seq("BEFORE"), 1470 "default": self._match_text_seq("DEFAULT"), 1471 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1472 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1473 "after": self._match_text_seq("AFTER"), 1474 "minimum": self._match_texts(("MIN", "MINIMUM")), 1475 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1476 } 1477 1478 if self._match_texts(self.PROPERTY_PARSERS): 1479 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1480 try: 1481 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1482 except TypeError: 1483 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1484 1485 return None 1486 1487 def _parse_property(self) -> t.Optional[exp.Expression]: 1488 if self._match_texts(self.PROPERTY_PARSERS): 1489 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1490 1491 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1492 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1493 1494 if self._match_text_seq("COMPOUND", "SORTKEY"): 1495 return self._parse_sortkey(compound=True) 1496 1497 if self._match_text_seq("SQL", "SECURITY"): 1498 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1499 1500 index = self._index 1501 key = self._parse_column() 1502 1503 if not self._match(TokenType.EQ): 1504 self._retreat(index) 1505 return None 1506 1507 return self.expression( 1508 exp.Property, 1509 this=key.to_dot() if isinstance(key, exp.Column) else key, 1510 value=self._parse_column() or self._parse_var(any_token=True), 1511 ) 1512 1513 def _parse_stored(self) -> exp.FileFormatProperty: 1514 self._match(TokenType.ALIAS) 1515 1516 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1517 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1518 1519 return self.expression( 1520 exp.FileFormatProperty, 1521 this=self.expression( 1522 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1523 ) 1524 if input_format or output_format 1525 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1526 ) 1527 1528 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1529 self._match(TokenType.EQ) 1530 self._match(TokenType.ALIAS) 1531 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1532 1533 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1534 properties = [] 1535 while True: 1536 if before: 1537 prop = self._parse_property_before() 1538 else: 1539 prop = self._parse_property() 1540 1541 if not prop: 1542 break 1543 for p in ensure_list(prop): 1544 properties.append(p) 1545 1546 if properties: 1547 return self.expression(exp.Properties, expressions=properties) 1548 1549 return None 1550 1551 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1552 return self.expression( 1553 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1554 ) 1555 1556 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1557 if self._index >= 2: 1558 pre_volatile_token = self._tokens[self._index - 2] 1559 else: 1560 pre_volatile_token = None 1561 1562 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1563 return exp.VolatileProperty() 1564 1565 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1566 1567 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1568 self._match_pair(TokenType.EQ, TokenType.ON) 1569 1570 prop = self.expression(exp.WithSystemVersioningProperty) 1571 if self._match(TokenType.L_PAREN): 1572 self._match_text_seq("HISTORY_TABLE", "=") 1573 prop.set("this", self._parse_table_parts()) 1574 1575 if self._match(TokenType.COMMA): 1576 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1577 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1578 1579 self._match_r_paren() 1580 1581 return prop 1582 1583 def _parse_with_property( 1584 self, 1585 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1586 if self._match(TokenType.L_PAREN, advance=False): 1587 return self._parse_wrapped_csv(self._parse_property) 1588 1589 if self._match_text_seq("JOURNAL"): 1590 return self._parse_withjournaltable() 1591 1592 if self._match_text_seq("DATA"): 1593 return self._parse_withdata(no=False) 1594 elif self._match_text_seq("NO", "DATA"): 1595 return self._parse_withdata(no=True) 1596 1597 if not self._next: 1598 return None 1599 1600 return self._parse_withisolatedloading() 1601 1602 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1603 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1604 self._match(TokenType.EQ) 1605 1606 user = self._parse_id_var() 1607 self._match(TokenType.PARAMETER) 1608 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1609 1610 if not user or not host: 1611 return None 1612 1613 return exp.DefinerProperty(this=f"{user}@{host}") 1614 1615 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1616 self._match(TokenType.TABLE) 1617 self._match(TokenType.EQ) 1618 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1619 1620 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1621 return self.expression(exp.LogProperty, no=no) 1622 1623 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1624 return self.expression(exp.JournalProperty, **kwargs) 1625 1626 def _parse_checksum(self) -> exp.ChecksumProperty: 1627 self._match(TokenType.EQ) 1628 1629 on = None 1630 if self._match(TokenType.ON): 1631 on = True 1632 elif self._match_text_seq("OFF"): 1633 on = False 1634 1635 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1636 1637 def _parse_cluster(self) -> exp.Cluster: 1638 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1639 1640 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1641 self._match_text_seq("BY") 1642 1643 self._match_l_paren() 1644 expressions = self._parse_csv(self._parse_column) 1645 self._match_r_paren() 1646 1647 if self._match_text_seq("SORTED", "BY"): 1648 self._match_l_paren() 1649 sorted_by = self._parse_csv(self._parse_ordered) 1650 self._match_r_paren() 1651 else: 1652 sorted_by = None 1653 1654 self._match(TokenType.INTO) 1655 buckets = self._parse_number() 1656 self._match_text_seq("BUCKETS") 1657 1658 return self.expression( 1659 exp.ClusteredByProperty, 1660 expressions=expressions, 1661 sorted_by=sorted_by, 1662 buckets=buckets, 1663 ) 1664 1665 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1666 if not self._match_text_seq("GRANTS"): 1667 self._retreat(self._index - 1) 1668 return None 1669 1670 return self.expression(exp.CopyGrantsProperty) 1671 1672 def _parse_freespace(self) -> exp.FreespaceProperty: 1673 self._match(TokenType.EQ) 1674 return self.expression( 1675 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1676 ) 1677 1678 def _parse_mergeblockratio( 1679 self, no: bool = False, default: bool = False 1680 ) -> exp.MergeBlockRatioProperty: 1681 if self._match(TokenType.EQ): 1682 return self.expression( 1683 exp.MergeBlockRatioProperty, 1684 this=self._parse_number(), 1685 percent=self._match(TokenType.PERCENT), 1686 ) 1687 1688 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1689 1690 def _parse_datablocksize( 1691 self, 1692 default: t.Optional[bool] = None, 1693 minimum: t.Optional[bool] = None, 1694 maximum: t.Optional[bool] = None, 1695 ) -> exp.DataBlocksizeProperty: 1696 self._match(TokenType.EQ) 1697 size = self._parse_number() 1698 1699 units = None 1700 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1701 units = self._prev.text 1702 1703 return self.expression( 1704 exp.DataBlocksizeProperty, 1705 size=size, 1706 units=units, 1707 default=default, 1708 minimum=minimum, 1709 maximum=maximum, 1710 ) 1711 1712 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1713 self._match(TokenType.EQ) 1714 always = self._match_text_seq("ALWAYS") 1715 manual = self._match_text_seq("MANUAL") 1716 never = self._match_text_seq("NEVER") 1717 default = self._match_text_seq("DEFAULT") 1718 1719 autotemp = None 1720 if self._match_text_seq("AUTOTEMP"): 1721 autotemp = self._parse_schema() 1722 1723 return self.expression( 1724 exp.BlockCompressionProperty, 1725 always=always, 1726 manual=manual, 1727 never=never, 1728 default=default, 1729 autotemp=autotemp, 1730 ) 1731 1732 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1733 no = self._match_text_seq("NO") 1734 concurrent = self._match_text_seq("CONCURRENT") 1735 self._match_text_seq("ISOLATED", "LOADING") 1736 for_all = self._match_text_seq("FOR", "ALL") 1737 for_insert = self._match_text_seq("FOR", "INSERT") 1738 for_none = self._match_text_seq("FOR", "NONE") 1739 return self.expression( 1740 exp.IsolatedLoadingProperty, 1741 no=no, 1742 concurrent=concurrent, 1743 for_all=for_all, 1744 for_insert=for_insert, 1745 for_none=for_none, 1746 ) 1747 1748 def _parse_locking(self) -> exp.LockingProperty: 1749 if self._match(TokenType.TABLE): 1750 kind = "TABLE" 1751 elif self._match(TokenType.VIEW): 1752 kind = "VIEW" 1753 elif self._match(TokenType.ROW): 1754 kind = "ROW" 1755 elif self._match_text_seq("DATABASE"): 1756 kind = "DATABASE" 1757 else: 1758 kind = None 1759 1760 if kind in ("DATABASE", "TABLE", "VIEW"): 1761 this = self._parse_table_parts() 1762 else: 1763 this = None 1764 1765 if self._match(TokenType.FOR): 1766 for_or_in = "FOR" 1767 elif self._match(TokenType.IN): 1768 for_or_in = "IN" 1769 else: 1770 for_or_in = None 1771 1772 if self._match_text_seq("ACCESS"): 1773 lock_type = "ACCESS" 1774 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1775 lock_type = "EXCLUSIVE" 1776 elif self._match_text_seq("SHARE"): 1777 lock_type = "SHARE" 1778 elif self._match_text_seq("READ"): 1779 lock_type = "READ" 1780 elif self._match_text_seq("WRITE"): 1781 lock_type = "WRITE" 1782 elif self._match_text_seq("CHECKSUM"): 1783 lock_type = "CHECKSUM" 1784 else: 1785 lock_type = None 1786 1787 override = self._match_text_seq("OVERRIDE") 1788 1789 return self.expression( 1790 exp.LockingProperty, 1791 this=this, 1792 kind=kind, 1793 for_or_in=for_or_in, 1794 lock_type=lock_type, 1795 override=override, 1796 ) 1797 1798 def _parse_partition_by(self) -> t.List[exp.Expression]: 1799 if self._match(TokenType.PARTITION_BY): 1800 return self._parse_csv(self._parse_conjunction) 1801 return [] 1802 1803 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1804 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1805 if self._match_text_seq("MINVALUE"): 1806 return exp.var("MINVALUE") 1807 if self._match_text_seq("MAXVALUE"): 1808 return exp.var("MAXVALUE") 1809 return self._parse_bitwise() 1810 1811 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1812 expression = None 1813 from_expressions = None 1814 to_expressions = None 1815 1816 if self._match(TokenType.IN): 1817 this = self._parse_wrapped_csv(self._parse_bitwise) 1818 elif self._match(TokenType.FROM): 1819 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1820 self._match_text_seq("TO") 1821 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1822 elif self._match_text_seq("WITH", "(", "MODULUS"): 1823 this = self._parse_number() 1824 self._match_text_seq(",", "REMAINDER") 1825 expression = self._parse_number() 1826 self._match_r_paren() 1827 else: 1828 self.raise_error("Failed to parse partition bound spec.") 1829 1830 return self.expression( 1831 exp.PartitionBoundSpec, 1832 this=this, 1833 expression=expression, 1834 from_expressions=from_expressions, 1835 to_expressions=to_expressions, 1836 ) 1837 1838 # https://www.postgresql.org/docs/current/sql-createtable.html 1839 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1840 if not self._match_text_seq("OF"): 1841 self._retreat(self._index - 1) 1842 return None 1843 1844 this = self._parse_table(schema=True) 1845 1846 if self._match(TokenType.DEFAULT): 1847 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1848 elif self._match_text_seq("FOR", "VALUES"): 1849 expression = self._parse_partition_bound_spec() 1850 else: 1851 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1852 1853 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1854 1855 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1856 self._match(TokenType.EQ) 1857 return self.expression( 1858 exp.PartitionedByProperty, 1859 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1860 ) 1861 1862 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1863 if self._match_text_seq("AND", "STATISTICS"): 1864 statistics = True 1865 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1866 statistics = False 1867 else: 1868 statistics = None 1869 1870 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1871 1872 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1873 if self._match_text_seq("PRIMARY", "INDEX"): 1874 return exp.NoPrimaryIndexProperty() 1875 return None 1876 1877 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1878 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1879 return exp.OnCommitProperty() 1880 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1881 return exp.OnCommitProperty(delete=True) 1882 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1883 1884 def _parse_distkey(self) -> exp.DistKeyProperty: 1885 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1886 1887 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1888 table = self._parse_table(schema=True) 1889 1890 options = [] 1891 while self._match_texts(("INCLUDING", "EXCLUDING")): 1892 this = self._prev.text.upper() 1893 1894 id_var = self._parse_id_var() 1895 if not id_var: 1896 return None 1897 1898 options.append( 1899 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1900 ) 1901 1902 return self.expression(exp.LikeProperty, this=table, expressions=options) 1903 1904 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1905 return self.expression( 1906 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1907 ) 1908 1909 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1910 self._match(TokenType.EQ) 1911 return self.expression( 1912 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1913 ) 1914 1915 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1916 self._match_text_seq("WITH", "CONNECTION") 1917 return self.expression( 1918 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1919 ) 1920 1921 def _parse_returns(self) -> exp.ReturnsProperty: 1922 value: t.Optional[exp.Expression] 1923 is_table = self._match(TokenType.TABLE) 1924 1925 if is_table: 1926 if self._match(TokenType.LT): 1927 value = self.expression( 1928 exp.Schema, 1929 this="TABLE", 1930 expressions=self._parse_csv(self._parse_struct_types), 1931 ) 1932 if not self._match(TokenType.GT): 1933 self.raise_error("Expecting >") 1934 else: 1935 value = self._parse_schema(exp.var("TABLE")) 1936 else: 1937 value = self._parse_types() 1938 1939 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1940 1941 def _parse_describe(self) -> exp.Describe: 1942 kind = self._match_set(self.CREATABLES) and self._prev.text 1943 this = self._parse_table(schema=True) 1944 properties = self._parse_properties() 1945 expressions = properties.expressions if properties else None 1946 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1947 1948 def _parse_insert(self) -> exp.Insert: 1949 comments = ensure_list(self._prev_comments) 1950 overwrite = self._match(TokenType.OVERWRITE) 1951 ignore = self._match(TokenType.IGNORE) 1952 local = self._match_text_seq("LOCAL") 1953 alternative = None 1954 1955 if self._match_text_seq("DIRECTORY"): 1956 this: t.Optional[exp.Expression] = self.expression( 1957 exp.Directory, 1958 this=self._parse_var_or_string(), 1959 local=local, 1960 row_format=self._parse_row_format(match_row=True), 1961 ) 1962 else: 1963 if self._match(TokenType.OR): 1964 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1965 1966 self._match(TokenType.INTO) 1967 comments += ensure_list(self._prev_comments) 1968 self._match(TokenType.TABLE) 1969 this = self._parse_table(schema=True) 1970 1971 returning = self._parse_returning() 1972 1973 return self.expression( 1974 exp.Insert, 1975 comments=comments, 1976 this=this, 1977 by_name=self._match_text_seq("BY", "NAME"), 1978 exists=self._parse_exists(), 1979 partition=self._parse_partition(), 1980 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1981 and self._parse_conjunction(), 1982 expression=self._parse_ddl_select(), 1983 conflict=self._parse_on_conflict(), 1984 returning=returning or self._parse_returning(), 1985 overwrite=overwrite, 1986 alternative=alternative, 1987 ignore=ignore, 1988 ) 1989 1990 def _parse_kill(self) -> exp.Kill: 1991 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1992 1993 return self.expression( 1994 exp.Kill, 1995 this=self._parse_primary(), 1996 kind=kind, 1997 ) 1998 1999 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2000 conflict = self._match_text_seq("ON", "CONFLICT") 2001 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2002 2003 if not conflict and not duplicate: 2004 return None 2005 2006 nothing = None 2007 expressions = None 2008 key = None 2009 constraint = None 2010 2011 if conflict: 2012 if self._match_text_seq("ON", "CONSTRAINT"): 2013 constraint = self._parse_id_var() 2014 else: 2015 key = self._parse_csv(self._parse_value) 2016 2017 self._match_text_seq("DO") 2018 if self._match_text_seq("NOTHING"): 2019 nothing = True 2020 else: 2021 self._match(TokenType.UPDATE) 2022 self._match(TokenType.SET) 2023 expressions = self._parse_csv(self._parse_equality) 2024 2025 return self.expression( 2026 exp.OnConflict, 2027 duplicate=duplicate, 2028 expressions=expressions, 2029 nothing=nothing, 2030 key=key, 2031 constraint=constraint, 2032 ) 2033 2034 def _parse_returning(self) -> t.Optional[exp.Returning]: 2035 if not self._match(TokenType.RETURNING): 2036 return None 2037 return self.expression( 2038 exp.Returning, 2039 expressions=self._parse_csv(self._parse_expression), 2040 into=self._match(TokenType.INTO) and self._parse_table_part(), 2041 ) 2042 2043 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2044 if not self._match(TokenType.FORMAT): 2045 return None 2046 return self._parse_row_format() 2047 2048 def _parse_row_format( 2049 self, match_row: bool = False 2050 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2051 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2052 return None 2053 2054 if self._match_text_seq("SERDE"): 2055 this = self._parse_string() 2056 2057 serde_properties = None 2058 if self._match(TokenType.SERDE_PROPERTIES): 2059 serde_properties = self.expression( 2060 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2061 ) 2062 2063 return self.expression( 2064 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2065 ) 2066 2067 self._match_text_seq("DELIMITED") 2068 2069 kwargs = {} 2070 2071 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2072 kwargs["fields"] = self._parse_string() 2073 if self._match_text_seq("ESCAPED", "BY"): 2074 kwargs["escaped"] = self._parse_string() 2075 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2076 kwargs["collection_items"] = self._parse_string() 2077 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2078 kwargs["map_keys"] = self._parse_string() 2079 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2080 kwargs["lines"] = self._parse_string() 2081 if self._match_text_seq("NULL", "DEFINED", "AS"): 2082 kwargs["null"] = self._parse_string() 2083 2084 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2085 2086 def _parse_load(self) -> exp.LoadData | exp.Command: 2087 if self._match_text_seq("DATA"): 2088 local = self._match_text_seq("LOCAL") 2089 self._match_text_seq("INPATH") 2090 inpath = self._parse_string() 2091 overwrite = self._match(TokenType.OVERWRITE) 2092 self._match_pair(TokenType.INTO, TokenType.TABLE) 2093 2094 return self.expression( 2095 exp.LoadData, 2096 this=self._parse_table(schema=True), 2097 local=local, 2098 overwrite=overwrite, 2099 inpath=inpath, 2100 partition=self._parse_partition(), 2101 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2102 serde=self._match_text_seq("SERDE") and self._parse_string(), 2103 ) 2104 return self._parse_as_command(self._prev) 2105 2106 def _parse_delete(self) -> exp.Delete: 2107 # This handles MySQL's "Multiple-Table Syntax" 2108 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2109 tables = None 2110 comments = self._prev_comments 2111 if not self._match(TokenType.FROM, advance=False): 2112 tables = self._parse_csv(self._parse_table) or None 2113 2114 returning = self._parse_returning() 2115 2116 return self.expression( 2117 exp.Delete, 2118 comments=comments, 2119 tables=tables, 2120 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2121 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2122 where=self._parse_where(), 2123 returning=returning or self._parse_returning(), 2124 limit=self._parse_limit(), 2125 ) 2126 2127 def _parse_update(self) -> exp.Update: 2128 comments = self._prev_comments 2129 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2130 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2131 returning = self._parse_returning() 2132 return self.expression( 2133 exp.Update, 2134 comments=comments, 2135 **{ # type: ignore 2136 "this": this, 2137 "expressions": expressions, 2138 "from": self._parse_from(joins=True), 2139 "where": self._parse_where(), 2140 "returning": returning or self._parse_returning(), 2141 "order": self._parse_order(), 2142 "limit": self._parse_limit(), 2143 }, 2144 ) 2145 2146 def _parse_uncache(self) -> exp.Uncache: 2147 if not self._match(TokenType.TABLE): 2148 self.raise_error("Expecting TABLE after UNCACHE") 2149 2150 return self.expression( 2151 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2152 ) 2153 2154 def _parse_cache(self) -> exp.Cache: 2155 lazy = self._match_text_seq("LAZY") 2156 self._match(TokenType.TABLE) 2157 table = self._parse_table(schema=True) 2158 2159 options = [] 2160 if self._match_text_seq("OPTIONS"): 2161 self._match_l_paren() 2162 k = self._parse_string() 2163 self._match(TokenType.EQ) 2164 v = self._parse_string() 2165 options = [k, v] 2166 self._match_r_paren() 2167 2168 self._match(TokenType.ALIAS) 2169 return self.expression( 2170 exp.Cache, 2171 this=table, 2172 lazy=lazy, 2173 options=options, 2174 expression=self._parse_select(nested=True), 2175 ) 2176 2177 def _parse_partition(self) -> t.Optional[exp.Partition]: 2178 if not self._match(TokenType.PARTITION): 2179 return None 2180 2181 return self.expression( 2182 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2183 ) 2184 2185 def _parse_value(self) -> exp.Tuple: 2186 if self._match(TokenType.L_PAREN): 2187 expressions = self._parse_csv(self._parse_conjunction) 2188 self._match_r_paren() 2189 return self.expression(exp.Tuple, expressions=expressions) 2190 2191 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2192 # https://prestodb.io/docs/current/sql/values.html 2193 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2194 2195 def _parse_projections(self) -> t.List[exp.Expression]: 2196 return self._parse_expressions() 2197 2198 def _parse_select( 2199 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2200 ) -> t.Optional[exp.Expression]: 2201 cte = self._parse_with() 2202 2203 if cte: 2204 this = self._parse_statement() 2205 2206 if not this: 2207 self.raise_error("Failed to parse any statement following CTE") 2208 return cte 2209 2210 if "with" in this.arg_types: 2211 this.set("with", cte) 2212 else: 2213 self.raise_error(f"{this.key} does not support CTE") 2214 this = cte 2215 2216 return this 2217 2218 # duckdb supports leading with FROM x 2219 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2220 2221 if self._match(TokenType.SELECT): 2222 comments = self._prev_comments 2223 2224 hint = self._parse_hint() 2225 all_ = self._match(TokenType.ALL) 2226 distinct = self._match_set(self.DISTINCT_TOKENS) 2227 2228 kind = ( 2229 self._match(TokenType.ALIAS) 2230 and self._match_texts(("STRUCT", "VALUE")) 2231 and self._prev.text 2232 ) 2233 2234 if distinct: 2235 distinct = self.expression( 2236 exp.Distinct, 2237 on=self._parse_value() if self._match(TokenType.ON) else None, 2238 ) 2239 2240 if all_ and distinct: 2241 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2242 2243 limit = self._parse_limit(top=True) 2244 projections = self._parse_projections() 2245 2246 this = self.expression( 2247 exp.Select, 2248 kind=kind, 2249 hint=hint, 2250 distinct=distinct, 2251 expressions=projections, 2252 limit=limit, 2253 ) 2254 this.comments = comments 2255 2256 into = self._parse_into() 2257 if into: 2258 this.set("into", into) 2259 2260 if not from_: 2261 from_ = self._parse_from() 2262 2263 if from_: 2264 this.set("from", from_) 2265 2266 this = self._parse_query_modifiers(this) 2267 elif (table or nested) and self._match(TokenType.L_PAREN): 2268 if self._match(TokenType.PIVOT): 2269 this = self._parse_simplified_pivot() 2270 elif self._match(TokenType.FROM): 2271 this = exp.select("*").from_( 2272 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2273 ) 2274 else: 2275 this = self._parse_table() if table else self._parse_select(nested=True) 2276 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2277 2278 self._match_r_paren() 2279 2280 # We return early here so that the UNION isn't attached to the subquery by the 2281 # following call to _parse_set_operations, but instead becomes the parent node 2282 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2283 elif self._match(TokenType.VALUES): 2284 this = self.expression( 2285 exp.Values, 2286 expressions=self._parse_csv(self._parse_value), 2287 alias=self._parse_table_alias(), 2288 ) 2289 elif from_: 2290 this = exp.select("*").from_(from_.this, copy=False) 2291 else: 2292 this = None 2293 2294 return self._parse_set_operations(this) 2295 2296 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2297 if not skip_with_token and not self._match(TokenType.WITH): 2298 return None 2299 2300 comments = self._prev_comments 2301 recursive = self._match(TokenType.RECURSIVE) 2302 2303 expressions = [] 2304 while True: 2305 expressions.append(self._parse_cte()) 2306 2307 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2308 break 2309 else: 2310 self._match(TokenType.WITH) 2311 2312 return self.expression( 2313 exp.With, comments=comments, expressions=expressions, recursive=recursive 2314 ) 2315 2316 def _parse_cte(self) -> exp.CTE: 2317 alias = self._parse_table_alias() 2318 if not alias or not alias.this: 2319 self.raise_error("Expected CTE to have alias") 2320 2321 self._match(TokenType.ALIAS) 2322 return self.expression( 2323 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2324 ) 2325 2326 def _parse_table_alias( 2327 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2328 ) -> t.Optional[exp.TableAlias]: 2329 any_token = self._match(TokenType.ALIAS) 2330 alias = ( 2331 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2332 or self._parse_string_as_identifier() 2333 ) 2334 2335 index = self._index 2336 if self._match(TokenType.L_PAREN): 2337 columns = self._parse_csv(self._parse_function_parameter) 2338 self._match_r_paren() if columns else self._retreat(index) 2339 else: 2340 columns = None 2341 2342 if not alias and not columns: 2343 return None 2344 2345 return self.expression(exp.TableAlias, this=alias, columns=columns) 2346 2347 def _parse_subquery( 2348 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2349 ) -> t.Optional[exp.Subquery]: 2350 if not this: 2351 return None 2352 2353 return self.expression( 2354 exp.Subquery, 2355 this=this, 2356 pivots=self._parse_pivots(), 2357 alias=self._parse_table_alias() if parse_alias else None, 2358 ) 2359 2360 def _parse_query_modifiers( 2361 self, this: t.Optional[exp.Expression] 2362 ) -> t.Optional[exp.Expression]: 2363 if isinstance(this, self.MODIFIABLES): 2364 for join in iter(self._parse_join, None): 2365 this.append("joins", join) 2366 for lateral in iter(self._parse_lateral, None): 2367 this.append("laterals", lateral) 2368 2369 while True: 2370 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2371 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2372 key, expression = parser(self) 2373 2374 if expression: 2375 this.set(key, expression) 2376 if key == "limit": 2377 offset = expression.args.pop("offset", None) 2378 if offset: 2379 this.set("offset", exp.Offset(expression=offset)) 2380 continue 2381 break 2382 return this 2383 2384 def _parse_hint(self) -> t.Optional[exp.Hint]: 2385 if self._match(TokenType.HINT): 2386 hints = [] 2387 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2388 hints.extend(hint) 2389 2390 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2391 self.raise_error("Expected */ after HINT") 2392 2393 return self.expression(exp.Hint, expressions=hints) 2394 2395 return None 2396 2397 def _parse_into(self) -> t.Optional[exp.Into]: 2398 if not self._match(TokenType.INTO): 2399 return None 2400 2401 temp = self._match(TokenType.TEMPORARY) 2402 unlogged = self._match_text_seq("UNLOGGED") 2403 self._match(TokenType.TABLE) 2404 2405 return self.expression( 2406 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2407 ) 2408 2409 def _parse_from( 2410 self, joins: bool = False, skip_from_token: bool = False 2411 ) -> t.Optional[exp.From]: 2412 if not skip_from_token and not self._match(TokenType.FROM): 2413 return None 2414 2415 return self.expression( 2416 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2417 ) 2418 2419 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2420 if not self._match(TokenType.MATCH_RECOGNIZE): 2421 return None 2422 2423 self._match_l_paren() 2424 2425 partition = self._parse_partition_by() 2426 order = self._parse_order() 2427 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2428 2429 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2430 rows = exp.var("ONE ROW PER MATCH") 2431 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2432 text = "ALL ROWS PER MATCH" 2433 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2434 text += f" SHOW EMPTY MATCHES" 2435 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2436 text += f" OMIT EMPTY MATCHES" 2437 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2438 text += f" WITH UNMATCHED ROWS" 2439 rows = exp.var(text) 2440 else: 2441 rows = None 2442 2443 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2444 text = "AFTER MATCH SKIP" 2445 if self._match_text_seq("PAST", "LAST", "ROW"): 2446 text += f" PAST LAST ROW" 2447 elif self._match_text_seq("TO", "NEXT", "ROW"): 2448 text += f" TO NEXT ROW" 2449 elif self._match_text_seq("TO", "FIRST"): 2450 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2451 elif self._match_text_seq("TO", "LAST"): 2452 text += f" TO LAST {self._advance_any().text}" # type: ignore 2453 after = exp.var(text) 2454 else: 2455 after = None 2456 2457 if self._match_text_seq("PATTERN"): 2458 self._match_l_paren() 2459 2460 if not self._curr: 2461 self.raise_error("Expecting )", self._curr) 2462 2463 paren = 1 2464 start = self._curr 2465 2466 while self._curr and paren > 0: 2467 if self._curr.token_type == TokenType.L_PAREN: 2468 paren += 1 2469 if self._curr.token_type == TokenType.R_PAREN: 2470 paren -= 1 2471 2472 end = self._prev 2473 self._advance() 2474 2475 if paren > 0: 2476 self.raise_error("Expecting )", self._curr) 2477 2478 pattern = exp.var(self._find_sql(start, end)) 2479 else: 2480 pattern = None 2481 2482 define = ( 2483 self._parse_csv( 2484 lambda: self.expression( 2485 exp.Alias, 2486 alias=self._parse_id_var(any_token=True), 2487 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2488 ) 2489 ) 2490 if self._match_text_seq("DEFINE") 2491 else None 2492 ) 2493 2494 self._match_r_paren() 2495 2496 return self.expression( 2497 exp.MatchRecognize, 2498 partition_by=partition, 2499 order=order, 2500 measures=measures, 2501 rows=rows, 2502 after=after, 2503 pattern=pattern, 2504 define=define, 2505 alias=self._parse_table_alias(), 2506 ) 2507 2508 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2509 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2510 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2511 2512 if outer_apply or cross_apply: 2513 this = self._parse_select(table=True) 2514 view = None 2515 outer = not cross_apply 2516 elif self._match(TokenType.LATERAL): 2517 this = self._parse_select(table=True) 2518 view = self._match(TokenType.VIEW) 2519 outer = self._match(TokenType.OUTER) 2520 else: 2521 return None 2522 2523 if not this: 2524 this = ( 2525 self._parse_unnest() 2526 or self._parse_function() 2527 or self._parse_id_var(any_token=False) 2528 ) 2529 2530 while self._match(TokenType.DOT): 2531 this = exp.Dot( 2532 this=this, 2533 expression=self._parse_function() or self._parse_id_var(any_token=False), 2534 ) 2535 2536 if view: 2537 table = self._parse_id_var(any_token=False) 2538 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2539 table_alias: t.Optional[exp.TableAlias] = self.expression( 2540 exp.TableAlias, this=table, columns=columns 2541 ) 2542 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2543 # We move the alias from the lateral's child node to the lateral itself 2544 table_alias = this.args["alias"].pop() 2545 else: 2546 table_alias = self._parse_table_alias() 2547 2548 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2549 2550 def _parse_join_parts( 2551 self, 2552 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2553 return ( 2554 self._match_set(self.JOIN_METHODS) and self._prev, 2555 self._match_set(self.JOIN_SIDES) and self._prev, 2556 self._match_set(self.JOIN_KINDS) and self._prev, 2557 ) 2558 2559 def _parse_join( 2560 self, skip_join_token: bool = False, parse_bracket: bool = False 2561 ) -> t.Optional[exp.Join]: 2562 if self._match(TokenType.COMMA): 2563 return self.expression(exp.Join, this=self._parse_table()) 2564 2565 index = self._index 2566 method, side, kind = self._parse_join_parts() 2567 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2568 join = self._match(TokenType.JOIN) 2569 2570 if not skip_join_token and not join: 2571 self._retreat(index) 2572 kind = None 2573 method = None 2574 side = None 2575 2576 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2577 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2578 2579 if not skip_join_token and not join and not outer_apply and not cross_apply: 2580 return None 2581 2582 if outer_apply: 2583 side = Token(TokenType.LEFT, "LEFT") 2584 2585 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2586 2587 if method: 2588 kwargs["method"] = method.text 2589 if side: 2590 kwargs["side"] = side.text 2591 if kind: 2592 kwargs["kind"] = kind.text 2593 if hint: 2594 kwargs["hint"] = hint 2595 2596 if self._match(TokenType.ON): 2597 kwargs["on"] = self._parse_conjunction() 2598 elif self._match(TokenType.USING): 2599 kwargs["using"] = self._parse_wrapped_id_vars() 2600 elif not (kind and kind.token_type == TokenType.CROSS): 2601 index = self._index 2602 join = self._parse_join() 2603 2604 if join and self._match(TokenType.ON): 2605 kwargs["on"] = self._parse_conjunction() 2606 elif join and self._match(TokenType.USING): 2607 kwargs["using"] = self._parse_wrapped_id_vars() 2608 else: 2609 join = None 2610 self._retreat(index) 2611 2612 kwargs["this"].set("joins", [join] if join else None) 2613 2614 comments = [c for token in (method, side, kind) if token for c in token.comments] 2615 return self.expression(exp.Join, comments=comments, **kwargs) 2616 2617 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2618 this = self._parse_conjunction() 2619 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2620 return this 2621 2622 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2623 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2624 2625 return this 2626 2627 def _parse_index( 2628 self, 2629 index: t.Optional[exp.Expression] = None, 2630 ) -> t.Optional[exp.Index]: 2631 if index: 2632 unique = None 2633 primary = None 2634 amp = None 2635 2636 self._match(TokenType.ON) 2637 self._match(TokenType.TABLE) # hive 2638 table = self._parse_table_parts(schema=True) 2639 else: 2640 unique = self._match(TokenType.UNIQUE) 2641 primary = self._match_text_seq("PRIMARY") 2642 amp = self._match_text_seq("AMP") 2643 2644 if not self._match(TokenType.INDEX): 2645 return None 2646 2647 index = self._parse_id_var() 2648 table = None 2649 2650 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2651 2652 if self._match(TokenType.L_PAREN, advance=False): 2653 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2654 else: 2655 columns = None 2656 2657 return self.expression( 2658 exp.Index, 2659 this=index, 2660 table=table, 2661 using=using, 2662 columns=columns, 2663 unique=unique, 2664 primary=primary, 2665 amp=amp, 2666 partition_by=self._parse_partition_by(), 2667 where=self._parse_where(), 2668 ) 2669 2670 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2671 hints: t.List[exp.Expression] = [] 2672 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2673 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2674 hints.append( 2675 self.expression( 2676 exp.WithTableHint, 2677 expressions=self._parse_csv( 2678 lambda: self._parse_function() or self._parse_var(any_token=True) 2679 ), 2680 ) 2681 ) 2682 self._match_r_paren() 2683 else: 2684 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2685 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2686 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2687 2688 self._match_texts(("INDEX", "KEY")) 2689 if self._match(TokenType.FOR): 2690 hint.set("target", self._advance_any() and self._prev.text.upper()) 2691 2692 hint.set("expressions", self._parse_wrapped_id_vars()) 2693 hints.append(hint) 2694 2695 return hints or None 2696 2697 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2698 return ( 2699 (not schema and self._parse_function(optional_parens=False)) 2700 or self._parse_id_var(any_token=False) 2701 or self._parse_string_as_identifier() 2702 or self._parse_placeholder() 2703 ) 2704 2705 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2706 catalog = None 2707 db = None 2708 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2709 2710 while self._match(TokenType.DOT): 2711 if catalog: 2712 # This allows nesting the table in arbitrarily many dot expressions if needed 2713 table = self.expression( 2714 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2715 ) 2716 else: 2717 catalog = db 2718 db = table 2719 table = self._parse_table_part(schema=schema) or "" 2720 2721 if not table: 2722 self.raise_error(f"Expected table name but got {self._curr}") 2723 2724 return self.expression( 2725 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2726 ) 2727 2728 def _parse_table( 2729 self, 2730 schema: bool = False, 2731 joins: bool = False, 2732 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2733 parse_bracket: bool = False, 2734 ) -> t.Optional[exp.Expression]: 2735 lateral = self._parse_lateral() 2736 if lateral: 2737 return lateral 2738 2739 unnest = self._parse_unnest() 2740 if unnest: 2741 return unnest 2742 2743 values = self._parse_derived_table_values() 2744 if values: 2745 return values 2746 2747 subquery = self._parse_select(table=True) 2748 if subquery: 2749 if not subquery.args.get("pivots"): 2750 subquery.set("pivots", self._parse_pivots()) 2751 return subquery 2752 2753 bracket = parse_bracket and self._parse_bracket(None) 2754 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2755 this = t.cast( 2756 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2757 ) 2758 2759 if schema: 2760 return self._parse_schema(this=this) 2761 2762 version = self._parse_version() 2763 2764 if version: 2765 this.set("version", version) 2766 2767 if self.dialect.ALIAS_POST_TABLESAMPLE: 2768 table_sample = self._parse_table_sample() 2769 2770 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2771 if alias: 2772 this.set("alias", alias) 2773 2774 if self._match_text_seq("AT"): 2775 this.set("index", self._parse_id_var()) 2776 2777 this.set("hints", self._parse_table_hints()) 2778 2779 if not this.args.get("pivots"): 2780 this.set("pivots", self._parse_pivots()) 2781 2782 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2783 table_sample = self._parse_table_sample() 2784 2785 if table_sample: 2786 table_sample.set("this", this) 2787 this = table_sample 2788 2789 if joins: 2790 for join in iter(self._parse_join, None): 2791 this.append("joins", join) 2792 2793 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2794 this.set("ordinality", True) 2795 this.set("alias", self._parse_table_alias()) 2796 2797 return this 2798 2799 def _parse_version(self) -> t.Optional[exp.Version]: 2800 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2801 this = "TIMESTAMP" 2802 elif self._match(TokenType.VERSION_SNAPSHOT): 2803 this = "VERSION" 2804 else: 2805 return None 2806 2807 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2808 kind = self._prev.text.upper() 2809 start = self._parse_bitwise() 2810 self._match_texts(("TO", "AND")) 2811 end = self._parse_bitwise() 2812 expression: t.Optional[exp.Expression] = self.expression( 2813 exp.Tuple, expressions=[start, end] 2814 ) 2815 elif self._match_text_seq("CONTAINED", "IN"): 2816 kind = "CONTAINED IN" 2817 expression = self.expression( 2818 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2819 ) 2820 elif self._match(TokenType.ALL): 2821 kind = "ALL" 2822 expression = None 2823 else: 2824 self._match_text_seq("AS", "OF") 2825 kind = "AS OF" 2826 expression = self._parse_type() 2827 2828 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2829 2830 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2831 if not self._match(TokenType.UNNEST): 2832 return None 2833 2834 expressions = self._parse_wrapped_csv(self._parse_equality) 2835 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2836 2837 alias = self._parse_table_alias() if with_alias else None 2838 2839 if alias: 2840 if self.dialect.UNNEST_COLUMN_ONLY: 2841 if alias.args.get("columns"): 2842 self.raise_error("Unexpected extra column alias in unnest.") 2843 2844 alias.set("columns", [alias.this]) 2845 alias.set("this", None) 2846 2847 columns = alias.args.get("columns") or [] 2848 if offset and len(expressions) < len(columns): 2849 offset = columns.pop() 2850 2851 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2852 self._match(TokenType.ALIAS) 2853 offset = self._parse_id_var( 2854 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2855 ) or exp.to_identifier("offset") 2856 2857 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2858 2859 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2860 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2861 if not is_derived and not self._match(TokenType.VALUES): 2862 return None 2863 2864 expressions = self._parse_csv(self._parse_value) 2865 alias = self._parse_table_alias() 2866 2867 if is_derived: 2868 self._match_r_paren() 2869 2870 return self.expression( 2871 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2872 ) 2873 2874 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2875 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2876 as_modifier and self._match_text_seq("USING", "SAMPLE") 2877 ): 2878 return None 2879 2880 bucket_numerator = None 2881 bucket_denominator = None 2882 bucket_field = None 2883 percent = None 2884 rows = None 2885 size = None 2886 seed = None 2887 2888 kind = ( 2889 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2890 ) 2891 method = self._parse_var(tokens=(TokenType.ROW,)) 2892 2893 matched_l_paren = self._match(TokenType.L_PAREN) 2894 2895 if self.TABLESAMPLE_CSV: 2896 num = None 2897 expressions = self._parse_csv(self._parse_primary) 2898 else: 2899 expressions = None 2900 num = ( 2901 self._parse_factor() 2902 if self._match(TokenType.NUMBER, advance=False) 2903 else self._parse_primary() or self._parse_placeholder() 2904 ) 2905 2906 if self._match_text_seq("BUCKET"): 2907 bucket_numerator = self._parse_number() 2908 self._match_text_seq("OUT", "OF") 2909 bucket_denominator = bucket_denominator = self._parse_number() 2910 self._match(TokenType.ON) 2911 bucket_field = self._parse_field() 2912 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2913 percent = num 2914 elif self._match(TokenType.ROWS): 2915 rows = num 2916 elif num: 2917 size = num 2918 2919 if matched_l_paren: 2920 self._match_r_paren() 2921 2922 if self._match(TokenType.L_PAREN): 2923 method = self._parse_var() 2924 seed = self._match(TokenType.COMMA) and self._parse_number() 2925 self._match_r_paren() 2926 elif self._match_texts(("SEED", "REPEATABLE")): 2927 seed = self._parse_wrapped(self._parse_number) 2928 2929 return self.expression( 2930 exp.TableSample, 2931 expressions=expressions, 2932 method=method, 2933 bucket_numerator=bucket_numerator, 2934 bucket_denominator=bucket_denominator, 2935 bucket_field=bucket_field, 2936 percent=percent, 2937 rows=rows, 2938 size=size, 2939 seed=seed, 2940 kind=kind, 2941 ) 2942 2943 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2944 return list(iter(self._parse_pivot, None)) or None 2945 2946 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2947 return list(iter(self._parse_join, None)) or None 2948 2949 # https://duckdb.org/docs/sql/statements/pivot 2950 def _parse_simplified_pivot(self) -> exp.Pivot: 2951 def _parse_on() -> t.Optional[exp.Expression]: 2952 this = self._parse_bitwise() 2953 return self._parse_in(this) if self._match(TokenType.IN) else this 2954 2955 this = self._parse_table() 2956 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2957 using = self._match(TokenType.USING) and self._parse_csv( 2958 lambda: self._parse_alias(self._parse_function()) 2959 ) 2960 group = self._parse_group() 2961 return self.expression( 2962 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2963 ) 2964 2965 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2966 index = self._index 2967 include_nulls = None 2968 2969 if self._match(TokenType.PIVOT): 2970 unpivot = False 2971 elif self._match(TokenType.UNPIVOT): 2972 unpivot = True 2973 2974 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2975 if self._match_text_seq("INCLUDE", "NULLS"): 2976 include_nulls = True 2977 elif self._match_text_seq("EXCLUDE", "NULLS"): 2978 include_nulls = False 2979 else: 2980 return None 2981 2982 expressions = [] 2983 field = None 2984 2985 if not self._match(TokenType.L_PAREN): 2986 self._retreat(index) 2987 return None 2988 2989 if unpivot: 2990 expressions = self._parse_csv(self._parse_column) 2991 else: 2992 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2993 2994 if not expressions: 2995 self.raise_error("Failed to parse PIVOT's aggregation list") 2996 2997 if not self._match(TokenType.FOR): 2998 self.raise_error("Expecting FOR") 2999 3000 value = self._parse_column() 3001 3002 if not self._match(TokenType.IN): 3003 self.raise_error("Expecting IN") 3004 3005 field = self._parse_in(value, alias=True) 3006 3007 self._match_r_paren() 3008 3009 pivot = self.expression( 3010 exp.Pivot, 3011 expressions=expressions, 3012 field=field, 3013 unpivot=unpivot, 3014 include_nulls=include_nulls, 3015 ) 3016 3017 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3018 pivot.set("alias", self._parse_table_alias()) 3019 3020 if not unpivot: 3021 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3022 3023 columns: t.List[exp.Expression] = [] 3024 for fld in pivot.args["field"].expressions: 3025 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3026 for name in names: 3027 if self.PREFIXED_PIVOT_COLUMNS: 3028 name = f"{name}_{field_name}" if name else field_name 3029 else: 3030 name = f"{field_name}_{name}" if name else field_name 3031 3032 columns.append(exp.to_identifier(name)) 3033 3034 pivot.set("columns", columns) 3035 3036 return pivot 3037 3038 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3039 return [agg.alias for agg in aggregations] 3040 3041 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3042 if not skip_where_token and not self._match(TokenType.WHERE): 3043 return None 3044 3045 return self.expression( 3046 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3047 ) 3048 3049 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3050 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3051 return None 3052 3053 elements = defaultdict(list) 3054 3055 if self._match(TokenType.ALL): 3056 return self.expression(exp.Group, all=True) 3057 3058 while True: 3059 expressions = self._parse_csv(self._parse_conjunction) 3060 if expressions: 3061 elements["expressions"].extend(expressions) 3062 3063 grouping_sets = self._parse_grouping_sets() 3064 if grouping_sets: 3065 elements["grouping_sets"].extend(grouping_sets) 3066 3067 rollup = None 3068 cube = None 3069 totals = None 3070 3071 index = self._index 3072 with_ = self._match(TokenType.WITH) 3073 if self._match(TokenType.ROLLUP): 3074 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3075 elements["rollup"].extend(ensure_list(rollup)) 3076 3077 if self._match(TokenType.CUBE): 3078 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3079 elements["cube"].extend(ensure_list(cube)) 3080 3081 if self._match_text_seq("TOTALS"): 3082 totals = True 3083 elements["totals"] = True # type: ignore 3084 3085 if not (grouping_sets or rollup or cube or totals): 3086 if with_: 3087 self._retreat(index) 3088 break 3089 3090 return self.expression(exp.Group, **elements) # type: ignore 3091 3092 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3093 if not self._match(TokenType.GROUPING_SETS): 3094 return None 3095 3096 return self._parse_wrapped_csv(self._parse_grouping_set) 3097 3098 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3099 if self._match(TokenType.L_PAREN): 3100 grouping_set = self._parse_csv(self._parse_column) 3101 self._match_r_paren() 3102 return self.expression(exp.Tuple, expressions=grouping_set) 3103 3104 return self._parse_column() 3105 3106 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3107 if not skip_having_token and not self._match(TokenType.HAVING): 3108 return None 3109 return self.expression(exp.Having, this=self._parse_conjunction()) 3110 3111 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3112 if not self._match(TokenType.QUALIFY): 3113 return None 3114 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3115 3116 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3117 if skip_start_token: 3118 start = None 3119 elif self._match(TokenType.START_WITH): 3120 start = self._parse_conjunction() 3121 else: 3122 return None 3123 3124 self._match(TokenType.CONNECT_BY) 3125 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3126 exp.Prior, this=self._parse_bitwise() 3127 ) 3128 connect = self._parse_conjunction() 3129 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3130 3131 if not start and self._match(TokenType.START_WITH): 3132 start = self._parse_conjunction() 3133 3134 return self.expression(exp.Connect, start=start, connect=connect) 3135 3136 def _parse_order( 3137 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3138 ) -> t.Optional[exp.Expression]: 3139 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3140 return this 3141 3142 return self.expression( 3143 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3144 ) 3145 3146 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3147 if not self._match(token): 3148 return None 3149 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3150 3151 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3152 this = parse_method() if parse_method else self._parse_conjunction() 3153 3154 asc = self._match(TokenType.ASC) 3155 desc = self._match(TokenType.DESC) or (asc and False) 3156 3157 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3158 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3159 3160 nulls_first = is_nulls_first or False 3161 explicitly_null_ordered = is_nulls_first or is_nulls_last 3162 3163 if ( 3164 not explicitly_null_ordered 3165 and ( 3166 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3167 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3168 ) 3169 and self.dialect.NULL_ORDERING != "nulls_are_last" 3170 ): 3171 nulls_first = True 3172 3173 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3174 3175 def _parse_limit( 3176 self, this: t.Optional[exp.Expression] = None, top: bool = False 3177 ) -> t.Optional[exp.Expression]: 3178 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3179 comments = self._prev_comments 3180 if top: 3181 limit_paren = self._match(TokenType.L_PAREN) 3182 expression = self._parse_term() if limit_paren else self._parse_number() 3183 3184 if limit_paren: 3185 self._match_r_paren() 3186 else: 3187 expression = self._parse_term() 3188 3189 if self._match(TokenType.COMMA): 3190 offset = expression 3191 expression = self._parse_term() 3192 else: 3193 offset = None 3194 3195 limit_exp = self.expression( 3196 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3197 ) 3198 3199 return limit_exp 3200 3201 if self._match(TokenType.FETCH): 3202 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3203 direction = self._prev.text if direction else "FIRST" 3204 3205 count = self._parse_field(tokens=self.FETCH_TOKENS) 3206 percent = self._match(TokenType.PERCENT) 3207 3208 self._match_set((TokenType.ROW, TokenType.ROWS)) 3209 3210 only = self._match_text_seq("ONLY") 3211 with_ties = self._match_text_seq("WITH", "TIES") 3212 3213 if only and with_ties: 3214 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3215 3216 return self.expression( 3217 exp.Fetch, 3218 direction=direction, 3219 count=count, 3220 percent=percent, 3221 with_ties=with_ties, 3222 ) 3223 3224 return this 3225 3226 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3227 if not self._match(TokenType.OFFSET): 3228 return this 3229 3230 count = self._parse_term() 3231 self._match_set((TokenType.ROW, TokenType.ROWS)) 3232 return self.expression(exp.Offset, this=this, expression=count) 3233 3234 def _parse_locks(self) -> t.List[exp.Lock]: 3235 locks = [] 3236 while True: 3237 if self._match_text_seq("FOR", "UPDATE"): 3238 update = True 3239 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3240 "LOCK", "IN", "SHARE", "MODE" 3241 ): 3242 update = False 3243 else: 3244 break 3245 3246 expressions = None 3247 if self._match_text_seq("OF"): 3248 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3249 3250 wait: t.Optional[bool | exp.Expression] = None 3251 if self._match_text_seq("NOWAIT"): 3252 wait = True 3253 elif self._match_text_seq("WAIT"): 3254 wait = self._parse_primary() 3255 elif self._match_text_seq("SKIP", "LOCKED"): 3256 wait = False 3257 3258 locks.append( 3259 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3260 ) 3261 3262 return locks 3263 3264 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3265 if not self._match_set(self.SET_OPERATIONS): 3266 return this 3267 3268 token_type = self._prev.token_type 3269 3270 if token_type == TokenType.UNION: 3271 expression = exp.Union 3272 elif token_type == TokenType.EXCEPT: 3273 expression = exp.Except 3274 else: 3275 expression = exp.Intersect 3276 3277 return self.expression( 3278 expression, 3279 comments=self._prev.comments, 3280 this=this, 3281 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3282 by_name=self._match_text_seq("BY", "NAME"), 3283 expression=self._parse_set_operations(self._parse_select(nested=True)), 3284 ) 3285 3286 def _parse_expression(self) -> t.Optional[exp.Expression]: 3287 return self._parse_alias(self._parse_conjunction()) 3288 3289 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3290 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3291 3292 def _parse_equality(self) -> t.Optional[exp.Expression]: 3293 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3294 3295 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3296 return self._parse_tokens(self._parse_range, self.COMPARISON) 3297 3298 def _parse_range(self) -> t.Optional[exp.Expression]: 3299 this = self._parse_bitwise() 3300 negate = self._match(TokenType.NOT) 3301 3302 if self._match_set(self.RANGE_PARSERS): 3303 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3304 if not expression: 3305 return this 3306 3307 this = expression 3308 elif self._match(TokenType.ISNULL): 3309 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3310 3311 # Postgres supports ISNULL and NOTNULL for conditions. 3312 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3313 if self._match(TokenType.NOTNULL): 3314 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3315 this = self.expression(exp.Not, this=this) 3316 3317 if negate: 3318 this = self.expression(exp.Not, this=this) 3319 3320 if self._match(TokenType.IS): 3321 this = self._parse_is(this) 3322 3323 return this 3324 3325 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3326 index = self._index - 1 3327 negate = self._match(TokenType.NOT) 3328 3329 if self._match_text_seq("DISTINCT", "FROM"): 3330 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3331 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3332 3333 expression = self._parse_null() or self._parse_boolean() 3334 if not expression: 3335 self._retreat(index) 3336 return None 3337 3338 this = self.expression(exp.Is, this=this, expression=expression) 3339 return self.expression(exp.Not, this=this) if negate else this 3340 3341 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3342 unnest = self._parse_unnest(with_alias=False) 3343 if unnest: 3344 this = self.expression(exp.In, this=this, unnest=unnest) 3345 elif self._match(TokenType.L_PAREN): 3346 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3347 3348 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3349 this = self.expression(exp.In, this=this, query=expressions[0]) 3350 else: 3351 this = self.expression(exp.In, this=this, expressions=expressions) 3352 3353 self._match_r_paren(this) 3354 else: 3355 this = self.expression(exp.In, this=this, field=self._parse_field()) 3356 3357 return this 3358 3359 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3360 low = self._parse_bitwise() 3361 self._match(TokenType.AND) 3362 high = self._parse_bitwise() 3363 return self.expression(exp.Between, this=this, low=low, high=high) 3364 3365 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3366 if not self._match(TokenType.ESCAPE): 3367 return this 3368 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3369 3370 def _parse_interval(self) -> t.Optional[exp.Interval]: 3371 index = self._index 3372 3373 if not self._match(TokenType.INTERVAL): 3374 return None 3375 3376 if self._match(TokenType.STRING, advance=False): 3377 this = self._parse_primary() 3378 else: 3379 this = self._parse_term() 3380 3381 if not this: 3382 self._retreat(index) 3383 return None 3384 3385 unit = self._parse_function() or self._parse_var(any_token=True) 3386 3387 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3388 # each INTERVAL expression into this canonical form so it's easy to transpile 3389 if this and this.is_number: 3390 this = exp.Literal.string(this.name) 3391 elif this and this.is_string: 3392 parts = this.name.split() 3393 3394 if len(parts) == 2: 3395 if unit: 3396 # This is not actually a unit, it's something else (e.g. a "window side") 3397 unit = None 3398 self._retreat(self._index - 1) 3399 3400 this = exp.Literal.string(parts[0]) 3401 unit = self.expression(exp.Var, this=parts[1]) 3402 3403 return self.expression(exp.Interval, this=this, unit=unit) 3404 3405 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3406 this = self._parse_term() 3407 3408 while True: 3409 if self._match_set(self.BITWISE): 3410 this = self.expression( 3411 self.BITWISE[self._prev.token_type], 3412 this=this, 3413 expression=self._parse_term(), 3414 ) 3415 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3416 this = self.expression( 3417 exp.DPipe, 3418 this=this, 3419 expression=self._parse_term(), 3420 safe=not self.dialect.STRICT_STRING_CONCAT, 3421 ) 3422 elif self._match(TokenType.DQMARK): 3423 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3424 elif self._match_pair(TokenType.LT, TokenType.LT): 3425 this = self.expression( 3426 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3427 ) 3428 elif self._match_pair(TokenType.GT, TokenType.GT): 3429 this = self.expression( 3430 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3431 ) 3432 else: 3433 break 3434 3435 return this 3436 3437 def _parse_term(self) -> t.Optional[exp.Expression]: 3438 return self._parse_tokens(self._parse_factor, self.TERM) 3439 3440 def _parse_factor(self) -> t.Optional[exp.Expression]: 3441 if self.EXPONENT: 3442 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3443 else: 3444 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3445 if isinstance(factor, exp.Div): 3446 factor.args["typed"] = self.dialect.TYPED_DIVISION 3447 factor.args["safe"] = self.dialect.SAFE_DIVISION 3448 return factor 3449 3450 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3451 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3452 3453 def _parse_unary(self) -> t.Optional[exp.Expression]: 3454 if self._match_set(self.UNARY_PARSERS): 3455 return self.UNARY_PARSERS[self._prev.token_type](self) 3456 return self._parse_at_time_zone(self._parse_type()) 3457 3458 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3459 interval = parse_interval and self._parse_interval() 3460 if interval: 3461 return interval 3462 3463 index = self._index 3464 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3465 this = self._parse_column() 3466 3467 if data_type: 3468 if isinstance(this, exp.Literal): 3469 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3470 if parser: 3471 return parser(self, this, data_type) 3472 return self.expression(exp.Cast, this=this, to=data_type) 3473 if not data_type.expressions: 3474 self._retreat(index) 3475 return self._parse_column() 3476 return self._parse_column_ops(data_type) 3477 3478 return this and self._parse_column_ops(this) 3479 3480 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3481 this = self._parse_type() 3482 if not this: 3483 return None 3484 3485 return self.expression( 3486 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3487 ) 3488 3489 def _parse_types( 3490 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3491 ) -> t.Optional[exp.Expression]: 3492 index = self._index 3493 3494 prefix = self._match_text_seq("SYSUDTLIB", ".") 3495 3496 if not self._match_set(self.TYPE_TOKENS): 3497 identifier = allow_identifiers and self._parse_id_var( 3498 any_token=False, tokens=(TokenType.VAR,) 3499 ) 3500 3501 if identifier: 3502 tokens = self.dialect.tokenize(identifier.name) 3503 3504 if len(tokens) != 1: 3505 self.raise_error("Unexpected identifier", self._prev) 3506 3507 if tokens[0].token_type in self.TYPE_TOKENS: 3508 self._prev = tokens[0] 3509 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3510 type_name = identifier.name 3511 3512 while self._match(TokenType.DOT): 3513 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3514 3515 return exp.DataType.build(type_name, udt=True) 3516 else: 3517 return None 3518 else: 3519 return None 3520 3521 type_token = self._prev.token_type 3522 3523 if type_token == TokenType.PSEUDO_TYPE: 3524 return self.expression(exp.PseudoType, this=self._prev.text) 3525 3526 if type_token == TokenType.OBJECT_IDENTIFIER: 3527 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3528 3529 nested = type_token in self.NESTED_TYPE_TOKENS 3530 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3531 expressions = None 3532 maybe_func = False 3533 3534 if self._match(TokenType.L_PAREN): 3535 if is_struct: 3536 expressions = self._parse_csv(self._parse_struct_types) 3537 elif nested: 3538 expressions = self._parse_csv( 3539 lambda: self._parse_types( 3540 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3541 ) 3542 ) 3543 elif type_token in self.ENUM_TYPE_TOKENS: 3544 expressions = self._parse_csv(self._parse_equality) 3545 else: 3546 expressions = self._parse_csv(self._parse_type_size) 3547 3548 if not expressions or not self._match(TokenType.R_PAREN): 3549 self._retreat(index) 3550 return None 3551 3552 maybe_func = True 3553 3554 this: t.Optional[exp.Expression] = None 3555 values: t.Optional[t.List[exp.Expression]] = None 3556 3557 if nested and self._match(TokenType.LT): 3558 if is_struct: 3559 expressions = self._parse_csv(self._parse_struct_types) 3560 else: 3561 expressions = self._parse_csv( 3562 lambda: self._parse_types( 3563 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3564 ) 3565 ) 3566 3567 if not self._match(TokenType.GT): 3568 self.raise_error("Expecting >") 3569 3570 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3571 values = self._parse_csv(self._parse_conjunction) 3572 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3573 3574 if type_token in self.TIMESTAMPS: 3575 if self._match_text_seq("WITH", "TIME", "ZONE"): 3576 maybe_func = False 3577 tz_type = ( 3578 exp.DataType.Type.TIMETZ 3579 if type_token in self.TIMES 3580 else exp.DataType.Type.TIMESTAMPTZ 3581 ) 3582 this = exp.DataType(this=tz_type, expressions=expressions) 3583 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3584 maybe_func = False 3585 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3586 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3587 maybe_func = False 3588 elif type_token == TokenType.INTERVAL: 3589 unit = self._parse_var() 3590 3591 if self._match_text_seq("TO"): 3592 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3593 else: 3594 span = None 3595 3596 if span or not unit: 3597 this = self.expression( 3598 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3599 ) 3600 else: 3601 this = self.expression(exp.Interval, unit=unit) 3602 3603 if maybe_func and check_func: 3604 index2 = self._index 3605 peek = self._parse_string() 3606 3607 if not peek: 3608 self._retreat(index) 3609 return None 3610 3611 self._retreat(index2) 3612 3613 if not this: 3614 if self._match_text_seq("UNSIGNED"): 3615 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3616 if not unsigned_type_token: 3617 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3618 3619 type_token = unsigned_type_token or type_token 3620 3621 this = exp.DataType( 3622 this=exp.DataType.Type[type_token.value], 3623 expressions=expressions, 3624 nested=nested, 3625 values=values, 3626 prefix=prefix, 3627 ) 3628 3629 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3630 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3631 3632 return this 3633 3634 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3635 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3636 self._match(TokenType.COLON) 3637 return self._parse_column_def(this) 3638 3639 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3640 if not self._match_text_seq("AT", "TIME", "ZONE"): 3641 return this 3642 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3643 3644 def _parse_column(self) -> t.Optional[exp.Expression]: 3645 this = self._parse_field() 3646 if isinstance(this, exp.Identifier): 3647 this = self.expression(exp.Column, this=this) 3648 elif not this: 3649 return self._parse_bracket(this) 3650 return self._parse_column_ops(this) 3651 3652 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3653 this = self._parse_bracket(this) 3654 3655 while self._match_set(self.COLUMN_OPERATORS): 3656 op_token = self._prev.token_type 3657 op = self.COLUMN_OPERATORS.get(op_token) 3658 3659 if op_token == TokenType.DCOLON: 3660 field = self._parse_types() 3661 if not field: 3662 self.raise_error("Expected type") 3663 elif op and self._curr: 3664 self._advance() 3665 value = self._prev.text 3666 field = ( 3667 exp.Literal.number(value) 3668 if self._prev.token_type == TokenType.NUMBER 3669 else exp.Literal.string(value) 3670 ) 3671 else: 3672 field = self._parse_field(anonymous_func=True, any_token=True) 3673 3674 if isinstance(field, exp.Func): 3675 # bigquery allows function calls like x.y.count(...) 3676 # SAFE.SUBSTR(...) 3677 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3678 this = self._replace_columns_with_dots(this) 3679 3680 if op: 3681 this = op(self, this, field) 3682 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3683 this = self.expression( 3684 exp.Column, 3685 this=field, 3686 table=this.this, 3687 db=this.args.get("table"), 3688 catalog=this.args.get("db"), 3689 ) 3690 else: 3691 this = self.expression(exp.Dot, this=this, expression=field) 3692 this = self._parse_bracket(this) 3693 return this 3694 3695 def _parse_primary(self) -> t.Optional[exp.Expression]: 3696 if self._match_set(self.PRIMARY_PARSERS): 3697 token_type = self._prev.token_type 3698 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3699 3700 if token_type == TokenType.STRING: 3701 expressions = [primary] 3702 while self._match(TokenType.STRING): 3703 expressions.append(exp.Literal.string(self._prev.text)) 3704 3705 if len(expressions) > 1: 3706 return self.expression(exp.Concat, expressions=expressions) 3707 3708 return primary 3709 3710 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3711 return exp.Literal.number(f"0.{self._prev.text}") 3712 3713 if self._match(TokenType.L_PAREN): 3714 comments = self._prev_comments 3715 query = self._parse_select() 3716 3717 if query: 3718 expressions = [query] 3719 else: 3720 expressions = self._parse_expressions() 3721 3722 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3723 3724 if isinstance(this, exp.Subqueryable): 3725 this = self._parse_set_operations( 3726 self._parse_subquery(this=this, parse_alias=False) 3727 ) 3728 elif len(expressions) > 1: 3729 this = self.expression(exp.Tuple, expressions=expressions) 3730 else: 3731 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3732 3733 if this: 3734 this.add_comments(comments) 3735 3736 self._match_r_paren(expression=this) 3737 return this 3738 3739 return None 3740 3741 def _parse_field( 3742 self, 3743 any_token: bool = False, 3744 tokens: t.Optional[t.Collection[TokenType]] = None, 3745 anonymous_func: bool = False, 3746 ) -> t.Optional[exp.Expression]: 3747 return ( 3748 self._parse_primary() 3749 or self._parse_function(anonymous=anonymous_func) 3750 or self._parse_id_var(any_token=any_token, tokens=tokens) 3751 ) 3752 3753 def _parse_function( 3754 self, 3755 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3756 anonymous: bool = False, 3757 optional_parens: bool = True, 3758 ) -> t.Optional[exp.Expression]: 3759 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3760 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3761 fn_syntax = False 3762 if ( 3763 self._match(TokenType.L_BRACE, advance=False) 3764 and self._next 3765 and self._next.text.upper() == "FN" 3766 ): 3767 self._advance(2) 3768 fn_syntax = True 3769 3770 func = self._parse_function_call( 3771 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3772 ) 3773 3774 if fn_syntax: 3775 self._match(TokenType.R_BRACE) 3776 3777 return func 3778 3779 def _parse_function_call( 3780 self, 3781 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3782 anonymous: bool = False, 3783 optional_parens: bool = True, 3784 ) -> t.Optional[exp.Expression]: 3785 if not self._curr: 3786 return None 3787 3788 comments = self._curr.comments 3789 token_type = self._curr.token_type 3790 this = self._curr.text 3791 upper = this.upper() 3792 3793 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3794 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3795 self._advance() 3796 return parser(self) 3797 3798 if not self._next or self._next.token_type != TokenType.L_PAREN: 3799 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3800 self._advance() 3801 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3802 3803 return None 3804 3805 if token_type not in self.FUNC_TOKENS: 3806 return None 3807 3808 self._advance(2) 3809 3810 parser = self.FUNCTION_PARSERS.get(upper) 3811 if parser and not anonymous: 3812 this = parser(self) 3813 else: 3814 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3815 3816 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3817 this = self.expression(subquery_predicate, this=self._parse_select()) 3818 self._match_r_paren() 3819 return this 3820 3821 if functions is None: 3822 functions = self.FUNCTIONS 3823 3824 function = functions.get(upper) 3825 3826 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3827 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3828 3829 if function and not anonymous: 3830 if "dialect" in function.__code__.co_varnames: 3831 func = function(args, dialect=self.dialect) 3832 else: 3833 func = function(args) 3834 3835 func = self.validate_expression(func, args) 3836 if not self.dialect.NORMALIZE_FUNCTIONS: 3837 func.meta["name"] = this 3838 3839 this = func 3840 else: 3841 this = self.expression(exp.Anonymous, this=this, expressions=args) 3842 3843 if isinstance(this, exp.Expression): 3844 this.add_comments(comments) 3845 3846 self._match_r_paren(this) 3847 return self._parse_window(this) 3848 3849 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3850 return self._parse_column_def(self._parse_id_var()) 3851 3852 def _parse_user_defined_function( 3853 self, kind: t.Optional[TokenType] = None 3854 ) -> t.Optional[exp.Expression]: 3855 this = self._parse_id_var() 3856 3857 while self._match(TokenType.DOT): 3858 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3859 3860 if not self._match(TokenType.L_PAREN): 3861 return this 3862 3863 expressions = self._parse_csv(self._parse_function_parameter) 3864 self._match_r_paren() 3865 return self.expression( 3866 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3867 ) 3868 3869 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3870 literal = self._parse_primary() 3871 if literal: 3872 return self.expression(exp.Introducer, this=token.text, expression=literal) 3873 3874 return self.expression(exp.Identifier, this=token.text) 3875 3876 def _parse_session_parameter(self) -> exp.SessionParameter: 3877 kind = None 3878 this = self._parse_id_var() or self._parse_primary() 3879 3880 if this and self._match(TokenType.DOT): 3881 kind = this.name 3882 this = self._parse_var() or self._parse_primary() 3883 3884 return self.expression(exp.SessionParameter, this=this, kind=kind) 3885 3886 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3887 index = self._index 3888 3889 if self._match(TokenType.L_PAREN): 3890 expressions = t.cast( 3891 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3892 ) 3893 3894 if not self._match(TokenType.R_PAREN): 3895 self._retreat(index) 3896 else: 3897 expressions = [self._parse_id_var()] 3898 3899 if self._match_set(self.LAMBDAS): 3900 return self.LAMBDAS[self._prev.token_type](self, expressions) 3901 3902 self._retreat(index) 3903 3904 this: t.Optional[exp.Expression] 3905 3906 if self._match(TokenType.DISTINCT): 3907 this = self.expression( 3908 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3909 ) 3910 else: 3911 this = self._parse_select_or_expression(alias=alias) 3912 3913 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3914 3915 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3916 index = self._index 3917 3918 if not self.errors: 3919 try: 3920 if self._parse_select(nested=True): 3921 return this 3922 except ParseError: 3923 pass 3924 finally: 3925 self.errors.clear() 3926 self._retreat(index) 3927 3928 if not self._match(TokenType.L_PAREN): 3929 return this 3930 3931 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3932 3933 self._match_r_paren() 3934 return self.expression(exp.Schema, this=this, expressions=args) 3935 3936 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3937 return self._parse_column_def(self._parse_field(any_token=True)) 3938 3939 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3940 # column defs are not really columns, they're identifiers 3941 if isinstance(this, exp.Column): 3942 this = this.this 3943 3944 kind = self._parse_types(schema=True) 3945 3946 if self._match_text_seq("FOR", "ORDINALITY"): 3947 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3948 3949 constraints: t.List[exp.Expression] = [] 3950 3951 if not kind and self._match(TokenType.ALIAS): 3952 constraints.append( 3953 self.expression( 3954 exp.ComputedColumnConstraint, 3955 this=self._parse_conjunction(), 3956 persisted=self._match_text_seq("PERSISTED"), 3957 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3958 ) 3959 ) 3960 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3961 self._match(TokenType.ALIAS) 3962 constraints.append( 3963 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3964 ) 3965 3966 while True: 3967 constraint = self._parse_column_constraint() 3968 if not constraint: 3969 break 3970 constraints.append(constraint) 3971 3972 if not kind and not constraints: 3973 return this 3974 3975 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3976 3977 def _parse_auto_increment( 3978 self, 3979 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3980 start = None 3981 increment = None 3982 3983 if self._match(TokenType.L_PAREN, advance=False): 3984 args = self._parse_wrapped_csv(self._parse_bitwise) 3985 start = seq_get(args, 0) 3986 increment = seq_get(args, 1) 3987 elif self._match_text_seq("START"): 3988 start = self._parse_bitwise() 3989 self._match_text_seq("INCREMENT") 3990 increment = self._parse_bitwise() 3991 3992 if start and increment: 3993 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3994 3995 return exp.AutoIncrementColumnConstraint() 3996 3997 def _parse_compress(self) -> exp.CompressColumnConstraint: 3998 if self._match(TokenType.L_PAREN, advance=False): 3999 return self.expression( 4000 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4001 ) 4002 4003 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4004 4005 def _parse_generated_as_identity( 4006 self, 4007 ) -> ( 4008 exp.GeneratedAsIdentityColumnConstraint 4009 | exp.ComputedColumnConstraint 4010 | exp.GeneratedAsRowColumnConstraint 4011 ): 4012 if self._match_text_seq("BY", "DEFAULT"): 4013 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4014 this = self.expression( 4015 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4016 ) 4017 else: 4018 self._match_text_seq("ALWAYS") 4019 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4020 4021 self._match(TokenType.ALIAS) 4022 4023 if self._match_text_seq("ROW"): 4024 start = self._match_text_seq("START") 4025 if not start: 4026 self._match(TokenType.END) 4027 hidden = self._match_text_seq("HIDDEN") 4028 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4029 4030 identity = self._match_text_seq("IDENTITY") 4031 4032 if self._match(TokenType.L_PAREN): 4033 if self._match(TokenType.START_WITH): 4034 this.set("start", self._parse_bitwise()) 4035 if self._match_text_seq("INCREMENT", "BY"): 4036 this.set("increment", self._parse_bitwise()) 4037 if self._match_text_seq("MINVALUE"): 4038 this.set("minvalue", self._parse_bitwise()) 4039 if self._match_text_seq("MAXVALUE"): 4040 this.set("maxvalue", self._parse_bitwise()) 4041 4042 if self._match_text_seq("CYCLE"): 4043 this.set("cycle", True) 4044 elif self._match_text_seq("NO", "CYCLE"): 4045 this.set("cycle", False) 4046 4047 if not identity: 4048 this.set("expression", self._parse_bitwise()) 4049 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4050 args = self._parse_csv(self._parse_bitwise) 4051 this.set("start", seq_get(args, 0)) 4052 this.set("increment", seq_get(args, 1)) 4053 4054 self._match_r_paren() 4055 4056 return this 4057 4058 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4059 self._match_text_seq("LENGTH") 4060 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4061 4062 def _parse_not_constraint( 4063 self, 4064 ) -> t.Optional[exp.Expression]: 4065 if self._match_text_seq("NULL"): 4066 return self.expression(exp.NotNullColumnConstraint) 4067 if self._match_text_seq("CASESPECIFIC"): 4068 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4069 if self._match_text_seq("FOR", "REPLICATION"): 4070 return self.expression(exp.NotForReplicationColumnConstraint) 4071 return None 4072 4073 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4074 if self._match(TokenType.CONSTRAINT): 4075 this = self._parse_id_var() 4076 else: 4077 this = None 4078 4079 if self._match_texts(self.CONSTRAINT_PARSERS): 4080 return self.expression( 4081 exp.ColumnConstraint, 4082 this=this, 4083 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4084 ) 4085 4086 return this 4087 4088 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4089 if not self._match(TokenType.CONSTRAINT): 4090 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4091 4092 this = self._parse_id_var() 4093 expressions = [] 4094 4095 while True: 4096 constraint = self._parse_unnamed_constraint() or self._parse_function() 4097 if not constraint: 4098 break 4099 expressions.append(constraint) 4100 4101 return self.expression(exp.Constraint, this=this, expressions=expressions) 4102 4103 def _parse_unnamed_constraint( 4104 self, constraints: t.Optional[t.Collection[str]] = None 4105 ) -> t.Optional[exp.Expression]: 4106 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4107 constraints or self.CONSTRAINT_PARSERS 4108 ): 4109 return None 4110 4111 constraint = self._prev.text.upper() 4112 if constraint not in self.CONSTRAINT_PARSERS: 4113 self.raise_error(f"No parser found for schema constraint {constraint}.") 4114 4115 return self.CONSTRAINT_PARSERS[constraint](self) 4116 4117 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4118 self._match_text_seq("KEY") 4119 return self.expression( 4120 exp.UniqueColumnConstraint, 4121 this=self._parse_schema(self._parse_id_var(any_token=False)), 4122 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4123 ) 4124 4125 def _parse_key_constraint_options(self) -> t.List[str]: 4126 options = [] 4127 while True: 4128 if not self._curr: 4129 break 4130 4131 if self._match(TokenType.ON): 4132 action = None 4133 on = self._advance_any() and self._prev.text 4134 4135 if self._match_text_seq("NO", "ACTION"): 4136 action = "NO ACTION" 4137 elif self._match_text_seq("CASCADE"): 4138 action = "CASCADE" 4139 elif self._match_text_seq("RESTRICT"): 4140 action = "RESTRICT" 4141 elif self._match_pair(TokenType.SET, TokenType.NULL): 4142 action = "SET NULL" 4143 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4144 action = "SET DEFAULT" 4145 else: 4146 self.raise_error("Invalid key constraint") 4147 4148 options.append(f"ON {on} {action}") 4149 elif self._match_text_seq("NOT", "ENFORCED"): 4150 options.append("NOT ENFORCED") 4151 elif self._match_text_seq("DEFERRABLE"): 4152 options.append("DEFERRABLE") 4153 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4154 options.append("INITIALLY DEFERRED") 4155 elif self._match_text_seq("NORELY"): 4156 options.append("NORELY") 4157 elif self._match_text_seq("MATCH", "FULL"): 4158 options.append("MATCH FULL") 4159 else: 4160 break 4161 4162 return options 4163 4164 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4165 if match and not self._match(TokenType.REFERENCES): 4166 return None 4167 4168 expressions = None 4169 this = self._parse_table(schema=True) 4170 options = self._parse_key_constraint_options() 4171 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4172 4173 def _parse_foreign_key(self) -> exp.ForeignKey: 4174 expressions = self._parse_wrapped_id_vars() 4175 reference = self._parse_references() 4176 options = {} 4177 4178 while self._match(TokenType.ON): 4179 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4180 self.raise_error("Expected DELETE or UPDATE") 4181 4182 kind = self._prev.text.lower() 4183 4184 if self._match_text_seq("NO", "ACTION"): 4185 action = "NO ACTION" 4186 elif self._match(TokenType.SET): 4187 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4188 action = "SET " + self._prev.text.upper() 4189 else: 4190 self._advance() 4191 action = self._prev.text.upper() 4192 4193 options[kind] = action 4194 4195 return self.expression( 4196 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4197 ) 4198 4199 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4200 return self._parse_field() 4201 4202 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4203 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4204 4205 id_vars = self._parse_wrapped_id_vars() 4206 return self.expression( 4207 exp.PeriodForSystemTimeConstraint, 4208 this=seq_get(id_vars, 0), 4209 expression=seq_get(id_vars, 1), 4210 ) 4211 4212 def _parse_primary_key( 4213 self, wrapped_optional: bool = False, in_props: bool = False 4214 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4215 desc = ( 4216 self._match_set((TokenType.ASC, TokenType.DESC)) 4217 and self._prev.token_type == TokenType.DESC 4218 ) 4219 4220 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4221 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4222 4223 expressions = self._parse_wrapped_csv( 4224 self._parse_primary_key_part, optional=wrapped_optional 4225 ) 4226 options = self._parse_key_constraint_options() 4227 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4228 4229 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4230 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4231 return this 4232 4233 bracket_kind = self._prev.token_type 4234 4235 if self._match(TokenType.COLON): 4236 expressions: t.List[exp.Expression] = [ 4237 self.expression(exp.Slice, expression=self._parse_conjunction()) 4238 ] 4239 else: 4240 expressions = self._parse_csv( 4241 lambda: self._parse_slice( 4242 self._parse_alias(self._parse_conjunction(), explicit=True) 4243 ) 4244 ) 4245 4246 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4247 self.raise_error("Expected ]") 4248 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4249 self.raise_error("Expected }") 4250 4251 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4252 if bracket_kind == TokenType.L_BRACE: 4253 this = self.expression(exp.Struct, expressions=expressions) 4254 elif not this or this.name.upper() == "ARRAY": 4255 this = self.expression(exp.Array, expressions=expressions) 4256 else: 4257 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4258 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4259 4260 self._add_comments(this) 4261 return self._parse_bracket(this) 4262 4263 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4264 if self._match(TokenType.COLON): 4265 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4266 return this 4267 4268 def _parse_case(self) -> t.Optional[exp.Expression]: 4269 ifs = [] 4270 default = None 4271 4272 comments = self._prev_comments 4273 expression = self._parse_conjunction() 4274 4275 while self._match(TokenType.WHEN): 4276 this = self._parse_conjunction() 4277 self._match(TokenType.THEN) 4278 then = self._parse_conjunction() 4279 ifs.append(self.expression(exp.If, this=this, true=then)) 4280 4281 if self._match(TokenType.ELSE): 4282 default = self._parse_conjunction() 4283 4284 if not self._match(TokenType.END): 4285 self.raise_error("Expected END after CASE", self._prev) 4286 4287 return self._parse_window( 4288 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4289 ) 4290 4291 def _parse_if(self) -> t.Optional[exp.Expression]: 4292 if self._match(TokenType.L_PAREN): 4293 args = self._parse_csv(self._parse_conjunction) 4294 this = self.validate_expression(exp.If.from_arg_list(args), args) 4295 self._match_r_paren() 4296 else: 4297 index = self._index - 1 4298 condition = self._parse_conjunction() 4299 4300 if not condition: 4301 self._retreat(index) 4302 return None 4303 4304 self._match(TokenType.THEN) 4305 true = self._parse_conjunction() 4306 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4307 self._match(TokenType.END) 4308 this = self.expression(exp.If, this=condition, true=true, false=false) 4309 4310 return self._parse_window(this) 4311 4312 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4313 if not self._match_text_seq("VALUE", "FOR"): 4314 self._retreat(self._index - 1) 4315 return None 4316 4317 return self.expression( 4318 exp.NextValueFor, 4319 this=self._parse_column(), 4320 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4321 ) 4322 4323 def _parse_extract(self) -> exp.Extract: 4324 this = self._parse_function() or self._parse_var() or self._parse_type() 4325 4326 if self._match(TokenType.FROM): 4327 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4328 4329 if not self._match(TokenType.COMMA): 4330 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4331 4332 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4333 4334 def _parse_any_value(self) -> exp.AnyValue: 4335 this = self._parse_lambda() 4336 is_max = None 4337 having = None 4338 4339 if self._match(TokenType.HAVING): 4340 self._match_texts(("MAX", "MIN")) 4341 is_max = self._prev.text == "MAX" 4342 having = self._parse_column() 4343 4344 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4345 4346 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4347 this = self._parse_conjunction() 4348 4349 if not self._match(TokenType.ALIAS): 4350 if self._match(TokenType.COMMA): 4351 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4352 4353 self.raise_error("Expected AS after CAST") 4354 4355 fmt = None 4356 to = self._parse_types() 4357 4358 if self._match(TokenType.FORMAT): 4359 fmt_string = self._parse_string() 4360 fmt = self._parse_at_time_zone(fmt_string) 4361 4362 if not to: 4363 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4364 if to.this in exp.DataType.TEMPORAL_TYPES: 4365 this = self.expression( 4366 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4367 this=this, 4368 format=exp.Literal.string( 4369 format_time( 4370 fmt_string.this if fmt_string else "", 4371 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4372 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4373 ) 4374 ), 4375 ) 4376 4377 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4378 this.set("zone", fmt.args["zone"]) 4379 return this 4380 elif not to: 4381 self.raise_error("Expected TYPE after CAST") 4382 elif isinstance(to, exp.Identifier): 4383 to = exp.DataType.build(to.name, udt=True) 4384 elif to.this == exp.DataType.Type.CHAR: 4385 if self._match(TokenType.CHARACTER_SET): 4386 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4387 4388 return self.expression( 4389 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4390 ) 4391 4392 def _parse_string_agg(self) -> exp.Expression: 4393 if self._match(TokenType.DISTINCT): 4394 args: t.List[t.Optional[exp.Expression]] = [ 4395 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4396 ] 4397 if self._match(TokenType.COMMA): 4398 args.extend(self._parse_csv(self._parse_conjunction)) 4399 else: 4400 args = self._parse_csv(self._parse_conjunction) # type: ignore 4401 4402 index = self._index 4403 if not self._match(TokenType.R_PAREN) and args: 4404 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4405 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4406 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4407 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4408 4409 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4410 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4411 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4412 if not self._match_text_seq("WITHIN", "GROUP"): 4413 self._retreat(index) 4414 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4415 4416 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4417 order = self._parse_order(this=seq_get(args, 0)) 4418 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4419 4420 def _parse_convert( 4421 self, strict: bool, safe: t.Optional[bool] = None 4422 ) -> t.Optional[exp.Expression]: 4423 this = self._parse_bitwise() 4424 4425 if self._match(TokenType.USING): 4426 to: t.Optional[exp.Expression] = self.expression( 4427 exp.CharacterSet, this=self._parse_var() 4428 ) 4429 elif self._match(TokenType.COMMA): 4430 to = self._parse_types() 4431 else: 4432 to = None 4433 4434 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4435 4436 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4437 """ 4438 There are generally two variants of the DECODE function: 4439 4440 - DECODE(bin, charset) 4441 - DECODE(expression, search, result [, search, result] ... [, default]) 4442 4443 The second variant will always be parsed into a CASE expression. Note that NULL 4444 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4445 instead of relying on pattern matching. 4446 """ 4447 args = self._parse_csv(self._parse_conjunction) 4448 4449 if len(args) < 3: 4450 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4451 4452 expression, *expressions = args 4453 if not expression: 4454 return None 4455 4456 ifs = [] 4457 for search, result in zip(expressions[::2], expressions[1::2]): 4458 if not search or not result: 4459 return None 4460 4461 if isinstance(search, exp.Literal): 4462 ifs.append( 4463 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4464 ) 4465 elif isinstance(search, exp.Null): 4466 ifs.append( 4467 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4468 ) 4469 else: 4470 cond = exp.or_( 4471 exp.EQ(this=expression.copy(), expression=search), 4472 exp.and_( 4473 exp.Is(this=expression.copy(), expression=exp.Null()), 4474 exp.Is(this=search.copy(), expression=exp.Null()), 4475 copy=False, 4476 ), 4477 copy=False, 4478 ) 4479 ifs.append(exp.If(this=cond, true=result)) 4480 4481 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4482 4483 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4484 self._match_text_seq("KEY") 4485 key = self._parse_column() 4486 self._match_set((TokenType.COLON, TokenType.COMMA)) 4487 self._match_text_seq("VALUE") 4488 value = self._parse_bitwise() 4489 4490 if not key and not value: 4491 return None 4492 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4493 4494 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4495 if not this or not self._match_text_seq("FORMAT", "JSON"): 4496 return this 4497 4498 return self.expression(exp.FormatJson, this=this) 4499 4500 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4501 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4502 for value in values: 4503 if self._match_text_seq(value, "ON", on): 4504 return f"{value} ON {on}" 4505 4506 return None 4507 4508 def _parse_json_object(self) -> exp.JSONObject: 4509 star = self._parse_star() 4510 expressions = ( 4511 [star] 4512 if star 4513 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4514 ) 4515 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4516 4517 unique_keys = None 4518 if self._match_text_seq("WITH", "UNIQUE"): 4519 unique_keys = True 4520 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4521 unique_keys = False 4522 4523 self._match_text_seq("KEYS") 4524 4525 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4526 self._parse_type() 4527 ) 4528 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4529 4530 return self.expression( 4531 exp.JSONObject, 4532 expressions=expressions, 4533 null_handling=null_handling, 4534 unique_keys=unique_keys, 4535 return_type=return_type, 4536 encoding=encoding, 4537 ) 4538 4539 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4540 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4541 if not self._match_text_seq("NESTED"): 4542 this = self._parse_id_var() 4543 kind = self._parse_types(allow_identifiers=False) 4544 nested = None 4545 else: 4546 this = None 4547 kind = None 4548 nested = True 4549 4550 path = self._match_text_seq("PATH") and self._parse_string() 4551 nested_schema = nested and self._parse_json_schema() 4552 4553 return self.expression( 4554 exp.JSONColumnDef, 4555 this=this, 4556 kind=kind, 4557 path=path, 4558 nested_schema=nested_schema, 4559 ) 4560 4561 def _parse_json_schema(self) -> exp.JSONSchema: 4562 self._match_text_seq("COLUMNS") 4563 return self.expression( 4564 exp.JSONSchema, 4565 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4566 ) 4567 4568 def _parse_json_table(self) -> exp.JSONTable: 4569 this = self._parse_format_json(self._parse_bitwise()) 4570 path = self._match(TokenType.COMMA) and self._parse_string() 4571 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4572 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4573 schema = self._parse_json_schema() 4574 4575 return exp.JSONTable( 4576 this=this, 4577 schema=schema, 4578 path=path, 4579 error_handling=error_handling, 4580 empty_handling=empty_handling, 4581 ) 4582 4583 def _parse_match_against(self) -> exp.MatchAgainst: 4584 expressions = self._parse_csv(self._parse_column) 4585 4586 self._match_text_seq(")", "AGAINST", "(") 4587 4588 this = self._parse_string() 4589 4590 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4591 modifier = "IN NATURAL LANGUAGE MODE" 4592 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4593 modifier = f"{modifier} WITH QUERY EXPANSION" 4594 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4595 modifier = "IN BOOLEAN MODE" 4596 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4597 modifier = "WITH QUERY EXPANSION" 4598 else: 4599 modifier = None 4600 4601 return self.expression( 4602 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4603 ) 4604 4605 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4606 def _parse_open_json(self) -> exp.OpenJSON: 4607 this = self._parse_bitwise() 4608 path = self._match(TokenType.COMMA) and self._parse_string() 4609 4610 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4611 this = self._parse_field(any_token=True) 4612 kind = self._parse_types() 4613 path = self._parse_string() 4614 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4615 4616 return self.expression( 4617 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4618 ) 4619 4620 expressions = None 4621 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4622 self._match_l_paren() 4623 expressions = self._parse_csv(_parse_open_json_column_def) 4624 4625 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4626 4627 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4628 args = self._parse_csv(self._parse_bitwise) 4629 4630 if self._match(TokenType.IN): 4631 return self.expression( 4632 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4633 ) 4634 4635 if haystack_first: 4636 haystack = seq_get(args, 0) 4637 needle = seq_get(args, 1) 4638 else: 4639 needle = seq_get(args, 0) 4640 haystack = seq_get(args, 1) 4641 4642 return self.expression( 4643 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4644 ) 4645 4646 def _parse_predict(self) -> exp.Predict: 4647 self._match_text_seq("MODEL") 4648 this = self._parse_table() 4649 4650 self._match(TokenType.COMMA) 4651 self._match_text_seq("TABLE") 4652 4653 return self.expression( 4654 exp.Predict, 4655 this=this, 4656 expression=self._parse_table(), 4657 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4658 ) 4659 4660 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4661 args = self._parse_csv(self._parse_table) 4662 return exp.JoinHint(this=func_name.upper(), expressions=args) 4663 4664 def _parse_substring(self) -> exp.Substring: 4665 # Postgres supports the form: substring(string [from int] [for int]) 4666 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4667 4668 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4669 4670 if self._match(TokenType.FROM): 4671 args.append(self._parse_bitwise()) 4672 if self._match(TokenType.FOR): 4673 args.append(self._parse_bitwise()) 4674 4675 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4676 4677 def _parse_trim(self) -> exp.Trim: 4678 # https://www.w3resource.com/sql/character-functions/trim.php 4679 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4680 4681 position = None 4682 collation = None 4683 expression = None 4684 4685 if self._match_texts(self.TRIM_TYPES): 4686 position = self._prev.text.upper() 4687 4688 this = self._parse_bitwise() 4689 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4690 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4691 expression = self._parse_bitwise() 4692 4693 if invert_order: 4694 this, expression = expression, this 4695 4696 if self._match(TokenType.COLLATE): 4697 collation = self._parse_bitwise() 4698 4699 return self.expression( 4700 exp.Trim, this=this, position=position, expression=expression, collation=collation 4701 ) 4702 4703 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4704 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4705 4706 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4707 return self._parse_window(self._parse_id_var(), alias=True) 4708 4709 def _parse_respect_or_ignore_nulls( 4710 self, this: t.Optional[exp.Expression] 4711 ) -> t.Optional[exp.Expression]: 4712 if self._match_text_seq("IGNORE", "NULLS"): 4713 return self.expression(exp.IgnoreNulls, this=this) 4714 if self._match_text_seq("RESPECT", "NULLS"): 4715 return self.expression(exp.RespectNulls, this=this) 4716 return this 4717 4718 def _parse_window( 4719 self, this: t.Optional[exp.Expression], alias: bool = False 4720 ) -> t.Optional[exp.Expression]: 4721 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4722 self._match(TokenType.WHERE) 4723 this = self.expression( 4724 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4725 ) 4726 self._match_r_paren() 4727 4728 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4729 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4730 if self._match_text_seq("WITHIN", "GROUP"): 4731 order = self._parse_wrapped(self._parse_order) 4732 this = self.expression(exp.WithinGroup, this=this, expression=order) 4733 4734 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4735 # Some dialects choose to implement and some do not. 4736 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4737 4738 # There is some code above in _parse_lambda that handles 4739 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4740 4741 # The below changes handle 4742 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4743 4744 # Oracle allows both formats 4745 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4746 # and Snowflake chose to do the same for familiarity 4747 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4748 this = self._parse_respect_or_ignore_nulls(this) 4749 4750 # bigquery select from window x AS (partition by ...) 4751 if alias: 4752 over = None 4753 self._match(TokenType.ALIAS) 4754 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4755 return this 4756 else: 4757 over = self._prev.text.upper() 4758 4759 if not self._match(TokenType.L_PAREN): 4760 return self.expression( 4761 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4762 ) 4763 4764 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4765 4766 first = self._match(TokenType.FIRST) 4767 if self._match_text_seq("LAST"): 4768 first = False 4769 4770 partition, order = self._parse_partition_and_order() 4771 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4772 4773 if kind: 4774 self._match(TokenType.BETWEEN) 4775 start = self._parse_window_spec() 4776 self._match(TokenType.AND) 4777 end = self._parse_window_spec() 4778 4779 spec = self.expression( 4780 exp.WindowSpec, 4781 kind=kind, 4782 start=start["value"], 4783 start_side=start["side"], 4784 end=end["value"], 4785 end_side=end["side"], 4786 ) 4787 else: 4788 spec = None 4789 4790 self._match_r_paren() 4791 4792 window = self.expression( 4793 exp.Window, 4794 this=this, 4795 partition_by=partition, 4796 order=order, 4797 spec=spec, 4798 alias=window_alias, 4799 over=over, 4800 first=first, 4801 ) 4802 4803 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4804 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4805 return self._parse_window(window, alias=alias) 4806 4807 return window 4808 4809 def _parse_partition_and_order( 4810 self, 4811 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4812 return self._parse_partition_by(), self._parse_order() 4813 4814 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4815 self._match(TokenType.BETWEEN) 4816 4817 return { 4818 "value": ( 4819 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4820 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4821 or self._parse_bitwise() 4822 ), 4823 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4824 } 4825 4826 def _parse_alias( 4827 self, this: t.Optional[exp.Expression], explicit: bool = False 4828 ) -> t.Optional[exp.Expression]: 4829 any_token = self._match(TokenType.ALIAS) 4830 comments = self._prev_comments 4831 4832 if explicit and not any_token: 4833 return this 4834 4835 if self._match(TokenType.L_PAREN): 4836 aliases = self.expression( 4837 exp.Aliases, 4838 comments=comments, 4839 this=this, 4840 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4841 ) 4842 self._match_r_paren(aliases) 4843 return aliases 4844 4845 alias = self._parse_id_var(any_token) 4846 4847 if alias: 4848 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4849 4850 return this 4851 4852 def _parse_id_var( 4853 self, 4854 any_token: bool = True, 4855 tokens: t.Optional[t.Collection[TokenType]] = None, 4856 ) -> t.Optional[exp.Expression]: 4857 identifier = self._parse_identifier() 4858 4859 if identifier: 4860 return identifier 4861 4862 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4863 quoted = self._prev.token_type == TokenType.STRING 4864 return exp.Identifier(this=self._prev.text, quoted=quoted) 4865 4866 return None 4867 4868 def _parse_string(self) -> t.Optional[exp.Expression]: 4869 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4870 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4871 return self._parse_placeholder() 4872 4873 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4874 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4875 4876 def _parse_number(self) -> t.Optional[exp.Expression]: 4877 if self._match(TokenType.NUMBER): 4878 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4879 return self._parse_placeholder() 4880 4881 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4882 if self._match(TokenType.IDENTIFIER): 4883 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4884 return self._parse_placeholder() 4885 4886 def _parse_var( 4887 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4888 ) -> t.Optional[exp.Expression]: 4889 if ( 4890 (any_token and self._advance_any()) 4891 or self._match(TokenType.VAR) 4892 or (self._match_set(tokens) if tokens else False) 4893 ): 4894 return self.expression(exp.Var, this=self._prev.text) 4895 return self._parse_placeholder() 4896 4897 def _advance_any(self) -> t.Optional[Token]: 4898 if self._curr and self._curr.token_type not in self.RESERVED_TOKENS: 4899 self._advance() 4900 return self._prev 4901 return None 4902 4903 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4904 return self._parse_var() or self._parse_string() 4905 4906 def _parse_null(self) -> t.Optional[exp.Expression]: 4907 if self._match_set(self.NULL_TOKENS): 4908 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4909 return self._parse_placeholder() 4910 4911 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4912 if self._match(TokenType.TRUE): 4913 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4914 if self._match(TokenType.FALSE): 4915 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4916 return self._parse_placeholder() 4917 4918 def _parse_star(self) -> t.Optional[exp.Expression]: 4919 if self._match(TokenType.STAR): 4920 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4921 return self._parse_placeholder() 4922 4923 def _parse_parameter(self) -> exp.Parameter: 4924 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4925 return ( 4926 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4927 ) 4928 4929 self._match(TokenType.L_BRACE) 4930 this = _parse_parameter_part() 4931 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4932 self._match(TokenType.R_BRACE) 4933 4934 return self.expression(exp.Parameter, this=this, expression=expression) 4935 4936 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4937 if self._match_set(self.PLACEHOLDER_PARSERS): 4938 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4939 if placeholder: 4940 return placeholder 4941 self._advance(-1) 4942 return None 4943 4944 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4945 if not self._match(TokenType.EXCEPT): 4946 return None 4947 if self._match(TokenType.L_PAREN, advance=False): 4948 return self._parse_wrapped_csv(self._parse_column) 4949 4950 except_column = self._parse_column() 4951 return [except_column] if except_column else None 4952 4953 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4954 if not self._match(TokenType.REPLACE): 4955 return None 4956 if self._match(TokenType.L_PAREN, advance=False): 4957 return self._parse_wrapped_csv(self._parse_expression) 4958 4959 replace_expression = self._parse_expression() 4960 return [replace_expression] if replace_expression else None 4961 4962 def _parse_csv( 4963 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4964 ) -> t.List[exp.Expression]: 4965 parse_result = parse_method() 4966 items = [parse_result] if parse_result is not None else [] 4967 4968 while self._match(sep): 4969 self._add_comments(parse_result) 4970 parse_result = parse_method() 4971 if parse_result is not None: 4972 items.append(parse_result) 4973 4974 return items 4975 4976 def _parse_tokens( 4977 self, parse_method: t.Callable, expressions: t.Dict 4978 ) -> t.Optional[exp.Expression]: 4979 this = parse_method() 4980 4981 while self._match_set(expressions): 4982 this = self.expression( 4983 expressions[self._prev.token_type], 4984 this=this, 4985 comments=self._prev_comments, 4986 expression=parse_method(), 4987 ) 4988 4989 return this 4990 4991 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4992 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4993 4994 def _parse_wrapped_csv( 4995 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4996 ) -> t.List[exp.Expression]: 4997 return self._parse_wrapped( 4998 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4999 ) 5000 5001 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5002 wrapped = self._match(TokenType.L_PAREN) 5003 if not wrapped and not optional: 5004 self.raise_error("Expecting (") 5005 parse_result = parse_method() 5006 if wrapped: 5007 self._match_r_paren() 5008 return parse_result 5009 5010 def _parse_expressions(self) -> t.List[exp.Expression]: 5011 return self._parse_csv(self._parse_expression) 5012 5013 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5014 return self._parse_select() or self._parse_set_operations( 5015 self._parse_expression() if alias else self._parse_conjunction() 5016 ) 5017 5018 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5019 return self._parse_query_modifiers( 5020 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5021 ) 5022 5023 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5024 this = None 5025 if self._match_texts(self.TRANSACTION_KIND): 5026 this = self._prev.text 5027 5028 self._match_texts(("TRANSACTION", "WORK")) 5029 5030 modes = [] 5031 while True: 5032 mode = [] 5033 while self._match(TokenType.VAR): 5034 mode.append(self._prev.text) 5035 5036 if mode: 5037 modes.append(" ".join(mode)) 5038 if not self._match(TokenType.COMMA): 5039 break 5040 5041 return self.expression(exp.Transaction, this=this, modes=modes) 5042 5043 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5044 chain = None 5045 savepoint = None 5046 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5047 5048 self._match_texts(("TRANSACTION", "WORK")) 5049 5050 if self._match_text_seq("TO"): 5051 self._match_text_seq("SAVEPOINT") 5052 savepoint = self._parse_id_var() 5053 5054 if self._match(TokenType.AND): 5055 chain = not self._match_text_seq("NO") 5056 self._match_text_seq("CHAIN") 5057 5058 if is_rollback: 5059 return self.expression(exp.Rollback, savepoint=savepoint) 5060 5061 return self.expression(exp.Commit, chain=chain) 5062 5063 def _parse_refresh(self) -> exp.Refresh: 5064 self._match(TokenType.TABLE) 5065 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5066 5067 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5068 if not self._match_text_seq("ADD"): 5069 return None 5070 5071 self._match(TokenType.COLUMN) 5072 exists_column = self._parse_exists(not_=True) 5073 expression = self._parse_field_def() 5074 5075 if expression: 5076 expression.set("exists", exists_column) 5077 5078 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5079 if self._match_texts(("FIRST", "AFTER")): 5080 position = self._prev.text 5081 column_position = self.expression( 5082 exp.ColumnPosition, this=self._parse_column(), position=position 5083 ) 5084 expression.set("position", column_position) 5085 5086 return expression 5087 5088 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5089 drop = self._match(TokenType.DROP) and self._parse_drop() 5090 if drop and not isinstance(drop, exp.Command): 5091 drop.set("kind", drop.args.get("kind", "COLUMN")) 5092 return drop 5093 5094 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5095 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5096 return self.expression( 5097 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5098 ) 5099 5100 def _parse_add_constraint(self) -> exp.AddConstraint: 5101 this = None 5102 kind = self._prev.token_type 5103 5104 if kind == TokenType.CONSTRAINT: 5105 this = self._parse_id_var() 5106 5107 if self._match_text_seq("CHECK"): 5108 expression = self._parse_wrapped(self._parse_conjunction) 5109 enforced = self._match_text_seq("ENFORCED") 5110 5111 return self.expression( 5112 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5113 ) 5114 5115 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5116 expression = self._parse_foreign_key() 5117 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5118 expression = self._parse_primary_key() 5119 else: 5120 expression = None 5121 5122 return self.expression(exp.AddConstraint, this=this, expression=expression) 5123 5124 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5125 index = self._index - 1 5126 5127 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5128 return self._parse_csv(self._parse_add_constraint) 5129 5130 self._retreat(index) 5131 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5132 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5133 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5134 5135 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5136 self._match(TokenType.COLUMN) 5137 column = self._parse_field(any_token=True) 5138 5139 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5140 return self.expression(exp.AlterColumn, this=column, drop=True) 5141 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5142 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5143 5144 self._match_text_seq("SET", "DATA") 5145 return self.expression( 5146 exp.AlterColumn, 5147 this=column, 5148 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5149 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5150 using=self._match(TokenType.USING) and self._parse_conjunction(), 5151 ) 5152 5153 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5154 index = self._index - 1 5155 5156 partition_exists = self._parse_exists() 5157 if self._match(TokenType.PARTITION, advance=False): 5158 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5159 5160 self._retreat(index) 5161 return self._parse_csv(self._parse_drop_column) 5162 5163 def _parse_alter_table_rename(self) -> exp.RenameTable: 5164 self._match_text_seq("TO") 5165 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5166 5167 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5168 start = self._prev 5169 5170 if not self._match(TokenType.TABLE): 5171 return self._parse_as_command(start) 5172 5173 exists = self._parse_exists() 5174 only = self._match_text_seq("ONLY") 5175 this = self._parse_table(schema=True) 5176 5177 if self._next: 5178 self._advance() 5179 5180 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5181 if parser: 5182 actions = ensure_list(parser(self)) 5183 5184 if not self._curr: 5185 return self.expression( 5186 exp.AlterTable, 5187 this=this, 5188 exists=exists, 5189 actions=actions, 5190 only=only, 5191 ) 5192 5193 return self._parse_as_command(start) 5194 5195 def _parse_merge(self) -> exp.Merge: 5196 self._match(TokenType.INTO) 5197 target = self._parse_table() 5198 5199 if target and self._match(TokenType.ALIAS, advance=False): 5200 target.set("alias", self._parse_table_alias()) 5201 5202 self._match(TokenType.USING) 5203 using = self._parse_table() 5204 5205 self._match(TokenType.ON) 5206 on = self._parse_conjunction() 5207 5208 return self.expression( 5209 exp.Merge, 5210 this=target, 5211 using=using, 5212 on=on, 5213 expressions=self._parse_when_matched(), 5214 ) 5215 5216 def _parse_when_matched(self) -> t.List[exp.When]: 5217 whens = [] 5218 5219 while self._match(TokenType.WHEN): 5220 matched = not self._match(TokenType.NOT) 5221 self._match_text_seq("MATCHED") 5222 source = ( 5223 False 5224 if self._match_text_seq("BY", "TARGET") 5225 else self._match_text_seq("BY", "SOURCE") 5226 ) 5227 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5228 5229 self._match(TokenType.THEN) 5230 5231 if self._match(TokenType.INSERT): 5232 _this = self._parse_star() 5233 if _this: 5234 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5235 else: 5236 then = self.expression( 5237 exp.Insert, 5238 this=self._parse_value(), 5239 expression=self._match(TokenType.VALUES) and self._parse_value(), 5240 ) 5241 elif self._match(TokenType.UPDATE): 5242 expressions = self._parse_star() 5243 if expressions: 5244 then = self.expression(exp.Update, expressions=expressions) 5245 else: 5246 then = self.expression( 5247 exp.Update, 5248 expressions=self._match(TokenType.SET) 5249 and self._parse_csv(self._parse_equality), 5250 ) 5251 elif self._match(TokenType.DELETE): 5252 then = self.expression(exp.Var, this=self._prev.text) 5253 else: 5254 then = None 5255 5256 whens.append( 5257 self.expression( 5258 exp.When, 5259 matched=matched, 5260 source=source, 5261 condition=condition, 5262 then=then, 5263 ) 5264 ) 5265 return whens 5266 5267 def _parse_show(self) -> t.Optional[exp.Expression]: 5268 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5269 if parser: 5270 return parser(self) 5271 return self._parse_as_command(self._prev) 5272 5273 def _parse_set_item_assignment( 5274 self, kind: t.Optional[str] = None 5275 ) -> t.Optional[exp.Expression]: 5276 index = self._index 5277 5278 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5279 return self._parse_set_transaction(global_=kind == "GLOBAL") 5280 5281 left = self._parse_primary() or self._parse_id_var() 5282 assignment_delimiter = self._match_texts(("=", "TO")) 5283 5284 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5285 self._retreat(index) 5286 return None 5287 5288 right = self._parse_statement() or self._parse_id_var() 5289 this = self.expression(exp.EQ, this=left, expression=right) 5290 5291 return self.expression(exp.SetItem, this=this, kind=kind) 5292 5293 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5294 self._match_text_seq("TRANSACTION") 5295 characteristics = self._parse_csv( 5296 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5297 ) 5298 return self.expression( 5299 exp.SetItem, 5300 expressions=characteristics, 5301 kind="TRANSACTION", 5302 **{"global": global_}, # type: ignore 5303 ) 5304 5305 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5306 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5307 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5308 5309 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5310 index = self._index 5311 set_ = self.expression( 5312 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5313 ) 5314 5315 if self._curr: 5316 self._retreat(index) 5317 return self._parse_as_command(self._prev) 5318 5319 return set_ 5320 5321 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5322 for option in options: 5323 if self._match_text_seq(*option.split(" ")): 5324 return exp.var(option) 5325 return None 5326 5327 def _parse_as_command(self, start: Token) -> exp.Command: 5328 while self._curr: 5329 self._advance() 5330 text = self._find_sql(start, self._prev) 5331 size = len(start.text) 5332 return exp.Command(this=text[:size], expression=text[size:]) 5333 5334 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5335 settings = [] 5336 5337 self._match_l_paren() 5338 kind = self._parse_id_var() 5339 5340 if self._match(TokenType.L_PAREN): 5341 while True: 5342 key = self._parse_id_var() 5343 value = self._parse_primary() 5344 5345 if not key and value is None: 5346 break 5347 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5348 self._match(TokenType.R_PAREN) 5349 5350 self._match_r_paren() 5351 5352 return self.expression( 5353 exp.DictProperty, 5354 this=this, 5355 kind=kind.this if kind else None, 5356 settings=settings, 5357 ) 5358 5359 def _parse_dict_range(self, this: str) -> exp.DictRange: 5360 self._match_l_paren() 5361 has_min = self._match_text_seq("MIN") 5362 if has_min: 5363 min = self._parse_var() or self._parse_primary() 5364 self._match_text_seq("MAX") 5365 max = self._parse_var() or self._parse_primary() 5366 else: 5367 max = self._parse_var() or self._parse_primary() 5368 min = exp.Literal.number(0) 5369 self._match_r_paren() 5370 return self.expression(exp.DictRange, this=this, min=min, max=max) 5371 5372 def _parse_comprehension( 5373 self, this: t.Optional[exp.Expression] 5374 ) -> t.Optional[exp.Comprehension]: 5375 index = self._index 5376 expression = self._parse_column() 5377 if not self._match(TokenType.IN): 5378 self._retreat(index - 1) 5379 return None 5380 iterator = self._parse_column() 5381 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5382 return self.expression( 5383 exp.Comprehension, 5384 this=this, 5385 expression=expression, 5386 iterator=iterator, 5387 condition=condition, 5388 ) 5389 5390 def _find_parser( 5391 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5392 ) -> t.Optional[t.Callable]: 5393 if not self._curr: 5394 return None 5395 5396 index = self._index 5397 this = [] 5398 while True: 5399 # The current token might be multiple words 5400 curr = self._curr.text.upper() 5401 key = curr.split(" ") 5402 this.append(curr) 5403 5404 self._advance() 5405 result, trie = in_trie(trie, key) 5406 if result == TrieResult.FAILED: 5407 break 5408 5409 if result == TrieResult.EXISTS: 5410 subparser = parsers[" ".join(this)] 5411 return subparser 5412 5413 self._retreat(index) 5414 return None 5415 5416 def _match(self, token_type, advance=True, expression=None): 5417 if not self._curr: 5418 return None 5419 5420 if self._curr.token_type == token_type: 5421 if advance: 5422 self._advance() 5423 self._add_comments(expression) 5424 return True 5425 5426 return None 5427 5428 def _match_set(self, types, advance=True): 5429 if not self._curr: 5430 return None 5431 5432 if self._curr.token_type in types: 5433 if advance: 5434 self._advance() 5435 return True 5436 5437 return None 5438 5439 def _match_pair(self, token_type_a, token_type_b, advance=True): 5440 if not self._curr or not self._next: 5441 return None 5442 5443 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5444 if advance: 5445 self._advance(2) 5446 return True 5447 5448 return None 5449 5450 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5451 if not self._match(TokenType.L_PAREN, expression=expression): 5452 self.raise_error("Expecting (") 5453 5454 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5455 if not self._match(TokenType.R_PAREN, expression=expression): 5456 self.raise_error("Expecting )") 5457 5458 def _match_texts(self, texts, advance=True): 5459 if self._curr and self._curr.text.upper() in texts: 5460 if advance: 5461 self._advance() 5462 return True 5463 return False 5464 5465 def _match_text_seq(self, *texts, advance=True): 5466 index = self._index 5467 for text in texts: 5468 if self._curr and self._curr.text.upper() == text: 5469 self._advance() 5470 else: 5471 self._retreat(index) 5472 return False 5473 5474 if not advance: 5475 self._retreat(index) 5476 5477 return True 5478 5479 @t.overload 5480 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5481 ... 5482 5483 @t.overload 5484 def _replace_columns_with_dots( 5485 self, this: t.Optional[exp.Expression] 5486 ) -> t.Optional[exp.Expression]: 5487 ... 5488 5489 def _replace_columns_with_dots(self, this): 5490 if isinstance(this, exp.Dot): 5491 exp.replace_children(this, self._replace_columns_with_dots) 5492 elif isinstance(this, exp.Column): 5493 exp.replace_children(this, self._replace_columns_with_dots) 5494 table = this.args.get("table") 5495 this = ( 5496 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5497 ) 5498 5499 return this 5500 5501 def _replace_lambda( 5502 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5503 ) -> t.Optional[exp.Expression]: 5504 if not node: 5505 return node 5506 5507 for column in node.find_all(exp.Column): 5508 if column.parts[0].name in lambda_variables: 5509 dot_or_id = column.to_dot() if column.table else column.this 5510 parent = column.parent 5511 5512 while isinstance(parent, exp.Dot): 5513 if not isinstance(parent.parent, exp.Dot): 5514 parent.replace(dot_or_id) 5515 break 5516 parent = parent.parent 5517 else: 5518 if column is node: 5519 node = dot_or_id 5520 else: 5521 column.replace(dot_or_id) 5522 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
988 def __init__( 989 self, 990 error_level: t.Optional[ErrorLevel] = None, 991 error_message_context: int = 100, 992 max_errors: int = 3, 993 dialect: DialectType = None, 994 ): 995 from sqlglot.dialects import Dialect 996 997 self.error_level = error_level or ErrorLevel.IMMEDIATE 998 self.error_message_context = error_message_context 999 self.max_errors = max_errors 1000 self.dialect = Dialect.get_or_raise(dialect) 1001 self.reset()
1013 def parse( 1014 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 """ 1017 Parses a list of tokens and returns a list of syntax trees, one tree 1018 per parsed SQL statement. 1019 1020 Args: 1021 raw_tokens: The list of tokens. 1022 sql: The original SQL string, used to produce helpful debug messages. 1023 1024 Returns: 1025 The list of the produced syntax trees. 1026 """ 1027 return self._parse( 1028 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1029 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1031 def parse_into( 1032 self, 1033 expression_types: exp.IntoType, 1034 raw_tokens: t.List[Token], 1035 sql: t.Optional[str] = None, 1036 ) -> t.List[t.Optional[exp.Expression]]: 1037 """ 1038 Parses a list of tokens into a given Expression type. If a collection of Expression 1039 types is given instead, this method will try to parse the token list into each one 1040 of them, stopping at the first for which the parsing succeeds. 1041 1042 Args: 1043 expression_types: The expression type(s) to try and parse the token list into. 1044 raw_tokens: The list of tokens. 1045 sql: The original SQL string, used to produce helpful debug messages. 1046 1047 Returns: 1048 The target Expression. 1049 """ 1050 errors = [] 1051 for expression_type in ensure_list(expression_types): 1052 parser = self.EXPRESSION_PARSERS.get(expression_type) 1053 if not parser: 1054 raise TypeError(f"No parser registered for {expression_type}") 1055 1056 try: 1057 return self._parse(parser, raw_tokens, sql) 1058 except ParseError as e: 1059 e.errors[0]["into_expression"] = expression_type 1060 errors.append(e) 1061 1062 raise ParseError( 1063 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1064 errors=merge_errors(errors), 1065 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1102 def check_errors(self) -> None: 1103 """Logs or raises any found errors, depending on the chosen error level setting.""" 1104 if self.error_level == ErrorLevel.WARN: 1105 for error in self.errors: 1106 logger.error(str(error)) 1107 elif self.error_level == ErrorLevel.RAISE and self.errors: 1108 raise ParseError( 1109 concat_messages(self.errors, self.max_errors), 1110 errors=merge_errors(self.errors), 1111 )
Logs or raises any found errors, depending on the chosen error level setting.
1113 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1114 """ 1115 Appends an error in the list of recorded errors or raises it, depending on the chosen 1116 error level setting. 1117 """ 1118 token = token or self._curr or self._prev or Token.string("") 1119 start = token.start 1120 end = token.end + 1 1121 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1122 highlight = self.sql[start:end] 1123 end_context = self.sql[end : end + self.error_message_context] 1124 1125 error = ParseError.new( 1126 f"{message}. Line {token.line}, Col: {token.col}.\n" 1127 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1128 description=message, 1129 line=token.line, 1130 col=token.col, 1131 start_context=start_context, 1132 highlight=highlight, 1133 end_context=end_context, 1134 ) 1135 1136 if self.error_level == ErrorLevel.IMMEDIATE: 1137 raise error 1138 1139 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1141 def expression( 1142 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1143 ) -> E: 1144 """ 1145 Creates a new, validated Expression. 1146 1147 Args: 1148 exp_class: The expression class to instantiate. 1149 comments: An optional list of comments to attach to the expression. 1150 kwargs: The arguments to set for the expression along with their respective values. 1151 1152 Returns: 1153 The target expression. 1154 """ 1155 instance = exp_class(**kwargs) 1156 instance.add_comments(comments) if comments else self._add_comments(instance) 1157 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1164 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1165 """ 1166 Validates an Expression, making sure that all its mandatory arguments are set. 1167 1168 Args: 1169 expression: The expression to validate. 1170 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1171 1172 Returns: 1173 The validated expression. 1174 """ 1175 if self.error_level != ErrorLevel.IGNORE: 1176 for error_message in expression.error_messages(args): 1177 self.raise_error(error_message) 1178 1179 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.