sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63class _Parser(type): 64 def __new__(cls, clsname, bases, attrs): 65 klass = super().__new__(cls, clsname, bases, attrs) 66 67 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 68 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 69 70 return klass 71 72 73class Parser(metaclass=_Parser): 74 """ 75 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 76 77 Args: 78 error_level: The desired error level. 79 Default: ErrorLevel.IMMEDIATE 80 error_message_context: Determines the amount of context to capture from a 81 query string when displaying the error message (in number of characters). 82 Default: 100 83 max_errors: Maximum number of error messages to include in a raised ParseError. 84 This is only relevant if error_level is ErrorLevel.RAISE. 85 Default: 3 86 """ 87 88 FUNCTIONS: t.Dict[str, t.Callable] = { 89 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 90 "CONCAT": lambda args, dialect: exp.Concat( 91 expressions=args, 92 safe=not dialect.STRICT_STRING_CONCAT, 93 coalesce=dialect.CONCAT_COALESCE, 94 ), 95 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 96 expressions=args, 97 safe=not dialect.STRICT_STRING_CONCAT, 98 coalesce=dialect.CONCAT_COALESCE, 99 ), 100 "DATE_TO_DATE_STR": lambda args: exp.Cast( 101 this=seq_get(args, 0), 102 to=exp.DataType(this=exp.DataType.Type.TEXT), 103 ), 104 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 105 "LIKE": parse_like, 106 "LOG": parse_logarithm, 107 "TIME_TO_TIME_STR": lambda args: exp.Cast( 108 this=seq_get(args, 0), 109 to=exp.DataType(this=exp.DataType.Type.TEXT), 110 ), 111 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 112 this=exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 start=exp.Literal.number(1), 117 length=exp.Literal.number(10), 118 ), 119 "VAR_MAP": parse_var_map, 120 } 121 122 NO_PAREN_FUNCTIONS = { 123 TokenType.CURRENT_DATE: exp.CurrentDate, 124 TokenType.CURRENT_DATETIME: exp.CurrentDate, 125 TokenType.CURRENT_TIME: exp.CurrentTime, 126 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 127 TokenType.CURRENT_USER: exp.CurrentUser, 128 } 129 130 STRUCT_TYPE_TOKENS = { 131 TokenType.NESTED, 132 TokenType.STRUCT, 133 } 134 135 NESTED_TYPE_TOKENS = { 136 TokenType.ARRAY, 137 TokenType.LOWCARDINALITY, 138 TokenType.MAP, 139 TokenType.NULLABLE, 140 *STRUCT_TYPE_TOKENS, 141 } 142 143 ENUM_TYPE_TOKENS = { 144 TokenType.ENUM, 145 TokenType.ENUM8, 146 TokenType.ENUM16, 147 } 148 149 TYPE_TOKENS = { 150 TokenType.BIT, 151 TokenType.BOOLEAN, 152 TokenType.TINYINT, 153 TokenType.UTINYINT, 154 TokenType.SMALLINT, 155 TokenType.USMALLINT, 156 TokenType.INT, 157 TokenType.UINT, 158 TokenType.BIGINT, 159 TokenType.UBIGINT, 160 TokenType.INT128, 161 TokenType.UINT128, 162 TokenType.INT256, 163 TokenType.UINT256, 164 TokenType.MEDIUMINT, 165 TokenType.UMEDIUMINT, 166 TokenType.FIXEDSTRING, 167 TokenType.FLOAT, 168 TokenType.DOUBLE, 169 TokenType.CHAR, 170 TokenType.NCHAR, 171 TokenType.VARCHAR, 172 TokenType.NVARCHAR, 173 TokenType.TEXT, 174 TokenType.MEDIUMTEXT, 175 TokenType.LONGTEXT, 176 TokenType.MEDIUMBLOB, 177 TokenType.LONGBLOB, 178 TokenType.BINARY, 179 TokenType.VARBINARY, 180 TokenType.JSON, 181 TokenType.JSONB, 182 TokenType.INTERVAL, 183 TokenType.TINYBLOB, 184 TokenType.TINYTEXT, 185 TokenType.TIME, 186 TokenType.TIMETZ, 187 TokenType.TIMESTAMP, 188 TokenType.TIMESTAMP_S, 189 TokenType.TIMESTAMP_MS, 190 TokenType.TIMESTAMP_NS, 191 TokenType.TIMESTAMPTZ, 192 TokenType.TIMESTAMPLTZ, 193 TokenType.DATETIME, 194 TokenType.DATETIME64, 195 TokenType.DATE, 196 TokenType.INT4RANGE, 197 TokenType.INT4MULTIRANGE, 198 TokenType.INT8RANGE, 199 TokenType.INT8MULTIRANGE, 200 TokenType.NUMRANGE, 201 TokenType.NUMMULTIRANGE, 202 TokenType.TSRANGE, 203 TokenType.TSMULTIRANGE, 204 TokenType.TSTZRANGE, 205 TokenType.TSTZMULTIRANGE, 206 TokenType.DATERANGE, 207 TokenType.DATEMULTIRANGE, 208 TokenType.DECIMAL, 209 TokenType.UDECIMAL, 210 TokenType.BIGDECIMAL, 211 TokenType.UUID, 212 TokenType.GEOGRAPHY, 213 TokenType.GEOMETRY, 214 TokenType.HLLSKETCH, 215 TokenType.HSTORE, 216 TokenType.PSEUDO_TYPE, 217 TokenType.SUPER, 218 TokenType.SERIAL, 219 TokenType.SMALLSERIAL, 220 TokenType.BIGSERIAL, 221 TokenType.XML, 222 TokenType.YEAR, 223 TokenType.UNIQUEIDENTIFIER, 224 TokenType.USERDEFINED, 225 TokenType.MONEY, 226 TokenType.SMALLMONEY, 227 TokenType.ROWVERSION, 228 TokenType.IMAGE, 229 TokenType.VARIANT, 230 TokenType.OBJECT, 231 TokenType.OBJECT_IDENTIFIER, 232 TokenType.INET, 233 TokenType.IPADDRESS, 234 TokenType.IPPREFIX, 235 TokenType.UNKNOWN, 236 TokenType.NULL, 237 *ENUM_TYPE_TOKENS, 238 *NESTED_TYPE_TOKENS, 239 } 240 241 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 242 TokenType.BIGINT: TokenType.UBIGINT, 243 TokenType.INT: TokenType.UINT, 244 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 245 TokenType.SMALLINT: TokenType.USMALLINT, 246 TokenType.TINYINT: TokenType.UTINYINT, 247 TokenType.DECIMAL: TokenType.UDECIMAL, 248 } 249 250 SUBQUERY_PREDICATES = { 251 TokenType.ANY: exp.Any, 252 TokenType.ALL: exp.All, 253 TokenType.EXISTS: exp.Exists, 254 TokenType.SOME: exp.Any, 255 } 256 257 RESERVED_TOKENS = { 258 *Tokenizer.SINGLE_TOKENS.values(), 259 TokenType.SELECT, 260 } 261 262 DB_CREATABLES = { 263 TokenType.DATABASE, 264 TokenType.SCHEMA, 265 TokenType.TABLE, 266 TokenType.VIEW, 267 TokenType.MODEL, 268 TokenType.DICTIONARY, 269 } 270 271 CREATABLES = { 272 TokenType.COLUMN, 273 TokenType.CONSTRAINT, 274 TokenType.FUNCTION, 275 TokenType.INDEX, 276 TokenType.PROCEDURE, 277 TokenType.FOREIGN_KEY, 278 *DB_CREATABLES, 279 } 280 281 # Tokens that can represent identifiers 282 ID_VAR_TOKENS = { 283 TokenType.VAR, 284 TokenType.ANTI, 285 TokenType.APPLY, 286 TokenType.ASC, 287 TokenType.AUTO_INCREMENT, 288 TokenType.BEGIN, 289 TokenType.CACHE, 290 TokenType.CASE, 291 TokenType.COLLATE, 292 TokenType.COMMAND, 293 TokenType.COMMENT, 294 TokenType.COMMIT, 295 TokenType.CONSTRAINT, 296 TokenType.DEFAULT, 297 TokenType.DELETE, 298 TokenType.DESC, 299 TokenType.DESCRIBE, 300 TokenType.DICTIONARY, 301 TokenType.DIV, 302 TokenType.END, 303 TokenType.EXECUTE, 304 TokenType.ESCAPE, 305 TokenType.FALSE, 306 TokenType.FIRST, 307 TokenType.FILTER, 308 TokenType.FORMAT, 309 TokenType.FULL, 310 TokenType.IS, 311 TokenType.ISNULL, 312 TokenType.INTERVAL, 313 TokenType.KEEP, 314 TokenType.KILL, 315 TokenType.LEFT, 316 TokenType.LOAD, 317 TokenType.MERGE, 318 TokenType.NATURAL, 319 TokenType.NEXT, 320 TokenType.OFFSET, 321 TokenType.OPERATOR, 322 TokenType.ORDINALITY, 323 TokenType.OVERLAPS, 324 TokenType.OVERWRITE, 325 TokenType.PARTITION, 326 TokenType.PERCENT, 327 TokenType.PIVOT, 328 TokenType.PRAGMA, 329 TokenType.RANGE, 330 TokenType.RECURSIVE, 331 TokenType.REFERENCES, 332 TokenType.REFRESH, 333 TokenType.REPLACE, 334 TokenType.RIGHT, 335 TokenType.ROW, 336 TokenType.ROWS, 337 TokenType.SEMI, 338 TokenType.SET, 339 TokenType.SETTINGS, 340 TokenType.SHOW, 341 TokenType.TEMPORARY, 342 TokenType.TOP, 343 TokenType.TRUE, 344 TokenType.UNIQUE, 345 TokenType.UNPIVOT, 346 TokenType.UPDATE, 347 TokenType.USE, 348 TokenType.VOLATILE, 349 TokenType.WINDOW, 350 *CREATABLES, 351 *SUBQUERY_PREDICATES, 352 *TYPE_TOKENS, 353 *NO_PAREN_FUNCTIONS, 354 } 355 356 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 357 358 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 359 TokenType.ANTI, 360 TokenType.APPLY, 361 TokenType.ASOF, 362 TokenType.FULL, 363 TokenType.LEFT, 364 TokenType.LOCK, 365 TokenType.NATURAL, 366 TokenType.OFFSET, 367 TokenType.RIGHT, 368 TokenType.SEMI, 369 TokenType.WINDOW, 370 } 371 372 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 373 374 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 375 376 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 377 378 FUNC_TOKENS = { 379 TokenType.COLLATE, 380 TokenType.COMMAND, 381 TokenType.CURRENT_DATE, 382 TokenType.CURRENT_DATETIME, 383 TokenType.CURRENT_TIMESTAMP, 384 TokenType.CURRENT_TIME, 385 TokenType.CURRENT_USER, 386 TokenType.FILTER, 387 TokenType.FIRST, 388 TokenType.FORMAT, 389 TokenType.GLOB, 390 TokenType.IDENTIFIER, 391 TokenType.INDEX, 392 TokenType.ISNULL, 393 TokenType.ILIKE, 394 TokenType.INSERT, 395 TokenType.LIKE, 396 TokenType.MERGE, 397 TokenType.OFFSET, 398 TokenType.PRIMARY_KEY, 399 TokenType.RANGE, 400 TokenType.REPLACE, 401 TokenType.RLIKE, 402 TokenType.ROW, 403 TokenType.UNNEST, 404 TokenType.VAR, 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.DATE, 408 TokenType.DATETIME, 409 TokenType.TABLE, 410 TokenType.TIMESTAMP, 411 TokenType.TIMESTAMPTZ, 412 TokenType.WINDOW, 413 TokenType.XOR, 414 *TYPE_TOKENS, 415 *SUBQUERY_PREDICATES, 416 } 417 418 CONJUNCTION = { 419 TokenType.AND: exp.And, 420 TokenType.OR: exp.Or, 421 } 422 423 EQUALITY = { 424 TokenType.COLON_EQ: exp.PropertyEQ, 425 TokenType.EQ: exp.EQ, 426 TokenType.NEQ: exp.NEQ, 427 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 428 } 429 430 COMPARISON = { 431 TokenType.GT: exp.GT, 432 TokenType.GTE: exp.GTE, 433 TokenType.LT: exp.LT, 434 TokenType.LTE: exp.LTE, 435 } 436 437 BITWISE = { 438 TokenType.AMP: exp.BitwiseAnd, 439 TokenType.CARET: exp.BitwiseXor, 440 TokenType.PIPE: exp.BitwiseOr, 441 } 442 443 TERM = { 444 TokenType.DASH: exp.Sub, 445 TokenType.PLUS: exp.Add, 446 TokenType.MOD: exp.Mod, 447 TokenType.COLLATE: exp.Collate, 448 } 449 450 FACTOR = { 451 TokenType.DIV: exp.IntDiv, 452 TokenType.LR_ARROW: exp.Distance, 453 TokenType.SLASH: exp.Div, 454 TokenType.STAR: exp.Mul, 455 } 456 457 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 458 459 TIMES = { 460 TokenType.TIME, 461 TokenType.TIMETZ, 462 } 463 464 TIMESTAMPS = { 465 TokenType.TIMESTAMP, 466 TokenType.TIMESTAMPTZ, 467 TokenType.TIMESTAMPLTZ, 468 *TIMES, 469 } 470 471 SET_OPERATIONS = { 472 TokenType.UNION, 473 TokenType.INTERSECT, 474 TokenType.EXCEPT, 475 } 476 477 JOIN_METHODS = { 478 TokenType.NATURAL, 479 TokenType.ASOF, 480 } 481 482 JOIN_SIDES = { 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.FULL, 486 } 487 488 JOIN_KINDS = { 489 TokenType.INNER, 490 TokenType.OUTER, 491 TokenType.CROSS, 492 TokenType.SEMI, 493 TokenType.ANTI, 494 } 495 496 JOIN_HINTS: t.Set[str] = set() 497 498 LAMBDAS = { 499 TokenType.ARROW: lambda self, expressions: self.expression( 500 exp.Lambda, 501 this=self._replace_lambda( 502 self._parse_conjunction(), 503 {node.name for node in expressions}, 504 ), 505 expressions=expressions, 506 ), 507 TokenType.FARROW: lambda self, expressions: self.expression( 508 exp.Kwarg, 509 this=exp.var(expressions[0].name), 510 expression=self._parse_conjunction(), 511 ), 512 } 513 514 COLUMN_OPERATORS = { 515 TokenType.DOT: None, 516 TokenType.DCOLON: lambda self, this, to: self.expression( 517 exp.Cast if self.STRICT_CAST else exp.TryCast, 518 this=this, 519 to=to, 520 ), 521 TokenType.ARROW: lambda self, this, path: self.expression( 522 exp.JSONExtract, 523 this=this, 524 expression=path, 525 ), 526 TokenType.DARROW: lambda self, this, path: self.expression( 527 exp.JSONExtractScalar, 528 this=this, 529 expression=path, 530 ), 531 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 532 exp.JSONBExtract, 533 this=this, 534 expression=path, 535 ), 536 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 537 exp.JSONBExtractScalar, 538 this=this, 539 expression=path, 540 ), 541 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 542 exp.JSONBContains, 543 this=this, 544 expression=key, 545 ), 546 } 547 548 EXPRESSION_PARSERS = { 549 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 550 exp.Column: lambda self: self._parse_column(), 551 exp.Condition: lambda self: self._parse_conjunction(), 552 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 553 exp.Expression: lambda self: self._parse_statement(), 554 exp.From: lambda self: self._parse_from(), 555 exp.Group: lambda self: self._parse_group(), 556 exp.Having: lambda self: self._parse_having(), 557 exp.Identifier: lambda self: self._parse_id_var(), 558 exp.Join: lambda self: self._parse_join(), 559 exp.Lambda: lambda self: self._parse_lambda(), 560 exp.Lateral: lambda self: self._parse_lateral(), 561 exp.Limit: lambda self: self._parse_limit(), 562 exp.Offset: lambda self: self._parse_offset(), 563 exp.Order: lambda self: self._parse_order(), 564 exp.Ordered: lambda self: self._parse_ordered(), 565 exp.Properties: lambda self: self._parse_properties(), 566 exp.Qualify: lambda self: self._parse_qualify(), 567 exp.Returning: lambda self: self._parse_returning(), 568 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 569 exp.Table: lambda self: self._parse_table_parts(), 570 exp.TableAlias: lambda self: self._parse_table_alias(), 571 exp.Where: lambda self: self._parse_where(), 572 exp.Window: lambda self: self._parse_named_window(), 573 exp.With: lambda self: self._parse_with(), 574 "JOIN_TYPE": lambda self: self._parse_join_parts(), 575 } 576 577 STATEMENT_PARSERS = { 578 TokenType.ALTER: lambda self: self._parse_alter(), 579 TokenType.BEGIN: lambda self: self._parse_transaction(), 580 TokenType.CACHE: lambda self: self._parse_cache(), 581 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 582 TokenType.COMMENT: lambda self: self._parse_comment(), 583 TokenType.CREATE: lambda self: self._parse_create(), 584 TokenType.DELETE: lambda self: self._parse_delete(), 585 TokenType.DESC: lambda self: self._parse_describe(), 586 TokenType.DESCRIBE: lambda self: self._parse_describe(), 587 TokenType.DROP: lambda self: self._parse_drop(), 588 TokenType.INSERT: lambda self: self._parse_insert(), 589 TokenType.KILL: lambda self: self._parse_kill(), 590 TokenType.LOAD: lambda self: self._parse_load(), 591 TokenType.MERGE: lambda self: self._parse_merge(), 592 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 593 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 594 TokenType.REFRESH: lambda self: self._parse_refresh(), 595 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 596 TokenType.SET: lambda self: self._parse_set(), 597 TokenType.UNCACHE: lambda self: self._parse_uncache(), 598 TokenType.UPDATE: lambda self: self._parse_update(), 599 TokenType.USE: lambda self: self.expression( 600 exp.Use, 601 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 602 and exp.var(self._prev.text), 603 this=self._parse_table(schema=False), 604 ), 605 } 606 607 UNARY_PARSERS = { 608 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 609 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 610 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 611 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 612 } 613 614 PRIMARY_PARSERS = { 615 TokenType.STRING: lambda self, token: self.expression( 616 exp.Literal, this=token.text, is_string=True 617 ), 618 TokenType.NUMBER: lambda self, token: self.expression( 619 exp.Literal, this=token.text, is_string=False 620 ), 621 TokenType.STAR: lambda self, _: self.expression( 622 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 623 ), 624 TokenType.NULL: lambda self, _: self.expression(exp.Null), 625 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 626 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 627 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 628 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 629 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 630 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 631 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 632 exp.National, this=token.text 633 ), 634 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 635 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 636 exp.RawString, this=token.text 637 ), 638 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 639 } 640 641 PLACEHOLDER_PARSERS = { 642 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 643 TokenType.PARAMETER: lambda self: self._parse_parameter(), 644 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 645 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 646 else None, 647 } 648 649 RANGE_PARSERS = { 650 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 651 TokenType.GLOB: binary_range_parser(exp.Glob), 652 TokenType.ILIKE: binary_range_parser(exp.ILike), 653 TokenType.IN: lambda self, this: self._parse_in(this), 654 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 655 TokenType.IS: lambda self, this: self._parse_is(this), 656 TokenType.LIKE: binary_range_parser(exp.Like), 657 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 658 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 659 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 660 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 661 } 662 663 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 664 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 665 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 666 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 667 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 668 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 669 "CHECKSUM": lambda self: self._parse_checksum(), 670 "CLUSTER BY": lambda self: self._parse_cluster(), 671 "CLUSTERED": lambda self: self._parse_clustered_by(), 672 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 673 exp.CollateProperty, **kwargs 674 ), 675 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 676 "COPY": lambda self: self._parse_copy_property(), 677 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 678 "DEFINER": lambda self: self._parse_definer(), 679 "DETERMINISTIC": lambda self: self.expression( 680 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 681 ), 682 "DISTKEY": lambda self: self._parse_distkey(), 683 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 684 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 685 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 686 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 687 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 688 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 689 "FREESPACE": lambda self: self._parse_freespace(), 690 "HEAP": lambda self: self.expression(exp.HeapProperty), 691 "IMMUTABLE": lambda self: self.expression( 692 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 693 ), 694 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 695 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 696 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 697 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 698 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 699 "LIKE": lambda self: self._parse_create_like(), 700 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 701 "LOCK": lambda self: self._parse_locking(), 702 "LOCKING": lambda self: self._parse_locking(), 703 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 704 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 705 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 706 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 707 "NO": lambda self: self._parse_no_property(), 708 "ON": lambda self: self._parse_on_property(), 709 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 710 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 711 "PARTITION": lambda self: self._parse_partitioned_of(), 712 "PARTITION BY": lambda self: self._parse_partitioned_by(), 713 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 714 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 715 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 716 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 717 "REMOTE": lambda self: self._parse_remote_with_connection(), 718 "RETURNS": lambda self: self._parse_returns(), 719 "ROW": lambda self: self._parse_row(), 720 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 721 "SAMPLE": lambda self: self.expression( 722 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 723 ), 724 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 725 "SETTINGS": lambda self: self.expression( 726 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 727 ), 728 "SORTKEY": lambda self: self._parse_sortkey(), 729 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 730 "STABLE": lambda self: self.expression( 731 exp.StabilityProperty, this=exp.Literal.string("STABLE") 732 ), 733 "STORED": lambda self: self._parse_stored(), 734 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 735 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 736 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 737 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 738 "TO": lambda self: self._parse_to_table(), 739 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 740 "TRANSFORM": lambda self: self.expression( 741 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 742 ), 743 "TTL": lambda self: self._parse_ttl(), 744 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 745 "VOLATILE": lambda self: self._parse_volatile_property(), 746 "WITH": lambda self: self._parse_with_property(), 747 } 748 749 CONSTRAINT_PARSERS = { 750 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 751 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 752 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 753 "CHARACTER SET": lambda self: self.expression( 754 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 755 ), 756 "CHECK": lambda self: self.expression( 757 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 758 ), 759 "COLLATE": lambda self: self.expression( 760 exp.CollateColumnConstraint, this=self._parse_var() 761 ), 762 "COMMENT": lambda self: self.expression( 763 exp.CommentColumnConstraint, this=self._parse_string() 764 ), 765 "COMPRESS": lambda self: self._parse_compress(), 766 "CLUSTERED": lambda self: self.expression( 767 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 768 ), 769 "NONCLUSTERED": lambda self: self.expression( 770 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 771 ), 772 "DEFAULT": lambda self: self.expression( 773 exp.DefaultColumnConstraint, this=self._parse_bitwise() 774 ), 775 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 776 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 777 "FORMAT": lambda self: self.expression( 778 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 779 ), 780 "GENERATED": lambda self: self._parse_generated_as_identity(), 781 "IDENTITY": lambda self: self._parse_auto_increment(), 782 "INLINE": lambda self: self._parse_inline(), 783 "LIKE": lambda self: self._parse_create_like(), 784 "NOT": lambda self: self._parse_not_constraint(), 785 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 786 "ON": lambda self: ( 787 self._match(TokenType.UPDATE) 788 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 789 ) 790 or self.expression(exp.OnProperty, this=self._parse_id_var()), 791 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 792 "PERIOD": lambda self: self._parse_period_for_system_time(), 793 "PRIMARY KEY": lambda self: self._parse_primary_key(), 794 "REFERENCES": lambda self: self._parse_references(match=False), 795 "TITLE": lambda self: self.expression( 796 exp.TitleColumnConstraint, this=self._parse_var_or_string() 797 ), 798 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 799 "UNIQUE": lambda self: self._parse_unique(), 800 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 801 "WITH": lambda self: self.expression( 802 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 803 ), 804 } 805 806 ALTER_PARSERS = { 807 "ADD": lambda self: self._parse_alter_table_add(), 808 "ALTER": lambda self: self._parse_alter_table_alter(), 809 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 810 "DROP": lambda self: self._parse_alter_table_drop(), 811 "RENAME": lambda self: self._parse_alter_table_rename(), 812 } 813 814 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 815 816 NO_PAREN_FUNCTION_PARSERS = { 817 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 818 "CASE": lambda self: self._parse_case(), 819 "IF": lambda self: self._parse_if(), 820 "NEXT": lambda self: self._parse_next_value_for(), 821 } 822 823 INVALID_FUNC_NAME_TOKENS = { 824 TokenType.IDENTIFIER, 825 TokenType.STRING, 826 } 827 828 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 829 830 FUNCTION_PARSERS = { 831 "ANY_VALUE": lambda self: self._parse_any_value(), 832 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 833 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 834 "DECODE": lambda self: self._parse_decode(), 835 "EXTRACT": lambda self: self._parse_extract(), 836 "JSON_OBJECT": lambda self: self._parse_json_object(), 837 "JSON_TABLE": lambda self: self._parse_json_table(), 838 "MATCH": lambda self: self._parse_match_against(), 839 "OPENJSON": lambda self: self._parse_open_json(), 840 "POSITION": lambda self: self._parse_position(), 841 "PREDICT": lambda self: self._parse_predict(), 842 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 843 "STRING_AGG": lambda self: self._parse_string_agg(), 844 "SUBSTRING": lambda self: self._parse_substring(), 845 "TRIM": lambda self: self._parse_trim(), 846 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 847 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 848 } 849 850 QUERY_MODIFIER_PARSERS = { 851 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 852 TokenType.WHERE: lambda self: ("where", self._parse_where()), 853 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 854 TokenType.HAVING: lambda self: ("having", self._parse_having()), 855 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 856 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 857 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 858 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 859 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 860 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 861 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 862 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 863 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 864 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 865 TokenType.CLUSTER_BY: lambda self: ( 866 "cluster", 867 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 868 ), 869 TokenType.DISTRIBUTE_BY: lambda self: ( 870 "distribute", 871 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 872 ), 873 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 874 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 875 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 876 } 877 878 SET_PARSERS = { 879 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 880 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 881 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 882 "TRANSACTION": lambda self: self._parse_set_transaction(), 883 } 884 885 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 886 887 TYPE_LITERAL_PARSERS = { 888 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 889 } 890 891 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 892 893 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 894 895 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 896 897 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 898 TRANSACTION_CHARACTERISTICS = { 899 "ISOLATION LEVEL REPEATABLE READ", 900 "ISOLATION LEVEL READ COMMITTED", 901 "ISOLATION LEVEL READ UNCOMMITTED", 902 "ISOLATION LEVEL SERIALIZABLE", 903 "READ WRITE", 904 "READ ONLY", 905 } 906 907 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 908 909 CLONE_KEYWORDS = {"CLONE", "COPY"} 910 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 911 912 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 913 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 914 915 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 916 917 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 918 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 919 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 920 921 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 922 923 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 924 925 DISTINCT_TOKENS = {TokenType.DISTINCT} 926 927 NULL_TOKENS = {TokenType.NULL} 928 929 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 930 931 STRICT_CAST = True 932 933 PREFIXED_PIVOT_COLUMNS = False 934 IDENTIFY_PIVOT_STRINGS = False 935 936 LOG_DEFAULTS_TO_LN = False 937 938 # Whether or not ADD is present for each column added by ALTER TABLE 939 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 940 941 # Whether or not the table sample clause expects CSV syntax 942 TABLESAMPLE_CSV = False 943 944 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 945 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 946 947 # Whether the TRIM function expects the characters to trim as its first argument 948 TRIM_PATTERN_FIRST = False 949 950 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 951 MODIFIERS_ATTACHED_TO_UNION = True 952 UNION_MODIFIERS = {"order", "limit", "offset"} 953 954 __slots__ = ( 955 "error_level", 956 "error_message_context", 957 "max_errors", 958 "dialect", 959 "sql", 960 "errors", 961 "_tokens", 962 "_index", 963 "_curr", 964 "_next", 965 "_prev", 966 "_prev_comments", 967 ) 968 969 # Autofilled 970 SHOW_TRIE: t.Dict = {} 971 SET_TRIE: t.Dict = {} 972 973 def __init__( 974 self, 975 error_level: t.Optional[ErrorLevel] = None, 976 error_message_context: int = 100, 977 max_errors: int = 3, 978 dialect: DialectType = None, 979 ): 980 from sqlglot.dialects import Dialect 981 982 self.error_level = error_level or ErrorLevel.IMMEDIATE 983 self.error_message_context = error_message_context 984 self.max_errors = max_errors 985 self.dialect = Dialect.get_or_raise(dialect) 986 self.reset() 987 988 def reset(self): 989 self.sql = "" 990 self.errors = [] 991 self._tokens = [] 992 self._index = 0 993 self._curr = None 994 self._next = None 995 self._prev = None 996 self._prev_comments = None 997 998 def parse( 999 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1000 ) -> t.List[t.Optional[exp.Expression]]: 1001 """ 1002 Parses a list of tokens and returns a list of syntax trees, one tree 1003 per parsed SQL statement. 1004 1005 Args: 1006 raw_tokens: The list of tokens. 1007 sql: The original SQL string, used to produce helpful debug messages. 1008 1009 Returns: 1010 The list of the produced syntax trees. 1011 """ 1012 return self._parse( 1013 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1014 ) 1015 1016 def parse_into( 1017 self, 1018 expression_types: exp.IntoType, 1019 raw_tokens: t.List[Token], 1020 sql: t.Optional[str] = None, 1021 ) -> t.List[t.Optional[exp.Expression]]: 1022 """ 1023 Parses a list of tokens into a given Expression type. If a collection of Expression 1024 types is given instead, this method will try to parse the token list into each one 1025 of them, stopping at the first for which the parsing succeeds. 1026 1027 Args: 1028 expression_types: The expression type(s) to try and parse the token list into. 1029 raw_tokens: The list of tokens. 1030 sql: The original SQL string, used to produce helpful debug messages. 1031 1032 Returns: 1033 The target Expression. 1034 """ 1035 errors = [] 1036 for expression_type in ensure_list(expression_types): 1037 parser = self.EXPRESSION_PARSERS.get(expression_type) 1038 if not parser: 1039 raise TypeError(f"No parser registered for {expression_type}") 1040 1041 try: 1042 return self._parse(parser, raw_tokens, sql) 1043 except ParseError as e: 1044 e.errors[0]["into_expression"] = expression_type 1045 errors.append(e) 1046 1047 raise ParseError( 1048 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1049 errors=merge_errors(errors), 1050 ) from errors[-1] 1051 1052 def _parse( 1053 self, 1054 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1055 raw_tokens: t.List[Token], 1056 sql: t.Optional[str] = None, 1057 ) -> t.List[t.Optional[exp.Expression]]: 1058 self.reset() 1059 self.sql = sql or "" 1060 1061 total = len(raw_tokens) 1062 chunks: t.List[t.List[Token]] = [[]] 1063 1064 for i, token in enumerate(raw_tokens): 1065 if token.token_type == TokenType.SEMICOLON: 1066 if i < total - 1: 1067 chunks.append([]) 1068 else: 1069 chunks[-1].append(token) 1070 1071 expressions = [] 1072 1073 for tokens in chunks: 1074 self._index = -1 1075 self._tokens = tokens 1076 self._advance() 1077 1078 expressions.append(parse_method(self)) 1079 1080 if self._index < len(self._tokens): 1081 self.raise_error("Invalid expression / Unexpected token") 1082 1083 self.check_errors() 1084 1085 return expressions 1086 1087 def check_errors(self) -> None: 1088 """Logs or raises any found errors, depending on the chosen error level setting.""" 1089 if self.error_level == ErrorLevel.WARN: 1090 for error in self.errors: 1091 logger.error(str(error)) 1092 elif self.error_level == ErrorLevel.RAISE and self.errors: 1093 raise ParseError( 1094 concat_messages(self.errors, self.max_errors), 1095 errors=merge_errors(self.errors), 1096 ) 1097 1098 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1099 """ 1100 Appends an error in the list of recorded errors or raises it, depending on the chosen 1101 error level setting. 1102 """ 1103 token = token or self._curr or self._prev or Token.string("") 1104 start = token.start 1105 end = token.end + 1 1106 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1107 highlight = self.sql[start:end] 1108 end_context = self.sql[end : end + self.error_message_context] 1109 1110 error = ParseError.new( 1111 f"{message}. Line {token.line}, Col: {token.col}.\n" 1112 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1113 description=message, 1114 line=token.line, 1115 col=token.col, 1116 start_context=start_context, 1117 highlight=highlight, 1118 end_context=end_context, 1119 ) 1120 1121 if self.error_level == ErrorLevel.IMMEDIATE: 1122 raise error 1123 1124 self.errors.append(error) 1125 1126 def expression( 1127 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1128 ) -> E: 1129 """ 1130 Creates a new, validated Expression. 1131 1132 Args: 1133 exp_class: The expression class to instantiate. 1134 comments: An optional list of comments to attach to the expression. 1135 kwargs: The arguments to set for the expression along with their respective values. 1136 1137 Returns: 1138 The target expression. 1139 """ 1140 instance = exp_class(**kwargs) 1141 instance.add_comments(comments) if comments else self._add_comments(instance) 1142 return self.validate_expression(instance) 1143 1144 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1145 if expression and self._prev_comments: 1146 expression.add_comments(self._prev_comments) 1147 self._prev_comments = None 1148 1149 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1150 """ 1151 Validates an Expression, making sure that all its mandatory arguments are set. 1152 1153 Args: 1154 expression: The expression to validate. 1155 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1156 1157 Returns: 1158 The validated expression. 1159 """ 1160 if self.error_level != ErrorLevel.IGNORE: 1161 for error_message in expression.error_messages(args): 1162 self.raise_error(error_message) 1163 1164 return expression 1165 1166 def _find_sql(self, start: Token, end: Token) -> str: 1167 return self.sql[start.start : end.end + 1] 1168 1169 def _is_connected(self) -> bool: 1170 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1171 1172 def _advance(self, times: int = 1) -> None: 1173 self._index += times 1174 self._curr = seq_get(self._tokens, self._index) 1175 self._next = seq_get(self._tokens, self._index + 1) 1176 1177 if self._index > 0: 1178 self._prev = self._tokens[self._index - 1] 1179 self._prev_comments = self._prev.comments 1180 else: 1181 self._prev = None 1182 self._prev_comments = None 1183 1184 def _retreat(self, index: int) -> None: 1185 if index != self._index: 1186 self._advance(index - self._index) 1187 1188 def _parse_command(self) -> exp.Command: 1189 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1190 1191 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1192 start = self._prev 1193 exists = self._parse_exists() if allow_exists else None 1194 1195 self._match(TokenType.ON) 1196 1197 kind = self._match_set(self.CREATABLES) and self._prev 1198 if not kind: 1199 return self._parse_as_command(start) 1200 1201 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1202 this = self._parse_user_defined_function(kind=kind.token_type) 1203 elif kind.token_type == TokenType.TABLE: 1204 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1205 elif kind.token_type == TokenType.COLUMN: 1206 this = self._parse_column() 1207 else: 1208 this = self._parse_id_var() 1209 1210 self._match(TokenType.IS) 1211 1212 return self.expression( 1213 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1214 ) 1215 1216 def _parse_to_table( 1217 self, 1218 ) -> exp.ToTableProperty: 1219 table = self._parse_table_parts(schema=True) 1220 return self.expression(exp.ToTableProperty, this=table) 1221 1222 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1223 def _parse_ttl(self) -> exp.Expression: 1224 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1225 this = self._parse_bitwise() 1226 1227 if self._match_text_seq("DELETE"): 1228 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1229 if self._match_text_seq("RECOMPRESS"): 1230 return self.expression( 1231 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1232 ) 1233 if self._match_text_seq("TO", "DISK"): 1234 return self.expression( 1235 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1236 ) 1237 if self._match_text_seq("TO", "VOLUME"): 1238 return self.expression( 1239 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1240 ) 1241 1242 return this 1243 1244 expressions = self._parse_csv(_parse_ttl_action) 1245 where = self._parse_where() 1246 group = self._parse_group() 1247 1248 aggregates = None 1249 if group and self._match(TokenType.SET): 1250 aggregates = self._parse_csv(self._parse_set_item) 1251 1252 return self.expression( 1253 exp.MergeTreeTTL, 1254 expressions=expressions, 1255 where=where, 1256 group=group, 1257 aggregates=aggregates, 1258 ) 1259 1260 def _parse_statement(self) -> t.Optional[exp.Expression]: 1261 if self._curr is None: 1262 return None 1263 1264 if self._match_set(self.STATEMENT_PARSERS): 1265 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1266 1267 if self._match_set(Tokenizer.COMMANDS): 1268 return self._parse_command() 1269 1270 expression = self._parse_expression() 1271 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1272 return self._parse_query_modifiers(expression) 1273 1274 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1275 start = self._prev 1276 temporary = self._match(TokenType.TEMPORARY) 1277 materialized = self._match_text_seq("MATERIALIZED") 1278 1279 kind = self._match_set(self.CREATABLES) and self._prev.text 1280 if not kind: 1281 return self._parse_as_command(start) 1282 1283 return self.expression( 1284 exp.Drop, 1285 comments=start.comments, 1286 exists=exists or self._parse_exists(), 1287 this=self._parse_table(schema=True), 1288 kind=kind, 1289 temporary=temporary, 1290 materialized=materialized, 1291 cascade=self._match_text_seq("CASCADE"), 1292 constraints=self._match_text_seq("CONSTRAINTS"), 1293 purge=self._match_text_seq("PURGE"), 1294 ) 1295 1296 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1297 return ( 1298 self._match_text_seq("IF") 1299 and (not not_ or self._match(TokenType.NOT)) 1300 and self._match(TokenType.EXISTS) 1301 ) 1302 1303 def _parse_create(self) -> exp.Create | exp.Command: 1304 # Note: this can't be None because we've matched a statement parser 1305 start = self._prev 1306 comments = self._prev_comments 1307 1308 replace = start.text.upper() == "REPLACE" or self._match_pair( 1309 TokenType.OR, TokenType.REPLACE 1310 ) 1311 unique = self._match(TokenType.UNIQUE) 1312 1313 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1314 self._advance() 1315 1316 properties = None 1317 create_token = self._match_set(self.CREATABLES) and self._prev 1318 1319 if not create_token: 1320 # exp.Properties.Location.POST_CREATE 1321 properties = self._parse_properties() 1322 create_token = self._match_set(self.CREATABLES) and self._prev 1323 1324 if not properties or not create_token: 1325 return self._parse_as_command(start) 1326 1327 exists = self._parse_exists(not_=True) 1328 this = None 1329 expression: t.Optional[exp.Expression] = None 1330 indexes = None 1331 no_schema_binding = None 1332 begin = None 1333 end = None 1334 clone = None 1335 1336 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1337 nonlocal properties 1338 if properties and temp_props: 1339 properties.expressions.extend(temp_props.expressions) 1340 elif temp_props: 1341 properties = temp_props 1342 1343 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1344 this = self._parse_user_defined_function(kind=create_token.token_type) 1345 1346 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1347 extend_props(self._parse_properties()) 1348 1349 self._match(TokenType.ALIAS) 1350 1351 if self._match(TokenType.COMMAND): 1352 expression = self._parse_as_command(self._prev) 1353 else: 1354 begin = self._match(TokenType.BEGIN) 1355 return_ = self._match_text_seq("RETURN") 1356 1357 if self._match(TokenType.STRING, advance=False): 1358 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1359 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1360 expression = self._parse_string() 1361 extend_props(self._parse_properties()) 1362 else: 1363 expression = self._parse_statement() 1364 1365 end = self._match_text_seq("END") 1366 1367 if return_: 1368 expression = self.expression(exp.Return, this=expression) 1369 elif create_token.token_type == TokenType.INDEX: 1370 this = self._parse_index(index=self._parse_id_var()) 1371 elif create_token.token_type in self.DB_CREATABLES: 1372 table_parts = self._parse_table_parts(schema=True) 1373 1374 # exp.Properties.Location.POST_NAME 1375 self._match(TokenType.COMMA) 1376 extend_props(self._parse_properties(before=True)) 1377 1378 this = self._parse_schema(this=table_parts) 1379 1380 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1381 extend_props(self._parse_properties()) 1382 1383 self._match(TokenType.ALIAS) 1384 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1385 # exp.Properties.Location.POST_ALIAS 1386 extend_props(self._parse_properties()) 1387 1388 expression = self._parse_ddl_select() 1389 1390 if create_token.token_type == TokenType.TABLE: 1391 # exp.Properties.Location.POST_EXPRESSION 1392 extend_props(self._parse_properties()) 1393 1394 indexes = [] 1395 while True: 1396 index = self._parse_index() 1397 1398 # exp.Properties.Location.POST_INDEX 1399 extend_props(self._parse_properties()) 1400 1401 if not index: 1402 break 1403 else: 1404 self._match(TokenType.COMMA) 1405 indexes.append(index) 1406 elif create_token.token_type == TokenType.VIEW: 1407 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1408 no_schema_binding = True 1409 1410 shallow = self._match_text_seq("SHALLOW") 1411 1412 if self._match_texts(self.CLONE_KEYWORDS): 1413 copy = self._prev.text.lower() == "copy" 1414 clone = self.expression( 1415 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1416 ) 1417 1418 return self.expression( 1419 exp.Create, 1420 comments=comments, 1421 this=this, 1422 kind=create_token.text, 1423 replace=replace, 1424 unique=unique, 1425 expression=expression, 1426 exists=exists, 1427 properties=properties, 1428 indexes=indexes, 1429 no_schema_binding=no_schema_binding, 1430 begin=begin, 1431 end=end, 1432 clone=clone, 1433 ) 1434 1435 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1436 # only used for teradata currently 1437 self._match(TokenType.COMMA) 1438 1439 kwargs = { 1440 "no": self._match_text_seq("NO"), 1441 "dual": self._match_text_seq("DUAL"), 1442 "before": self._match_text_seq("BEFORE"), 1443 "default": self._match_text_seq("DEFAULT"), 1444 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1445 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1446 "after": self._match_text_seq("AFTER"), 1447 "minimum": self._match_texts(("MIN", "MINIMUM")), 1448 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1449 } 1450 1451 if self._match_texts(self.PROPERTY_PARSERS): 1452 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1453 try: 1454 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1455 except TypeError: 1456 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1457 1458 return None 1459 1460 def _parse_property(self) -> t.Optional[exp.Expression]: 1461 if self._match_texts(self.PROPERTY_PARSERS): 1462 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1463 1464 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1465 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1466 1467 if self._match_text_seq("COMPOUND", "SORTKEY"): 1468 return self._parse_sortkey(compound=True) 1469 1470 if self._match_text_seq("SQL", "SECURITY"): 1471 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1472 1473 index = self._index 1474 key = self._parse_column() 1475 1476 if not self._match(TokenType.EQ): 1477 self._retreat(index) 1478 return None 1479 1480 return self.expression( 1481 exp.Property, 1482 this=key.to_dot() if isinstance(key, exp.Column) else key, 1483 value=self._parse_column() or self._parse_var(any_token=True), 1484 ) 1485 1486 def _parse_stored(self) -> exp.FileFormatProperty: 1487 self._match(TokenType.ALIAS) 1488 1489 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1490 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1491 1492 return self.expression( 1493 exp.FileFormatProperty, 1494 this=self.expression( 1495 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1496 ) 1497 if input_format or output_format 1498 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1499 ) 1500 1501 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1502 self._match(TokenType.EQ) 1503 self._match(TokenType.ALIAS) 1504 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1505 1506 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1507 properties = [] 1508 while True: 1509 if before: 1510 prop = self._parse_property_before() 1511 else: 1512 prop = self._parse_property() 1513 1514 if not prop: 1515 break 1516 for p in ensure_list(prop): 1517 properties.append(p) 1518 1519 if properties: 1520 return self.expression(exp.Properties, expressions=properties) 1521 1522 return None 1523 1524 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1525 return self.expression( 1526 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1527 ) 1528 1529 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1530 if self._index >= 2: 1531 pre_volatile_token = self._tokens[self._index - 2] 1532 else: 1533 pre_volatile_token = None 1534 1535 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1536 return exp.VolatileProperty() 1537 1538 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1539 1540 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1541 self._match_pair(TokenType.EQ, TokenType.ON) 1542 1543 prop = self.expression(exp.WithSystemVersioningProperty) 1544 if self._match(TokenType.L_PAREN): 1545 self._match_text_seq("HISTORY_TABLE", "=") 1546 prop.set("this", self._parse_table_parts()) 1547 1548 if self._match(TokenType.COMMA): 1549 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1550 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1551 1552 self._match_r_paren() 1553 1554 return prop 1555 1556 def _parse_with_property( 1557 self, 1558 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1559 if self._match(TokenType.L_PAREN, advance=False): 1560 return self._parse_wrapped_csv(self._parse_property) 1561 1562 if self._match_text_seq("JOURNAL"): 1563 return self._parse_withjournaltable() 1564 1565 if self._match_text_seq("DATA"): 1566 return self._parse_withdata(no=False) 1567 elif self._match_text_seq("NO", "DATA"): 1568 return self._parse_withdata(no=True) 1569 1570 if not self._next: 1571 return None 1572 1573 return self._parse_withisolatedloading() 1574 1575 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1576 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1577 self._match(TokenType.EQ) 1578 1579 user = self._parse_id_var() 1580 self._match(TokenType.PARAMETER) 1581 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1582 1583 if not user or not host: 1584 return None 1585 1586 return exp.DefinerProperty(this=f"{user}@{host}") 1587 1588 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1589 self._match(TokenType.TABLE) 1590 self._match(TokenType.EQ) 1591 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1592 1593 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1594 return self.expression(exp.LogProperty, no=no) 1595 1596 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1597 return self.expression(exp.JournalProperty, **kwargs) 1598 1599 def _parse_checksum(self) -> exp.ChecksumProperty: 1600 self._match(TokenType.EQ) 1601 1602 on = None 1603 if self._match(TokenType.ON): 1604 on = True 1605 elif self._match_text_seq("OFF"): 1606 on = False 1607 1608 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1609 1610 def _parse_cluster(self) -> exp.Cluster: 1611 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1612 1613 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1614 self._match_text_seq("BY") 1615 1616 self._match_l_paren() 1617 expressions = self._parse_csv(self._parse_column) 1618 self._match_r_paren() 1619 1620 if self._match_text_seq("SORTED", "BY"): 1621 self._match_l_paren() 1622 sorted_by = self._parse_csv(self._parse_ordered) 1623 self._match_r_paren() 1624 else: 1625 sorted_by = None 1626 1627 self._match(TokenType.INTO) 1628 buckets = self._parse_number() 1629 self._match_text_seq("BUCKETS") 1630 1631 return self.expression( 1632 exp.ClusteredByProperty, 1633 expressions=expressions, 1634 sorted_by=sorted_by, 1635 buckets=buckets, 1636 ) 1637 1638 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1639 if not self._match_text_seq("GRANTS"): 1640 self._retreat(self._index - 1) 1641 return None 1642 1643 return self.expression(exp.CopyGrantsProperty) 1644 1645 def _parse_freespace(self) -> exp.FreespaceProperty: 1646 self._match(TokenType.EQ) 1647 return self.expression( 1648 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1649 ) 1650 1651 def _parse_mergeblockratio( 1652 self, no: bool = False, default: bool = False 1653 ) -> exp.MergeBlockRatioProperty: 1654 if self._match(TokenType.EQ): 1655 return self.expression( 1656 exp.MergeBlockRatioProperty, 1657 this=self._parse_number(), 1658 percent=self._match(TokenType.PERCENT), 1659 ) 1660 1661 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1662 1663 def _parse_datablocksize( 1664 self, 1665 default: t.Optional[bool] = None, 1666 minimum: t.Optional[bool] = None, 1667 maximum: t.Optional[bool] = None, 1668 ) -> exp.DataBlocksizeProperty: 1669 self._match(TokenType.EQ) 1670 size = self._parse_number() 1671 1672 units = None 1673 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1674 units = self._prev.text 1675 1676 return self.expression( 1677 exp.DataBlocksizeProperty, 1678 size=size, 1679 units=units, 1680 default=default, 1681 minimum=minimum, 1682 maximum=maximum, 1683 ) 1684 1685 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1686 self._match(TokenType.EQ) 1687 always = self._match_text_seq("ALWAYS") 1688 manual = self._match_text_seq("MANUAL") 1689 never = self._match_text_seq("NEVER") 1690 default = self._match_text_seq("DEFAULT") 1691 1692 autotemp = None 1693 if self._match_text_seq("AUTOTEMP"): 1694 autotemp = self._parse_schema() 1695 1696 return self.expression( 1697 exp.BlockCompressionProperty, 1698 always=always, 1699 manual=manual, 1700 never=never, 1701 default=default, 1702 autotemp=autotemp, 1703 ) 1704 1705 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1706 no = self._match_text_seq("NO") 1707 concurrent = self._match_text_seq("CONCURRENT") 1708 self._match_text_seq("ISOLATED", "LOADING") 1709 for_all = self._match_text_seq("FOR", "ALL") 1710 for_insert = self._match_text_seq("FOR", "INSERT") 1711 for_none = self._match_text_seq("FOR", "NONE") 1712 return self.expression( 1713 exp.IsolatedLoadingProperty, 1714 no=no, 1715 concurrent=concurrent, 1716 for_all=for_all, 1717 for_insert=for_insert, 1718 for_none=for_none, 1719 ) 1720 1721 def _parse_locking(self) -> exp.LockingProperty: 1722 if self._match(TokenType.TABLE): 1723 kind = "TABLE" 1724 elif self._match(TokenType.VIEW): 1725 kind = "VIEW" 1726 elif self._match(TokenType.ROW): 1727 kind = "ROW" 1728 elif self._match_text_seq("DATABASE"): 1729 kind = "DATABASE" 1730 else: 1731 kind = None 1732 1733 if kind in ("DATABASE", "TABLE", "VIEW"): 1734 this = self._parse_table_parts() 1735 else: 1736 this = None 1737 1738 if self._match(TokenType.FOR): 1739 for_or_in = "FOR" 1740 elif self._match(TokenType.IN): 1741 for_or_in = "IN" 1742 else: 1743 for_or_in = None 1744 1745 if self._match_text_seq("ACCESS"): 1746 lock_type = "ACCESS" 1747 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1748 lock_type = "EXCLUSIVE" 1749 elif self._match_text_seq("SHARE"): 1750 lock_type = "SHARE" 1751 elif self._match_text_seq("READ"): 1752 lock_type = "READ" 1753 elif self._match_text_seq("WRITE"): 1754 lock_type = "WRITE" 1755 elif self._match_text_seq("CHECKSUM"): 1756 lock_type = "CHECKSUM" 1757 else: 1758 lock_type = None 1759 1760 override = self._match_text_seq("OVERRIDE") 1761 1762 return self.expression( 1763 exp.LockingProperty, 1764 this=this, 1765 kind=kind, 1766 for_or_in=for_or_in, 1767 lock_type=lock_type, 1768 override=override, 1769 ) 1770 1771 def _parse_partition_by(self) -> t.List[exp.Expression]: 1772 if self._match(TokenType.PARTITION_BY): 1773 return self._parse_csv(self._parse_conjunction) 1774 return [] 1775 1776 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1777 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1778 if self._match_text_seq("MINVALUE"): 1779 return exp.var("MINVALUE") 1780 if self._match_text_seq("MAXVALUE"): 1781 return exp.var("MAXVALUE") 1782 return self._parse_bitwise() 1783 1784 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1785 expression = None 1786 from_expressions = None 1787 to_expressions = None 1788 1789 if self._match(TokenType.IN): 1790 this = self._parse_wrapped_csv(self._parse_bitwise) 1791 elif self._match(TokenType.FROM): 1792 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1793 self._match_text_seq("TO") 1794 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1795 elif self._match_text_seq("WITH", "(", "MODULUS"): 1796 this = self._parse_number() 1797 self._match_text_seq(",", "REMAINDER") 1798 expression = self._parse_number() 1799 self._match_r_paren() 1800 else: 1801 self.raise_error("Failed to parse partition bound spec.") 1802 1803 return self.expression( 1804 exp.PartitionBoundSpec, 1805 this=this, 1806 expression=expression, 1807 from_expressions=from_expressions, 1808 to_expressions=to_expressions, 1809 ) 1810 1811 # https://www.postgresql.org/docs/current/sql-createtable.html 1812 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1813 if not self._match_text_seq("OF"): 1814 self._retreat(self._index - 1) 1815 return None 1816 1817 this = self._parse_table(schema=True) 1818 1819 if self._match(TokenType.DEFAULT): 1820 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1821 elif self._match_text_seq("FOR", "VALUES"): 1822 expression = self._parse_partition_bound_spec() 1823 else: 1824 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1825 1826 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1827 1828 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1829 self._match(TokenType.EQ) 1830 return self.expression( 1831 exp.PartitionedByProperty, 1832 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1833 ) 1834 1835 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1836 if self._match_text_seq("AND", "STATISTICS"): 1837 statistics = True 1838 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1839 statistics = False 1840 else: 1841 statistics = None 1842 1843 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1844 1845 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1846 if self._match_text_seq("PRIMARY", "INDEX"): 1847 return exp.NoPrimaryIndexProperty() 1848 return None 1849 1850 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1851 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1852 return exp.OnCommitProperty() 1853 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1854 return exp.OnCommitProperty(delete=True) 1855 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1856 1857 def _parse_distkey(self) -> exp.DistKeyProperty: 1858 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1859 1860 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1861 table = self._parse_table(schema=True) 1862 1863 options = [] 1864 while self._match_texts(("INCLUDING", "EXCLUDING")): 1865 this = self._prev.text.upper() 1866 1867 id_var = self._parse_id_var() 1868 if not id_var: 1869 return None 1870 1871 options.append( 1872 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1873 ) 1874 1875 return self.expression(exp.LikeProperty, this=table, expressions=options) 1876 1877 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1878 return self.expression( 1879 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1880 ) 1881 1882 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1883 self._match(TokenType.EQ) 1884 return self.expression( 1885 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1886 ) 1887 1888 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1889 self._match_text_seq("WITH", "CONNECTION") 1890 return self.expression( 1891 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1892 ) 1893 1894 def _parse_returns(self) -> exp.ReturnsProperty: 1895 value: t.Optional[exp.Expression] 1896 is_table = self._match(TokenType.TABLE) 1897 1898 if is_table: 1899 if self._match(TokenType.LT): 1900 value = self.expression( 1901 exp.Schema, 1902 this="TABLE", 1903 expressions=self._parse_csv(self._parse_struct_types), 1904 ) 1905 if not self._match(TokenType.GT): 1906 self.raise_error("Expecting >") 1907 else: 1908 value = self._parse_schema(exp.var("TABLE")) 1909 else: 1910 value = self._parse_types() 1911 1912 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1913 1914 def _parse_describe(self) -> exp.Describe: 1915 kind = self._match_set(self.CREATABLES) and self._prev.text 1916 this = self._parse_table(schema=True) 1917 properties = self._parse_properties() 1918 expressions = properties.expressions if properties else None 1919 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1920 1921 def _parse_insert(self) -> exp.Insert: 1922 comments = ensure_list(self._prev_comments) 1923 overwrite = self._match(TokenType.OVERWRITE) 1924 ignore = self._match(TokenType.IGNORE) 1925 local = self._match_text_seq("LOCAL") 1926 alternative = None 1927 1928 if self._match_text_seq("DIRECTORY"): 1929 this: t.Optional[exp.Expression] = self.expression( 1930 exp.Directory, 1931 this=self._parse_var_or_string(), 1932 local=local, 1933 row_format=self._parse_row_format(match_row=True), 1934 ) 1935 else: 1936 if self._match(TokenType.OR): 1937 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1938 1939 self._match(TokenType.INTO) 1940 comments += ensure_list(self._prev_comments) 1941 self._match(TokenType.TABLE) 1942 this = self._parse_table(schema=True) 1943 1944 returning = self._parse_returning() 1945 1946 return self.expression( 1947 exp.Insert, 1948 comments=comments, 1949 this=this, 1950 by_name=self._match_text_seq("BY", "NAME"), 1951 exists=self._parse_exists(), 1952 partition=self._parse_partition(), 1953 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1954 and self._parse_conjunction(), 1955 expression=self._parse_ddl_select(), 1956 conflict=self._parse_on_conflict(), 1957 returning=returning or self._parse_returning(), 1958 overwrite=overwrite, 1959 alternative=alternative, 1960 ignore=ignore, 1961 ) 1962 1963 def _parse_kill(self) -> exp.Kill: 1964 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1965 1966 return self.expression( 1967 exp.Kill, 1968 this=self._parse_primary(), 1969 kind=kind, 1970 ) 1971 1972 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1973 conflict = self._match_text_seq("ON", "CONFLICT") 1974 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1975 1976 if not conflict and not duplicate: 1977 return None 1978 1979 nothing = None 1980 expressions = None 1981 key = None 1982 constraint = None 1983 1984 if conflict: 1985 if self._match_text_seq("ON", "CONSTRAINT"): 1986 constraint = self._parse_id_var() 1987 else: 1988 key = self._parse_csv(self._parse_value) 1989 1990 self._match_text_seq("DO") 1991 if self._match_text_seq("NOTHING"): 1992 nothing = True 1993 else: 1994 self._match(TokenType.UPDATE) 1995 self._match(TokenType.SET) 1996 expressions = self._parse_csv(self._parse_equality) 1997 1998 return self.expression( 1999 exp.OnConflict, 2000 duplicate=duplicate, 2001 expressions=expressions, 2002 nothing=nothing, 2003 key=key, 2004 constraint=constraint, 2005 ) 2006 2007 def _parse_returning(self) -> t.Optional[exp.Returning]: 2008 if not self._match(TokenType.RETURNING): 2009 return None 2010 return self.expression( 2011 exp.Returning, 2012 expressions=self._parse_csv(self._parse_expression), 2013 into=self._match(TokenType.INTO) and self._parse_table_part(), 2014 ) 2015 2016 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2017 if not self._match(TokenType.FORMAT): 2018 return None 2019 return self._parse_row_format() 2020 2021 def _parse_row_format( 2022 self, match_row: bool = False 2023 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2024 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2025 return None 2026 2027 if self._match_text_seq("SERDE"): 2028 this = self._parse_string() 2029 2030 serde_properties = None 2031 if self._match(TokenType.SERDE_PROPERTIES): 2032 serde_properties = self.expression( 2033 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2034 ) 2035 2036 return self.expression( 2037 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2038 ) 2039 2040 self._match_text_seq("DELIMITED") 2041 2042 kwargs = {} 2043 2044 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2045 kwargs["fields"] = self._parse_string() 2046 if self._match_text_seq("ESCAPED", "BY"): 2047 kwargs["escaped"] = self._parse_string() 2048 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2049 kwargs["collection_items"] = self._parse_string() 2050 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2051 kwargs["map_keys"] = self._parse_string() 2052 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2053 kwargs["lines"] = self._parse_string() 2054 if self._match_text_seq("NULL", "DEFINED", "AS"): 2055 kwargs["null"] = self._parse_string() 2056 2057 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2058 2059 def _parse_load(self) -> exp.LoadData | exp.Command: 2060 if self._match_text_seq("DATA"): 2061 local = self._match_text_seq("LOCAL") 2062 self._match_text_seq("INPATH") 2063 inpath = self._parse_string() 2064 overwrite = self._match(TokenType.OVERWRITE) 2065 self._match_pair(TokenType.INTO, TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.LoadData, 2069 this=self._parse_table(schema=True), 2070 local=local, 2071 overwrite=overwrite, 2072 inpath=inpath, 2073 partition=self._parse_partition(), 2074 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2075 serde=self._match_text_seq("SERDE") and self._parse_string(), 2076 ) 2077 return self._parse_as_command(self._prev) 2078 2079 def _parse_delete(self) -> exp.Delete: 2080 # This handles MySQL's "Multiple-Table Syntax" 2081 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2082 tables = None 2083 comments = self._prev_comments 2084 if not self._match(TokenType.FROM, advance=False): 2085 tables = self._parse_csv(self._parse_table) or None 2086 2087 returning = self._parse_returning() 2088 2089 return self.expression( 2090 exp.Delete, 2091 comments=comments, 2092 tables=tables, 2093 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2094 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2095 where=self._parse_where(), 2096 returning=returning or self._parse_returning(), 2097 limit=self._parse_limit(), 2098 ) 2099 2100 def _parse_update(self) -> exp.Update: 2101 comments = self._prev_comments 2102 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2103 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2104 returning = self._parse_returning() 2105 return self.expression( 2106 exp.Update, 2107 comments=comments, 2108 **{ # type: ignore 2109 "this": this, 2110 "expressions": expressions, 2111 "from": self._parse_from(joins=True), 2112 "where": self._parse_where(), 2113 "returning": returning or self._parse_returning(), 2114 "order": self._parse_order(), 2115 "limit": self._parse_limit(), 2116 }, 2117 ) 2118 2119 def _parse_uncache(self) -> exp.Uncache: 2120 if not self._match(TokenType.TABLE): 2121 self.raise_error("Expecting TABLE after UNCACHE") 2122 2123 return self.expression( 2124 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2125 ) 2126 2127 def _parse_cache(self) -> exp.Cache: 2128 lazy = self._match_text_seq("LAZY") 2129 self._match(TokenType.TABLE) 2130 table = self._parse_table(schema=True) 2131 2132 options = [] 2133 if self._match_text_seq("OPTIONS"): 2134 self._match_l_paren() 2135 k = self._parse_string() 2136 self._match(TokenType.EQ) 2137 v = self._parse_string() 2138 options = [k, v] 2139 self._match_r_paren() 2140 2141 self._match(TokenType.ALIAS) 2142 return self.expression( 2143 exp.Cache, 2144 this=table, 2145 lazy=lazy, 2146 options=options, 2147 expression=self._parse_select(nested=True), 2148 ) 2149 2150 def _parse_partition(self) -> t.Optional[exp.Partition]: 2151 if not self._match(TokenType.PARTITION): 2152 return None 2153 2154 return self.expression( 2155 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2156 ) 2157 2158 def _parse_value(self) -> exp.Tuple: 2159 if self._match(TokenType.L_PAREN): 2160 expressions = self._parse_csv(self._parse_conjunction) 2161 self._match_r_paren() 2162 return self.expression(exp.Tuple, expressions=expressions) 2163 2164 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2165 # https://prestodb.io/docs/current/sql/values.html 2166 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2167 2168 def _parse_projections(self) -> t.List[exp.Expression]: 2169 return self._parse_expressions() 2170 2171 def _parse_select( 2172 self, 2173 nested: bool = False, 2174 table: bool = False, 2175 parse_subquery_alias: bool = True, 2176 parse_set_operation: bool = True, 2177 ) -> t.Optional[exp.Expression]: 2178 cte = self._parse_with() 2179 2180 if cte: 2181 this = self._parse_statement() 2182 2183 if not this: 2184 self.raise_error("Failed to parse any statement following CTE") 2185 return cte 2186 2187 if "with" in this.arg_types: 2188 this.set("with", cte) 2189 else: 2190 self.raise_error(f"{this.key} does not support CTE") 2191 this = cte 2192 2193 return this 2194 2195 # duckdb supports leading with FROM x 2196 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2197 2198 if self._match(TokenType.SELECT): 2199 comments = self._prev_comments 2200 2201 hint = self._parse_hint() 2202 all_ = self._match(TokenType.ALL) 2203 distinct = self._match_set(self.DISTINCT_TOKENS) 2204 2205 kind = ( 2206 self._match(TokenType.ALIAS) 2207 and self._match_texts(("STRUCT", "VALUE")) 2208 and self._prev.text 2209 ) 2210 2211 if distinct: 2212 distinct = self.expression( 2213 exp.Distinct, 2214 on=self._parse_value() if self._match(TokenType.ON) else None, 2215 ) 2216 2217 if all_ and distinct: 2218 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2219 2220 limit = self._parse_limit(top=True) 2221 projections = self._parse_projections() 2222 2223 this = self.expression( 2224 exp.Select, 2225 kind=kind, 2226 hint=hint, 2227 distinct=distinct, 2228 expressions=projections, 2229 limit=limit, 2230 ) 2231 this.comments = comments 2232 2233 into = self._parse_into() 2234 if into: 2235 this.set("into", into) 2236 2237 if not from_: 2238 from_ = self._parse_from() 2239 2240 if from_: 2241 this.set("from", from_) 2242 2243 this = self._parse_query_modifiers(this) 2244 elif (table or nested) and self._match(TokenType.L_PAREN): 2245 if self._match(TokenType.PIVOT): 2246 this = self._parse_simplified_pivot() 2247 elif self._match(TokenType.FROM): 2248 this = exp.select("*").from_( 2249 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2250 ) 2251 else: 2252 this = ( 2253 self._parse_table() 2254 if table 2255 else self._parse_select(nested=True, parse_set_operation=False) 2256 ) 2257 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2258 2259 self._match_r_paren() 2260 2261 # We return early here so that the UNION isn't attached to the subquery by the 2262 # following call to _parse_set_operations, but instead becomes the parent node 2263 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2264 elif self._match(TokenType.VALUES): 2265 this = self.expression( 2266 exp.Values, 2267 expressions=self._parse_csv(self._parse_value), 2268 alias=self._parse_table_alias(), 2269 ) 2270 elif from_: 2271 this = exp.select("*").from_(from_.this, copy=False) 2272 else: 2273 this = None 2274 2275 if parse_set_operation: 2276 return self._parse_set_operations(this) 2277 return this 2278 2279 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2280 if not skip_with_token and not self._match(TokenType.WITH): 2281 return None 2282 2283 comments = self._prev_comments 2284 recursive = self._match(TokenType.RECURSIVE) 2285 2286 expressions = [] 2287 while True: 2288 expressions.append(self._parse_cte()) 2289 2290 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2291 break 2292 else: 2293 self._match(TokenType.WITH) 2294 2295 return self.expression( 2296 exp.With, comments=comments, expressions=expressions, recursive=recursive 2297 ) 2298 2299 def _parse_cte(self) -> exp.CTE: 2300 alias = self._parse_table_alias() 2301 if not alias or not alias.this: 2302 self.raise_error("Expected CTE to have alias") 2303 2304 self._match(TokenType.ALIAS) 2305 return self.expression( 2306 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2307 ) 2308 2309 def _parse_table_alias( 2310 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2311 ) -> t.Optional[exp.TableAlias]: 2312 any_token = self._match(TokenType.ALIAS) 2313 alias = ( 2314 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2315 or self._parse_string_as_identifier() 2316 ) 2317 2318 index = self._index 2319 if self._match(TokenType.L_PAREN): 2320 columns = self._parse_csv(self._parse_function_parameter) 2321 self._match_r_paren() if columns else self._retreat(index) 2322 else: 2323 columns = None 2324 2325 if not alias and not columns: 2326 return None 2327 2328 return self.expression(exp.TableAlias, this=alias, columns=columns) 2329 2330 def _parse_subquery( 2331 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2332 ) -> t.Optional[exp.Subquery]: 2333 if not this: 2334 return None 2335 2336 return self.expression( 2337 exp.Subquery, 2338 this=this, 2339 pivots=self._parse_pivots(), 2340 alias=self._parse_table_alias() if parse_alias else None, 2341 ) 2342 2343 def _parse_query_modifiers( 2344 self, this: t.Optional[exp.Expression] 2345 ) -> t.Optional[exp.Expression]: 2346 if isinstance(this, self.MODIFIABLES): 2347 for join in iter(self._parse_join, None): 2348 this.append("joins", join) 2349 for lateral in iter(self._parse_lateral, None): 2350 this.append("laterals", lateral) 2351 2352 while True: 2353 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2354 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2355 key, expression = parser(self) 2356 2357 if expression: 2358 this.set(key, expression) 2359 if key == "limit": 2360 offset = expression.args.pop("offset", None) 2361 if offset: 2362 this.set("offset", exp.Offset(expression=offset)) 2363 continue 2364 break 2365 return this 2366 2367 def _parse_hint(self) -> t.Optional[exp.Hint]: 2368 if self._match(TokenType.HINT): 2369 hints = [] 2370 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2371 hints.extend(hint) 2372 2373 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2374 self.raise_error("Expected */ after HINT") 2375 2376 return self.expression(exp.Hint, expressions=hints) 2377 2378 return None 2379 2380 def _parse_into(self) -> t.Optional[exp.Into]: 2381 if not self._match(TokenType.INTO): 2382 return None 2383 2384 temp = self._match(TokenType.TEMPORARY) 2385 unlogged = self._match_text_seq("UNLOGGED") 2386 self._match(TokenType.TABLE) 2387 2388 return self.expression( 2389 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2390 ) 2391 2392 def _parse_from( 2393 self, joins: bool = False, skip_from_token: bool = False 2394 ) -> t.Optional[exp.From]: 2395 if not skip_from_token and not self._match(TokenType.FROM): 2396 return None 2397 2398 return self.expression( 2399 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2400 ) 2401 2402 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2403 if not self._match(TokenType.MATCH_RECOGNIZE): 2404 return None 2405 2406 self._match_l_paren() 2407 2408 partition = self._parse_partition_by() 2409 order = self._parse_order() 2410 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2411 2412 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2413 rows = exp.var("ONE ROW PER MATCH") 2414 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2415 text = "ALL ROWS PER MATCH" 2416 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2417 text += f" SHOW EMPTY MATCHES" 2418 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2419 text += f" OMIT EMPTY MATCHES" 2420 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2421 text += f" WITH UNMATCHED ROWS" 2422 rows = exp.var(text) 2423 else: 2424 rows = None 2425 2426 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2427 text = "AFTER MATCH SKIP" 2428 if self._match_text_seq("PAST", "LAST", "ROW"): 2429 text += f" PAST LAST ROW" 2430 elif self._match_text_seq("TO", "NEXT", "ROW"): 2431 text += f" TO NEXT ROW" 2432 elif self._match_text_seq("TO", "FIRST"): 2433 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2434 elif self._match_text_seq("TO", "LAST"): 2435 text += f" TO LAST {self._advance_any().text}" # type: ignore 2436 after = exp.var(text) 2437 else: 2438 after = None 2439 2440 if self._match_text_seq("PATTERN"): 2441 self._match_l_paren() 2442 2443 if not self._curr: 2444 self.raise_error("Expecting )", self._curr) 2445 2446 paren = 1 2447 start = self._curr 2448 2449 while self._curr and paren > 0: 2450 if self._curr.token_type == TokenType.L_PAREN: 2451 paren += 1 2452 if self._curr.token_type == TokenType.R_PAREN: 2453 paren -= 1 2454 2455 end = self._prev 2456 self._advance() 2457 2458 if paren > 0: 2459 self.raise_error("Expecting )", self._curr) 2460 2461 pattern = exp.var(self._find_sql(start, end)) 2462 else: 2463 pattern = None 2464 2465 define = ( 2466 self._parse_csv( 2467 lambda: self.expression( 2468 exp.Alias, 2469 alias=self._parse_id_var(any_token=True), 2470 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2471 ) 2472 ) 2473 if self._match_text_seq("DEFINE") 2474 else None 2475 ) 2476 2477 self._match_r_paren() 2478 2479 return self.expression( 2480 exp.MatchRecognize, 2481 partition_by=partition, 2482 order=order, 2483 measures=measures, 2484 rows=rows, 2485 after=after, 2486 pattern=pattern, 2487 define=define, 2488 alias=self._parse_table_alias(), 2489 ) 2490 2491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2492 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2493 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2494 2495 if outer_apply or cross_apply: 2496 this = self._parse_select(table=True) 2497 view = None 2498 outer = not cross_apply 2499 elif self._match(TokenType.LATERAL): 2500 this = self._parse_select(table=True) 2501 view = self._match(TokenType.VIEW) 2502 outer = self._match(TokenType.OUTER) 2503 else: 2504 return None 2505 2506 if not this: 2507 this = ( 2508 self._parse_unnest() 2509 or self._parse_function() 2510 or self._parse_id_var(any_token=False) 2511 ) 2512 2513 while self._match(TokenType.DOT): 2514 this = exp.Dot( 2515 this=this, 2516 expression=self._parse_function() or self._parse_id_var(any_token=False), 2517 ) 2518 2519 if view: 2520 table = self._parse_id_var(any_token=False) 2521 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2522 table_alias: t.Optional[exp.TableAlias] = self.expression( 2523 exp.TableAlias, this=table, columns=columns 2524 ) 2525 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2526 # We move the alias from the lateral's child node to the lateral itself 2527 table_alias = this.args["alias"].pop() 2528 else: 2529 table_alias = self._parse_table_alias() 2530 2531 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2532 2533 def _parse_join_parts( 2534 self, 2535 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2536 return ( 2537 self._match_set(self.JOIN_METHODS) and self._prev, 2538 self._match_set(self.JOIN_SIDES) and self._prev, 2539 self._match_set(self.JOIN_KINDS) and self._prev, 2540 ) 2541 2542 def _parse_join( 2543 self, skip_join_token: bool = False, parse_bracket: bool = False 2544 ) -> t.Optional[exp.Join]: 2545 if self._match(TokenType.COMMA): 2546 return self.expression(exp.Join, this=self._parse_table()) 2547 2548 index = self._index 2549 method, side, kind = self._parse_join_parts() 2550 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2551 join = self._match(TokenType.JOIN) 2552 2553 if not skip_join_token and not join: 2554 self._retreat(index) 2555 kind = None 2556 method = None 2557 side = None 2558 2559 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2560 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2561 2562 if not skip_join_token and not join and not outer_apply and not cross_apply: 2563 return None 2564 2565 if outer_apply: 2566 side = Token(TokenType.LEFT, "LEFT") 2567 2568 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2569 2570 if method: 2571 kwargs["method"] = method.text 2572 if side: 2573 kwargs["side"] = side.text 2574 if kind: 2575 kwargs["kind"] = kind.text 2576 if hint: 2577 kwargs["hint"] = hint 2578 2579 if self._match(TokenType.ON): 2580 kwargs["on"] = self._parse_conjunction() 2581 elif self._match(TokenType.USING): 2582 kwargs["using"] = self._parse_wrapped_id_vars() 2583 elif not (kind and kind.token_type == TokenType.CROSS): 2584 index = self._index 2585 join = self._parse_join() 2586 2587 if join and self._match(TokenType.ON): 2588 kwargs["on"] = self._parse_conjunction() 2589 elif join and self._match(TokenType.USING): 2590 kwargs["using"] = self._parse_wrapped_id_vars() 2591 else: 2592 join = None 2593 self._retreat(index) 2594 2595 kwargs["this"].set("joins", [join] if join else None) 2596 2597 comments = [c for token in (method, side, kind) if token for c in token.comments] 2598 return self.expression(exp.Join, comments=comments, **kwargs) 2599 2600 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2601 this = self._parse_conjunction() 2602 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2603 return this 2604 2605 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2606 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2607 2608 return this 2609 2610 def _parse_index( 2611 self, 2612 index: t.Optional[exp.Expression] = None, 2613 ) -> t.Optional[exp.Index]: 2614 if index: 2615 unique = None 2616 primary = None 2617 amp = None 2618 2619 self._match(TokenType.ON) 2620 self._match(TokenType.TABLE) # hive 2621 table = self._parse_table_parts(schema=True) 2622 else: 2623 unique = self._match(TokenType.UNIQUE) 2624 primary = self._match_text_seq("PRIMARY") 2625 amp = self._match_text_seq("AMP") 2626 2627 if not self._match(TokenType.INDEX): 2628 return None 2629 2630 index = self._parse_id_var() 2631 table = None 2632 2633 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2634 2635 if self._match(TokenType.L_PAREN, advance=False): 2636 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2637 else: 2638 columns = None 2639 2640 return self.expression( 2641 exp.Index, 2642 this=index, 2643 table=table, 2644 using=using, 2645 columns=columns, 2646 unique=unique, 2647 primary=primary, 2648 amp=amp, 2649 partition_by=self._parse_partition_by(), 2650 where=self._parse_where(), 2651 ) 2652 2653 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2654 hints: t.List[exp.Expression] = [] 2655 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2656 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2657 hints.append( 2658 self.expression( 2659 exp.WithTableHint, 2660 expressions=self._parse_csv( 2661 lambda: self._parse_function() or self._parse_var(any_token=True) 2662 ), 2663 ) 2664 ) 2665 self._match_r_paren() 2666 else: 2667 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2668 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2669 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2670 2671 self._match_texts(("INDEX", "KEY")) 2672 if self._match(TokenType.FOR): 2673 hint.set("target", self._advance_any() and self._prev.text.upper()) 2674 2675 hint.set("expressions", self._parse_wrapped_id_vars()) 2676 hints.append(hint) 2677 2678 return hints or None 2679 2680 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2681 return ( 2682 (not schema and self._parse_function(optional_parens=False)) 2683 or self._parse_id_var(any_token=False) 2684 or self._parse_string_as_identifier() 2685 or self._parse_placeholder() 2686 ) 2687 2688 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2689 catalog = None 2690 db = None 2691 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2692 2693 while self._match(TokenType.DOT): 2694 if catalog: 2695 # This allows nesting the table in arbitrarily many dot expressions if needed 2696 table = self.expression( 2697 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2698 ) 2699 else: 2700 catalog = db 2701 db = table 2702 table = self._parse_table_part(schema=schema) or "" 2703 2704 if not table: 2705 self.raise_error(f"Expected table name but got {self._curr}") 2706 2707 return self.expression( 2708 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2709 ) 2710 2711 def _parse_table( 2712 self, 2713 schema: bool = False, 2714 joins: bool = False, 2715 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2716 parse_bracket: bool = False, 2717 ) -> t.Optional[exp.Expression]: 2718 lateral = self._parse_lateral() 2719 if lateral: 2720 return lateral 2721 2722 unnest = self._parse_unnest() 2723 if unnest: 2724 return unnest 2725 2726 values = self._parse_derived_table_values() 2727 if values: 2728 return values 2729 2730 subquery = self._parse_select(table=True) 2731 if subquery: 2732 if not subquery.args.get("pivots"): 2733 subquery.set("pivots", self._parse_pivots()) 2734 return subquery 2735 2736 bracket = parse_bracket and self._parse_bracket(None) 2737 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2738 this = t.cast( 2739 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2740 ) 2741 2742 if schema: 2743 return self._parse_schema(this=this) 2744 2745 version = self._parse_version() 2746 2747 if version: 2748 this.set("version", version) 2749 2750 if self.dialect.ALIAS_POST_TABLESAMPLE: 2751 table_sample = self._parse_table_sample() 2752 2753 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2754 if alias: 2755 this.set("alias", alias) 2756 2757 if self._match_text_seq("AT"): 2758 this.set("index", self._parse_id_var()) 2759 2760 this.set("hints", self._parse_table_hints()) 2761 2762 if not this.args.get("pivots"): 2763 this.set("pivots", self._parse_pivots()) 2764 2765 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2766 table_sample = self._parse_table_sample() 2767 2768 if table_sample: 2769 table_sample.set("this", this) 2770 this = table_sample 2771 2772 if joins: 2773 for join in iter(self._parse_join, None): 2774 this.append("joins", join) 2775 2776 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2777 this.set("ordinality", True) 2778 this.set("alias", self._parse_table_alias()) 2779 2780 return this 2781 2782 def _parse_version(self) -> t.Optional[exp.Version]: 2783 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2784 this = "TIMESTAMP" 2785 elif self._match(TokenType.VERSION_SNAPSHOT): 2786 this = "VERSION" 2787 else: 2788 return None 2789 2790 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2791 kind = self._prev.text.upper() 2792 start = self._parse_bitwise() 2793 self._match_texts(("TO", "AND")) 2794 end = self._parse_bitwise() 2795 expression: t.Optional[exp.Expression] = self.expression( 2796 exp.Tuple, expressions=[start, end] 2797 ) 2798 elif self._match_text_seq("CONTAINED", "IN"): 2799 kind = "CONTAINED IN" 2800 expression = self.expression( 2801 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2802 ) 2803 elif self._match(TokenType.ALL): 2804 kind = "ALL" 2805 expression = None 2806 else: 2807 self._match_text_seq("AS", "OF") 2808 kind = "AS OF" 2809 expression = self._parse_type() 2810 2811 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2812 2813 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2814 if not self._match(TokenType.UNNEST): 2815 return None 2816 2817 expressions = self._parse_wrapped_csv(self._parse_equality) 2818 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2819 2820 alias = self._parse_table_alias() if with_alias else None 2821 2822 if alias: 2823 if self.dialect.UNNEST_COLUMN_ONLY: 2824 if alias.args.get("columns"): 2825 self.raise_error("Unexpected extra column alias in unnest.") 2826 2827 alias.set("columns", [alias.this]) 2828 alias.set("this", None) 2829 2830 columns = alias.args.get("columns") or [] 2831 if offset and len(expressions) < len(columns): 2832 offset = columns.pop() 2833 2834 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2835 self._match(TokenType.ALIAS) 2836 offset = self._parse_id_var( 2837 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2838 ) or exp.to_identifier("offset") 2839 2840 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2841 2842 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2843 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2844 if not is_derived and not self._match(TokenType.VALUES): 2845 return None 2846 2847 expressions = self._parse_csv(self._parse_value) 2848 alias = self._parse_table_alias() 2849 2850 if is_derived: 2851 self._match_r_paren() 2852 2853 return self.expression( 2854 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2855 ) 2856 2857 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2858 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2859 as_modifier and self._match_text_seq("USING", "SAMPLE") 2860 ): 2861 return None 2862 2863 bucket_numerator = None 2864 bucket_denominator = None 2865 bucket_field = None 2866 percent = None 2867 rows = None 2868 size = None 2869 seed = None 2870 2871 kind = ( 2872 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2873 ) 2874 method = self._parse_var(tokens=(TokenType.ROW,)) 2875 2876 matched_l_paren = self._match(TokenType.L_PAREN) 2877 2878 if self.TABLESAMPLE_CSV: 2879 num = None 2880 expressions = self._parse_csv(self._parse_primary) 2881 else: 2882 expressions = None 2883 num = ( 2884 self._parse_factor() 2885 if self._match(TokenType.NUMBER, advance=False) 2886 else self._parse_primary() or self._parse_placeholder() 2887 ) 2888 2889 if self._match_text_seq("BUCKET"): 2890 bucket_numerator = self._parse_number() 2891 self._match_text_seq("OUT", "OF") 2892 bucket_denominator = bucket_denominator = self._parse_number() 2893 self._match(TokenType.ON) 2894 bucket_field = self._parse_field() 2895 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2896 percent = num 2897 elif self._match(TokenType.ROWS): 2898 rows = num 2899 elif num: 2900 size = num 2901 2902 if matched_l_paren: 2903 self._match_r_paren() 2904 2905 if self._match(TokenType.L_PAREN): 2906 method = self._parse_var() 2907 seed = self._match(TokenType.COMMA) and self._parse_number() 2908 self._match_r_paren() 2909 elif self._match_texts(("SEED", "REPEATABLE")): 2910 seed = self._parse_wrapped(self._parse_number) 2911 2912 return self.expression( 2913 exp.TableSample, 2914 expressions=expressions, 2915 method=method, 2916 bucket_numerator=bucket_numerator, 2917 bucket_denominator=bucket_denominator, 2918 bucket_field=bucket_field, 2919 percent=percent, 2920 rows=rows, 2921 size=size, 2922 seed=seed, 2923 kind=kind, 2924 ) 2925 2926 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2927 return list(iter(self._parse_pivot, None)) or None 2928 2929 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2930 return list(iter(self._parse_join, None)) or None 2931 2932 # https://duckdb.org/docs/sql/statements/pivot 2933 def _parse_simplified_pivot(self) -> exp.Pivot: 2934 def _parse_on() -> t.Optional[exp.Expression]: 2935 this = self._parse_bitwise() 2936 return self._parse_in(this) if self._match(TokenType.IN) else this 2937 2938 this = self._parse_table() 2939 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2940 using = self._match(TokenType.USING) and self._parse_csv( 2941 lambda: self._parse_alias(self._parse_function()) 2942 ) 2943 group = self._parse_group() 2944 return self.expression( 2945 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2946 ) 2947 2948 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2949 index = self._index 2950 include_nulls = None 2951 2952 if self._match(TokenType.PIVOT): 2953 unpivot = False 2954 elif self._match(TokenType.UNPIVOT): 2955 unpivot = True 2956 2957 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2958 if self._match_text_seq("INCLUDE", "NULLS"): 2959 include_nulls = True 2960 elif self._match_text_seq("EXCLUDE", "NULLS"): 2961 include_nulls = False 2962 else: 2963 return None 2964 2965 expressions = [] 2966 field = None 2967 2968 if not self._match(TokenType.L_PAREN): 2969 self._retreat(index) 2970 return None 2971 2972 if unpivot: 2973 expressions = self._parse_csv(self._parse_column) 2974 else: 2975 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2976 2977 if not expressions: 2978 self.raise_error("Failed to parse PIVOT's aggregation list") 2979 2980 if not self._match(TokenType.FOR): 2981 self.raise_error("Expecting FOR") 2982 2983 value = self._parse_column() 2984 2985 if not self._match(TokenType.IN): 2986 self.raise_error("Expecting IN") 2987 2988 field = self._parse_in(value, alias=True) 2989 2990 self._match_r_paren() 2991 2992 pivot = self.expression( 2993 exp.Pivot, 2994 expressions=expressions, 2995 field=field, 2996 unpivot=unpivot, 2997 include_nulls=include_nulls, 2998 ) 2999 3000 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3001 pivot.set("alias", self._parse_table_alias()) 3002 3003 if not unpivot: 3004 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3005 3006 columns: t.List[exp.Expression] = [] 3007 for fld in pivot.args["field"].expressions: 3008 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3009 for name in names: 3010 if self.PREFIXED_PIVOT_COLUMNS: 3011 name = f"{name}_{field_name}" if name else field_name 3012 else: 3013 name = f"{field_name}_{name}" if name else field_name 3014 3015 columns.append(exp.to_identifier(name)) 3016 3017 pivot.set("columns", columns) 3018 3019 return pivot 3020 3021 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3022 return [agg.alias for agg in aggregations] 3023 3024 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3025 if not skip_where_token and not self._match(TokenType.WHERE): 3026 return None 3027 3028 return self.expression( 3029 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3030 ) 3031 3032 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3033 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3034 return None 3035 3036 elements = defaultdict(list) 3037 3038 if self._match(TokenType.ALL): 3039 return self.expression(exp.Group, all=True) 3040 3041 while True: 3042 expressions = self._parse_csv(self._parse_conjunction) 3043 if expressions: 3044 elements["expressions"].extend(expressions) 3045 3046 grouping_sets = self._parse_grouping_sets() 3047 if grouping_sets: 3048 elements["grouping_sets"].extend(grouping_sets) 3049 3050 rollup = None 3051 cube = None 3052 totals = None 3053 3054 index = self._index 3055 with_ = self._match(TokenType.WITH) 3056 if self._match(TokenType.ROLLUP): 3057 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3058 elements["rollup"].extend(ensure_list(rollup)) 3059 3060 if self._match(TokenType.CUBE): 3061 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3062 elements["cube"].extend(ensure_list(cube)) 3063 3064 if self._match_text_seq("TOTALS"): 3065 totals = True 3066 elements["totals"] = True # type: ignore 3067 3068 if not (grouping_sets or rollup or cube or totals): 3069 if with_: 3070 self._retreat(index) 3071 break 3072 3073 return self.expression(exp.Group, **elements) # type: ignore 3074 3075 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3076 if not self._match(TokenType.GROUPING_SETS): 3077 return None 3078 3079 return self._parse_wrapped_csv(self._parse_grouping_set) 3080 3081 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3082 if self._match(TokenType.L_PAREN): 3083 grouping_set = self._parse_csv(self._parse_column) 3084 self._match_r_paren() 3085 return self.expression(exp.Tuple, expressions=grouping_set) 3086 3087 return self._parse_column() 3088 3089 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3090 if not skip_having_token and not self._match(TokenType.HAVING): 3091 return None 3092 return self.expression(exp.Having, this=self._parse_conjunction()) 3093 3094 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3095 if not self._match(TokenType.QUALIFY): 3096 return None 3097 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3098 3099 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3100 if skip_start_token: 3101 start = None 3102 elif self._match(TokenType.START_WITH): 3103 start = self._parse_conjunction() 3104 else: 3105 return None 3106 3107 self._match(TokenType.CONNECT_BY) 3108 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3109 exp.Prior, this=self._parse_bitwise() 3110 ) 3111 connect = self._parse_conjunction() 3112 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3113 3114 if not start and self._match(TokenType.START_WITH): 3115 start = self._parse_conjunction() 3116 3117 return self.expression(exp.Connect, start=start, connect=connect) 3118 3119 def _parse_order( 3120 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3121 ) -> t.Optional[exp.Expression]: 3122 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3123 return this 3124 3125 return self.expression( 3126 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3127 ) 3128 3129 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3130 if not self._match(token): 3131 return None 3132 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3133 3134 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3135 this = parse_method() if parse_method else self._parse_conjunction() 3136 3137 asc = self._match(TokenType.ASC) 3138 desc = self._match(TokenType.DESC) or (asc and False) 3139 3140 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3141 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3142 3143 nulls_first = is_nulls_first or False 3144 explicitly_null_ordered = is_nulls_first or is_nulls_last 3145 3146 if ( 3147 not explicitly_null_ordered 3148 and ( 3149 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3150 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3151 ) 3152 and self.dialect.NULL_ORDERING != "nulls_are_last" 3153 ): 3154 nulls_first = True 3155 3156 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3157 3158 def _parse_limit( 3159 self, this: t.Optional[exp.Expression] = None, top: bool = False 3160 ) -> t.Optional[exp.Expression]: 3161 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3162 comments = self._prev_comments 3163 if top: 3164 limit_paren = self._match(TokenType.L_PAREN) 3165 expression = self._parse_term() if limit_paren else self._parse_number() 3166 3167 if limit_paren: 3168 self._match_r_paren() 3169 else: 3170 expression = self._parse_term() 3171 3172 if self._match(TokenType.COMMA): 3173 offset = expression 3174 expression = self._parse_term() 3175 else: 3176 offset = None 3177 3178 limit_exp = self.expression( 3179 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3180 ) 3181 3182 return limit_exp 3183 3184 if self._match(TokenType.FETCH): 3185 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3186 direction = self._prev.text if direction else "FIRST" 3187 3188 count = self._parse_field(tokens=self.FETCH_TOKENS) 3189 percent = self._match(TokenType.PERCENT) 3190 3191 self._match_set((TokenType.ROW, TokenType.ROWS)) 3192 3193 only = self._match_text_seq("ONLY") 3194 with_ties = self._match_text_seq("WITH", "TIES") 3195 3196 if only and with_ties: 3197 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3198 3199 return self.expression( 3200 exp.Fetch, 3201 direction=direction, 3202 count=count, 3203 percent=percent, 3204 with_ties=with_ties, 3205 ) 3206 3207 return this 3208 3209 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3210 if not self._match(TokenType.OFFSET): 3211 return this 3212 3213 count = self._parse_term() 3214 self._match_set((TokenType.ROW, TokenType.ROWS)) 3215 return self.expression(exp.Offset, this=this, expression=count) 3216 3217 def _parse_locks(self) -> t.List[exp.Lock]: 3218 locks = [] 3219 while True: 3220 if self._match_text_seq("FOR", "UPDATE"): 3221 update = True 3222 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3223 "LOCK", "IN", "SHARE", "MODE" 3224 ): 3225 update = False 3226 else: 3227 break 3228 3229 expressions = None 3230 if self._match_text_seq("OF"): 3231 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3232 3233 wait: t.Optional[bool | exp.Expression] = None 3234 if self._match_text_seq("NOWAIT"): 3235 wait = True 3236 elif self._match_text_seq("WAIT"): 3237 wait = self._parse_primary() 3238 elif self._match_text_seq("SKIP", "LOCKED"): 3239 wait = False 3240 3241 locks.append( 3242 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3243 ) 3244 3245 return locks 3246 3247 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3248 while this and self._match_set(self.SET_OPERATIONS): 3249 token_type = self._prev.token_type 3250 3251 if token_type == TokenType.UNION: 3252 operation = exp.Union 3253 elif token_type == TokenType.EXCEPT: 3254 operation = exp.Except 3255 else: 3256 operation = exp.Intersect 3257 3258 comments = self._prev.comments 3259 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3260 by_name = self._match_text_seq("BY", "NAME") 3261 expression = self._parse_select(nested=True, parse_set_operation=False) 3262 3263 this = self.expression( 3264 operation, 3265 comments=comments, 3266 this=this, 3267 distinct=distinct, 3268 by_name=by_name, 3269 expression=expression, 3270 ) 3271 3272 if this and self.MODIFIERS_ATTACHED_TO_UNION: 3273 expression = this.expression 3274 3275 if expression: 3276 for arg in self.UNION_MODIFIERS: 3277 expr = expression.args.get(arg) 3278 if expr: 3279 this.set(arg, expr.pop()) 3280 3281 return this 3282 3283 def _parse_expression(self) -> t.Optional[exp.Expression]: 3284 return self._parse_alias(self._parse_conjunction()) 3285 3286 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3287 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3288 3289 def _parse_equality(self) -> t.Optional[exp.Expression]: 3290 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3291 3292 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3293 return self._parse_tokens(self._parse_range, self.COMPARISON) 3294 3295 def _parse_range(self) -> t.Optional[exp.Expression]: 3296 this = self._parse_bitwise() 3297 negate = self._match(TokenType.NOT) 3298 3299 if self._match_set(self.RANGE_PARSERS): 3300 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3301 if not expression: 3302 return this 3303 3304 this = expression 3305 elif self._match(TokenType.ISNULL): 3306 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3307 3308 # Postgres supports ISNULL and NOTNULL for conditions. 3309 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3310 if self._match(TokenType.NOTNULL): 3311 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3312 this = self.expression(exp.Not, this=this) 3313 3314 if negate: 3315 this = self.expression(exp.Not, this=this) 3316 3317 if self._match(TokenType.IS): 3318 this = self._parse_is(this) 3319 3320 return this 3321 3322 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3323 index = self._index - 1 3324 negate = self._match(TokenType.NOT) 3325 3326 if self._match_text_seq("DISTINCT", "FROM"): 3327 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3328 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3329 3330 expression = self._parse_null() or self._parse_boolean() 3331 if not expression: 3332 self._retreat(index) 3333 return None 3334 3335 this = self.expression(exp.Is, this=this, expression=expression) 3336 return self.expression(exp.Not, this=this) if negate else this 3337 3338 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3339 unnest = self._parse_unnest(with_alias=False) 3340 if unnest: 3341 this = self.expression(exp.In, this=this, unnest=unnest) 3342 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3343 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3344 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3345 3346 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3347 this = self.expression(exp.In, this=this, query=expressions[0]) 3348 else: 3349 this = self.expression(exp.In, this=this, expressions=expressions) 3350 3351 if matched_l_paren: 3352 self._match_r_paren(this) 3353 elif not self._match(TokenType.R_BRACKET, expression=this): 3354 self.raise_error("Expecting ]") 3355 else: 3356 this = self.expression(exp.In, this=this, field=self._parse_field()) 3357 3358 return this 3359 3360 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3361 low = self._parse_bitwise() 3362 self._match(TokenType.AND) 3363 high = self._parse_bitwise() 3364 return self.expression(exp.Between, this=this, low=low, high=high) 3365 3366 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3367 if not self._match(TokenType.ESCAPE): 3368 return this 3369 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3370 3371 def _parse_interval(self) -> t.Optional[exp.Interval]: 3372 index = self._index 3373 3374 if not self._match(TokenType.INTERVAL): 3375 return None 3376 3377 if self._match(TokenType.STRING, advance=False): 3378 this = self._parse_primary() 3379 else: 3380 this = self._parse_term() 3381 3382 if not this: 3383 self._retreat(index) 3384 return None 3385 3386 unit = self._parse_function() or self._parse_var(any_token=True) 3387 3388 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3389 # each INTERVAL expression into this canonical form so it's easy to transpile 3390 if this and this.is_number: 3391 this = exp.Literal.string(this.name) 3392 elif this and this.is_string: 3393 parts = this.name.split() 3394 3395 if len(parts) == 2: 3396 if unit: 3397 # This is not actually a unit, it's something else (e.g. a "window side") 3398 unit = None 3399 self._retreat(self._index - 1) 3400 3401 this = exp.Literal.string(parts[0]) 3402 unit = self.expression(exp.Var, this=parts[1]) 3403 3404 return self.expression(exp.Interval, this=this, unit=unit) 3405 3406 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3407 this = self._parse_term() 3408 3409 while True: 3410 if self._match_set(self.BITWISE): 3411 this = self.expression( 3412 self.BITWISE[self._prev.token_type], 3413 this=this, 3414 expression=self._parse_term(), 3415 ) 3416 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3417 this = self.expression( 3418 exp.DPipe, 3419 this=this, 3420 expression=self._parse_term(), 3421 safe=not self.dialect.STRICT_STRING_CONCAT, 3422 ) 3423 elif self._match(TokenType.DQMARK): 3424 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3425 elif self._match_pair(TokenType.LT, TokenType.LT): 3426 this = self.expression( 3427 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3428 ) 3429 elif self._match_pair(TokenType.GT, TokenType.GT): 3430 this = self.expression( 3431 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3432 ) 3433 else: 3434 break 3435 3436 return this 3437 3438 def _parse_term(self) -> t.Optional[exp.Expression]: 3439 return self._parse_tokens(self._parse_factor, self.TERM) 3440 3441 def _parse_factor(self) -> t.Optional[exp.Expression]: 3442 if self.EXPONENT: 3443 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3444 else: 3445 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3446 if isinstance(factor, exp.Div): 3447 factor.args["typed"] = self.dialect.TYPED_DIVISION 3448 factor.args["safe"] = self.dialect.SAFE_DIVISION 3449 return factor 3450 3451 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3452 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3453 3454 def _parse_unary(self) -> t.Optional[exp.Expression]: 3455 if self._match_set(self.UNARY_PARSERS): 3456 return self.UNARY_PARSERS[self._prev.token_type](self) 3457 return self._parse_at_time_zone(self._parse_type()) 3458 3459 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3460 interval = parse_interval and self._parse_interval() 3461 if interval: 3462 return interval 3463 3464 index = self._index 3465 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3466 this = self._parse_column() 3467 3468 if data_type: 3469 if isinstance(this, exp.Literal): 3470 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3471 if parser: 3472 return parser(self, this, data_type) 3473 return self.expression(exp.Cast, this=this, to=data_type) 3474 if not data_type.expressions: 3475 self._retreat(index) 3476 return self._parse_column() 3477 return self._parse_column_ops(data_type) 3478 3479 return this and self._parse_column_ops(this) 3480 3481 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3482 this = self._parse_type() 3483 if not this: 3484 return None 3485 3486 return self.expression( 3487 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3488 ) 3489 3490 def _parse_types( 3491 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3492 ) -> t.Optional[exp.Expression]: 3493 index = self._index 3494 3495 prefix = self._match_text_seq("SYSUDTLIB", ".") 3496 3497 if not self._match_set(self.TYPE_TOKENS): 3498 identifier = allow_identifiers and self._parse_id_var( 3499 any_token=False, tokens=(TokenType.VAR,) 3500 ) 3501 3502 if identifier: 3503 tokens = self.dialect.tokenize(identifier.name) 3504 3505 if len(tokens) != 1: 3506 self.raise_error("Unexpected identifier", self._prev) 3507 3508 if tokens[0].token_type in self.TYPE_TOKENS: 3509 self._prev = tokens[0] 3510 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3511 type_name = identifier.name 3512 3513 while self._match(TokenType.DOT): 3514 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3515 3516 return exp.DataType.build(type_name, udt=True) 3517 else: 3518 return None 3519 else: 3520 return None 3521 3522 type_token = self._prev.token_type 3523 3524 if type_token == TokenType.PSEUDO_TYPE: 3525 return self.expression(exp.PseudoType, this=self._prev.text) 3526 3527 if type_token == TokenType.OBJECT_IDENTIFIER: 3528 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3529 3530 nested = type_token in self.NESTED_TYPE_TOKENS 3531 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3532 expressions = None 3533 maybe_func = False 3534 3535 if self._match(TokenType.L_PAREN): 3536 if is_struct: 3537 expressions = self._parse_csv(self._parse_struct_types) 3538 elif nested: 3539 expressions = self._parse_csv( 3540 lambda: self._parse_types( 3541 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3542 ) 3543 ) 3544 elif type_token in self.ENUM_TYPE_TOKENS: 3545 expressions = self._parse_csv(self._parse_equality) 3546 else: 3547 expressions = self._parse_csv(self._parse_type_size) 3548 3549 if not expressions or not self._match(TokenType.R_PAREN): 3550 self._retreat(index) 3551 return None 3552 3553 maybe_func = True 3554 3555 this: t.Optional[exp.Expression] = None 3556 values: t.Optional[t.List[exp.Expression]] = None 3557 3558 if nested and self._match(TokenType.LT): 3559 if is_struct: 3560 expressions = self._parse_csv(self._parse_struct_types) 3561 else: 3562 expressions = self._parse_csv( 3563 lambda: self._parse_types( 3564 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3565 ) 3566 ) 3567 3568 if not self._match(TokenType.GT): 3569 self.raise_error("Expecting >") 3570 3571 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3572 values = self._parse_csv(self._parse_conjunction) 3573 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3574 3575 if type_token in self.TIMESTAMPS: 3576 if self._match_text_seq("WITH", "TIME", "ZONE"): 3577 maybe_func = False 3578 tz_type = ( 3579 exp.DataType.Type.TIMETZ 3580 if type_token in self.TIMES 3581 else exp.DataType.Type.TIMESTAMPTZ 3582 ) 3583 this = exp.DataType(this=tz_type, expressions=expressions) 3584 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3585 maybe_func = False 3586 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3587 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3588 maybe_func = False 3589 elif type_token == TokenType.INTERVAL: 3590 unit = self._parse_var() 3591 3592 if self._match_text_seq("TO"): 3593 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3594 else: 3595 span = None 3596 3597 if span or not unit: 3598 this = self.expression( 3599 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3600 ) 3601 else: 3602 this = self.expression(exp.Interval, unit=unit) 3603 3604 if maybe_func and check_func: 3605 index2 = self._index 3606 peek = self._parse_string() 3607 3608 if not peek: 3609 self._retreat(index) 3610 return None 3611 3612 self._retreat(index2) 3613 3614 if not this: 3615 if self._match_text_seq("UNSIGNED"): 3616 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3617 if not unsigned_type_token: 3618 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3619 3620 type_token = unsigned_type_token or type_token 3621 3622 this = exp.DataType( 3623 this=exp.DataType.Type[type_token.value], 3624 expressions=expressions, 3625 nested=nested, 3626 values=values, 3627 prefix=prefix, 3628 ) 3629 3630 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3631 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3632 3633 return this 3634 3635 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3636 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3637 self._match(TokenType.COLON) 3638 return self._parse_column_def(this) 3639 3640 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3641 if not self._match_text_seq("AT", "TIME", "ZONE"): 3642 return this 3643 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3644 3645 def _parse_column(self) -> t.Optional[exp.Expression]: 3646 this = self._parse_field() 3647 if isinstance(this, exp.Identifier): 3648 this = self.expression(exp.Column, this=this) 3649 elif not this: 3650 return self._parse_bracket(this) 3651 return self._parse_column_ops(this) 3652 3653 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3654 this = self._parse_bracket(this) 3655 3656 while self._match_set(self.COLUMN_OPERATORS): 3657 op_token = self._prev.token_type 3658 op = self.COLUMN_OPERATORS.get(op_token) 3659 3660 if op_token == TokenType.DCOLON: 3661 field = self._parse_types() 3662 if not field: 3663 self.raise_error("Expected type") 3664 elif op and self._curr: 3665 self._advance() 3666 value = self._prev.text 3667 field = ( 3668 exp.Literal.number(value) 3669 if self._prev.token_type == TokenType.NUMBER 3670 else exp.Literal.string(value) 3671 ) 3672 else: 3673 field = self._parse_field(anonymous_func=True, any_token=True) 3674 3675 if isinstance(field, exp.Func): 3676 # bigquery allows function calls like x.y.count(...) 3677 # SAFE.SUBSTR(...) 3678 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3679 this = self._replace_columns_with_dots(this) 3680 3681 if op: 3682 this = op(self, this, field) 3683 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3684 this = self.expression( 3685 exp.Column, 3686 this=field, 3687 table=this.this, 3688 db=this.args.get("table"), 3689 catalog=this.args.get("db"), 3690 ) 3691 else: 3692 this = self.expression(exp.Dot, this=this, expression=field) 3693 this = self._parse_bracket(this) 3694 return this 3695 3696 def _parse_primary(self) -> t.Optional[exp.Expression]: 3697 if self._match_set(self.PRIMARY_PARSERS): 3698 token_type = self._prev.token_type 3699 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3700 3701 if token_type == TokenType.STRING: 3702 expressions = [primary] 3703 while self._match(TokenType.STRING): 3704 expressions.append(exp.Literal.string(self._prev.text)) 3705 3706 if len(expressions) > 1: 3707 return self.expression(exp.Concat, expressions=expressions) 3708 3709 return primary 3710 3711 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3712 return exp.Literal.number(f"0.{self._prev.text}") 3713 3714 if self._match(TokenType.L_PAREN): 3715 comments = self._prev_comments 3716 query = self._parse_select() 3717 3718 if query: 3719 expressions = [query] 3720 else: 3721 expressions = self._parse_expressions() 3722 3723 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3724 3725 if isinstance(this, exp.Subqueryable): 3726 this = self._parse_set_operations( 3727 self._parse_subquery(this=this, parse_alias=False) 3728 ) 3729 elif len(expressions) > 1: 3730 this = self.expression(exp.Tuple, expressions=expressions) 3731 else: 3732 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3733 3734 if this: 3735 this.add_comments(comments) 3736 3737 self._match_r_paren(expression=this) 3738 return this 3739 3740 return None 3741 3742 def _parse_field( 3743 self, 3744 any_token: bool = False, 3745 tokens: t.Optional[t.Collection[TokenType]] = None, 3746 anonymous_func: bool = False, 3747 ) -> t.Optional[exp.Expression]: 3748 return ( 3749 self._parse_primary() 3750 or self._parse_function(anonymous=anonymous_func) 3751 or self._parse_id_var(any_token=any_token, tokens=tokens) 3752 ) 3753 3754 def _parse_function( 3755 self, 3756 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3757 anonymous: bool = False, 3758 optional_parens: bool = True, 3759 ) -> t.Optional[exp.Expression]: 3760 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3761 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3762 fn_syntax = False 3763 if ( 3764 self._match(TokenType.L_BRACE, advance=False) 3765 and self._next 3766 and self._next.text.upper() == "FN" 3767 ): 3768 self._advance(2) 3769 fn_syntax = True 3770 3771 func = self._parse_function_call( 3772 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3773 ) 3774 3775 if fn_syntax: 3776 self._match(TokenType.R_BRACE) 3777 3778 return func 3779 3780 def _parse_function_call( 3781 self, 3782 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3783 anonymous: bool = False, 3784 optional_parens: bool = True, 3785 ) -> t.Optional[exp.Expression]: 3786 if not self._curr: 3787 return None 3788 3789 comments = self._curr.comments 3790 token_type = self._curr.token_type 3791 this = self._curr.text 3792 upper = this.upper() 3793 3794 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3795 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3796 self._advance() 3797 return parser(self) 3798 3799 if not self._next or self._next.token_type != TokenType.L_PAREN: 3800 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3801 self._advance() 3802 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3803 3804 return None 3805 3806 if token_type not in self.FUNC_TOKENS: 3807 return None 3808 3809 self._advance(2) 3810 3811 parser = self.FUNCTION_PARSERS.get(upper) 3812 if parser and not anonymous: 3813 this = parser(self) 3814 else: 3815 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3816 3817 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3818 this = self.expression(subquery_predicate, this=self._parse_select()) 3819 self._match_r_paren() 3820 return this 3821 3822 if functions is None: 3823 functions = self.FUNCTIONS 3824 3825 function = functions.get(upper) 3826 3827 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3828 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3829 3830 if function and not anonymous: 3831 if "dialect" in function.__code__.co_varnames: 3832 func = function(args, dialect=self.dialect) 3833 else: 3834 func = function(args) 3835 3836 func = self.validate_expression(func, args) 3837 if not self.dialect.NORMALIZE_FUNCTIONS: 3838 func.meta["name"] = this 3839 3840 this = func 3841 else: 3842 this = self.expression(exp.Anonymous, this=this, expressions=args) 3843 3844 if isinstance(this, exp.Expression): 3845 this.add_comments(comments) 3846 3847 self._match_r_paren(this) 3848 return self._parse_window(this) 3849 3850 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3851 return self._parse_column_def(self._parse_id_var()) 3852 3853 def _parse_user_defined_function( 3854 self, kind: t.Optional[TokenType] = None 3855 ) -> t.Optional[exp.Expression]: 3856 this = self._parse_id_var() 3857 3858 while self._match(TokenType.DOT): 3859 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3860 3861 if not self._match(TokenType.L_PAREN): 3862 return this 3863 3864 expressions = self._parse_csv(self._parse_function_parameter) 3865 self._match_r_paren() 3866 return self.expression( 3867 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3868 ) 3869 3870 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3871 literal = self._parse_primary() 3872 if literal: 3873 return self.expression(exp.Introducer, this=token.text, expression=literal) 3874 3875 return self.expression(exp.Identifier, this=token.text) 3876 3877 def _parse_session_parameter(self) -> exp.SessionParameter: 3878 kind = None 3879 this = self._parse_id_var() or self._parse_primary() 3880 3881 if this and self._match(TokenType.DOT): 3882 kind = this.name 3883 this = self._parse_var() or self._parse_primary() 3884 3885 return self.expression(exp.SessionParameter, this=this, kind=kind) 3886 3887 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3888 index = self._index 3889 3890 if self._match(TokenType.L_PAREN): 3891 expressions = t.cast( 3892 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3893 ) 3894 3895 if not self._match(TokenType.R_PAREN): 3896 self._retreat(index) 3897 else: 3898 expressions = [self._parse_id_var()] 3899 3900 if self._match_set(self.LAMBDAS): 3901 return self.LAMBDAS[self._prev.token_type](self, expressions) 3902 3903 self._retreat(index) 3904 3905 this: t.Optional[exp.Expression] 3906 3907 if self._match(TokenType.DISTINCT): 3908 this = self.expression( 3909 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3910 ) 3911 else: 3912 this = self._parse_select_or_expression(alias=alias) 3913 3914 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3915 3916 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3917 index = self._index 3918 3919 if not self.errors: 3920 try: 3921 if self._parse_select(nested=True): 3922 return this 3923 except ParseError: 3924 pass 3925 finally: 3926 self.errors.clear() 3927 self._retreat(index) 3928 3929 if not self._match(TokenType.L_PAREN): 3930 return this 3931 3932 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3933 3934 self._match_r_paren() 3935 return self.expression(exp.Schema, this=this, expressions=args) 3936 3937 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3938 return self._parse_column_def(self._parse_field(any_token=True)) 3939 3940 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3941 # column defs are not really columns, they're identifiers 3942 if isinstance(this, exp.Column): 3943 this = this.this 3944 3945 kind = self._parse_types(schema=True) 3946 3947 if self._match_text_seq("FOR", "ORDINALITY"): 3948 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3949 3950 constraints: t.List[exp.Expression] = [] 3951 3952 if not kind and self._match(TokenType.ALIAS): 3953 constraints.append( 3954 self.expression( 3955 exp.ComputedColumnConstraint, 3956 this=self._parse_conjunction(), 3957 persisted=self._match_text_seq("PERSISTED"), 3958 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3959 ) 3960 ) 3961 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3962 self._match(TokenType.ALIAS) 3963 constraints.append( 3964 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3965 ) 3966 3967 while True: 3968 constraint = self._parse_column_constraint() 3969 if not constraint: 3970 break 3971 constraints.append(constraint) 3972 3973 if not kind and not constraints: 3974 return this 3975 3976 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3977 3978 def _parse_auto_increment( 3979 self, 3980 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3981 start = None 3982 increment = None 3983 3984 if self._match(TokenType.L_PAREN, advance=False): 3985 args = self._parse_wrapped_csv(self._parse_bitwise) 3986 start = seq_get(args, 0) 3987 increment = seq_get(args, 1) 3988 elif self._match_text_seq("START"): 3989 start = self._parse_bitwise() 3990 self._match_text_seq("INCREMENT") 3991 increment = self._parse_bitwise() 3992 3993 if start and increment: 3994 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3995 3996 return exp.AutoIncrementColumnConstraint() 3997 3998 def _parse_compress(self) -> exp.CompressColumnConstraint: 3999 if self._match(TokenType.L_PAREN, advance=False): 4000 return self.expression( 4001 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4002 ) 4003 4004 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4005 4006 def _parse_generated_as_identity( 4007 self, 4008 ) -> ( 4009 exp.GeneratedAsIdentityColumnConstraint 4010 | exp.ComputedColumnConstraint 4011 | exp.GeneratedAsRowColumnConstraint 4012 ): 4013 if self._match_text_seq("BY", "DEFAULT"): 4014 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4015 this = self.expression( 4016 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4017 ) 4018 else: 4019 self._match_text_seq("ALWAYS") 4020 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4021 4022 self._match(TokenType.ALIAS) 4023 4024 if self._match_text_seq("ROW"): 4025 start = self._match_text_seq("START") 4026 if not start: 4027 self._match(TokenType.END) 4028 hidden = self._match_text_seq("HIDDEN") 4029 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4030 4031 identity = self._match_text_seq("IDENTITY") 4032 4033 if self._match(TokenType.L_PAREN): 4034 if self._match(TokenType.START_WITH): 4035 this.set("start", self._parse_bitwise()) 4036 if self._match_text_seq("INCREMENT", "BY"): 4037 this.set("increment", self._parse_bitwise()) 4038 if self._match_text_seq("MINVALUE"): 4039 this.set("minvalue", self._parse_bitwise()) 4040 if self._match_text_seq("MAXVALUE"): 4041 this.set("maxvalue", self._parse_bitwise()) 4042 4043 if self._match_text_seq("CYCLE"): 4044 this.set("cycle", True) 4045 elif self._match_text_seq("NO", "CYCLE"): 4046 this.set("cycle", False) 4047 4048 if not identity: 4049 this.set("expression", self._parse_bitwise()) 4050 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4051 args = self._parse_csv(self._parse_bitwise) 4052 this.set("start", seq_get(args, 0)) 4053 this.set("increment", seq_get(args, 1)) 4054 4055 self._match_r_paren() 4056 4057 return this 4058 4059 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4060 self._match_text_seq("LENGTH") 4061 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4062 4063 def _parse_not_constraint( 4064 self, 4065 ) -> t.Optional[exp.Expression]: 4066 if self._match_text_seq("NULL"): 4067 return self.expression(exp.NotNullColumnConstraint) 4068 if self._match_text_seq("CASESPECIFIC"): 4069 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4070 if self._match_text_seq("FOR", "REPLICATION"): 4071 return self.expression(exp.NotForReplicationColumnConstraint) 4072 return None 4073 4074 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4075 if self._match(TokenType.CONSTRAINT): 4076 this = self._parse_id_var() 4077 else: 4078 this = None 4079 4080 if self._match_texts(self.CONSTRAINT_PARSERS): 4081 return self.expression( 4082 exp.ColumnConstraint, 4083 this=this, 4084 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4085 ) 4086 4087 return this 4088 4089 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4090 if not self._match(TokenType.CONSTRAINT): 4091 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4092 4093 this = self._parse_id_var() 4094 expressions = [] 4095 4096 while True: 4097 constraint = self._parse_unnamed_constraint() or self._parse_function() 4098 if not constraint: 4099 break 4100 expressions.append(constraint) 4101 4102 return self.expression(exp.Constraint, this=this, expressions=expressions) 4103 4104 def _parse_unnamed_constraint( 4105 self, constraints: t.Optional[t.Collection[str]] = None 4106 ) -> t.Optional[exp.Expression]: 4107 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4108 constraints or self.CONSTRAINT_PARSERS 4109 ): 4110 return None 4111 4112 constraint = self._prev.text.upper() 4113 if constraint not in self.CONSTRAINT_PARSERS: 4114 self.raise_error(f"No parser found for schema constraint {constraint}.") 4115 4116 return self.CONSTRAINT_PARSERS[constraint](self) 4117 4118 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4119 self._match_text_seq("KEY") 4120 return self.expression( 4121 exp.UniqueColumnConstraint, 4122 this=self._parse_schema(self._parse_id_var(any_token=False)), 4123 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4124 ) 4125 4126 def _parse_key_constraint_options(self) -> t.List[str]: 4127 options = [] 4128 while True: 4129 if not self._curr: 4130 break 4131 4132 if self._match(TokenType.ON): 4133 action = None 4134 on = self._advance_any() and self._prev.text 4135 4136 if self._match_text_seq("NO", "ACTION"): 4137 action = "NO ACTION" 4138 elif self._match_text_seq("CASCADE"): 4139 action = "CASCADE" 4140 elif self._match_text_seq("RESTRICT"): 4141 action = "RESTRICT" 4142 elif self._match_pair(TokenType.SET, TokenType.NULL): 4143 action = "SET NULL" 4144 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4145 action = "SET DEFAULT" 4146 else: 4147 self.raise_error("Invalid key constraint") 4148 4149 options.append(f"ON {on} {action}") 4150 elif self._match_text_seq("NOT", "ENFORCED"): 4151 options.append("NOT ENFORCED") 4152 elif self._match_text_seq("DEFERRABLE"): 4153 options.append("DEFERRABLE") 4154 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4155 options.append("INITIALLY DEFERRED") 4156 elif self._match_text_seq("NORELY"): 4157 options.append("NORELY") 4158 elif self._match_text_seq("MATCH", "FULL"): 4159 options.append("MATCH FULL") 4160 else: 4161 break 4162 4163 return options 4164 4165 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4166 if match and not self._match(TokenType.REFERENCES): 4167 return None 4168 4169 expressions = None 4170 this = self._parse_table(schema=True) 4171 options = self._parse_key_constraint_options() 4172 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4173 4174 def _parse_foreign_key(self) -> exp.ForeignKey: 4175 expressions = self._parse_wrapped_id_vars() 4176 reference = self._parse_references() 4177 options = {} 4178 4179 while self._match(TokenType.ON): 4180 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4181 self.raise_error("Expected DELETE or UPDATE") 4182 4183 kind = self._prev.text.lower() 4184 4185 if self._match_text_seq("NO", "ACTION"): 4186 action = "NO ACTION" 4187 elif self._match(TokenType.SET): 4188 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4189 action = "SET " + self._prev.text.upper() 4190 else: 4191 self._advance() 4192 action = self._prev.text.upper() 4193 4194 options[kind] = action 4195 4196 return self.expression( 4197 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4198 ) 4199 4200 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4201 return self._parse_field() 4202 4203 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4204 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4205 4206 id_vars = self._parse_wrapped_id_vars() 4207 return self.expression( 4208 exp.PeriodForSystemTimeConstraint, 4209 this=seq_get(id_vars, 0), 4210 expression=seq_get(id_vars, 1), 4211 ) 4212 4213 def _parse_primary_key( 4214 self, wrapped_optional: bool = False, in_props: bool = False 4215 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4216 desc = ( 4217 self._match_set((TokenType.ASC, TokenType.DESC)) 4218 and self._prev.token_type == TokenType.DESC 4219 ) 4220 4221 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4222 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4223 4224 expressions = self._parse_wrapped_csv( 4225 self._parse_primary_key_part, optional=wrapped_optional 4226 ) 4227 options = self._parse_key_constraint_options() 4228 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4229 4230 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4231 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4232 return this 4233 4234 bracket_kind = self._prev.token_type 4235 4236 if self._match(TokenType.COLON): 4237 expressions: t.List[exp.Expression] = [ 4238 self.expression(exp.Slice, expression=self._parse_conjunction()) 4239 ] 4240 else: 4241 expressions = self._parse_csv( 4242 lambda: self._parse_slice( 4243 self._parse_alias(self._parse_conjunction(), explicit=True) 4244 ) 4245 ) 4246 4247 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4248 self.raise_error("Expected ]") 4249 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4250 self.raise_error("Expected }") 4251 4252 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4253 if bracket_kind == TokenType.L_BRACE: 4254 this = self.expression(exp.Struct, expressions=expressions) 4255 elif not this or this.name.upper() == "ARRAY": 4256 this = self.expression(exp.Array, expressions=expressions) 4257 else: 4258 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4259 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4260 4261 self._add_comments(this) 4262 return self._parse_bracket(this) 4263 4264 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4265 if self._match(TokenType.COLON): 4266 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4267 return this 4268 4269 def _parse_case(self) -> t.Optional[exp.Expression]: 4270 ifs = [] 4271 default = None 4272 4273 comments = self._prev_comments 4274 expression = self._parse_conjunction() 4275 4276 while self._match(TokenType.WHEN): 4277 this = self._parse_conjunction() 4278 self._match(TokenType.THEN) 4279 then = self._parse_conjunction() 4280 ifs.append(self.expression(exp.If, this=this, true=then)) 4281 4282 if self._match(TokenType.ELSE): 4283 default = self._parse_conjunction() 4284 4285 if not self._match(TokenType.END): 4286 self.raise_error("Expected END after CASE", self._prev) 4287 4288 return self._parse_window( 4289 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4290 ) 4291 4292 def _parse_if(self) -> t.Optional[exp.Expression]: 4293 if self._match(TokenType.L_PAREN): 4294 args = self._parse_csv(self._parse_conjunction) 4295 this = self.validate_expression(exp.If.from_arg_list(args), args) 4296 self._match_r_paren() 4297 else: 4298 index = self._index - 1 4299 condition = self._parse_conjunction() 4300 4301 if not condition: 4302 self._retreat(index) 4303 return None 4304 4305 self._match(TokenType.THEN) 4306 true = self._parse_conjunction() 4307 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4308 self._match(TokenType.END) 4309 this = self.expression(exp.If, this=condition, true=true, false=false) 4310 4311 return self._parse_window(this) 4312 4313 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4314 if not self._match_text_seq("VALUE", "FOR"): 4315 self._retreat(self._index - 1) 4316 return None 4317 4318 return self.expression( 4319 exp.NextValueFor, 4320 this=self._parse_column(), 4321 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4322 ) 4323 4324 def _parse_extract(self) -> exp.Extract: 4325 this = self._parse_function() or self._parse_var() or self._parse_type() 4326 4327 if self._match(TokenType.FROM): 4328 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4329 4330 if not self._match(TokenType.COMMA): 4331 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4332 4333 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4334 4335 def _parse_any_value(self) -> exp.AnyValue: 4336 this = self._parse_lambda() 4337 is_max = None 4338 having = None 4339 4340 if self._match(TokenType.HAVING): 4341 self._match_texts(("MAX", "MIN")) 4342 is_max = self._prev.text == "MAX" 4343 having = self._parse_column() 4344 4345 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4346 4347 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4348 this = self._parse_conjunction() 4349 4350 if not self._match(TokenType.ALIAS): 4351 if self._match(TokenType.COMMA): 4352 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4353 4354 self.raise_error("Expected AS after CAST") 4355 4356 fmt = None 4357 to = self._parse_types() 4358 4359 if self._match(TokenType.FORMAT): 4360 fmt_string = self._parse_string() 4361 fmt = self._parse_at_time_zone(fmt_string) 4362 4363 if not to: 4364 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4365 if to.this in exp.DataType.TEMPORAL_TYPES: 4366 this = self.expression( 4367 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4368 this=this, 4369 format=exp.Literal.string( 4370 format_time( 4371 fmt_string.this if fmt_string else "", 4372 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4373 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4374 ) 4375 ), 4376 ) 4377 4378 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4379 this.set("zone", fmt.args["zone"]) 4380 return this 4381 elif not to: 4382 self.raise_error("Expected TYPE after CAST") 4383 elif isinstance(to, exp.Identifier): 4384 to = exp.DataType.build(to.name, udt=True) 4385 elif to.this == exp.DataType.Type.CHAR: 4386 if self._match(TokenType.CHARACTER_SET): 4387 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4388 4389 return self.expression( 4390 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4391 ) 4392 4393 def _parse_string_agg(self) -> exp.Expression: 4394 if self._match(TokenType.DISTINCT): 4395 args: t.List[t.Optional[exp.Expression]] = [ 4396 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4397 ] 4398 if self._match(TokenType.COMMA): 4399 args.extend(self._parse_csv(self._parse_conjunction)) 4400 else: 4401 args = self._parse_csv(self._parse_conjunction) # type: ignore 4402 4403 index = self._index 4404 if not self._match(TokenType.R_PAREN) and args: 4405 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4406 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4407 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4408 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4409 4410 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4411 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4412 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4413 if not self._match_text_seq("WITHIN", "GROUP"): 4414 self._retreat(index) 4415 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4416 4417 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4418 order = self._parse_order(this=seq_get(args, 0)) 4419 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4420 4421 def _parse_convert( 4422 self, strict: bool, safe: t.Optional[bool] = None 4423 ) -> t.Optional[exp.Expression]: 4424 this = self._parse_bitwise() 4425 4426 if self._match(TokenType.USING): 4427 to: t.Optional[exp.Expression] = self.expression( 4428 exp.CharacterSet, this=self._parse_var() 4429 ) 4430 elif self._match(TokenType.COMMA): 4431 to = self._parse_types() 4432 else: 4433 to = None 4434 4435 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4436 4437 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4438 """ 4439 There are generally two variants of the DECODE function: 4440 4441 - DECODE(bin, charset) 4442 - DECODE(expression, search, result [, search, result] ... [, default]) 4443 4444 The second variant will always be parsed into a CASE expression. Note that NULL 4445 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4446 instead of relying on pattern matching. 4447 """ 4448 args = self._parse_csv(self._parse_conjunction) 4449 4450 if len(args) < 3: 4451 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4452 4453 expression, *expressions = args 4454 if not expression: 4455 return None 4456 4457 ifs = [] 4458 for search, result in zip(expressions[::2], expressions[1::2]): 4459 if not search or not result: 4460 return None 4461 4462 if isinstance(search, exp.Literal): 4463 ifs.append( 4464 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4465 ) 4466 elif isinstance(search, exp.Null): 4467 ifs.append( 4468 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4469 ) 4470 else: 4471 cond = exp.or_( 4472 exp.EQ(this=expression.copy(), expression=search), 4473 exp.and_( 4474 exp.Is(this=expression.copy(), expression=exp.Null()), 4475 exp.Is(this=search.copy(), expression=exp.Null()), 4476 copy=False, 4477 ), 4478 copy=False, 4479 ) 4480 ifs.append(exp.If(this=cond, true=result)) 4481 4482 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4483 4484 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4485 self._match_text_seq("KEY") 4486 key = self._parse_column() 4487 self._match_set((TokenType.COLON, TokenType.COMMA)) 4488 self._match_text_seq("VALUE") 4489 value = self._parse_bitwise() 4490 4491 if not key and not value: 4492 return None 4493 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4494 4495 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4496 if not this or not self._match_text_seq("FORMAT", "JSON"): 4497 return this 4498 4499 return self.expression(exp.FormatJson, this=this) 4500 4501 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4502 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4503 for value in values: 4504 if self._match_text_seq(value, "ON", on): 4505 return f"{value} ON {on}" 4506 4507 return None 4508 4509 def _parse_json_object(self) -> exp.JSONObject: 4510 star = self._parse_star() 4511 expressions = ( 4512 [star] 4513 if star 4514 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4515 ) 4516 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4517 4518 unique_keys = None 4519 if self._match_text_seq("WITH", "UNIQUE"): 4520 unique_keys = True 4521 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4522 unique_keys = False 4523 4524 self._match_text_seq("KEYS") 4525 4526 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4527 self._parse_type() 4528 ) 4529 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4530 4531 return self.expression( 4532 exp.JSONObject, 4533 expressions=expressions, 4534 null_handling=null_handling, 4535 unique_keys=unique_keys, 4536 return_type=return_type, 4537 encoding=encoding, 4538 ) 4539 4540 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4541 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4542 if not self._match_text_seq("NESTED"): 4543 this = self._parse_id_var() 4544 kind = self._parse_types(allow_identifiers=False) 4545 nested = None 4546 else: 4547 this = None 4548 kind = None 4549 nested = True 4550 4551 path = self._match_text_seq("PATH") and self._parse_string() 4552 nested_schema = nested and self._parse_json_schema() 4553 4554 return self.expression( 4555 exp.JSONColumnDef, 4556 this=this, 4557 kind=kind, 4558 path=path, 4559 nested_schema=nested_schema, 4560 ) 4561 4562 def _parse_json_schema(self) -> exp.JSONSchema: 4563 self._match_text_seq("COLUMNS") 4564 return self.expression( 4565 exp.JSONSchema, 4566 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4567 ) 4568 4569 def _parse_json_table(self) -> exp.JSONTable: 4570 this = self._parse_format_json(self._parse_bitwise()) 4571 path = self._match(TokenType.COMMA) and self._parse_string() 4572 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4573 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4574 schema = self._parse_json_schema() 4575 4576 return exp.JSONTable( 4577 this=this, 4578 schema=schema, 4579 path=path, 4580 error_handling=error_handling, 4581 empty_handling=empty_handling, 4582 ) 4583 4584 def _parse_match_against(self) -> exp.MatchAgainst: 4585 expressions = self._parse_csv(self._parse_column) 4586 4587 self._match_text_seq(")", "AGAINST", "(") 4588 4589 this = self._parse_string() 4590 4591 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4592 modifier = "IN NATURAL LANGUAGE MODE" 4593 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4594 modifier = f"{modifier} WITH QUERY EXPANSION" 4595 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4596 modifier = "IN BOOLEAN MODE" 4597 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4598 modifier = "WITH QUERY EXPANSION" 4599 else: 4600 modifier = None 4601 4602 return self.expression( 4603 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4604 ) 4605 4606 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4607 def _parse_open_json(self) -> exp.OpenJSON: 4608 this = self._parse_bitwise() 4609 path = self._match(TokenType.COMMA) and self._parse_string() 4610 4611 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4612 this = self._parse_field(any_token=True) 4613 kind = self._parse_types() 4614 path = self._parse_string() 4615 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4616 4617 return self.expression( 4618 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4619 ) 4620 4621 expressions = None 4622 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4623 self._match_l_paren() 4624 expressions = self._parse_csv(_parse_open_json_column_def) 4625 4626 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4627 4628 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4629 args = self._parse_csv(self._parse_bitwise) 4630 4631 if self._match(TokenType.IN): 4632 return self.expression( 4633 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4634 ) 4635 4636 if haystack_first: 4637 haystack = seq_get(args, 0) 4638 needle = seq_get(args, 1) 4639 else: 4640 needle = seq_get(args, 0) 4641 haystack = seq_get(args, 1) 4642 4643 return self.expression( 4644 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4645 ) 4646 4647 def _parse_predict(self) -> exp.Predict: 4648 self._match_text_seq("MODEL") 4649 this = self._parse_table() 4650 4651 self._match(TokenType.COMMA) 4652 self._match_text_seq("TABLE") 4653 4654 return self.expression( 4655 exp.Predict, 4656 this=this, 4657 expression=self._parse_table(), 4658 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4659 ) 4660 4661 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4662 args = self._parse_csv(self._parse_table) 4663 return exp.JoinHint(this=func_name.upper(), expressions=args) 4664 4665 def _parse_substring(self) -> exp.Substring: 4666 # Postgres supports the form: substring(string [from int] [for int]) 4667 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4668 4669 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4670 4671 if self._match(TokenType.FROM): 4672 args.append(self._parse_bitwise()) 4673 if self._match(TokenType.FOR): 4674 args.append(self._parse_bitwise()) 4675 4676 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4677 4678 def _parse_trim(self) -> exp.Trim: 4679 # https://www.w3resource.com/sql/character-functions/trim.php 4680 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4681 4682 position = None 4683 collation = None 4684 expression = None 4685 4686 if self._match_texts(self.TRIM_TYPES): 4687 position = self._prev.text.upper() 4688 4689 this = self._parse_bitwise() 4690 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4691 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4692 expression = self._parse_bitwise() 4693 4694 if invert_order: 4695 this, expression = expression, this 4696 4697 if self._match(TokenType.COLLATE): 4698 collation = self._parse_bitwise() 4699 4700 return self.expression( 4701 exp.Trim, this=this, position=position, expression=expression, collation=collation 4702 ) 4703 4704 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4705 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4706 4707 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4708 return self._parse_window(self._parse_id_var(), alias=True) 4709 4710 def _parse_respect_or_ignore_nulls( 4711 self, this: t.Optional[exp.Expression] 4712 ) -> t.Optional[exp.Expression]: 4713 if self._match_text_seq("IGNORE", "NULLS"): 4714 return self.expression(exp.IgnoreNulls, this=this) 4715 if self._match_text_seq("RESPECT", "NULLS"): 4716 return self.expression(exp.RespectNulls, this=this) 4717 return this 4718 4719 def _parse_window( 4720 self, this: t.Optional[exp.Expression], alias: bool = False 4721 ) -> t.Optional[exp.Expression]: 4722 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4723 self._match(TokenType.WHERE) 4724 this = self.expression( 4725 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4726 ) 4727 self._match_r_paren() 4728 4729 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4730 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4731 if self._match_text_seq("WITHIN", "GROUP"): 4732 order = self._parse_wrapped(self._parse_order) 4733 this = self.expression(exp.WithinGroup, this=this, expression=order) 4734 4735 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4736 # Some dialects choose to implement and some do not. 4737 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4738 4739 # There is some code above in _parse_lambda that handles 4740 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4741 4742 # The below changes handle 4743 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4744 4745 # Oracle allows both formats 4746 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4747 # and Snowflake chose to do the same for familiarity 4748 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4749 this = self._parse_respect_or_ignore_nulls(this) 4750 4751 # bigquery select from window x AS (partition by ...) 4752 if alias: 4753 over = None 4754 self._match(TokenType.ALIAS) 4755 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4756 return this 4757 else: 4758 over = self._prev.text.upper() 4759 4760 if not self._match(TokenType.L_PAREN): 4761 return self.expression( 4762 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4763 ) 4764 4765 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4766 4767 first = self._match(TokenType.FIRST) 4768 if self._match_text_seq("LAST"): 4769 first = False 4770 4771 partition, order = self._parse_partition_and_order() 4772 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4773 4774 if kind: 4775 self._match(TokenType.BETWEEN) 4776 start = self._parse_window_spec() 4777 self._match(TokenType.AND) 4778 end = self._parse_window_spec() 4779 4780 spec = self.expression( 4781 exp.WindowSpec, 4782 kind=kind, 4783 start=start["value"], 4784 start_side=start["side"], 4785 end=end["value"], 4786 end_side=end["side"], 4787 ) 4788 else: 4789 spec = None 4790 4791 self._match_r_paren() 4792 4793 window = self.expression( 4794 exp.Window, 4795 this=this, 4796 partition_by=partition, 4797 order=order, 4798 spec=spec, 4799 alias=window_alias, 4800 over=over, 4801 first=first, 4802 ) 4803 4804 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4805 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4806 return self._parse_window(window, alias=alias) 4807 4808 return window 4809 4810 def _parse_partition_and_order( 4811 self, 4812 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4813 return self._parse_partition_by(), self._parse_order() 4814 4815 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4816 self._match(TokenType.BETWEEN) 4817 4818 return { 4819 "value": ( 4820 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4821 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4822 or self._parse_bitwise() 4823 ), 4824 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4825 } 4826 4827 def _parse_alias( 4828 self, this: t.Optional[exp.Expression], explicit: bool = False 4829 ) -> t.Optional[exp.Expression]: 4830 any_token = self._match(TokenType.ALIAS) 4831 comments = self._prev_comments 4832 4833 if explicit and not any_token: 4834 return this 4835 4836 if self._match(TokenType.L_PAREN): 4837 aliases = self.expression( 4838 exp.Aliases, 4839 comments=comments, 4840 this=this, 4841 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4842 ) 4843 self._match_r_paren(aliases) 4844 return aliases 4845 4846 alias = self._parse_id_var(any_token) 4847 4848 if alias: 4849 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4850 4851 return this 4852 4853 def _parse_id_var( 4854 self, 4855 any_token: bool = True, 4856 tokens: t.Optional[t.Collection[TokenType]] = None, 4857 ) -> t.Optional[exp.Expression]: 4858 identifier = self._parse_identifier() 4859 4860 if identifier: 4861 return identifier 4862 4863 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4864 quoted = self._prev.token_type == TokenType.STRING 4865 return exp.Identifier(this=self._prev.text, quoted=quoted) 4866 4867 return None 4868 4869 def _parse_string(self) -> t.Optional[exp.Expression]: 4870 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4871 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4872 return self._parse_placeholder() 4873 4874 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4875 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4876 4877 def _parse_number(self) -> t.Optional[exp.Expression]: 4878 if self._match(TokenType.NUMBER): 4879 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4880 return self._parse_placeholder() 4881 4882 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4883 if self._match(TokenType.IDENTIFIER): 4884 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4885 return self._parse_placeholder() 4886 4887 def _parse_var( 4888 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4889 ) -> t.Optional[exp.Expression]: 4890 if ( 4891 (any_token and self._advance_any()) 4892 or self._match(TokenType.VAR) 4893 or (self._match_set(tokens) if tokens else False) 4894 ): 4895 return self.expression(exp.Var, this=self._prev.text) 4896 return self._parse_placeholder() 4897 4898 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4899 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4900 self._advance() 4901 return self._prev 4902 return None 4903 4904 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4905 return self._parse_var() or self._parse_string() 4906 4907 def _parse_null(self) -> t.Optional[exp.Expression]: 4908 if self._match_set(self.NULL_TOKENS): 4909 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4910 return self._parse_placeholder() 4911 4912 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4913 if self._match(TokenType.TRUE): 4914 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4915 if self._match(TokenType.FALSE): 4916 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4917 return self._parse_placeholder() 4918 4919 def _parse_star(self) -> t.Optional[exp.Expression]: 4920 if self._match(TokenType.STAR): 4921 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4922 return self._parse_placeholder() 4923 4924 def _parse_parameter(self) -> exp.Parameter: 4925 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4926 return ( 4927 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4928 ) 4929 4930 self._match(TokenType.L_BRACE) 4931 this = _parse_parameter_part() 4932 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4933 self._match(TokenType.R_BRACE) 4934 4935 return self.expression(exp.Parameter, this=this, expression=expression) 4936 4937 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4938 if self._match_set(self.PLACEHOLDER_PARSERS): 4939 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4940 if placeholder: 4941 return placeholder 4942 self._advance(-1) 4943 return None 4944 4945 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4946 if not self._match(TokenType.EXCEPT): 4947 return None 4948 if self._match(TokenType.L_PAREN, advance=False): 4949 return self._parse_wrapped_csv(self._parse_column) 4950 4951 except_column = self._parse_column() 4952 return [except_column] if except_column else None 4953 4954 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4955 if not self._match(TokenType.REPLACE): 4956 return None 4957 if self._match(TokenType.L_PAREN, advance=False): 4958 return self._parse_wrapped_csv(self._parse_expression) 4959 4960 replace_expression = self._parse_expression() 4961 return [replace_expression] if replace_expression else None 4962 4963 def _parse_csv( 4964 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4965 ) -> t.List[exp.Expression]: 4966 parse_result = parse_method() 4967 items = [parse_result] if parse_result is not None else [] 4968 4969 while self._match(sep): 4970 self._add_comments(parse_result) 4971 parse_result = parse_method() 4972 if parse_result is not None: 4973 items.append(parse_result) 4974 4975 return items 4976 4977 def _parse_tokens( 4978 self, parse_method: t.Callable, expressions: t.Dict 4979 ) -> t.Optional[exp.Expression]: 4980 this = parse_method() 4981 4982 while self._match_set(expressions): 4983 this = self.expression( 4984 expressions[self._prev.token_type], 4985 this=this, 4986 comments=self._prev_comments, 4987 expression=parse_method(), 4988 ) 4989 4990 return this 4991 4992 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4993 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4994 4995 def _parse_wrapped_csv( 4996 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4997 ) -> t.List[exp.Expression]: 4998 return self._parse_wrapped( 4999 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5000 ) 5001 5002 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5003 wrapped = self._match(TokenType.L_PAREN) 5004 if not wrapped and not optional: 5005 self.raise_error("Expecting (") 5006 parse_result = parse_method() 5007 if wrapped: 5008 self._match_r_paren() 5009 return parse_result 5010 5011 def _parse_expressions(self) -> t.List[exp.Expression]: 5012 return self._parse_csv(self._parse_expression) 5013 5014 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5015 return self._parse_select() or self._parse_set_operations( 5016 self._parse_expression() if alias else self._parse_conjunction() 5017 ) 5018 5019 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5020 return self._parse_query_modifiers( 5021 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5022 ) 5023 5024 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5025 this = None 5026 if self._match_texts(self.TRANSACTION_KIND): 5027 this = self._prev.text 5028 5029 self._match_texts(("TRANSACTION", "WORK")) 5030 5031 modes = [] 5032 while True: 5033 mode = [] 5034 while self._match(TokenType.VAR): 5035 mode.append(self._prev.text) 5036 5037 if mode: 5038 modes.append(" ".join(mode)) 5039 if not self._match(TokenType.COMMA): 5040 break 5041 5042 return self.expression(exp.Transaction, this=this, modes=modes) 5043 5044 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5045 chain = None 5046 savepoint = None 5047 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5048 5049 self._match_texts(("TRANSACTION", "WORK")) 5050 5051 if self._match_text_seq("TO"): 5052 self._match_text_seq("SAVEPOINT") 5053 savepoint = self._parse_id_var() 5054 5055 if self._match(TokenType.AND): 5056 chain = not self._match_text_seq("NO") 5057 self._match_text_seq("CHAIN") 5058 5059 if is_rollback: 5060 return self.expression(exp.Rollback, savepoint=savepoint) 5061 5062 return self.expression(exp.Commit, chain=chain) 5063 5064 def _parse_refresh(self) -> exp.Refresh: 5065 self._match(TokenType.TABLE) 5066 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5067 5068 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5069 if not self._match_text_seq("ADD"): 5070 return None 5071 5072 self._match(TokenType.COLUMN) 5073 exists_column = self._parse_exists(not_=True) 5074 expression = self._parse_field_def() 5075 5076 if expression: 5077 expression.set("exists", exists_column) 5078 5079 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5080 if self._match_texts(("FIRST", "AFTER")): 5081 position = self._prev.text 5082 column_position = self.expression( 5083 exp.ColumnPosition, this=self._parse_column(), position=position 5084 ) 5085 expression.set("position", column_position) 5086 5087 return expression 5088 5089 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5090 drop = self._match(TokenType.DROP) and self._parse_drop() 5091 if drop and not isinstance(drop, exp.Command): 5092 drop.set("kind", drop.args.get("kind", "COLUMN")) 5093 return drop 5094 5095 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5096 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5097 return self.expression( 5098 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5099 ) 5100 5101 def _parse_add_constraint(self) -> exp.AddConstraint: 5102 this = None 5103 kind = self._prev.token_type 5104 5105 if kind == TokenType.CONSTRAINT: 5106 this = self._parse_id_var() 5107 5108 if self._match_text_seq("CHECK"): 5109 expression = self._parse_wrapped(self._parse_conjunction) 5110 enforced = self._match_text_seq("ENFORCED") 5111 5112 return self.expression( 5113 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5114 ) 5115 5116 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5117 expression = self._parse_foreign_key() 5118 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5119 expression = self._parse_primary_key() 5120 else: 5121 expression = None 5122 5123 return self.expression(exp.AddConstraint, this=this, expression=expression) 5124 5125 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5126 index = self._index - 1 5127 5128 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5129 return self._parse_csv(self._parse_add_constraint) 5130 5131 self._retreat(index) 5132 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5133 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5134 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5135 5136 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5137 self._match(TokenType.COLUMN) 5138 column = self._parse_field(any_token=True) 5139 5140 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5141 return self.expression(exp.AlterColumn, this=column, drop=True) 5142 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5143 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5144 5145 self._match_text_seq("SET", "DATA") 5146 return self.expression( 5147 exp.AlterColumn, 5148 this=column, 5149 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5150 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5151 using=self._match(TokenType.USING) and self._parse_conjunction(), 5152 ) 5153 5154 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5155 index = self._index - 1 5156 5157 partition_exists = self._parse_exists() 5158 if self._match(TokenType.PARTITION, advance=False): 5159 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5160 5161 self._retreat(index) 5162 return self._parse_csv(self._parse_drop_column) 5163 5164 def _parse_alter_table_rename(self) -> exp.RenameTable: 5165 self._match_text_seq("TO") 5166 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5167 5168 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5169 start = self._prev 5170 5171 if not self._match(TokenType.TABLE): 5172 return self._parse_as_command(start) 5173 5174 exists = self._parse_exists() 5175 only = self._match_text_seq("ONLY") 5176 this = self._parse_table(schema=True) 5177 5178 if self._next: 5179 self._advance() 5180 5181 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5182 if parser: 5183 actions = ensure_list(parser(self)) 5184 5185 if not self._curr: 5186 return self.expression( 5187 exp.AlterTable, 5188 this=this, 5189 exists=exists, 5190 actions=actions, 5191 only=only, 5192 ) 5193 5194 return self._parse_as_command(start) 5195 5196 def _parse_merge(self) -> exp.Merge: 5197 self._match(TokenType.INTO) 5198 target = self._parse_table() 5199 5200 if target and self._match(TokenType.ALIAS, advance=False): 5201 target.set("alias", self._parse_table_alias()) 5202 5203 self._match(TokenType.USING) 5204 using = self._parse_table() 5205 5206 self._match(TokenType.ON) 5207 on = self._parse_conjunction() 5208 5209 return self.expression( 5210 exp.Merge, 5211 this=target, 5212 using=using, 5213 on=on, 5214 expressions=self._parse_when_matched(), 5215 ) 5216 5217 def _parse_when_matched(self) -> t.List[exp.When]: 5218 whens = [] 5219 5220 while self._match(TokenType.WHEN): 5221 matched = not self._match(TokenType.NOT) 5222 self._match_text_seq("MATCHED") 5223 source = ( 5224 False 5225 if self._match_text_seq("BY", "TARGET") 5226 else self._match_text_seq("BY", "SOURCE") 5227 ) 5228 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5229 5230 self._match(TokenType.THEN) 5231 5232 if self._match(TokenType.INSERT): 5233 _this = self._parse_star() 5234 if _this: 5235 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5236 else: 5237 then = self.expression( 5238 exp.Insert, 5239 this=self._parse_value(), 5240 expression=self._match(TokenType.VALUES) and self._parse_value(), 5241 ) 5242 elif self._match(TokenType.UPDATE): 5243 expressions = self._parse_star() 5244 if expressions: 5245 then = self.expression(exp.Update, expressions=expressions) 5246 else: 5247 then = self.expression( 5248 exp.Update, 5249 expressions=self._match(TokenType.SET) 5250 and self._parse_csv(self._parse_equality), 5251 ) 5252 elif self._match(TokenType.DELETE): 5253 then = self.expression(exp.Var, this=self._prev.text) 5254 else: 5255 then = None 5256 5257 whens.append( 5258 self.expression( 5259 exp.When, 5260 matched=matched, 5261 source=source, 5262 condition=condition, 5263 then=then, 5264 ) 5265 ) 5266 return whens 5267 5268 def _parse_show(self) -> t.Optional[exp.Expression]: 5269 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5270 if parser: 5271 return parser(self) 5272 return self._parse_as_command(self._prev) 5273 5274 def _parse_set_item_assignment( 5275 self, kind: t.Optional[str] = None 5276 ) -> t.Optional[exp.Expression]: 5277 index = self._index 5278 5279 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5280 return self._parse_set_transaction(global_=kind == "GLOBAL") 5281 5282 left = self._parse_primary() or self._parse_id_var() 5283 assignment_delimiter = self._match_texts(("=", "TO")) 5284 5285 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5286 self._retreat(index) 5287 return None 5288 5289 right = self._parse_statement() or self._parse_id_var() 5290 this = self.expression(exp.EQ, this=left, expression=right) 5291 5292 return self.expression(exp.SetItem, this=this, kind=kind) 5293 5294 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5295 self._match_text_seq("TRANSACTION") 5296 characteristics = self._parse_csv( 5297 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5298 ) 5299 return self.expression( 5300 exp.SetItem, 5301 expressions=characteristics, 5302 kind="TRANSACTION", 5303 **{"global": global_}, # type: ignore 5304 ) 5305 5306 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5307 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5308 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5309 5310 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5311 index = self._index 5312 set_ = self.expression( 5313 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5314 ) 5315 5316 if self._curr: 5317 self._retreat(index) 5318 return self._parse_as_command(self._prev) 5319 5320 return set_ 5321 5322 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5323 for option in options: 5324 if self._match_text_seq(*option.split(" ")): 5325 return exp.var(option) 5326 return None 5327 5328 def _parse_as_command(self, start: Token) -> exp.Command: 5329 while self._curr: 5330 self._advance() 5331 text = self._find_sql(start, self._prev) 5332 size = len(start.text) 5333 return exp.Command(this=text[:size], expression=text[size:]) 5334 5335 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5336 settings = [] 5337 5338 self._match_l_paren() 5339 kind = self._parse_id_var() 5340 5341 if self._match(TokenType.L_PAREN): 5342 while True: 5343 key = self._parse_id_var() 5344 value = self._parse_primary() 5345 5346 if not key and value is None: 5347 break 5348 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5349 self._match(TokenType.R_PAREN) 5350 5351 self._match_r_paren() 5352 5353 return self.expression( 5354 exp.DictProperty, 5355 this=this, 5356 kind=kind.this if kind else None, 5357 settings=settings, 5358 ) 5359 5360 def _parse_dict_range(self, this: str) -> exp.DictRange: 5361 self._match_l_paren() 5362 has_min = self._match_text_seq("MIN") 5363 if has_min: 5364 min = self._parse_var() or self._parse_primary() 5365 self._match_text_seq("MAX") 5366 max = self._parse_var() or self._parse_primary() 5367 else: 5368 max = self._parse_var() or self._parse_primary() 5369 min = exp.Literal.number(0) 5370 self._match_r_paren() 5371 return self.expression(exp.DictRange, this=this, min=min, max=max) 5372 5373 def _parse_comprehension( 5374 self, this: t.Optional[exp.Expression] 5375 ) -> t.Optional[exp.Comprehension]: 5376 index = self._index 5377 expression = self._parse_column() 5378 if not self._match(TokenType.IN): 5379 self._retreat(index - 1) 5380 return None 5381 iterator = self._parse_column() 5382 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5383 return self.expression( 5384 exp.Comprehension, 5385 this=this, 5386 expression=expression, 5387 iterator=iterator, 5388 condition=condition, 5389 ) 5390 5391 def _find_parser( 5392 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5393 ) -> t.Optional[t.Callable]: 5394 if not self._curr: 5395 return None 5396 5397 index = self._index 5398 this = [] 5399 while True: 5400 # The current token might be multiple words 5401 curr = self._curr.text.upper() 5402 key = curr.split(" ") 5403 this.append(curr) 5404 5405 self._advance() 5406 result, trie = in_trie(trie, key) 5407 if result == TrieResult.FAILED: 5408 break 5409 5410 if result == TrieResult.EXISTS: 5411 subparser = parsers[" ".join(this)] 5412 return subparser 5413 5414 self._retreat(index) 5415 return None 5416 5417 def _match(self, token_type, advance=True, expression=None): 5418 if not self._curr: 5419 return None 5420 5421 if self._curr.token_type == token_type: 5422 if advance: 5423 self._advance() 5424 self._add_comments(expression) 5425 return True 5426 5427 return None 5428 5429 def _match_set(self, types, advance=True): 5430 if not self._curr: 5431 return None 5432 5433 if self._curr.token_type in types: 5434 if advance: 5435 self._advance() 5436 return True 5437 5438 return None 5439 5440 def _match_pair(self, token_type_a, token_type_b, advance=True): 5441 if not self._curr or not self._next: 5442 return None 5443 5444 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5445 if advance: 5446 self._advance(2) 5447 return True 5448 5449 return None 5450 5451 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5452 if not self._match(TokenType.L_PAREN, expression=expression): 5453 self.raise_error("Expecting (") 5454 5455 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5456 if not self._match(TokenType.R_PAREN, expression=expression): 5457 self.raise_error("Expecting )") 5458 5459 def _match_texts(self, texts, advance=True): 5460 if self._curr and self._curr.text.upper() in texts: 5461 if advance: 5462 self._advance() 5463 return True 5464 return False 5465 5466 def _match_text_seq(self, *texts, advance=True): 5467 index = self._index 5468 for text in texts: 5469 if self._curr and self._curr.text.upper() == text: 5470 self._advance() 5471 else: 5472 self._retreat(index) 5473 return False 5474 5475 if not advance: 5476 self._retreat(index) 5477 5478 return True 5479 5480 @t.overload 5481 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5482 ... 5483 5484 @t.overload 5485 def _replace_columns_with_dots( 5486 self, this: t.Optional[exp.Expression] 5487 ) -> t.Optional[exp.Expression]: 5488 ... 5489 5490 def _replace_columns_with_dots(self, this): 5491 if isinstance(this, exp.Dot): 5492 exp.replace_children(this, self._replace_columns_with_dots) 5493 elif isinstance(this, exp.Column): 5494 exp.replace_children(this, self._replace_columns_with_dots) 5495 table = this.args.get("table") 5496 this = ( 5497 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5498 ) 5499 5500 return this 5501 5502 def _replace_lambda( 5503 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5504 ) -> t.Optional[exp.Expression]: 5505 if not node: 5506 return node 5507 5508 for column in node.find_all(exp.Column): 5509 if column.parts[0].name in lambda_variables: 5510 dot_or_id = column.to_dot() if column.table else column.this 5511 parent = column.parent 5512 5513 while isinstance(parent, exp.Dot): 5514 if not isinstance(parent.parent, exp.Dot): 5515 parent.replace(dot_or_id) 5516 break 5517 parent = parent.parent 5518 else: 5519 if column is node: 5520 node = dot_or_id 5521 else: 5522 column.replace(dot_or_id) 5523 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
74class Parser(metaclass=_Parser): 75 """ 76 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 77 78 Args: 79 error_level: The desired error level. 80 Default: ErrorLevel.IMMEDIATE 81 error_message_context: Determines the amount of context to capture from a 82 query string when displaying the error message (in number of characters). 83 Default: 100 84 max_errors: Maximum number of error messages to include in a raised ParseError. 85 This is only relevant if error_level is ErrorLevel.RAISE. 86 Default: 3 87 """ 88 89 FUNCTIONS: t.Dict[str, t.Callable] = { 90 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 91 "CONCAT": lambda args, dialect: exp.Concat( 92 expressions=args, 93 safe=not dialect.STRICT_STRING_CONCAT, 94 coalesce=dialect.CONCAT_COALESCE, 95 ), 96 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 97 expressions=args, 98 safe=not dialect.STRICT_STRING_CONCAT, 99 coalesce=dialect.CONCAT_COALESCE, 100 ), 101 "DATE_TO_DATE_STR": lambda args: exp.Cast( 102 this=seq_get(args, 0), 103 to=exp.DataType(this=exp.DataType.Type.TEXT), 104 ), 105 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 106 "LIKE": parse_like, 107 "LOG": parse_logarithm, 108 "TIME_TO_TIME_STR": lambda args: exp.Cast( 109 this=seq_get(args, 0), 110 to=exp.DataType(this=exp.DataType.Type.TEXT), 111 ), 112 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 113 this=exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 start=exp.Literal.number(1), 118 length=exp.Literal.number(10), 119 ), 120 "VAR_MAP": parse_var_map, 121 } 122 123 NO_PAREN_FUNCTIONS = { 124 TokenType.CURRENT_DATE: exp.CurrentDate, 125 TokenType.CURRENT_DATETIME: exp.CurrentDate, 126 TokenType.CURRENT_TIME: exp.CurrentTime, 127 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 128 TokenType.CURRENT_USER: exp.CurrentUser, 129 } 130 131 STRUCT_TYPE_TOKENS = { 132 TokenType.NESTED, 133 TokenType.STRUCT, 134 } 135 136 NESTED_TYPE_TOKENS = { 137 TokenType.ARRAY, 138 TokenType.LOWCARDINALITY, 139 TokenType.MAP, 140 TokenType.NULLABLE, 141 *STRUCT_TYPE_TOKENS, 142 } 143 144 ENUM_TYPE_TOKENS = { 145 TokenType.ENUM, 146 TokenType.ENUM8, 147 TokenType.ENUM16, 148 } 149 150 TYPE_TOKENS = { 151 TokenType.BIT, 152 TokenType.BOOLEAN, 153 TokenType.TINYINT, 154 TokenType.UTINYINT, 155 TokenType.SMALLINT, 156 TokenType.USMALLINT, 157 TokenType.INT, 158 TokenType.UINT, 159 TokenType.BIGINT, 160 TokenType.UBIGINT, 161 TokenType.INT128, 162 TokenType.UINT128, 163 TokenType.INT256, 164 TokenType.UINT256, 165 TokenType.MEDIUMINT, 166 TokenType.UMEDIUMINT, 167 TokenType.FIXEDSTRING, 168 TokenType.FLOAT, 169 TokenType.DOUBLE, 170 TokenType.CHAR, 171 TokenType.NCHAR, 172 TokenType.VARCHAR, 173 TokenType.NVARCHAR, 174 TokenType.TEXT, 175 TokenType.MEDIUMTEXT, 176 TokenType.LONGTEXT, 177 TokenType.MEDIUMBLOB, 178 TokenType.LONGBLOB, 179 TokenType.BINARY, 180 TokenType.VARBINARY, 181 TokenType.JSON, 182 TokenType.JSONB, 183 TokenType.INTERVAL, 184 TokenType.TINYBLOB, 185 TokenType.TINYTEXT, 186 TokenType.TIME, 187 TokenType.TIMETZ, 188 TokenType.TIMESTAMP, 189 TokenType.TIMESTAMP_S, 190 TokenType.TIMESTAMP_MS, 191 TokenType.TIMESTAMP_NS, 192 TokenType.TIMESTAMPTZ, 193 TokenType.TIMESTAMPLTZ, 194 TokenType.DATETIME, 195 TokenType.DATETIME64, 196 TokenType.DATE, 197 TokenType.INT4RANGE, 198 TokenType.INT4MULTIRANGE, 199 TokenType.INT8RANGE, 200 TokenType.INT8MULTIRANGE, 201 TokenType.NUMRANGE, 202 TokenType.NUMMULTIRANGE, 203 TokenType.TSRANGE, 204 TokenType.TSMULTIRANGE, 205 TokenType.TSTZRANGE, 206 TokenType.TSTZMULTIRANGE, 207 TokenType.DATERANGE, 208 TokenType.DATEMULTIRANGE, 209 TokenType.DECIMAL, 210 TokenType.UDECIMAL, 211 TokenType.BIGDECIMAL, 212 TokenType.UUID, 213 TokenType.GEOGRAPHY, 214 TokenType.GEOMETRY, 215 TokenType.HLLSKETCH, 216 TokenType.HSTORE, 217 TokenType.PSEUDO_TYPE, 218 TokenType.SUPER, 219 TokenType.SERIAL, 220 TokenType.SMALLSERIAL, 221 TokenType.BIGSERIAL, 222 TokenType.XML, 223 TokenType.YEAR, 224 TokenType.UNIQUEIDENTIFIER, 225 TokenType.USERDEFINED, 226 TokenType.MONEY, 227 TokenType.SMALLMONEY, 228 TokenType.ROWVERSION, 229 TokenType.IMAGE, 230 TokenType.VARIANT, 231 TokenType.OBJECT, 232 TokenType.OBJECT_IDENTIFIER, 233 TokenType.INET, 234 TokenType.IPADDRESS, 235 TokenType.IPPREFIX, 236 TokenType.UNKNOWN, 237 TokenType.NULL, 238 *ENUM_TYPE_TOKENS, 239 *NESTED_TYPE_TOKENS, 240 } 241 242 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 243 TokenType.BIGINT: TokenType.UBIGINT, 244 TokenType.INT: TokenType.UINT, 245 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 246 TokenType.SMALLINT: TokenType.USMALLINT, 247 TokenType.TINYINT: TokenType.UTINYINT, 248 TokenType.DECIMAL: TokenType.UDECIMAL, 249 } 250 251 SUBQUERY_PREDICATES = { 252 TokenType.ANY: exp.Any, 253 TokenType.ALL: exp.All, 254 TokenType.EXISTS: exp.Exists, 255 TokenType.SOME: exp.Any, 256 } 257 258 RESERVED_TOKENS = { 259 *Tokenizer.SINGLE_TOKENS.values(), 260 TokenType.SELECT, 261 } 262 263 DB_CREATABLES = { 264 TokenType.DATABASE, 265 TokenType.SCHEMA, 266 TokenType.TABLE, 267 TokenType.VIEW, 268 TokenType.MODEL, 269 TokenType.DICTIONARY, 270 } 271 272 CREATABLES = { 273 TokenType.COLUMN, 274 TokenType.CONSTRAINT, 275 TokenType.FUNCTION, 276 TokenType.INDEX, 277 TokenType.PROCEDURE, 278 TokenType.FOREIGN_KEY, 279 *DB_CREATABLES, 280 } 281 282 # Tokens that can represent identifiers 283 ID_VAR_TOKENS = { 284 TokenType.VAR, 285 TokenType.ANTI, 286 TokenType.APPLY, 287 TokenType.ASC, 288 TokenType.AUTO_INCREMENT, 289 TokenType.BEGIN, 290 TokenType.CACHE, 291 TokenType.CASE, 292 TokenType.COLLATE, 293 TokenType.COMMAND, 294 TokenType.COMMENT, 295 TokenType.COMMIT, 296 TokenType.CONSTRAINT, 297 TokenType.DEFAULT, 298 TokenType.DELETE, 299 TokenType.DESC, 300 TokenType.DESCRIBE, 301 TokenType.DICTIONARY, 302 TokenType.DIV, 303 TokenType.END, 304 TokenType.EXECUTE, 305 TokenType.ESCAPE, 306 TokenType.FALSE, 307 TokenType.FIRST, 308 TokenType.FILTER, 309 TokenType.FORMAT, 310 TokenType.FULL, 311 TokenType.IS, 312 TokenType.ISNULL, 313 TokenType.INTERVAL, 314 TokenType.KEEP, 315 TokenType.KILL, 316 TokenType.LEFT, 317 TokenType.LOAD, 318 TokenType.MERGE, 319 TokenType.NATURAL, 320 TokenType.NEXT, 321 TokenType.OFFSET, 322 TokenType.OPERATOR, 323 TokenType.ORDINALITY, 324 TokenType.OVERLAPS, 325 TokenType.OVERWRITE, 326 TokenType.PARTITION, 327 TokenType.PERCENT, 328 TokenType.PIVOT, 329 TokenType.PRAGMA, 330 TokenType.RANGE, 331 TokenType.RECURSIVE, 332 TokenType.REFERENCES, 333 TokenType.REFRESH, 334 TokenType.REPLACE, 335 TokenType.RIGHT, 336 TokenType.ROW, 337 TokenType.ROWS, 338 TokenType.SEMI, 339 TokenType.SET, 340 TokenType.SETTINGS, 341 TokenType.SHOW, 342 TokenType.TEMPORARY, 343 TokenType.TOP, 344 TokenType.TRUE, 345 TokenType.UNIQUE, 346 TokenType.UNPIVOT, 347 TokenType.UPDATE, 348 TokenType.USE, 349 TokenType.VOLATILE, 350 TokenType.WINDOW, 351 *CREATABLES, 352 *SUBQUERY_PREDICATES, 353 *TYPE_TOKENS, 354 *NO_PAREN_FUNCTIONS, 355 } 356 357 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 358 359 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 360 TokenType.ANTI, 361 TokenType.APPLY, 362 TokenType.ASOF, 363 TokenType.FULL, 364 TokenType.LEFT, 365 TokenType.LOCK, 366 TokenType.NATURAL, 367 TokenType.OFFSET, 368 TokenType.RIGHT, 369 TokenType.SEMI, 370 TokenType.WINDOW, 371 } 372 373 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 374 375 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 376 377 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 378 379 FUNC_TOKENS = { 380 TokenType.COLLATE, 381 TokenType.COMMAND, 382 TokenType.CURRENT_DATE, 383 TokenType.CURRENT_DATETIME, 384 TokenType.CURRENT_TIMESTAMP, 385 TokenType.CURRENT_TIME, 386 TokenType.CURRENT_USER, 387 TokenType.FILTER, 388 TokenType.FIRST, 389 TokenType.FORMAT, 390 TokenType.GLOB, 391 TokenType.IDENTIFIER, 392 TokenType.INDEX, 393 TokenType.ISNULL, 394 TokenType.ILIKE, 395 TokenType.INSERT, 396 TokenType.LIKE, 397 TokenType.MERGE, 398 TokenType.OFFSET, 399 TokenType.PRIMARY_KEY, 400 TokenType.RANGE, 401 TokenType.REPLACE, 402 TokenType.RLIKE, 403 TokenType.ROW, 404 TokenType.UNNEST, 405 TokenType.VAR, 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.DATE, 409 TokenType.DATETIME, 410 TokenType.TABLE, 411 TokenType.TIMESTAMP, 412 TokenType.TIMESTAMPTZ, 413 TokenType.WINDOW, 414 TokenType.XOR, 415 *TYPE_TOKENS, 416 *SUBQUERY_PREDICATES, 417 } 418 419 CONJUNCTION = { 420 TokenType.AND: exp.And, 421 TokenType.OR: exp.Or, 422 } 423 424 EQUALITY = { 425 TokenType.COLON_EQ: exp.PropertyEQ, 426 TokenType.EQ: exp.EQ, 427 TokenType.NEQ: exp.NEQ, 428 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 429 } 430 431 COMPARISON = { 432 TokenType.GT: exp.GT, 433 TokenType.GTE: exp.GTE, 434 TokenType.LT: exp.LT, 435 TokenType.LTE: exp.LTE, 436 } 437 438 BITWISE = { 439 TokenType.AMP: exp.BitwiseAnd, 440 TokenType.CARET: exp.BitwiseXor, 441 TokenType.PIPE: exp.BitwiseOr, 442 } 443 444 TERM = { 445 TokenType.DASH: exp.Sub, 446 TokenType.PLUS: exp.Add, 447 TokenType.MOD: exp.Mod, 448 TokenType.COLLATE: exp.Collate, 449 } 450 451 FACTOR = { 452 TokenType.DIV: exp.IntDiv, 453 TokenType.LR_ARROW: exp.Distance, 454 TokenType.SLASH: exp.Div, 455 TokenType.STAR: exp.Mul, 456 } 457 458 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 459 460 TIMES = { 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 } 464 465 TIMESTAMPS = { 466 TokenType.TIMESTAMP, 467 TokenType.TIMESTAMPTZ, 468 TokenType.TIMESTAMPLTZ, 469 *TIMES, 470 } 471 472 SET_OPERATIONS = { 473 TokenType.UNION, 474 TokenType.INTERSECT, 475 TokenType.EXCEPT, 476 } 477 478 JOIN_METHODS = { 479 TokenType.NATURAL, 480 TokenType.ASOF, 481 } 482 483 JOIN_SIDES = { 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.FULL, 487 } 488 489 JOIN_KINDS = { 490 TokenType.INNER, 491 TokenType.OUTER, 492 TokenType.CROSS, 493 TokenType.SEMI, 494 TokenType.ANTI, 495 } 496 497 JOIN_HINTS: t.Set[str] = set() 498 499 LAMBDAS = { 500 TokenType.ARROW: lambda self, expressions: self.expression( 501 exp.Lambda, 502 this=self._replace_lambda( 503 self._parse_conjunction(), 504 {node.name for node in expressions}, 505 ), 506 expressions=expressions, 507 ), 508 TokenType.FARROW: lambda self, expressions: self.expression( 509 exp.Kwarg, 510 this=exp.var(expressions[0].name), 511 expression=self._parse_conjunction(), 512 ), 513 } 514 515 COLUMN_OPERATORS = { 516 TokenType.DOT: None, 517 TokenType.DCOLON: lambda self, this, to: self.expression( 518 exp.Cast if self.STRICT_CAST else exp.TryCast, 519 this=this, 520 to=to, 521 ), 522 TokenType.ARROW: lambda self, this, path: self.expression( 523 exp.JSONExtract, 524 this=this, 525 expression=path, 526 ), 527 TokenType.DARROW: lambda self, this, path: self.expression( 528 exp.JSONExtractScalar, 529 this=this, 530 expression=path, 531 ), 532 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 533 exp.JSONBExtract, 534 this=this, 535 expression=path, 536 ), 537 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 538 exp.JSONBExtractScalar, 539 this=this, 540 expression=path, 541 ), 542 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 543 exp.JSONBContains, 544 this=this, 545 expression=key, 546 ), 547 } 548 549 EXPRESSION_PARSERS = { 550 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 551 exp.Column: lambda self: self._parse_column(), 552 exp.Condition: lambda self: self._parse_conjunction(), 553 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 554 exp.Expression: lambda self: self._parse_statement(), 555 exp.From: lambda self: self._parse_from(), 556 exp.Group: lambda self: self._parse_group(), 557 exp.Having: lambda self: self._parse_having(), 558 exp.Identifier: lambda self: self._parse_id_var(), 559 exp.Join: lambda self: self._parse_join(), 560 exp.Lambda: lambda self: self._parse_lambda(), 561 exp.Lateral: lambda self: self._parse_lateral(), 562 exp.Limit: lambda self: self._parse_limit(), 563 exp.Offset: lambda self: self._parse_offset(), 564 exp.Order: lambda self: self._parse_order(), 565 exp.Ordered: lambda self: self._parse_ordered(), 566 exp.Properties: lambda self: self._parse_properties(), 567 exp.Qualify: lambda self: self._parse_qualify(), 568 exp.Returning: lambda self: self._parse_returning(), 569 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 570 exp.Table: lambda self: self._parse_table_parts(), 571 exp.TableAlias: lambda self: self._parse_table_alias(), 572 exp.Where: lambda self: self._parse_where(), 573 exp.Window: lambda self: self._parse_named_window(), 574 exp.With: lambda self: self._parse_with(), 575 "JOIN_TYPE": lambda self: self._parse_join_parts(), 576 } 577 578 STATEMENT_PARSERS = { 579 TokenType.ALTER: lambda self: self._parse_alter(), 580 TokenType.BEGIN: lambda self: self._parse_transaction(), 581 TokenType.CACHE: lambda self: self._parse_cache(), 582 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 583 TokenType.COMMENT: lambda self: self._parse_comment(), 584 TokenType.CREATE: lambda self: self._parse_create(), 585 TokenType.DELETE: lambda self: self._parse_delete(), 586 TokenType.DESC: lambda self: self._parse_describe(), 587 TokenType.DESCRIBE: lambda self: self._parse_describe(), 588 TokenType.DROP: lambda self: self._parse_drop(), 589 TokenType.INSERT: lambda self: self._parse_insert(), 590 TokenType.KILL: lambda self: self._parse_kill(), 591 TokenType.LOAD: lambda self: self._parse_load(), 592 TokenType.MERGE: lambda self: self._parse_merge(), 593 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 594 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 595 TokenType.REFRESH: lambda self: self._parse_refresh(), 596 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 597 TokenType.SET: lambda self: self._parse_set(), 598 TokenType.UNCACHE: lambda self: self._parse_uncache(), 599 TokenType.UPDATE: lambda self: self._parse_update(), 600 TokenType.USE: lambda self: self.expression( 601 exp.Use, 602 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 603 and exp.var(self._prev.text), 604 this=self._parse_table(schema=False), 605 ), 606 } 607 608 UNARY_PARSERS = { 609 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 610 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 611 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 612 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 613 } 614 615 PRIMARY_PARSERS = { 616 TokenType.STRING: lambda self, token: self.expression( 617 exp.Literal, this=token.text, is_string=True 618 ), 619 TokenType.NUMBER: lambda self, token: self.expression( 620 exp.Literal, this=token.text, is_string=False 621 ), 622 TokenType.STAR: lambda self, _: self.expression( 623 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 624 ), 625 TokenType.NULL: lambda self, _: self.expression(exp.Null), 626 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 627 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 628 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 629 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 630 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 631 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 632 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 633 exp.National, this=token.text 634 ), 635 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 636 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 637 exp.RawString, this=token.text 638 ), 639 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 640 } 641 642 PLACEHOLDER_PARSERS = { 643 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 644 TokenType.PARAMETER: lambda self: self._parse_parameter(), 645 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 646 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 647 else None, 648 } 649 650 RANGE_PARSERS = { 651 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 652 TokenType.GLOB: binary_range_parser(exp.Glob), 653 TokenType.ILIKE: binary_range_parser(exp.ILike), 654 TokenType.IN: lambda self, this: self._parse_in(this), 655 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 656 TokenType.IS: lambda self, this: self._parse_is(this), 657 TokenType.LIKE: binary_range_parser(exp.Like), 658 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 659 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 660 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 661 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 662 } 663 664 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 665 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 666 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 667 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 668 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 669 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 670 "CHECKSUM": lambda self: self._parse_checksum(), 671 "CLUSTER BY": lambda self: self._parse_cluster(), 672 "CLUSTERED": lambda self: self._parse_clustered_by(), 673 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 674 exp.CollateProperty, **kwargs 675 ), 676 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 677 "COPY": lambda self: self._parse_copy_property(), 678 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 679 "DEFINER": lambda self: self._parse_definer(), 680 "DETERMINISTIC": lambda self: self.expression( 681 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 682 ), 683 "DISTKEY": lambda self: self._parse_distkey(), 684 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 685 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 686 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 687 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 688 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 689 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 690 "FREESPACE": lambda self: self._parse_freespace(), 691 "HEAP": lambda self: self.expression(exp.HeapProperty), 692 "IMMUTABLE": lambda self: self.expression( 693 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 694 ), 695 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 696 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 697 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 698 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 699 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 700 "LIKE": lambda self: self._parse_create_like(), 701 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 702 "LOCK": lambda self: self._parse_locking(), 703 "LOCKING": lambda self: self._parse_locking(), 704 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 705 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 706 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 707 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 708 "NO": lambda self: self._parse_no_property(), 709 "ON": lambda self: self._parse_on_property(), 710 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 711 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 712 "PARTITION": lambda self: self._parse_partitioned_of(), 713 "PARTITION BY": lambda self: self._parse_partitioned_by(), 714 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 715 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 717 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 718 "REMOTE": lambda self: self._parse_remote_with_connection(), 719 "RETURNS": lambda self: self._parse_returns(), 720 "ROW": lambda self: self._parse_row(), 721 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 722 "SAMPLE": lambda self: self.expression( 723 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 724 ), 725 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 726 "SETTINGS": lambda self: self.expression( 727 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 728 ), 729 "SORTKEY": lambda self: self._parse_sortkey(), 730 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 731 "STABLE": lambda self: self.expression( 732 exp.StabilityProperty, this=exp.Literal.string("STABLE") 733 ), 734 "STORED": lambda self: self._parse_stored(), 735 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 736 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 737 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 738 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 739 "TO": lambda self: self._parse_to_table(), 740 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 741 "TRANSFORM": lambda self: self.expression( 742 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 743 ), 744 "TTL": lambda self: self._parse_ttl(), 745 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 746 "VOLATILE": lambda self: self._parse_volatile_property(), 747 "WITH": lambda self: self._parse_with_property(), 748 } 749 750 CONSTRAINT_PARSERS = { 751 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 752 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 753 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 754 "CHARACTER SET": lambda self: self.expression( 755 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 756 ), 757 "CHECK": lambda self: self.expression( 758 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 759 ), 760 "COLLATE": lambda self: self.expression( 761 exp.CollateColumnConstraint, this=self._parse_var() 762 ), 763 "COMMENT": lambda self: self.expression( 764 exp.CommentColumnConstraint, this=self._parse_string() 765 ), 766 "COMPRESS": lambda self: self._parse_compress(), 767 "CLUSTERED": lambda self: self.expression( 768 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 769 ), 770 "NONCLUSTERED": lambda self: self.expression( 771 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 772 ), 773 "DEFAULT": lambda self: self.expression( 774 exp.DefaultColumnConstraint, this=self._parse_bitwise() 775 ), 776 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 777 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 778 "FORMAT": lambda self: self.expression( 779 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 780 ), 781 "GENERATED": lambda self: self._parse_generated_as_identity(), 782 "IDENTITY": lambda self: self._parse_auto_increment(), 783 "INLINE": lambda self: self._parse_inline(), 784 "LIKE": lambda self: self._parse_create_like(), 785 "NOT": lambda self: self._parse_not_constraint(), 786 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 787 "ON": lambda self: ( 788 self._match(TokenType.UPDATE) 789 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 790 ) 791 or self.expression(exp.OnProperty, this=self._parse_id_var()), 792 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 793 "PERIOD": lambda self: self._parse_period_for_system_time(), 794 "PRIMARY KEY": lambda self: self._parse_primary_key(), 795 "REFERENCES": lambda self: self._parse_references(match=False), 796 "TITLE": lambda self: self.expression( 797 exp.TitleColumnConstraint, this=self._parse_var_or_string() 798 ), 799 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 800 "UNIQUE": lambda self: self._parse_unique(), 801 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 802 "WITH": lambda self: self.expression( 803 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 804 ), 805 } 806 807 ALTER_PARSERS = { 808 "ADD": lambda self: self._parse_alter_table_add(), 809 "ALTER": lambda self: self._parse_alter_table_alter(), 810 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 811 "DROP": lambda self: self._parse_alter_table_drop(), 812 "RENAME": lambda self: self._parse_alter_table_rename(), 813 } 814 815 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 816 817 NO_PAREN_FUNCTION_PARSERS = { 818 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 819 "CASE": lambda self: self._parse_case(), 820 "IF": lambda self: self._parse_if(), 821 "NEXT": lambda self: self._parse_next_value_for(), 822 } 823 824 INVALID_FUNC_NAME_TOKENS = { 825 TokenType.IDENTIFIER, 826 TokenType.STRING, 827 } 828 829 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 830 831 FUNCTION_PARSERS = { 832 "ANY_VALUE": lambda self: self._parse_any_value(), 833 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 834 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 835 "DECODE": lambda self: self._parse_decode(), 836 "EXTRACT": lambda self: self._parse_extract(), 837 "JSON_OBJECT": lambda self: self._parse_json_object(), 838 "JSON_TABLE": lambda self: self._parse_json_table(), 839 "MATCH": lambda self: self._parse_match_against(), 840 "OPENJSON": lambda self: self._parse_open_json(), 841 "POSITION": lambda self: self._parse_position(), 842 "PREDICT": lambda self: self._parse_predict(), 843 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 844 "STRING_AGG": lambda self: self._parse_string_agg(), 845 "SUBSTRING": lambda self: self._parse_substring(), 846 "TRIM": lambda self: self._parse_trim(), 847 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 848 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 849 } 850 851 QUERY_MODIFIER_PARSERS = { 852 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 853 TokenType.WHERE: lambda self: ("where", self._parse_where()), 854 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 855 TokenType.HAVING: lambda self: ("having", self._parse_having()), 856 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 857 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 858 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 859 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 860 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 861 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 862 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 863 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 864 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 865 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 866 TokenType.CLUSTER_BY: lambda self: ( 867 "cluster", 868 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 869 ), 870 TokenType.DISTRIBUTE_BY: lambda self: ( 871 "distribute", 872 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 873 ), 874 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 875 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 876 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 877 } 878 879 SET_PARSERS = { 880 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 881 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 882 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 883 "TRANSACTION": lambda self: self._parse_set_transaction(), 884 } 885 886 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 887 888 TYPE_LITERAL_PARSERS = { 889 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 890 } 891 892 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 893 894 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 895 896 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 897 898 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 899 TRANSACTION_CHARACTERISTICS = { 900 "ISOLATION LEVEL REPEATABLE READ", 901 "ISOLATION LEVEL READ COMMITTED", 902 "ISOLATION LEVEL READ UNCOMMITTED", 903 "ISOLATION LEVEL SERIALIZABLE", 904 "READ WRITE", 905 "READ ONLY", 906 } 907 908 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 909 910 CLONE_KEYWORDS = {"CLONE", "COPY"} 911 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 912 913 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 914 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 915 916 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 917 918 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 919 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 920 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 921 922 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 923 924 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 925 926 DISTINCT_TOKENS = {TokenType.DISTINCT} 927 928 NULL_TOKENS = {TokenType.NULL} 929 930 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 931 932 STRICT_CAST = True 933 934 PREFIXED_PIVOT_COLUMNS = False 935 IDENTIFY_PIVOT_STRINGS = False 936 937 LOG_DEFAULTS_TO_LN = False 938 939 # Whether or not ADD is present for each column added by ALTER TABLE 940 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 941 942 # Whether or not the table sample clause expects CSV syntax 943 TABLESAMPLE_CSV = False 944 945 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 946 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 947 948 # Whether the TRIM function expects the characters to trim as its first argument 949 TRIM_PATTERN_FIRST = False 950 951 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 952 MODIFIERS_ATTACHED_TO_UNION = True 953 UNION_MODIFIERS = {"order", "limit", "offset"} 954 955 __slots__ = ( 956 "error_level", 957 "error_message_context", 958 "max_errors", 959 "dialect", 960 "sql", 961 "errors", 962 "_tokens", 963 "_index", 964 "_curr", 965 "_next", 966 "_prev", 967 "_prev_comments", 968 ) 969 970 # Autofilled 971 SHOW_TRIE: t.Dict = {} 972 SET_TRIE: t.Dict = {} 973 974 def __init__( 975 self, 976 error_level: t.Optional[ErrorLevel] = None, 977 error_message_context: int = 100, 978 max_errors: int = 3, 979 dialect: DialectType = None, 980 ): 981 from sqlglot.dialects import Dialect 982 983 self.error_level = error_level or ErrorLevel.IMMEDIATE 984 self.error_message_context = error_message_context 985 self.max_errors = max_errors 986 self.dialect = Dialect.get_or_raise(dialect) 987 self.reset() 988 989 def reset(self): 990 self.sql = "" 991 self.errors = [] 992 self._tokens = [] 993 self._index = 0 994 self._curr = None 995 self._next = None 996 self._prev = None 997 self._prev_comments = None 998 999 def parse( 1000 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1001 ) -> t.List[t.Optional[exp.Expression]]: 1002 """ 1003 Parses a list of tokens and returns a list of syntax trees, one tree 1004 per parsed SQL statement. 1005 1006 Args: 1007 raw_tokens: The list of tokens. 1008 sql: The original SQL string, used to produce helpful debug messages. 1009 1010 Returns: 1011 The list of the produced syntax trees. 1012 """ 1013 return self._parse( 1014 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1015 ) 1016 1017 def parse_into( 1018 self, 1019 expression_types: exp.IntoType, 1020 raw_tokens: t.List[Token], 1021 sql: t.Optional[str] = None, 1022 ) -> t.List[t.Optional[exp.Expression]]: 1023 """ 1024 Parses a list of tokens into a given Expression type. If a collection of Expression 1025 types is given instead, this method will try to parse the token list into each one 1026 of them, stopping at the first for which the parsing succeeds. 1027 1028 Args: 1029 expression_types: The expression type(s) to try and parse the token list into. 1030 raw_tokens: The list of tokens. 1031 sql: The original SQL string, used to produce helpful debug messages. 1032 1033 Returns: 1034 The target Expression. 1035 """ 1036 errors = [] 1037 for expression_type in ensure_list(expression_types): 1038 parser = self.EXPRESSION_PARSERS.get(expression_type) 1039 if not parser: 1040 raise TypeError(f"No parser registered for {expression_type}") 1041 1042 try: 1043 return self._parse(parser, raw_tokens, sql) 1044 except ParseError as e: 1045 e.errors[0]["into_expression"] = expression_type 1046 errors.append(e) 1047 1048 raise ParseError( 1049 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1050 errors=merge_errors(errors), 1051 ) from errors[-1] 1052 1053 def _parse( 1054 self, 1055 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1056 raw_tokens: t.List[Token], 1057 sql: t.Optional[str] = None, 1058 ) -> t.List[t.Optional[exp.Expression]]: 1059 self.reset() 1060 self.sql = sql or "" 1061 1062 total = len(raw_tokens) 1063 chunks: t.List[t.List[Token]] = [[]] 1064 1065 for i, token in enumerate(raw_tokens): 1066 if token.token_type == TokenType.SEMICOLON: 1067 if i < total - 1: 1068 chunks.append([]) 1069 else: 1070 chunks[-1].append(token) 1071 1072 expressions = [] 1073 1074 for tokens in chunks: 1075 self._index = -1 1076 self._tokens = tokens 1077 self._advance() 1078 1079 expressions.append(parse_method(self)) 1080 1081 if self._index < len(self._tokens): 1082 self.raise_error("Invalid expression / Unexpected token") 1083 1084 self.check_errors() 1085 1086 return expressions 1087 1088 def check_errors(self) -> None: 1089 """Logs or raises any found errors, depending on the chosen error level setting.""" 1090 if self.error_level == ErrorLevel.WARN: 1091 for error in self.errors: 1092 logger.error(str(error)) 1093 elif self.error_level == ErrorLevel.RAISE and self.errors: 1094 raise ParseError( 1095 concat_messages(self.errors, self.max_errors), 1096 errors=merge_errors(self.errors), 1097 ) 1098 1099 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1100 """ 1101 Appends an error in the list of recorded errors or raises it, depending on the chosen 1102 error level setting. 1103 """ 1104 token = token or self._curr or self._prev or Token.string("") 1105 start = token.start 1106 end = token.end + 1 1107 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1108 highlight = self.sql[start:end] 1109 end_context = self.sql[end : end + self.error_message_context] 1110 1111 error = ParseError.new( 1112 f"{message}. Line {token.line}, Col: {token.col}.\n" 1113 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1114 description=message, 1115 line=token.line, 1116 col=token.col, 1117 start_context=start_context, 1118 highlight=highlight, 1119 end_context=end_context, 1120 ) 1121 1122 if self.error_level == ErrorLevel.IMMEDIATE: 1123 raise error 1124 1125 self.errors.append(error) 1126 1127 def expression( 1128 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1129 ) -> E: 1130 """ 1131 Creates a new, validated Expression. 1132 1133 Args: 1134 exp_class: The expression class to instantiate. 1135 comments: An optional list of comments to attach to the expression. 1136 kwargs: The arguments to set for the expression along with their respective values. 1137 1138 Returns: 1139 The target expression. 1140 """ 1141 instance = exp_class(**kwargs) 1142 instance.add_comments(comments) if comments else self._add_comments(instance) 1143 return self.validate_expression(instance) 1144 1145 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1146 if expression and self._prev_comments: 1147 expression.add_comments(self._prev_comments) 1148 self._prev_comments = None 1149 1150 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1151 """ 1152 Validates an Expression, making sure that all its mandatory arguments are set. 1153 1154 Args: 1155 expression: The expression to validate. 1156 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1157 1158 Returns: 1159 The validated expression. 1160 """ 1161 if self.error_level != ErrorLevel.IGNORE: 1162 for error_message in expression.error_messages(args): 1163 self.raise_error(error_message) 1164 1165 return expression 1166 1167 def _find_sql(self, start: Token, end: Token) -> str: 1168 return self.sql[start.start : end.end + 1] 1169 1170 def _is_connected(self) -> bool: 1171 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1172 1173 def _advance(self, times: int = 1) -> None: 1174 self._index += times 1175 self._curr = seq_get(self._tokens, self._index) 1176 self._next = seq_get(self._tokens, self._index + 1) 1177 1178 if self._index > 0: 1179 self._prev = self._tokens[self._index - 1] 1180 self._prev_comments = self._prev.comments 1181 else: 1182 self._prev = None 1183 self._prev_comments = None 1184 1185 def _retreat(self, index: int) -> None: 1186 if index != self._index: 1187 self._advance(index - self._index) 1188 1189 def _parse_command(self) -> exp.Command: 1190 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1191 1192 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1193 start = self._prev 1194 exists = self._parse_exists() if allow_exists else None 1195 1196 self._match(TokenType.ON) 1197 1198 kind = self._match_set(self.CREATABLES) and self._prev 1199 if not kind: 1200 return self._parse_as_command(start) 1201 1202 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=kind.token_type) 1204 elif kind.token_type == TokenType.TABLE: 1205 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1206 elif kind.token_type == TokenType.COLUMN: 1207 this = self._parse_column() 1208 else: 1209 this = self._parse_id_var() 1210 1211 self._match(TokenType.IS) 1212 1213 return self.expression( 1214 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1215 ) 1216 1217 def _parse_to_table( 1218 self, 1219 ) -> exp.ToTableProperty: 1220 table = self._parse_table_parts(schema=True) 1221 return self.expression(exp.ToTableProperty, this=table) 1222 1223 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1224 def _parse_ttl(self) -> exp.Expression: 1225 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1226 this = self._parse_bitwise() 1227 1228 if self._match_text_seq("DELETE"): 1229 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1230 if self._match_text_seq("RECOMPRESS"): 1231 return self.expression( 1232 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1233 ) 1234 if self._match_text_seq("TO", "DISK"): 1235 return self.expression( 1236 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1237 ) 1238 if self._match_text_seq("TO", "VOLUME"): 1239 return self.expression( 1240 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1241 ) 1242 1243 return this 1244 1245 expressions = self._parse_csv(_parse_ttl_action) 1246 where = self._parse_where() 1247 group = self._parse_group() 1248 1249 aggregates = None 1250 if group and self._match(TokenType.SET): 1251 aggregates = self._parse_csv(self._parse_set_item) 1252 1253 return self.expression( 1254 exp.MergeTreeTTL, 1255 expressions=expressions, 1256 where=where, 1257 group=group, 1258 aggregates=aggregates, 1259 ) 1260 1261 def _parse_statement(self) -> t.Optional[exp.Expression]: 1262 if self._curr is None: 1263 return None 1264 1265 if self._match_set(self.STATEMENT_PARSERS): 1266 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1267 1268 if self._match_set(Tokenizer.COMMANDS): 1269 return self._parse_command() 1270 1271 expression = self._parse_expression() 1272 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1273 return self._parse_query_modifiers(expression) 1274 1275 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1276 start = self._prev 1277 temporary = self._match(TokenType.TEMPORARY) 1278 materialized = self._match_text_seq("MATERIALIZED") 1279 1280 kind = self._match_set(self.CREATABLES) and self._prev.text 1281 if not kind: 1282 return self._parse_as_command(start) 1283 1284 return self.expression( 1285 exp.Drop, 1286 comments=start.comments, 1287 exists=exists or self._parse_exists(), 1288 this=self._parse_table(schema=True), 1289 kind=kind, 1290 temporary=temporary, 1291 materialized=materialized, 1292 cascade=self._match_text_seq("CASCADE"), 1293 constraints=self._match_text_seq("CONSTRAINTS"), 1294 purge=self._match_text_seq("PURGE"), 1295 ) 1296 1297 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1298 return ( 1299 self._match_text_seq("IF") 1300 and (not not_ or self._match(TokenType.NOT)) 1301 and self._match(TokenType.EXISTS) 1302 ) 1303 1304 def _parse_create(self) -> exp.Create | exp.Command: 1305 # Note: this can't be None because we've matched a statement parser 1306 start = self._prev 1307 comments = self._prev_comments 1308 1309 replace = start.text.upper() == "REPLACE" or self._match_pair( 1310 TokenType.OR, TokenType.REPLACE 1311 ) 1312 unique = self._match(TokenType.UNIQUE) 1313 1314 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1315 self._advance() 1316 1317 properties = None 1318 create_token = self._match_set(self.CREATABLES) and self._prev 1319 1320 if not create_token: 1321 # exp.Properties.Location.POST_CREATE 1322 properties = self._parse_properties() 1323 create_token = self._match_set(self.CREATABLES) and self._prev 1324 1325 if not properties or not create_token: 1326 return self._parse_as_command(start) 1327 1328 exists = self._parse_exists(not_=True) 1329 this = None 1330 expression: t.Optional[exp.Expression] = None 1331 indexes = None 1332 no_schema_binding = None 1333 begin = None 1334 end = None 1335 clone = None 1336 1337 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1338 nonlocal properties 1339 if properties and temp_props: 1340 properties.expressions.extend(temp_props.expressions) 1341 elif temp_props: 1342 properties = temp_props 1343 1344 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1345 this = self._parse_user_defined_function(kind=create_token.token_type) 1346 1347 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1348 extend_props(self._parse_properties()) 1349 1350 self._match(TokenType.ALIAS) 1351 1352 if self._match(TokenType.COMMAND): 1353 expression = self._parse_as_command(self._prev) 1354 else: 1355 begin = self._match(TokenType.BEGIN) 1356 return_ = self._match_text_seq("RETURN") 1357 1358 if self._match(TokenType.STRING, advance=False): 1359 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1360 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1361 expression = self._parse_string() 1362 extend_props(self._parse_properties()) 1363 else: 1364 expression = self._parse_statement() 1365 1366 end = self._match_text_seq("END") 1367 1368 if return_: 1369 expression = self.expression(exp.Return, this=expression) 1370 elif create_token.token_type == TokenType.INDEX: 1371 this = self._parse_index(index=self._parse_id_var()) 1372 elif create_token.token_type in self.DB_CREATABLES: 1373 table_parts = self._parse_table_parts(schema=True) 1374 1375 # exp.Properties.Location.POST_NAME 1376 self._match(TokenType.COMMA) 1377 extend_props(self._parse_properties(before=True)) 1378 1379 this = self._parse_schema(this=table_parts) 1380 1381 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1382 extend_props(self._parse_properties()) 1383 1384 self._match(TokenType.ALIAS) 1385 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1386 # exp.Properties.Location.POST_ALIAS 1387 extend_props(self._parse_properties()) 1388 1389 expression = self._parse_ddl_select() 1390 1391 if create_token.token_type == TokenType.TABLE: 1392 # exp.Properties.Location.POST_EXPRESSION 1393 extend_props(self._parse_properties()) 1394 1395 indexes = [] 1396 while True: 1397 index = self._parse_index() 1398 1399 # exp.Properties.Location.POST_INDEX 1400 extend_props(self._parse_properties()) 1401 1402 if not index: 1403 break 1404 else: 1405 self._match(TokenType.COMMA) 1406 indexes.append(index) 1407 elif create_token.token_type == TokenType.VIEW: 1408 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1409 no_schema_binding = True 1410 1411 shallow = self._match_text_seq("SHALLOW") 1412 1413 if self._match_texts(self.CLONE_KEYWORDS): 1414 copy = self._prev.text.lower() == "copy" 1415 clone = self.expression( 1416 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1417 ) 1418 1419 return self.expression( 1420 exp.Create, 1421 comments=comments, 1422 this=this, 1423 kind=create_token.text, 1424 replace=replace, 1425 unique=unique, 1426 expression=expression, 1427 exists=exists, 1428 properties=properties, 1429 indexes=indexes, 1430 no_schema_binding=no_schema_binding, 1431 begin=begin, 1432 end=end, 1433 clone=clone, 1434 ) 1435 1436 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1437 # only used for teradata currently 1438 self._match(TokenType.COMMA) 1439 1440 kwargs = { 1441 "no": self._match_text_seq("NO"), 1442 "dual": self._match_text_seq("DUAL"), 1443 "before": self._match_text_seq("BEFORE"), 1444 "default": self._match_text_seq("DEFAULT"), 1445 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1446 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1447 "after": self._match_text_seq("AFTER"), 1448 "minimum": self._match_texts(("MIN", "MINIMUM")), 1449 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1450 } 1451 1452 if self._match_texts(self.PROPERTY_PARSERS): 1453 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1454 try: 1455 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1456 except TypeError: 1457 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1458 1459 return None 1460 1461 def _parse_property(self) -> t.Optional[exp.Expression]: 1462 if self._match_texts(self.PROPERTY_PARSERS): 1463 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1464 1465 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1466 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1467 1468 if self._match_text_seq("COMPOUND", "SORTKEY"): 1469 return self._parse_sortkey(compound=True) 1470 1471 if self._match_text_seq("SQL", "SECURITY"): 1472 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1473 1474 index = self._index 1475 key = self._parse_column() 1476 1477 if not self._match(TokenType.EQ): 1478 self._retreat(index) 1479 return None 1480 1481 return self.expression( 1482 exp.Property, 1483 this=key.to_dot() if isinstance(key, exp.Column) else key, 1484 value=self._parse_column() or self._parse_var(any_token=True), 1485 ) 1486 1487 def _parse_stored(self) -> exp.FileFormatProperty: 1488 self._match(TokenType.ALIAS) 1489 1490 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1491 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1492 1493 return self.expression( 1494 exp.FileFormatProperty, 1495 this=self.expression( 1496 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1497 ) 1498 if input_format or output_format 1499 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1500 ) 1501 1502 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1503 self._match(TokenType.EQ) 1504 self._match(TokenType.ALIAS) 1505 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1506 1507 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1508 properties = [] 1509 while True: 1510 if before: 1511 prop = self._parse_property_before() 1512 else: 1513 prop = self._parse_property() 1514 1515 if not prop: 1516 break 1517 for p in ensure_list(prop): 1518 properties.append(p) 1519 1520 if properties: 1521 return self.expression(exp.Properties, expressions=properties) 1522 1523 return None 1524 1525 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1526 return self.expression( 1527 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1528 ) 1529 1530 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1531 if self._index >= 2: 1532 pre_volatile_token = self._tokens[self._index - 2] 1533 else: 1534 pre_volatile_token = None 1535 1536 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1537 return exp.VolatileProperty() 1538 1539 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1540 1541 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1542 self._match_pair(TokenType.EQ, TokenType.ON) 1543 1544 prop = self.expression(exp.WithSystemVersioningProperty) 1545 if self._match(TokenType.L_PAREN): 1546 self._match_text_seq("HISTORY_TABLE", "=") 1547 prop.set("this", self._parse_table_parts()) 1548 1549 if self._match(TokenType.COMMA): 1550 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1551 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1552 1553 self._match_r_paren() 1554 1555 return prop 1556 1557 def _parse_with_property( 1558 self, 1559 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1560 if self._match(TokenType.L_PAREN, advance=False): 1561 return self._parse_wrapped_csv(self._parse_property) 1562 1563 if self._match_text_seq("JOURNAL"): 1564 return self._parse_withjournaltable() 1565 1566 if self._match_text_seq("DATA"): 1567 return self._parse_withdata(no=False) 1568 elif self._match_text_seq("NO", "DATA"): 1569 return self._parse_withdata(no=True) 1570 1571 if not self._next: 1572 return None 1573 1574 return self._parse_withisolatedloading() 1575 1576 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1577 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1578 self._match(TokenType.EQ) 1579 1580 user = self._parse_id_var() 1581 self._match(TokenType.PARAMETER) 1582 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1583 1584 if not user or not host: 1585 return None 1586 1587 return exp.DefinerProperty(this=f"{user}@{host}") 1588 1589 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1590 self._match(TokenType.TABLE) 1591 self._match(TokenType.EQ) 1592 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1593 1594 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1595 return self.expression(exp.LogProperty, no=no) 1596 1597 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1598 return self.expression(exp.JournalProperty, **kwargs) 1599 1600 def _parse_checksum(self) -> exp.ChecksumProperty: 1601 self._match(TokenType.EQ) 1602 1603 on = None 1604 if self._match(TokenType.ON): 1605 on = True 1606 elif self._match_text_seq("OFF"): 1607 on = False 1608 1609 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1610 1611 def _parse_cluster(self) -> exp.Cluster: 1612 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1613 1614 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1615 self._match_text_seq("BY") 1616 1617 self._match_l_paren() 1618 expressions = self._parse_csv(self._parse_column) 1619 self._match_r_paren() 1620 1621 if self._match_text_seq("SORTED", "BY"): 1622 self._match_l_paren() 1623 sorted_by = self._parse_csv(self._parse_ordered) 1624 self._match_r_paren() 1625 else: 1626 sorted_by = None 1627 1628 self._match(TokenType.INTO) 1629 buckets = self._parse_number() 1630 self._match_text_seq("BUCKETS") 1631 1632 return self.expression( 1633 exp.ClusteredByProperty, 1634 expressions=expressions, 1635 sorted_by=sorted_by, 1636 buckets=buckets, 1637 ) 1638 1639 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1640 if not self._match_text_seq("GRANTS"): 1641 self._retreat(self._index - 1) 1642 return None 1643 1644 return self.expression(exp.CopyGrantsProperty) 1645 1646 def _parse_freespace(self) -> exp.FreespaceProperty: 1647 self._match(TokenType.EQ) 1648 return self.expression( 1649 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1650 ) 1651 1652 def _parse_mergeblockratio( 1653 self, no: bool = False, default: bool = False 1654 ) -> exp.MergeBlockRatioProperty: 1655 if self._match(TokenType.EQ): 1656 return self.expression( 1657 exp.MergeBlockRatioProperty, 1658 this=self._parse_number(), 1659 percent=self._match(TokenType.PERCENT), 1660 ) 1661 1662 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1663 1664 def _parse_datablocksize( 1665 self, 1666 default: t.Optional[bool] = None, 1667 minimum: t.Optional[bool] = None, 1668 maximum: t.Optional[bool] = None, 1669 ) -> exp.DataBlocksizeProperty: 1670 self._match(TokenType.EQ) 1671 size = self._parse_number() 1672 1673 units = None 1674 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1675 units = self._prev.text 1676 1677 return self.expression( 1678 exp.DataBlocksizeProperty, 1679 size=size, 1680 units=units, 1681 default=default, 1682 minimum=minimum, 1683 maximum=maximum, 1684 ) 1685 1686 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1687 self._match(TokenType.EQ) 1688 always = self._match_text_seq("ALWAYS") 1689 manual = self._match_text_seq("MANUAL") 1690 never = self._match_text_seq("NEVER") 1691 default = self._match_text_seq("DEFAULT") 1692 1693 autotemp = None 1694 if self._match_text_seq("AUTOTEMP"): 1695 autotemp = self._parse_schema() 1696 1697 return self.expression( 1698 exp.BlockCompressionProperty, 1699 always=always, 1700 manual=manual, 1701 never=never, 1702 default=default, 1703 autotemp=autotemp, 1704 ) 1705 1706 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1707 no = self._match_text_seq("NO") 1708 concurrent = self._match_text_seq("CONCURRENT") 1709 self._match_text_seq("ISOLATED", "LOADING") 1710 for_all = self._match_text_seq("FOR", "ALL") 1711 for_insert = self._match_text_seq("FOR", "INSERT") 1712 for_none = self._match_text_seq("FOR", "NONE") 1713 return self.expression( 1714 exp.IsolatedLoadingProperty, 1715 no=no, 1716 concurrent=concurrent, 1717 for_all=for_all, 1718 for_insert=for_insert, 1719 for_none=for_none, 1720 ) 1721 1722 def _parse_locking(self) -> exp.LockingProperty: 1723 if self._match(TokenType.TABLE): 1724 kind = "TABLE" 1725 elif self._match(TokenType.VIEW): 1726 kind = "VIEW" 1727 elif self._match(TokenType.ROW): 1728 kind = "ROW" 1729 elif self._match_text_seq("DATABASE"): 1730 kind = "DATABASE" 1731 else: 1732 kind = None 1733 1734 if kind in ("DATABASE", "TABLE", "VIEW"): 1735 this = self._parse_table_parts() 1736 else: 1737 this = None 1738 1739 if self._match(TokenType.FOR): 1740 for_or_in = "FOR" 1741 elif self._match(TokenType.IN): 1742 for_or_in = "IN" 1743 else: 1744 for_or_in = None 1745 1746 if self._match_text_seq("ACCESS"): 1747 lock_type = "ACCESS" 1748 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1749 lock_type = "EXCLUSIVE" 1750 elif self._match_text_seq("SHARE"): 1751 lock_type = "SHARE" 1752 elif self._match_text_seq("READ"): 1753 lock_type = "READ" 1754 elif self._match_text_seq("WRITE"): 1755 lock_type = "WRITE" 1756 elif self._match_text_seq("CHECKSUM"): 1757 lock_type = "CHECKSUM" 1758 else: 1759 lock_type = None 1760 1761 override = self._match_text_seq("OVERRIDE") 1762 1763 return self.expression( 1764 exp.LockingProperty, 1765 this=this, 1766 kind=kind, 1767 for_or_in=for_or_in, 1768 lock_type=lock_type, 1769 override=override, 1770 ) 1771 1772 def _parse_partition_by(self) -> t.List[exp.Expression]: 1773 if self._match(TokenType.PARTITION_BY): 1774 return self._parse_csv(self._parse_conjunction) 1775 return [] 1776 1777 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1778 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1779 if self._match_text_seq("MINVALUE"): 1780 return exp.var("MINVALUE") 1781 if self._match_text_seq("MAXVALUE"): 1782 return exp.var("MAXVALUE") 1783 return self._parse_bitwise() 1784 1785 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1786 expression = None 1787 from_expressions = None 1788 to_expressions = None 1789 1790 if self._match(TokenType.IN): 1791 this = self._parse_wrapped_csv(self._parse_bitwise) 1792 elif self._match(TokenType.FROM): 1793 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1794 self._match_text_seq("TO") 1795 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1796 elif self._match_text_seq("WITH", "(", "MODULUS"): 1797 this = self._parse_number() 1798 self._match_text_seq(",", "REMAINDER") 1799 expression = self._parse_number() 1800 self._match_r_paren() 1801 else: 1802 self.raise_error("Failed to parse partition bound spec.") 1803 1804 return self.expression( 1805 exp.PartitionBoundSpec, 1806 this=this, 1807 expression=expression, 1808 from_expressions=from_expressions, 1809 to_expressions=to_expressions, 1810 ) 1811 1812 # https://www.postgresql.org/docs/current/sql-createtable.html 1813 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1814 if not self._match_text_seq("OF"): 1815 self._retreat(self._index - 1) 1816 return None 1817 1818 this = self._parse_table(schema=True) 1819 1820 if self._match(TokenType.DEFAULT): 1821 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1822 elif self._match_text_seq("FOR", "VALUES"): 1823 expression = self._parse_partition_bound_spec() 1824 else: 1825 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1826 1827 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1828 1829 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1830 self._match(TokenType.EQ) 1831 return self.expression( 1832 exp.PartitionedByProperty, 1833 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1834 ) 1835 1836 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1837 if self._match_text_seq("AND", "STATISTICS"): 1838 statistics = True 1839 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1840 statistics = False 1841 else: 1842 statistics = None 1843 1844 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1845 1846 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1847 if self._match_text_seq("PRIMARY", "INDEX"): 1848 return exp.NoPrimaryIndexProperty() 1849 return None 1850 1851 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1852 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1853 return exp.OnCommitProperty() 1854 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1855 return exp.OnCommitProperty(delete=True) 1856 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1857 1858 def _parse_distkey(self) -> exp.DistKeyProperty: 1859 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1860 1861 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1862 table = self._parse_table(schema=True) 1863 1864 options = [] 1865 while self._match_texts(("INCLUDING", "EXCLUDING")): 1866 this = self._prev.text.upper() 1867 1868 id_var = self._parse_id_var() 1869 if not id_var: 1870 return None 1871 1872 options.append( 1873 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1874 ) 1875 1876 return self.expression(exp.LikeProperty, this=table, expressions=options) 1877 1878 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1879 return self.expression( 1880 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1881 ) 1882 1883 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1884 self._match(TokenType.EQ) 1885 return self.expression( 1886 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1887 ) 1888 1889 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1890 self._match_text_seq("WITH", "CONNECTION") 1891 return self.expression( 1892 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1893 ) 1894 1895 def _parse_returns(self) -> exp.ReturnsProperty: 1896 value: t.Optional[exp.Expression] 1897 is_table = self._match(TokenType.TABLE) 1898 1899 if is_table: 1900 if self._match(TokenType.LT): 1901 value = self.expression( 1902 exp.Schema, 1903 this="TABLE", 1904 expressions=self._parse_csv(self._parse_struct_types), 1905 ) 1906 if not self._match(TokenType.GT): 1907 self.raise_error("Expecting >") 1908 else: 1909 value = self._parse_schema(exp.var("TABLE")) 1910 else: 1911 value = self._parse_types() 1912 1913 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1914 1915 def _parse_describe(self) -> exp.Describe: 1916 kind = self._match_set(self.CREATABLES) and self._prev.text 1917 this = self._parse_table(schema=True) 1918 properties = self._parse_properties() 1919 expressions = properties.expressions if properties else None 1920 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1921 1922 def _parse_insert(self) -> exp.Insert: 1923 comments = ensure_list(self._prev_comments) 1924 overwrite = self._match(TokenType.OVERWRITE) 1925 ignore = self._match(TokenType.IGNORE) 1926 local = self._match_text_seq("LOCAL") 1927 alternative = None 1928 1929 if self._match_text_seq("DIRECTORY"): 1930 this: t.Optional[exp.Expression] = self.expression( 1931 exp.Directory, 1932 this=self._parse_var_or_string(), 1933 local=local, 1934 row_format=self._parse_row_format(match_row=True), 1935 ) 1936 else: 1937 if self._match(TokenType.OR): 1938 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1939 1940 self._match(TokenType.INTO) 1941 comments += ensure_list(self._prev_comments) 1942 self._match(TokenType.TABLE) 1943 this = self._parse_table(schema=True) 1944 1945 returning = self._parse_returning() 1946 1947 return self.expression( 1948 exp.Insert, 1949 comments=comments, 1950 this=this, 1951 by_name=self._match_text_seq("BY", "NAME"), 1952 exists=self._parse_exists(), 1953 partition=self._parse_partition(), 1954 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1955 and self._parse_conjunction(), 1956 expression=self._parse_ddl_select(), 1957 conflict=self._parse_on_conflict(), 1958 returning=returning or self._parse_returning(), 1959 overwrite=overwrite, 1960 alternative=alternative, 1961 ignore=ignore, 1962 ) 1963 1964 def _parse_kill(self) -> exp.Kill: 1965 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1966 1967 return self.expression( 1968 exp.Kill, 1969 this=self._parse_primary(), 1970 kind=kind, 1971 ) 1972 1973 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1974 conflict = self._match_text_seq("ON", "CONFLICT") 1975 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1976 1977 if not conflict and not duplicate: 1978 return None 1979 1980 nothing = None 1981 expressions = None 1982 key = None 1983 constraint = None 1984 1985 if conflict: 1986 if self._match_text_seq("ON", "CONSTRAINT"): 1987 constraint = self._parse_id_var() 1988 else: 1989 key = self._parse_csv(self._parse_value) 1990 1991 self._match_text_seq("DO") 1992 if self._match_text_seq("NOTHING"): 1993 nothing = True 1994 else: 1995 self._match(TokenType.UPDATE) 1996 self._match(TokenType.SET) 1997 expressions = self._parse_csv(self._parse_equality) 1998 1999 return self.expression( 2000 exp.OnConflict, 2001 duplicate=duplicate, 2002 expressions=expressions, 2003 nothing=nothing, 2004 key=key, 2005 constraint=constraint, 2006 ) 2007 2008 def _parse_returning(self) -> t.Optional[exp.Returning]: 2009 if not self._match(TokenType.RETURNING): 2010 return None 2011 return self.expression( 2012 exp.Returning, 2013 expressions=self._parse_csv(self._parse_expression), 2014 into=self._match(TokenType.INTO) and self._parse_table_part(), 2015 ) 2016 2017 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2018 if not self._match(TokenType.FORMAT): 2019 return None 2020 return self._parse_row_format() 2021 2022 def _parse_row_format( 2023 self, match_row: bool = False 2024 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2025 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2026 return None 2027 2028 if self._match_text_seq("SERDE"): 2029 this = self._parse_string() 2030 2031 serde_properties = None 2032 if self._match(TokenType.SERDE_PROPERTIES): 2033 serde_properties = self.expression( 2034 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2035 ) 2036 2037 return self.expression( 2038 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2039 ) 2040 2041 self._match_text_seq("DELIMITED") 2042 2043 kwargs = {} 2044 2045 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2046 kwargs["fields"] = self._parse_string() 2047 if self._match_text_seq("ESCAPED", "BY"): 2048 kwargs["escaped"] = self._parse_string() 2049 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2050 kwargs["collection_items"] = self._parse_string() 2051 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2052 kwargs["map_keys"] = self._parse_string() 2053 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2054 kwargs["lines"] = self._parse_string() 2055 if self._match_text_seq("NULL", "DEFINED", "AS"): 2056 kwargs["null"] = self._parse_string() 2057 2058 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2059 2060 def _parse_load(self) -> exp.LoadData | exp.Command: 2061 if self._match_text_seq("DATA"): 2062 local = self._match_text_seq("LOCAL") 2063 self._match_text_seq("INPATH") 2064 inpath = self._parse_string() 2065 overwrite = self._match(TokenType.OVERWRITE) 2066 self._match_pair(TokenType.INTO, TokenType.TABLE) 2067 2068 return self.expression( 2069 exp.LoadData, 2070 this=self._parse_table(schema=True), 2071 local=local, 2072 overwrite=overwrite, 2073 inpath=inpath, 2074 partition=self._parse_partition(), 2075 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2076 serde=self._match_text_seq("SERDE") and self._parse_string(), 2077 ) 2078 return self._parse_as_command(self._prev) 2079 2080 def _parse_delete(self) -> exp.Delete: 2081 # This handles MySQL's "Multiple-Table Syntax" 2082 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2083 tables = None 2084 comments = self._prev_comments 2085 if not self._match(TokenType.FROM, advance=False): 2086 tables = self._parse_csv(self._parse_table) or None 2087 2088 returning = self._parse_returning() 2089 2090 return self.expression( 2091 exp.Delete, 2092 comments=comments, 2093 tables=tables, 2094 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2095 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2096 where=self._parse_where(), 2097 returning=returning or self._parse_returning(), 2098 limit=self._parse_limit(), 2099 ) 2100 2101 def _parse_update(self) -> exp.Update: 2102 comments = self._prev_comments 2103 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2104 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2105 returning = self._parse_returning() 2106 return self.expression( 2107 exp.Update, 2108 comments=comments, 2109 **{ # type: ignore 2110 "this": this, 2111 "expressions": expressions, 2112 "from": self._parse_from(joins=True), 2113 "where": self._parse_where(), 2114 "returning": returning or self._parse_returning(), 2115 "order": self._parse_order(), 2116 "limit": self._parse_limit(), 2117 }, 2118 ) 2119 2120 def _parse_uncache(self) -> exp.Uncache: 2121 if not self._match(TokenType.TABLE): 2122 self.raise_error("Expecting TABLE after UNCACHE") 2123 2124 return self.expression( 2125 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2126 ) 2127 2128 def _parse_cache(self) -> exp.Cache: 2129 lazy = self._match_text_seq("LAZY") 2130 self._match(TokenType.TABLE) 2131 table = self._parse_table(schema=True) 2132 2133 options = [] 2134 if self._match_text_seq("OPTIONS"): 2135 self._match_l_paren() 2136 k = self._parse_string() 2137 self._match(TokenType.EQ) 2138 v = self._parse_string() 2139 options = [k, v] 2140 self._match_r_paren() 2141 2142 self._match(TokenType.ALIAS) 2143 return self.expression( 2144 exp.Cache, 2145 this=table, 2146 lazy=lazy, 2147 options=options, 2148 expression=self._parse_select(nested=True), 2149 ) 2150 2151 def _parse_partition(self) -> t.Optional[exp.Partition]: 2152 if not self._match(TokenType.PARTITION): 2153 return None 2154 2155 return self.expression( 2156 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2157 ) 2158 2159 def _parse_value(self) -> exp.Tuple: 2160 if self._match(TokenType.L_PAREN): 2161 expressions = self._parse_csv(self._parse_conjunction) 2162 self._match_r_paren() 2163 return self.expression(exp.Tuple, expressions=expressions) 2164 2165 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2166 # https://prestodb.io/docs/current/sql/values.html 2167 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2168 2169 def _parse_projections(self) -> t.List[exp.Expression]: 2170 return self._parse_expressions() 2171 2172 def _parse_select( 2173 self, 2174 nested: bool = False, 2175 table: bool = False, 2176 parse_subquery_alias: bool = True, 2177 parse_set_operation: bool = True, 2178 ) -> t.Optional[exp.Expression]: 2179 cte = self._parse_with() 2180 2181 if cte: 2182 this = self._parse_statement() 2183 2184 if not this: 2185 self.raise_error("Failed to parse any statement following CTE") 2186 return cte 2187 2188 if "with" in this.arg_types: 2189 this.set("with", cte) 2190 else: 2191 self.raise_error(f"{this.key} does not support CTE") 2192 this = cte 2193 2194 return this 2195 2196 # duckdb supports leading with FROM x 2197 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2198 2199 if self._match(TokenType.SELECT): 2200 comments = self._prev_comments 2201 2202 hint = self._parse_hint() 2203 all_ = self._match(TokenType.ALL) 2204 distinct = self._match_set(self.DISTINCT_TOKENS) 2205 2206 kind = ( 2207 self._match(TokenType.ALIAS) 2208 and self._match_texts(("STRUCT", "VALUE")) 2209 and self._prev.text 2210 ) 2211 2212 if distinct: 2213 distinct = self.expression( 2214 exp.Distinct, 2215 on=self._parse_value() if self._match(TokenType.ON) else None, 2216 ) 2217 2218 if all_ and distinct: 2219 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2220 2221 limit = self._parse_limit(top=True) 2222 projections = self._parse_projections() 2223 2224 this = self.expression( 2225 exp.Select, 2226 kind=kind, 2227 hint=hint, 2228 distinct=distinct, 2229 expressions=projections, 2230 limit=limit, 2231 ) 2232 this.comments = comments 2233 2234 into = self._parse_into() 2235 if into: 2236 this.set("into", into) 2237 2238 if not from_: 2239 from_ = self._parse_from() 2240 2241 if from_: 2242 this.set("from", from_) 2243 2244 this = self._parse_query_modifiers(this) 2245 elif (table or nested) and self._match(TokenType.L_PAREN): 2246 if self._match(TokenType.PIVOT): 2247 this = self._parse_simplified_pivot() 2248 elif self._match(TokenType.FROM): 2249 this = exp.select("*").from_( 2250 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2251 ) 2252 else: 2253 this = ( 2254 self._parse_table() 2255 if table 2256 else self._parse_select(nested=True, parse_set_operation=False) 2257 ) 2258 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2259 2260 self._match_r_paren() 2261 2262 # We return early here so that the UNION isn't attached to the subquery by the 2263 # following call to _parse_set_operations, but instead becomes the parent node 2264 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2265 elif self._match(TokenType.VALUES): 2266 this = self.expression( 2267 exp.Values, 2268 expressions=self._parse_csv(self._parse_value), 2269 alias=self._parse_table_alias(), 2270 ) 2271 elif from_: 2272 this = exp.select("*").from_(from_.this, copy=False) 2273 else: 2274 this = None 2275 2276 if parse_set_operation: 2277 return self._parse_set_operations(this) 2278 return this 2279 2280 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2281 if not skip_with_token and not self._match(TokenType.WITH): 2282 return None 2283 2284 comments = self._prev_comments 2285 recursive = self._match(TokenType.RECURSIVE) 2286 2287 expressions = [] 2288 while True: 2289 expressions.append(self._parse_cte()) 2290 2291 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2292 break 2293 else: 2294 self._match(TokenType.WITH) 2295 2296 return self.expression( 2297 exp.With, comments=comments, expressions=expressions, recursive=recursive 2298 ) 2299 2300 def _parse_cte(self) -> exp.CTE: 2301 alias = self._parse_table_alias() 2302 if not alias or not alias.this: 2303 self.raise_error("Expected CTE to have alias") 2304 2305 self._match(TokenType.ALIAS) 2306 return self.expression( 2307 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2308 ) 2309 2310 def _parse_table_alias( 2311 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2312 ) -> t.Optional[exp.TableAlias]: 2313 any_token = self._match(TokenType.ALIAS) 2314 alias = ( 2315 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2316 or self._parse_string_as_identifier() 2317 ) 2318 2319 index = self._index 2320 if self._match(TokenType.L_PAREN): 2321 columns = self._parse_csv(self._parse_function_parameter) 2322 self._match_r_paren() if columns else self._retreat(index) 2323 else: 2324 columns = None 2325 2326 if not alias and not columns: 2327 return None 2328 2329 return self.expression(exp.TableAlias, this=alias, columns=columns) 2330 2331 def _parse_subquery( 2332 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2333 ) -> t.Optional[exp.Subquery]: 2334 if not this: 2335 return None 2336 2337 return self.expression( 2338 exp.Subquery, 2339 this=this, 2340 pivots=self._parse_pivots(), 2341 alias=self._parse_table_alias() if parse_alias else None, 2342 ) 2343 2344 def _parse_query_modifiers( 2345 self, this: t.Optional[exp.Expression] 2346 ) -> t.Optional[exp.Expression]: 2347 if isinstance(this, self.MODIFIABLES): 2348 for join in iter(self._parse_join, None): 2349 this.append("joins", join) 2350 for lateral in iter(self._parse_lateral, None): 2351 this.append("laterals", lateral) 2352 2353 while True: 2354 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2355 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2356 key, expression = parser(self) 2357 2358 if expression: 2359 this.set(key, expression) 2360 if key == "limit": 2361 offset = expression.args.pop("offset", None) 2362 if offset: 2363 this.set("offset", exp.Offset(expression=offset)) 2364 continue 2365 break 2366 return this 2367 2368 def _parse_hint(self) -> t.Optional[exp.Hint]: 2369 if self._match(TokenType.HINT): 2370 hints = [] 2371 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2372 hints.extend(hint) 2373 2374 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2375 self.raise_error("Expected */ after HINT") 2376 2377 return self.expression(exp.Hint, expressions=hints) 2378 2379 return None 2380 2381 def _parse_into(self) -> t.Optional[exp.Into]: 2382 if not self._match(TokenType.INTO): 2383 return None 2384 2385 temp = self._match(TokenType.TEMPORARY) 2386 unlogged = self._match_text_seq("UNLOGGED") 2387 self._match(TokenType.TABLE) 2388 2389 return self.expression( 2390 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2391 ) 2392 2393 def _parse_from( 2394 self, joins: bool = False, skip_from_token: bool = False 2395 ) -> t.Optional[exp.From]: 2396 if not skip_from_token and not self._match(TokenType.FROM): 2397 return None 2398 2399 return self.expression( 2400 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2401 ) 2402 2403 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2404 if not self._match(TokenType.MATCH_RECOGNIZE): 2405 return None 2406 2407 self._match_l_paren() 2408 2409 partition = self._parse_partition_by() 2410 order = self._parse_order() 2411 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2412 2413 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2414 rows = exp.var("ONE ROW PER MATCH") 2415 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2416 text = "ALL ROWS PER MATCH" 2417 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2418 text += f" SHOW EMPTY MATCHES" 2419 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2420 text += f" OMIT EMPTY MATCHES" 2421 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2422 text += f" WITH UNMATCHED ROWS" 2423 rows = exp.var(text) 2424 else: 2425 rows = None 2426 2427 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2428 text = "AFTER MATCH SKIP" 2429 if self._match_text_seq("PAST", "LAST", "ROW"): 2430 text += f" PAST LAST ROW" 2431 elif self._match_text_seq("TO", "NEXT", "ROW"): 2432 text += f" TO NEXT ROW" 2433 elif self._match_text_seq("TO", "FIRST"): 2434 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2435 elif self._match_text_seq("TO", "LAST"): 2436 text += f" TO LAST {self._advance_any().text}" # type: ignore 2437 after = exp.var(text) 2438 else: 2439 after = None 2440 2441 if self._match_text_seq("PATTERN"): 2442 self._match_l_paren() 2443 2444 if not self._curr: 2445 self.raise_error("Expecting )", self._curr) 2446 2447 paren = 1 2448 start = self._curr 2449 2450 while self._curr and paren > 0: 2451 if self._curr.token_type == TokenType.L_PAREN: 2452 paren += 1 2453 if self._curr.token_type == TokenType.R_PAREN: 2454 paren -= 1 2455 2456 end = self._prev 2457 self._advance() 2458 2459 if paren > 0: 2460 self.raise_error("Expecting )", self._curr) 2461 2462 pattern = exp.var(self._find_sql(start, end)) 2463 else: 2464 pattern = None 2465 2466 define = ( 2467 self._parse_csv( 2468 lambda: self.expression( 2469 exp.Alias, 2470 alias=self._parse_id_var(any_token=True), 2471 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2472 ) 2473 ) 2474 if self._match_text_seq("DEFINE") 2475 else None 2476 ) 2477 2478 self._match_r_paren() 2479 2480 return self.expression( 2481 exp.MatchRecognize, 2482 partition_by=partition, 2483 order=order, 2484 measures=measures, 2485 rows=rows, 2486 after=after, 2487 pattern=pattern, 2488 define=define, 2489 alias=self._parse_table_alias(), 2490 ) 2491 2492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2493 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2494 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2495 2496 if outer_apply or cross_apply: 2497 this = self._parse_select(table=True) 2498 view = None 2499 outer = not cross_apply 2500 elif self._match(TokenType.LATERAL): 2501 this = self._parse_select(table=True) 2502 view = self._match(TokenType.VIEW) 2503 outer = self._match(TokenType.OUTER) 2504 else: 2505 return None 2506 2507 if not this: 2508 this = ( 2509 self._parse_unnest() 2510 or self._parse_function() 2511 or self._parse_id_var(any_token=False) 2512 ) 2513 2514 while self._match(TokenType.DOT): 2515 this = exp.Dot( 2516 this=this, 2517 expression=self._parse_function() or self._parse_id_var(any_token=False), 2518 ) 2519 2520 if view: 2521 table = self._parse_id_var(any_token=False) 2522 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2523 table_alias: t.Optional[exp.TableAlias] = self.expression( 2524 exp.TableAlias, this=table, columns=columns 2525 ) 2526 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2527 # We move the alias from the lateral's child node to the lateral itself 2528 table_alias = this.args["alias"].pop() 2529 else: 2530 table_alias = self._parse_table_alias() 2531 2532 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2533 2534 def _parse_join_parts( 2535 self, 2536 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2537 return ( 2538 self._match_set(self.JOIN_METHODS) and self._prev, 2539 self._match_set(self.JOIN_SIDES) and self._prev, 2540 self._match_set(self.JOIN_KINDS) and self._prev, 2541 ) 2542 2543 def _parse_join( 2544 self, skip_join_token: bool = False, parse_bracket: bool = False 2545 ) -> t.Optional[exp.Join]: 2546 if self._match(TokenType.COMMA): 2547 return self.expression(exp.Join, this=self._parse_table()) 2548 2549 index = self._index 2550 method, side, kind = self._parse_join_parts() 2551 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2552 join = self._match(TokenType.JOIN) 2553 2554 if not skip_join_token and not join: 2555 self._retreat(index) 2556 kind = None 2557 method = None 2558 side = None 2559 2560 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2561 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2562 2563 if not skip_join_token and not join and not outer_apply and not cross_apply: 2564 return None 2565 2566 if outer_apply: 2567 side = Token(TokenType.LEFT, "LEFT") 2568 2569 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2570 2571 if method: 2572 kwargs["method"] = method.text 2573 if side: 2574 kwargs["side"] = side.text 2575 if kind: 2576 kwargs["kind"] = kind.text 2577 if hint: 2578 kwargs["hint"] = hint 2579 2580 if self._match(TokenType.ON): 2581 kwargs["on"] = self._parse_conjunction() 2582 elif self._match(TokenType.USING): 2583 kwargs["using"] = self._parse_wrapped_id_vars() 2584 elif not (kind and kind.token_type == TokenType.CROSS): 2585 index = self._index 2586 join = self._parse_join() 2587 2588 if join and self._match(TokenType.ON): 2589 kwargs["on"] = self._parse_conjunction() 2590 elif join and self._match(TokenType.USING): 2591 kwargs["using"] = self._parse_wrapped_id_vars() 2592 else: 2593 join = None 2594 self._retreat(index) 2595 2596 kwargs["this"].set("joins", [join] if join else None) 2597 2598 comments = [c for token in (method, side, kind) if token for c in token.comments] 2599 return self.expression(exp.Join, comments=comments, **kwargs) 2600 2601 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2602 this = self._parse_conjunction() 2603 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2604 return this 2605 2606 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2607 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2608 2609 return this 2610 2611 def _parse_index( 2612 self, 2613 index: t.Optional[exp.Expression] = None, 2614 ) -> t.Optional[exp.Index]: 2615 if index: 2616 unique = None 2617 primary = None 2618 amp = None 2619 2620 self._match(TokenType.ON) 2621 self._match(TokenType.TABLE) # hive 2622 table = self._parse_table_parts(schema=True) 2623 else: 2624 unique = self._match(TokenType.UNIQUE) 2625 primary = self._match_text_seq("PRIMARY") 2626 amp = self._match_text_seq("AMP") 2627 2628 if not self._match(TokenType.INDEX): 2629 return None 2630 2631 index = self._parse_id_var() 2632 table = None 2633 2634 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2635 2636 if self._match(TokenType.L_PAREN, advance=False): 2637 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2638 else: 2639 columns = None 2640 2641 return self.expression( 2642 exp.Index, 2643 this=index, 2644 table=table, 2645 using=using, 2646 columns=columns, 2647 unique=unique, 2648 primary=primary, 2649 amp=amp, 2650 partition_by=self._parse_partition_by(), 2651 where=self._parse_where(), 2652 ) 2653 2654 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2655 hints: t.List[exp.Expression] = [] 2656 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2657 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2658 hints.append( 2659 self.expression( 2660 exp.WithTableHint, 2661 expressions=self._parse_csv( 2662 lambda: self._parse_function() or self._parse_var(any_token=True) 2663 ), 2664 ) 2665 ) 2666 self._match_r_paren() 2667 else: 2668 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2669 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2670 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2671 2672 self._match_texts(("INDEX", "KEY")) 2673 if self._match(TokenType.FOR): 2674 hint.set("target", self._advance_any() and self._prev.text.upper()) 2675 2676 hint.set("expressions", self._parse_wrapped_id_vars()) 2677 hints.append(hint) 2678 2679 return hints or None 2680 2681 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2682 return ( 2683 (not schema and self._parse_function(optional_parens=False)) 2684 or self._parse_id_var(any_token=False) 2685 or self._parse_string_as_identifier() 2686 or self._parse_placeholder() 2687 ) 2688 2689 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2690 catalog = None 2691 db = None 2692 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2693 2694 while self._match(TokenType.DOT): 2695 if catalog: 2696 # This allows nesting the table in arbitrarily many dot expressions if needed 2697 table = self.expression( 2698 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2699 ) 2700 else: 2701 catalog = db 2702 db = table 2703 table = self._parse_table_part(schema=schema) or "" 2704 2705 if not table: 2706 self.raise_error(f"Expected table name but got {self._curr}") 2707 2708 return self.expression( 2709 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2710 ) 2711 2712 def _parse_table( 2713 self, 2714 schema: bool = False, 2715 joins: bool = False, 2716 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2717 parse_bracket: bool = False, 2718 ) -> t.Optional[exp.Expression]: 2719 lateral = self._parse_lateral() 2720 if lateral: 2721 return lateral 2722 2723 unnest = self._parse_unnest() 2724 if unnest: 2725 return unnest 2726 2727 values = self._parse_derived_table_values() 2728 if values: 2729 return values 2730 2731 subquery = self._parse_select(table=True) 2732 if subquery: 2733 if not subquery.args.get("pivots"): 2734 subquery.set("pivots", self._parse_pivots()) 2735 return subquery 2736 2737 bracket = parse_bracket and self._parse_bracket(None) 2738 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2739 this = t.cast( 2740 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2741 ) 2742 2743 if schema: 2744 return self._parse_schema(this=this) 2745 2746 version = self._parse_version() 2747 2748 if version: 2749 this.set("version", version) 2750 2751 if self.dialect.ALIAS_POST_TABLESAMPLE: 2752 table_sample = self._parse_table_sample() 2753 2754 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2755 if alias: 2756 this.set("alias", alias) 2757 2758 if self._match_text_seq("AT"): 2759 this.set("index", self._parse_id_var()) 2760 2761 this.set("hints", self._parse_table_hints()) 2762 2763 if not this.args.get("pivots"): 2764 this.set("pivots", self._parse_pivots()) 2765 2766 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2767 table_sample = self._parse_table_sample() 2768 2769 if table_sample: 2770 table_sample.set("this", this) 2771 this = table_sample 2772 2773 if joins: 2774 for join in iter(self._parse_join, None): 2775 this.append("joins", join) 2776 2777 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2778 this.set("ordinality", True) 2779 this.set("alias", self._parse_table_alias()) 2780 2781 return this 2782 2783 def _parse_version(self) -> t.Optional[exp.Version]: 2784 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2785 this = "TIMESTAMP" 2786 elif self._match(TokenType.VERSION_SNAPSHOT): 2787 this = "VERSION" 2788 else: 2789 return None 2790 2791 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2792 kind = self._prev.text.upper() 2793 start = self._parse_bitwise() 2794 self._match_texts(("TO", "AND")) 2795 end = self._parse_bitwise() 2796 expression: t.Optional[exp.Expression] = self.expression( 2797 exp.Tuple, expressions=[start, end] 2798 ) 2799 elif self._match_text_seq("CONTAINED", "IN"): 2800 kind = "CONTAINED IN" 2801 expression = self.expression( 2802 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2803 ) 2804 elif self._match(TokenType.ALL): 2805 kind = "ALL" 2806 expression = None 2807 else: 2808 self._match_text_seq("AS", "OF") 2809 kind = "AS OF" 2810 expression = self._parse_type() 2811 2812 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2813 2814 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2815 if not self._match(TokenType.UNNEST): 2816 return None 2817 2818 expressions = self._parse_wrapped_csv(self._parse_equality) 2819 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2820 2821 alias = self._parse_table_alias() if with_alias else None 2822 2823 if alias: 2824 if self.dialect.UNNEST_COLUMN_ONLY: 2825 if alias.args.get("columns"): 2826 self.raise_error("Unexpected extra column alias in unnest.") 2827 2828 alias.set("columns", [alias.this]) 2829 alias.set("this", None) 2830 2831 columns = alias.args.get("columns") or [] 2832 if offset and len(expressions) < len(columns): 2833 offset = columns.pop() 2834 2835 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2836 self._match(TokenType.ALIAS) 2837 offset = self._parse_id_var( 2838 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2839 ) or exp.to_identifier("offset") 2840 2841 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2842 2843 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2844 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2845 if not is_derived and not self._match(TokenType.VALUES): 2846 return None 2847 2848 expressions = self._parse_csv(self._parse_value) 2849 alias = self._parse_table_alias() 2850 2851 if is_derived: 2852 self._match_r_paren() 2853 2854 return self.expression( 2855 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2856 ) 2857 2858 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2859 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2860 as_modifier and self._match_text_seq("USING", "SAMPLE") 2861 ): 2862 return None 2863 2864 bucket_numerator = None 2865 bucket_denominator = None 2866 bucket_field = None 2867 percent = None 2868 rows = None 2869 size = None 2870 seed = None 2871 2872 kind = ( 2873 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2874 ) 2875 method = self._parse_var(tokens=(TokenType.ROW,)) 2876 2877 matched_l_paren = self._match(TokenType.L_PAREN) 2878 2879 if self.TABLESAMPLE_CSV: 2880 num = None 2881 expressions = self._parse_csv(self._parse_primary) 2882 else: 2883 expressions = None 2884 num = ( 2885 self._parse_factor() 2886 if self._match(TokenType.NUMBER, advance=False) 2887 else self._parse_primary() or self._parse_placeholder() 2888 ) 2889 2890 if self._match_text_seq("BUCKET"): 2891 bucket_numerator = self._parse_number() 2892 self._match_text_seq("OUT", "OF") 2893 bucket_denominator = bucket_denominator = self._parse_number() 2894 self._match(TokenType.ON) 2895 bucket_field = self._parse_field() 2896 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2897 percent = num 2898 elif self._match(TokenType.ROWS): 2899 rows = num 2900 elif num: 2901 size = num 2902 2903 if matched_l_paren: 2904 self._match_r_paren() 2905 2906 if self._match(TokenType.L_PAREN): 2907 method = self._parse_var() 2908 seed = self._match(TokenType.COMMA) and self._parse_number() 2909 self._match_r_paren() 2910 elif self._match_texts(("SEED", "REPEATABLE")): 2911 seed = self._parse_wrapped(self._parse_number) 2912 2913 return self.expression( 2914 exp.TableSample, 2915 expressions=expressions, 2916 method=method, 2917 bucket_numerator=bucket_numerator, 2918 bucket_denominator=bucket_denominator, 2919 bucket_field=bucket_field, 2920 percent=percent, 2921 rows=rows, 2922 size=size, 2923 seed=seed, 2924 kind=kind, 2925 ) 2926 2927 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2928 return list(iter(self._parse_pivot, None)) or None 2929 2930 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2931 return list(iter(self._parse_join, None)) or None 2932 2933 # https://duckdb.org/docs/sql/statements/pivot 2934 def _parse_simplified_pivot(self) -> exp.Pivot: 2935 def _parse_on() -> t.Optional[exp.Expression]: 2936 this = self._parse_bitwise() 2937 return self._parse_in(this) if self._match(TokenType.IN) else this 2938 2939 this = self._parse_table() 2940 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2941 using = self._match(TokenType.USING) and self._parse_csv( 2942 lambda: self._parse_alias(self._parse_function()) 2943 ) 2944 group = self._parse_group() 2945 return self.expression( 2946 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2947 ) 2948 2949 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2950 index = self._index 2951 include_nulls = None 2952 2953 if self._match(TokenType.PIVOT): 2954 unpivot = False 2955 elif self._match(TokenType.UNPIVOT): 2956 unpivot = True 2957 2958 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2959 if self._match_text_seq("INCLUDE", "NULLS"): 2960 include_nulls = True 2961 elif self._match_text_seq("EXCLUDE", "NULLS"): 2962 include_nulls = False 2963 else: 2964 return None 2965 2966 expressions = [] 2967 field = None 2968 2969 if not self._match(TokenType.L_PAREN): 2970 self._retreat(index) 2971 return None 2972 2973 if unpivot: 2974 expressions = self._parse_csv(self._parse_column) 2975 else: 2976 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2977 2978 if not expressions: 2979 self.raise_error("Failed to parse PIVOT's aggregation list") 2980 2981 if not self._match(TokenType.FOR): 2982 self.raise_error("Expecting FOR") 2983 2984 value = self._parse_column() 2985 2986 if not self._match(TokenType.IN): 2987 self.raise_error("Expecting IN") 2988 2989 field = self._parse_in(value, alias=True) 2990 2991 self._match_r_paren() 2992 2993 pivot = self.expression( 2994 exp.Pivot, 2995 expressions=expressions, 2996 field=field, 2997 unpivot=unpivot, 2998 include_nulls=include_nulls, 2999 ) 3000 3001 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3002 pivot.set("alias", self._parse_table_alias()) 3003 3004 if not unpivot: 3005 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3006 3007 columns: t.List[exp.Expression] = [] 3008 for fld in pivot.args["field"].expressions: 3009 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3010 for name in names: 3011 if self.PREFIXED_PIVOT_COLUMNS: 3012 name = f"{name}_{field_name}" if name else field_name 3013 else: 3014 name = f"{field_name}_{name}" if name else field_name 3015 3016 columns.append(exp.to_identifier(name)) 3017 3018 pivot.set("columns", columns) 3019 3020 return pivot 3021 3022 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3023 return [agg.alias for agg in aggregations] 3024 3025 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3026 if not skip_where_token and not self._match(TokenType.WHERE): 3027 return None 3028 3029 return self.expression( 3030 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3031 ) 3032 3033 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3034 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3035 return None 3036 3037 elements = defaultdict(list) 3038 3039 if self._match(TokenType.ALL): 3040 return self.expression(exp.Group, all=True) 3041 3042 while True: 3043 expressions = self._parse_csv(self._parse_conjunction) 3044 if expressions: 3045 elements["expressions"].extend(expressions) 3046 3047 grouping_sets = self._parse_grouping_sets() 3048 if grouping_sets: 3049 elements["grouping_sets"].extend(grouping_sets) 3050 3051 rollup = None 3052 cube = None 3053 totals = None 3054 3055 index = self._index 3056 with_ = self._match(TokenType.WITH) 3057 if self._match(TokenType.ROLLUP): 3058 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3059 elements["rollup"].extend(ensure_list(rollup)) 3060 3061 if self._match(TokenType.CUBE): 3062 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3063 elements["cube"].extend(ensure_list(cube)) 3064 3065 if self._match_text_seq("TOTALS"): 3066 totals = True 3067 elements["totals"] = True # type: ignore 3068 3069 if not (grouping_sets or rollup or cube or totals): 3070 if with_: 3071 self._retreat(index) 3072 break 3073 3074 return self.expression(exp.Group, **elements) # type: ignore 3075 3076 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3077 if not self._match(TokenType.GROUPING_SETS): 3078 return None 3079 3080 return self._parse_wrapped_csv(self._parse_grouping_set) 3081 3082 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3083 if self._match(TokenType.L_PAREN): 3084 grouping_set = self._parse_csv(self._parse_column) 3085 self._match_r_paren() 3086 return self.expression(exp.Tuple, expressions=grouping_set) 3087 3088 return self._parse_column() 3089 3090 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3091 if not skip_having_token and not self._match(TokenType.HAVING): 3092 return None 3093 return self.expression(exp.Having, this=self._parse_conjunction()) 3094 3095 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3096 if not self._match(TokenType.QUALIFY): 3097 return None 3098 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3099 3100 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3101 if skip_start_token: 3102 start = None 3103 elif self._match(TokenType.START_WITH): 3104 start = self._parse_conjunction() 3105 else: 3106 return None 3107 3108 self._match(TokenType.CONNECT_BY) 3109 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3110 exp.Prior, this=self._parse_bitwise() 3111 ) 3112 connect = self._parse_conjunction() 3113 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3114 3115 if not start and self._match(TokenType.START_WITH): 3116 start = self._parse_conjunction() 3117 3118 return self.expression(exp.Connect, start=start, connect=connect) 3119 3120 def _parse_order( 3121 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3122 ) -> t.Optional[exp.Expression]: 3123 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3124 return this 3125 3126 return self.expression( 3127 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3128 ) 3129 3130 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3131 if not self._match(token): 3132 return None 3133 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3134 3135 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3136 this = parse_method() if parse_method else self._parse_conjunction() 3137 3138 asc = self._match(TokenType.ASC) 3139 desc = self._match(TokenType.DESC) or (asc and False) 3140 3141 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3142 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3143 3144 nulls_first = is_nulls_first or False 3145 explicitly_null_ordered = is_nulls_first or is_nulls_last 3146 3147 if ( 3148 not explicitly_null_ordered 3149 and ( 3150 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3151 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3152 ) 3153 and self.dialect.NULL_ORDERING != "nulls_are_last" 3154 ): 3155 nulls_first = True 3156 3157 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3158 3159 def _parse_limit( 3160 self, this: t.Optional[exp.Expression] = None, top: bool = False 3161 ) -> t.Optional[exp.Expression]: 3162 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3163 comments = self._prev_comments 3164 if top: 3165 limit_paren = self._match(TokenType.L_PAREN) 3166 expression = self._parse_term() if limit_paren else self._parse_number() 3167 3168 if limit_paren: 3169 self._match_r_paren() 3170 else: 3171 expression = self._parse_term() 3172 3173 if self._match(TokenType.COMMA): 3174 offset = expression 3175 expression = self._parse_term() 3176 else: 3177 offset = None 3178 3179 limit_exp = self.expression( 3180 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3181 ) 3182 3183 return limit_exp 3184 3185 if self._match(TokenType.FETCH): 3186 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3187 direction = self._prev.text if direction else "FIRST" 3188 3189 count = self._parse_field(tokens=self.FETCH_TOKENS) 3190 percent = self._match(TokenType.PERCENT) 3191 3192 self._match_set((TokenType.ROW, TokenType.ROWS)) 3193 3194 only = self._match_text_seq("ONLY") 3195 with_ties = self._match_text_seq("WITH", "TIES") 3196 3197 if only and with_ties: 3198 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3199 3200 return self.expression( 3201 exp.Fetch, 3202 direction=direction, 3203 count=count, 3204 percent=percent, 3205 with_ties=with_ties, 3206 ) 3207 3208 return this 3209 3210 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3211 if not self._match(TokenType.OFFSET): 3212 return this 3213 3214 count = self._parse_term() 3215 self._match_set((TokenType.ROW, TokenType.ROWS)) 3216 return self.expression(exp.Offset, this=this, expression=count) 3217 3218 def _parse_locks(self) -> t.List[exp.Lock]: 3219 locks = [] 3220 while True: 3221 if self._match_text_seq("FOR", "UPDATE"): 3222 update = True 3223 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3224 "LOCK", "IN", "SHARE", "MODE" 3225 ): 3226 update = False 3227 else: 3228 break 3229 3230 expressions = None 3231 if self._match_text_seq("OF"): 3232 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3233 3234 wait: t.Optional[bool | exp.Expression] = None 3235 if self._match_text_seq("NOWAIT"): 3236 wait = True 3237 elif self._match_text_seq("WAIT"): 3238 wait = self._parse_primary() 3239 elif self._match_text_seq("SKIP", "LOCKED"): 3240 wait = False 3241 3242 locks.append( 3243 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3244 ) 3245 3246 return locks 3247 3248 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3249 while this and self._match_set(self.SET_OPERATIONS): 3250 token_type = self._prev.token_type 3251 3252 if token_type == TokenType.UNION: 3253 operation = exp.Union 3254 elif token_type == TokenType.EXCEPT: 3255 operation = exp.Except 3256 else: 3257 operation = exp.Intersect 3258 3259 comments = self._prev.comments 3260 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3261 by_name = self._match_text_seq("BY", "NAME") 3262 expression = self._parse_select(nested=True, parse_set_operation=False) 3263 3264 this = self.expression( 3265 operation, 3266 comments=comments, 3267 this=this, 3268 distinct=distinct, 3269 by_name=by_name, 3270 expression=expression, 3271 ) 3272 3273 if this and self.MODIFIERS_ATTACHED_TO_UNION: 3274 expression = this.expression 3275 3276 if expression: 3277 for arg in self.UNION_MODIFIERS: 3278 expr = expression.args.get(arg) 3279 if expr: 3280 this.set(arg, expr.pop()) 3281 3282 return this 3283 3284 def _parse_expression(self) -> t.Optional[exp.Expression]: 3285 return self._parse_alias(self._parse_conjunction()) 3286 3287 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3288 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3289 3290 def _parse_equality(self) -> t.Optional[exp.Expression]: 3291 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3292 3293 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3294 return self._parse_tokens(self._parse_range, self.COMPARISON) 3295 3296 def _parse_range(self) -> t.Optional[exp.Expression]: 3297 this = self._parse_bitwise() 3298 negate = self._match(TokenType.NOT) 3299 3300 if self._match_set(self.RANGE_PARSERS): 3301 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3302 if not expression: 3303 return this 3304 3305 this = expression 3306 elif self._match(TokenType.ISNULL): 3307 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3308 3309 # Postgres supports ISNULL and NOTNULL for conditions. 3310 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3311 if self._match(TokenType.NOTNULL): 3312 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3313 this = self.expression(exp.Not, this=this) 3314 3315 if negate: 3316 this = self.expression(exp.Not, this=this) 3317 3318 if self._match(TokenType.IS): 3319 this = self._parse_is(this) 3320 3321 return this 3322 3323 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3324 index = self._index - 1 3325 negate = self._match(TokenType.NOT) 3326 3327 if self._match_text_seq("DISTINCT", "FROM"): 3328 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3329 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3330 3331 expression = self._parse_null() or self._parse_boolean() 3332 if not expression: 3333 self._retreat(index) 3334 return None 3335 3336 this = self.expression(exp.Is, this=this, expression=expression) 3337 return self.expression(exp.Not, this=this) if negate else this 3338 3339 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3340 unnest = self._parse_unnest(with_alias=False) 3341 if unnest: 3342 this = self.expression(exp.In, this=this, unnest=unnest) 3343 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3344 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3345 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3346 3347 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3348 this = self.expression(exp.In, this=this, query=expressions[0]) 3349 else: 3350 this = self.expression(exp.In, this=this, expressions=expressions) 3351 3352 if matched_l_paren: 3353 self._match_r_paren(this) 3354 elif not self._match(TokenType.R_BRACKET, expression=this): 3355 self.raise_error("Expecting ]") 3356 else: 3357 this = self.expression(exp.In, this=this, field=self._parse_field()) 3358 3359 return this 3360 3361 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3362 low = self._parse_bitwise() 3363 self._match(TokenType.AND) 3364 high = self._parse_bitwise() 3365 return self.expression(exp.Between, this=this, low=low, high=high) 3366 3367 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3368 if not self._match(TokenType.ESCAPE): 3369 return this 3370 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3371 3372 def _parse_interval(self) -> t.Optional[exp.Interval]: 3373 index = self._index 3374 3375 if not self._match(TokenType.INTERVAL): 3376 return None 3377 3378 if self._match(TokenType.STRING, advance=False): 3379 this = self._parse_primary() 3380 else: 3381 this = self._parse_term() 3382 3383 if not this: 3384 self._retreat(index) 3385 return None 3386 3387 unit = self._parse_function() or self._parse_var(any_token=True) 3388 3389 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3390 # each INTERVAL expression into this canonical form so it's easy to transpile 3391 if this and this.is_number: 3392 this = exp.Literal.string(this.name) 3393 elif this and this.is_string: 3394 parts = this.name.split() 3395 3396 if len(parts) == 2: 3397 if unit: 3398 # This is not actually a unit, it's something else (e.g. a "window side") 3399 unit = None 3400 self._retreat(self._index - 1) 3401 3402 this = exp.Literal.string(parts[0]) 3403 unit = self.expression(exp.Var, this=parts[1]) 3404 3405 return self.expression(exp.Interval, this=this, unit=unit) 3406 3407 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3408 this = self._parse_term() 3409 3410 while True: 3411 if self._match_set(self.BITWISE): 3412 this = self.expression( 3413 self.BITWISE[self._prev.token_type], 3414 this=this, 3415 expression=self._parse_term(), 3416 ) 3417 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3418 this = self.expression( 3419 exp.DPipe, 3420 this=this, 3421 expression=self._parse_term(), 3422 safe=not self.dialect.STRICT_STRING_CONCAT, 3423 ) 3424 elif self._match(TokenType.DQMARK): 3425 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3426 elif self._match_pair(TokenType.LT, TokenType.LT): 3427 this = self.expression( 3428 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3429 ) 3430 elif self._match_pair(TokenType.GT, TokenType.GT): 3431 this = self.expression( 3432 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3433 ) 3434 else: 3435 break 3436 3437 return this 3438 3439 def _parse_term(self) -> t.Optional[exp.Expression]: 3440 return self._parse_tokens(self._parse_factor, self.TERM) 3441 3442 def _parse_factor(self) -> t.Optional[exp.Expression]: 3443 if self.EXPONENT: 3444 factor = self._parse_tokens(self._parse_exponent, self.FACTOR) 3445 else: 3446 factor = self._parse_tokens(self._parse_unary, self.FACTOR) 3447 if isinstance(factor, exp.Div): 3448 factor.args["typed"] = self.dialect.TYPED_DIVISION 3449 factor.args["safe"] = self.dialect.SAFE_DIVISION 3450 return factor 3451 3452 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3453 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3454 3455 def _parse_unary(self) -> t.Optional[exp.Expression]: 3456 if self._match_set(self.UNARY_PARSERS): 3457 return self.UNARY_PARSERS[self._prev.token_type](self) 3458 return self._parse_at_time_zone(self._parse_type()) 3459 3460 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3461 interval = parse_interval and self._parse_interval() 3462 if interval: 3463 return interval 3464 3465 index = self._index 3466 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3467 this = self._parse_column() 3468 3469 if data_type: 3470 if isinstance(this, exp.Literal): 3471 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3472 if parser: 3473 return parser(self, this, data_type) 3474 return self.expression(exp.Cast, this=this, to=data_type) 3475 if not data_type.expressions: 3476 self._retreat(index) 3477 return self._parse_column() 3478 return self._parse_column_ops(data_type) 3479 3480 return this and self._parse_column_ops(this) 3481 3482 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3483 this = self._parse_type() 3484 if not this: 3485 return None 3486 3487 return self.expression( 3488 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3489 ) 3490 3491 def _parse_types( 3492 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3493 ) -> t.Optional[exp.Expression]: 3494 index = self._index 3495 3496 prefix = self._match_text_seq("SYSUDTLIB", ".") 3497 3498 if not self._match_set(self.TYPE_TOKENS): 3499 identifier = allow_identifiers and self._parse_id_var( 3500 any_token=False, tokens=(TokenType.VAR,) 3501 ) 3502 3503 if identifier: 3504 tokens = self.dialect.tokenize(identifier.name) 3505 3506 if len(tokens) != 1: 3507 self.raise_error("Unexpected identifier", self._prev) 3508 3509 if tokens[0].token_type in self.TYPE_TOKENS: 3510 self._prev = tokens[0] 3511 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3512 type_name = identifier.name 3513 3514 while self._match(TokenType.DOT): 3515 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3516 3517 return exp.DataType.build(type_name, udt=True) 3518 else: 3519 return None 3520 else: 3521 return None 3522 3523 type_token = self._prev.token_type 3524 3525 if type_token == TokenType.PSEUDO_TYPE: 3526 return self.expression(exp.PseudoType, this=self._prev.text) 3527 3528 if type_token == TokenType.OBJECT_IDENTIFIER: 3529 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3530 3531 nested = type_token in self.NESTED_TYPE_TOKENS 3532 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3533 expressions = None 3534 maybe_func = False 3535 3536 if self._match(TokenType.L_PAREN): 3537 if is_struct: 3538 expressions = self._parse_csv(self._parse_struct_types) 3539 elif nested: 3540 expressions = self._parse_csv( 3541 lambda: self._parse_types( 3542 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3543 ) 3544 ) 3545 elif type_token in self.ENUM_TYPE_TOKENS: 3546 expressions = self._parse_csv(self._parse_equality) 3547 else: 3548 expressions = self._parse_csv(self._parse_type_size) 3549 3550 if not expressions or not self._match(TokenType.R_PAREN): 3551 self._retreat(index) 3552 return None 3553 3554 maybe_func = True 3555 3556 this: t.Optional[exp.Expression] = None 3557 values: t.Optional[t.List[exp.Expression]] = None 3558 3559 if nested and self._match(TokenType.LT): 3560 if is_struct: 3561 expressions = self._parse_csv(self._parse_struct_types) 3562 else: 3563 expressions = self._parse_csv( 3564 lambda: self._parse_types( 3565 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3566 ) 3567 ) 3568 3569 if not self._match(TokenType.GT): 3570 self.raise_error("Expecting >") 3571 3572 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3573 values = self._parse_csv(self._parse_conjunction) 3574 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3575 3576 if type_token in self.TIMESTAMPS: 3577 if self._match_text_seq("WITH", "TIME", "ZONE"): 3578 maybe_func = False 3579 tz_type = ( 3580 exp.DataType.Type.TIMETZ 3581 if type_token in self.TIMES 3582 else exp.DataType.Type.TIMESTAMPTZ 3583 ) 3584 this = exp.DataType(this=tz_type, expressions=expressions) 3585 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3586 maybe_func = False 3587 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3588 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3589 maybe_func = False 3590 elif type_token == TokenType.INTERVAL: 3591 unit = self._parse_var() 3592 3593 if self._match_text_seq("TO"): 3594 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3595 else: 3596 span = None 3597 3598 if span or not unit: 3599 this = self.expression( 3600 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3601 ) 3602 else: 3603 this = self.expression(exp.Interval, unit=unit) 3604 3605 if maybe_func and check_func: 3606 index2 = self._index 3607 peek = self._parse_string() 3608 3609 if not peek: 3610 self._retreat(index) 3611 return None 3612 3613 self._retreat(index2) 3614 3615 if not this: 3616 if self._match_text_seq("UNSIGNED"): 3617 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3618 if not unsigned_type_token: 3619 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3620 3621 type_token = unsigned_type_token or type_token 3622 3623 this = exp.DataType( 3624 this=exp.DataType.Type[type_token.value], 3625 expressions=expressions, 3626 nested=nested, 3627 values=values, 3628 prefix=prefix, 3629 ) 3630 3631 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3632 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3633 3634 return this 3635 3636 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3637 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3638 self._match(TokenType.COLON) 3639 return self._parse_column_def(this) 3640 3641 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3642 if not self._match_text_seq("AT", "TIME", "ZONE"): 3643 return this 3644 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3645 3646 def _parse_column(self) -> t.Optional[exp.Expression]: 3647 this = self._parse_field() 3648 if isinstance(this, exp.Identifier): 3649 this = self.expression(exp.Column, this=this) 3650 elif not this: 3651 return self._parse_bracket(this) 3652 return self._parse_column_ops(this) 3653 3654 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3655 this = self._parse_bracket(this) 3656 3657 while self._match_set(self.COLUMN_OPERATORS): 3658 op_token = self._prev.token_type 3659 op = self.COLUMN_OPERATORS.get(op_token) 3660 3661 if op_token == TokenType.DCOLON: 3662 field = self._parse_types() 3663 if not field: 3664 self.raise_error("Expected type") 3665 elif op and self._curr: 3666 self._advance() 3667 value = self._prev.text 3668 field = ( 3669 exp.Literal.number(value) 3670 if self._prev.token_type == TokenType.NUMBER 3671 else exp.Literal.string(value) 3672 ) 3673 else: 3674 field = self._parse_field(anonymous_func=True, any_token=True) 3675 3676 if isinstance(field, exp.Func): 3677 # bigquery allows function calls like x.y.count(...) 3678 # SAFE.SUBSTR(...) 3679 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3680 this = self._replace_columns_with_dots(this) 3681 3682 if op: 3683 this = op(self, this, field) 3684 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3685 this = self.expression( 3686 exp.Column, 3687 this=field, 3688 table=this.this, 3689 db=this.args.get("table"), 3690 catalog=this.args.get("db"), 3691 ) 3692 else: 3693 this = self.expression(exp.Dot, this=this, expression=field) 3694 this = self._parse_bracket(this) 3695 return this 3696 3697 def _parse_primary(self) -> t.Optional[exp.Expression]: 3698 if self._match_set(self.PRIMARY_PARSERS): 3699 token_type = self._prev.token_type 3700 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3701 3702 if token_type == TokenType.STRING: 3703 expressions = [primary] 3704 while self._match(TokenType.STRING): 3705 expressions.append(exp.Literal.string(self._prev.text)) 3706 3707 if len(expressions) > 1: 3708 return self.expression(exp.Concat, expressions=expressions) 3709 3710 return primary 3711 3712 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3713 return exp.Literal.number(f"0.{self._prev.text}") 3714 3715 if self._match(TokenType.L_PAREN): 3716 comments = self._prev_comments 3717 query = self._parse_select() 3718 3719 if query: 3720 expressions = [query] 3721 else: 3722 expressions = self._parse_expressions() 3723 3724 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3725 3726 if isinstance(this, exp.Subqueryable): 3727 this = self._parse_set_operations( 3728 self._parse_subquery(this=this, parse_alias=False) 3729 ) 3730 elif len(expressions) > 1: 3731 this = self.expression(exp.Tuple, expressions=expressions) 3732 else: 3733 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3734 3735 if this: 3736 this.add_comments(comments) 3737 3738 self._match_r_paren(expression=this) 3739 return this 3740 3741 return None 3742 3743 def _parse_field( 3744 self, 3745 any_token: bool = False, 3746 tokens: t.Optional[t.Collection[TokenType]] = None, 3747 anonymous_func: bool = False, 3748 ) -> t.Optional[exp.Expression]: 3749 return ( 3750 self._parse_primary() 3751 or self._parse_function(anonymous=anonymous_func) 3752 or self._parse_id_var(any_token=any_token, tokens=tokens) 3753 ) 3754 3755 def _parse_function( 3756 self, 3757 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3758 anonymous: bool = False, 3759 optional_parens: bool = True, 3760 ) -> t.Optional[exp.Expression]: 3761 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3762 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3763 fn_syntax = False 3764 if ( 3765 self._match(TokenType.L_BRACE, advance=False) 3766 and self._next 3767 and self._next.text.upper() == "FN" 3768 ): 3769 self._advance(2) 3770 fn_syntax = True 3771 3772 func = self._parse_function_call( 3773 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3774 ) 3775 3776 if fn_syntax: 3777 self._match(TokenType.R_BRACE) 3778 3779 return func 3780 3781 def _parse_function_call( 3782 self, 3783 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3784 anonymous: bool = False, 3785 optional_parens: bool = True, 3786 ) -> t.Optional[exp.Expression]: 3787 if not self._curr: 3788 return None 3789 3790 comments = self._curr.comments 3791 token_type = self._curr.token_type 3792 this = self._curr.text 3793 upper = this.upper() 3794 3795 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3796 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3797 self._advance() 3798 return parser(self) 3799 3800 if not self._next or self._next.token_type != TokenType.L_PAREN: 3801 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3802 self._advance() 3803 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3804 3805 return None 3806 3807 if token_type not in self.FUNC_TOKENS: 3808 return None 3809 3810 self._advance(2) 3811 3812 parser = self.FUNCTION_PARSERS.get(upper) 3813 if parser and not anonymous: 3814 this = parser(self) 3815 else: 3816 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3817 3818 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3819 this = self.expression(subquery_predicate, this=self._parse_select()) 3820 self._match_r_paren() 3821 return this 3822 3823 if functions is None: 3824 functions = self.FUNCTIONS 3825 3826 function = functions.get(upper) 3827 3828 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3829 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3830 3831 if function and not anonymous: 3832 if "dialect" in function.__code__.co_varnames: 3833 func = function(args, dialect=self.dialect) 3834 else: 3835 func = function(args) 3836 3837 func = self.validate_expression(func, args) 3838 if not self.dialect.NORMALIZE_FUNCTIONS: 3839 func.meta["name"] = this 3840 3841 this = func 3842 else: 3843 this = self.expression(exp.Anonymous, this=this, expressions=args) 3844 3845 if isinstance(this, exp.Expression): 3846 this.add_comments(comments) 3847 3848 self._match_r_paren(this) 3849 return self._parse_window(this) 3850 3851 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3852 return self._parse_column_def(self._parse_id_var()) 3853 3854 def _parse_user_defined_function( 3855 self, kind: t.Optional[TokenType] = None 3856 ) -> t.Optional[exp.Expression]: 3857 this = self._parse_id_var() 3858 3859 while self._match(TokenType.DOT): 3860 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3861 3862 if not self._match(TokenType.L_PAREN): 3863 return this 3864 3865 expressions = self._parse_csv(self._parse_function_parameter) 3866 self._match_r_paren() 3867 return self.expression( 3868 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3869 ) 3870 3871 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3872 literal = self._parse_primary() 3873 if literal: 3874 return self.expression(exp.Introducer, this=token.text, expression=literal) 3875 3876 return self.expression(exp.Identifier, this=token.text) 3877 3878 def _parse_session_parameter(self) -> exp.SessionParameter: 3879 kind = None 3880 this = self._parse_id_var() or self._parse_primary() 3881 3882 if this and self._match(TokenType.DOT): 3883 kind = this.name 3884 this = self._parse_var() or self._parse_primary() 3885 3886 return self.expression(exp.SessionParameter, this=this, kind=kind) 3887 3888 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3889 index = self._index 3890 3891 if self._match(TokenType.L_PAREN): 3892 expressions = t.cast( 3893 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3894 ) 3895 3896 if not self._match(TokenType.R_PAREN): 3897 self._retreat(index) 3898 else: 3899 expressions = [self._parse_id_var()] 3900 3901 if self._match_set(self.LAMBDAS): 3902 return self.LAMBDAS[self._prev.token_type](self, expressions) 3903 3904 self._retreat(index) 3905 3906 this: t.Optional[exp.Expression] 3907 3908 if self._match(TokenType.DISTINCT): 3909 this = self.expression( 3910 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3911 ) 3912 else: 3913 this = self._parse_select_or_expression(alias=alias) 3914 3915 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3916 3917 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3918 index = self._index 3919 3920 if not self.errors: 3921 try: 3922 if self._parse_select(nested=True): 3923 return this 3924 except ParseError: 3925 pass 3926 finally: 3927 self.errors.clear() 3928 self._retreat(index) 3929 3930 if not self._match(TokenType.L_PAREN): 3931 return this 3932 3933 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3934 3935 self._match_r_paren() 3936 return self.expression(exp.Schema, this=this, expressions=args) 3937 3938 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3939 return self._parse_column_def(self._parse_field(any_token=True)) 3940 3941 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3942 # column defs are not really columns, they're identifiers 3943 if isinstance(this, exp.Column): 3944 this = this.this 3945 3946 kind = self._parse_types(schema=True) 3947 3948 if self._match_text_seq("FOR", "ORDINALITY"): 3949 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3950 3951 constraints: t.List[exp.Expression] = [] 3952 3953 if not kind and self._match(TokenType.ALIAS): 3954 constraints.append( 3955 self.expression( 3956 exp.ComputedColumnConstraint, 3957 this=self._parse_conjunction(), 3958 persisted=self._match_text_seq("PERSISTED"), 3959 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3960 ) 3961 ) 3962 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 3963 self._match(TokenType.ALIAS) 3964 constraints.append( 3965 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 3966 ) 3967 3968 while True: 3969 constraint = self._parse_column_constraint() 3970 if not constraint: 3971 break 3972 constraints.append(constraint) 3973 3974 if not kind and not constraints: 3975 return this 3976 3977 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3978 3979 def _parse_auto_increment( 3980 self, 3981 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3982 start = None 3983 increment = None 3984 3985 if self._match(TokenType.L_PAREN, advance=False): 3986 args = self._parse_wrapped_csv(self._parse_bitwise) 3987 start = seq_get(args, 0) 3988 increment = seq_get(args, 1) 3989 elif self._match_text_seq("START"): 3990 start = self._parse_bitwise() 3991 self._match_text_seq("INCREMENT") 3992 increment = self._parse_bitwise() 3993 3994 if start and increment: 3995 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3996 3997 return exp.AutoIncrementColumnConstraint() 3998 3999 def _parse_compress(self) -> exp.CompressColumnConstraint: 4000 if self._match(TokenType.L_PAREN, advance=False): 4001 return self.expression( 4002 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4003 ) 4004 4005 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4006 4007 def _parse_generated_as_identity( 4008 self, 4009 ) -> ( 4010 exp.GeneratedAsIdentityColumnConstraint 4011 | exp.ComputedColumnConstraint 4012 | exp.GeneratedAsRowColumnConstraint 4013 ): 4014 if self._match_text_seq("BY", "DEFAULT"): 4015 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4016 this = self.expression( 4017 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4018 ) 4019 else: 4020 self._match_text_seq("ALWAYS") 4021 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4022 4023 self._match(TokenType.ALIAS) 4024 4025 if self._match_text_seq("ROW"): 4026 start = self._match_text_seq("START") 4027 if not start: 4028 self._match(TokenType.END) 4029 hidden = self._match_text_seq("HIDDEN") 4030 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4031 4032 identity = self._match_text_seq("IDENTITY") 4033 4034 if self._match(TokenType.L_PAREN): 4035 if self._match(TokenType.START_WITH): 4036 this.set("start", self._parse_bitwise()) 4037 if self._match_text_seq("INCREMENT", "BY"): 4038 this.set("increment", self._parse_bitwise()) 4039 if self._match_text_seq("MINVALUE"): 4040 this.set("minvalue", self._parse_bitwise()) 4041 if self._match_text_seq("MAXVALUE"): 4042 this.set("maxvalue", self._parse_bitwise()) 4043 4044 if self._match_text_seq("CYCLE"): 4045 this.set("cycle", True) 4046 elif self._match_text_seq("NO", "CYCLE"): 4047 this.set("cycle", False) 4048 4049 if not identity: 4050 this.set("expression", self._parse_bitwise()) 4051 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4052 args = self._parse_csv(self._parse_bitwise) 4053 this.set("start", seq_get(args, 0)) 4054 this.set("increment", seq_get(args, 1)) 4055 4056 self._match_r_paren() 4057 4058 return this 4059 4060 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4061 self._match_text_seq("LENGTH") 4062 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4063 4064 def _parse_not_constraint( 4065 self, 4066 ) -> t.Optional[exp.Expression]: 4067 if self._match_text_seq("NULL"): 4068 return self.expression(exp.NotNullColumnConstraint) 4069 if self._match_text_seq("CASESPECIFIC"): 4070 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4071 if self._match_text_seq("FOR", "REPLICATION"): 4072 return self.expression(exp.NotForReplicationColumnConstraint) 4073 return None 4074 4075 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4076 if self._match(TokenType.CONSTRAINT): 4077 this = self._parse_id_var() 4078 else: 4079 this = None 4080 4081 if self._match_texts(self.CONSTRAINT_PARSERS): 4082 return self.expression( 4083 exp.ColumnConstraint, 4084 this=this, 4085 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4086 ) 4087 4088 return this 4089 4090 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4091 if not self._match(TokenType.CONSTRAINT): 4092 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4093 4094 this = self._parse_id_var() 4095 expressions = [] 4096 4097 while True: 4098 constraint = self._parse_unnamed_constraint() or self._parse_function() 4099 if not constraint: 4100 break 4101 expressions.append(constraint) 4102 4103 return self.expression(exp.Constraint, this=this, expressions=expressions) 4104 4105 def _parse_unnamed_constraint( 4106 self, constraints: t.Optional[t.Collection[str]] = None 4107 ) -> t.Optional[exp.Expression]: 4108 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4109 constraints or self.CONSTRAINT_PARSERS 4110 ): 4111 return None 4112 4113 constraint = self._prev.text.upper() 4114 if constraint not in self.CONSTRAINT_PARSERS: 4115 self.raise_error(f"No parser found for schema constraint {constraint}.") 4116 4117 return self.CONSTRAINT_PARSERS[constraint](self) 4118 4119 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4120 self._match_text_seq("KEY") 4121 return self.expression( 4122 exp.UniqueColumnConstraint, 4123 this=self._parse_schema(self._parse_id_var(any_token=False)), 4124 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4125 ) 4126 4127 def _parse_key_constraint_options(self) -> t.List[str]: 4128 options = [] 4129 while True: 4130 if not self._curr: 4131 break 4132 4133 if self._match(TokenType.ON): 4134 action = None 4135 on = self._advance_any() and self._prev.text 4136 4137 if self._match_text_seq("NO", "ACTION"): 4138 action = "NO ACTION" 4139 elif self._match_text_seq("CASCADE"): 4140 action = "CASCADE" 4141 elif self._match_text_seq("RESTRICT"): 4142 action = "RESTRICT" 4143 elif self._match_pair(TokenType.SET, TokenType.NULL): 4144 action = "SET NULL" 4145 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4146 action = "SET DEFAULT" 4147 else: 4148 self.raise_error("Invalid key constraint") 4149 4150 options.append(f"ON {on} {action}") 4151 elif self._match_text_seq("NOT", "ENFORCED"): 4152 options.append("NOT ENFORCED") 4153 elif self._match_text_seq("DEFERRABLE"): 4154 options.append("DEFERRABLE") 4155 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4156 options.append("INITIALLY DEFERRED") 4157 elif self._match_text_seq("NORELY"): 4158 options.append("NORELY") 4159 elif self._match_text_seq("MATCH", "FULL"): 4160 options.append("MATCH FULL") 4161 else: 4162 break 4163 4164 return options 4165 4166 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4167 if match and not self._match(TokenType.REFERENCES): 4168 return None 4169 4170 expressions = None 4171 this = self._parse_table(schema=True) 4172 options = self._parse_key_constraint_options() 4173 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4174 4175 def _parse_foreign_key(self) -> exp.ForeignKey: 4176 expressions = self._parse_wrapped_id_vars() 4177 reference = self._parse_references() 4178 options = {} 4179 4180 while self._match(TokenType.ON): 4181 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4182 self.raise_error("Expected DELETE or UPDATE") 4183 4184 kind = self._prev.text.lower() 4185 4186 if self._match_text_seq("NO", "ACTION"): 4187 action = "NO ACTION" 4188 elif self._match(TokenType.SET): 4189 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4190 action = "SET " + self._prev.text.upper() 4191 else: 4192 self._advance() 4193 action = self._prev.text.upper() 4194 4195 options[kind] = action 4196 4197 return self.expression( 4198 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4199 ) 4200 4201 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4202 return self._parse_field() 4203 4204 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4205 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4206 4207 id_vars = self._parse_wrapped_id_vars() 4208 return self.expression( 4209 exp.PeriodForSystemTimeConstraint, 4210 this=seq_get(id_vars, 0), 4211 expression=seq_get(id_vars, 1), 4212 ) 4213 4214 def _parse_primary_key( 4215 self, wrapped_optional: bool = False, in_props: bool = False 4216 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4217 desc = ( 4218 self._match_set((TokenType.ASC, TokenType.DESC)) 4219 and self._prev.token_type == TokenType.DESC 4220 ) 4221 4222 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4223 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4224 4225 expressions = self._parse_wrapped_csv( 4226 self._parse_primary_key_part, optional=wrapped_optional 4227 ) 4228 options = self._parse_key_constraint_options() 4229 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4230 4231 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4232 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4233 return this 4234 4235 bracket_kind = self._prev.token_type 4236 4237 if self._match(TokenType.COLON): 4238 expressions: t.List[exp.Expression] = [ 4239 self.expression(exp.Slice, expression=self._parse_conjunction()) 4240 ] 4241 else: 4242 expressions = self._parse_csv( 4243 lambda: self._parse_slice( 4244 self._parse_alias(self._parse_conjunction(), explicit=True) 4245 ) 4246 ) 4247 4248 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4249 self.raise_error("Expected ]") 4250 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4251 self.raise_error("Expected }") 4252 4253 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4254 if bracket_kind == TokenType.L_BRACE: 4255 this = self.expression(exp.Struct, expressions=expressions) 4256 elif not this or this.name.upper() == "ARRAY": 4257 this = self.expression(exp.Array, expressions=expressions) 4258 else: 4259 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4260 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4261 4262 self._add_comments(this) 4263 return self._parse_bracket(this) 4264 4265 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4266 if self._match(TokenType.COLON): 4267 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4268 return this 4269 4270 def _parse_case(self) -> t.Optional[exp.Expression]: 4271 ifs = [] 4272 default = None 4273 4274 comments = self._prev_comments 4275 expression = self._parse_conjunction() 4276 4277 while self._match(TokenType.WHEN): 4278 this = self._parse_conjunction() 4279 self._match(TokenType.THEN) 4280 then = self._parse_conjunction() 4281 ifs.append(self.expression(exp.If, this=this, true=then)) 4282 4283 if self._match(TokenType.ELSE): 4284 default = self._parse_conjunction() 4285 4286 if not self._match(TokenType.END): 4287 self.raise_error("Expected END after CASE", self._prev) 4288 4289 return self._parse_window( 4290 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4291 ) 4292 4293 def _parse_if(self) -> t.Optional[exp.Expression]: 4294 if self._match(TokenType.L_PAREN): 4295 args = self._parse_csv(self._parse_conjunction) 4296 this = self.validate_expression(exp.If.from_arg_list(args), args) 4297 self._match_r_paren() 4298 else: 4299 index = self._index - 1 4300 condition = self._parse_conjunction() 4301 4302 if not condition: 4303 self._retreat(index) 4304 return None 4305 4306 self._match(TokenType.THEN) 4307 true = self._parse_conjunction() 4308 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4309 self._match(TokenType.END) 4310 this = self.expression(exp.If, this=condition, true=true, false=false) 4311 4312 return self._parse_window(this) 4313 4314 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4315 if not self._match_text_seq("VALUE", "FOR"): 4316 self._retreat(self._index - 1) 4317 return None 4318 4319 return self.expression( 4320 exp.NextValueFor, 4321 this=self._parse_column(), 4322 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4323 ) 4324 4325 def _parse_extract(self) -> exp.Extract: 4326 this = self._parse_function() or self._parse_var() or self._parse_type() 4327 4328 if self._match(TokenType.FROM): 4329 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4330 4331 if not self._match(TokenType.COMMA): 4332 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4333 4334 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4335 4336 def _parse_any_value(self) -> exp.AnyValue: 4337 this = self._parse_lambda() 4338 is_max = None 4339 having = None 4340 4341 if self._match(TokenType.HAVING): 4342 self._match_texts(("MAX", "MIN")) 4343 is_max = self._prev.text == "MAX" 4344 having = self._parse_column() 4345 4346 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4347 4348 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4349 this = self._parse_conjunction() 4350 4351 if not self._match(TokenType.ALIAS): 4352 if self._match(TokenType.COMMA): 4353 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4354 4355 self.raise_error("Expected AS after CAST") 4356 4357 fmt = None 4358 to = self._parse_types() 4359 4360 if self._match(TokenType.FORMAT): 4361 fmt_string = self._parse_string() 4362 fmt = self._parse_at_time_zone(fmt_string) 4363 4364 if not to: 4365 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4366 if to.this in exp.DataType.TEMPORAL_TYPES: 4367 this = self.expression( 4368 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4369 this=this, 4370 format=exp.Literal.string( 4371 format_time( 4372 fmt_string.this if fmt_string else "", 4373 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4374 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4375 ) 4376 ), 4377 ) 4378 4379 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4380 this.set("zone", fmt.args["zone"]) 4381 return this 4382 elif not to: 4383 self.raise_error("Expected TYPE after CAST") 4384 elif isinstance(to, exp.Identifier): 4385 to = exp.DataType.build(to.name, udt=True) 4386 elif to.this == exp.DataType.Type.CHAR: 4387 if self._match(TokenType.CHARACTER_SET): 4388 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4389 4390 return self.expression( 4391 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4392 ) 4393 4394 def _parse_string_agg(self) -> exp.Expression: 4395 if self._match(TokenType.DISTINCT): 4396 args: t.List[t.Optional[exp.Expression]] = [ 4397 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4398 ] 4399 if self._match(TokenType.COMMA): 4400 args.extend(self._parse_csv(self._parse_conjunction)) 4401 else: 4402 args = self._parse_csv(self._parse_conjunction) # type: ignore 4403 4404 index = self._index 4405 if not self._match(TokenType.R_PAREN) and args: 4406 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4407 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4408 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4409 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4410 4411 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4412 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4413 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4414 if not self._match_text_seq("WITHIN", "GROUP"): 4415 self._retreat(index) 4416 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4417 4418 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4419 order = self._parse_order(this=seq_get(args, 0)) 4420 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4421 4422 def _parse_convert( 4423 self, strict: bool, safe: t.Optional[bool] = None 4424 ) -> t.Optional[exp.Expression]: 4425 this = self._parse_bitwise() 4426 4427 if self._match(TokenType.USING): 4428 to: t.Optional[exp.Expression] = self.expression( 4429 exp.CharacterSet, this=self._parse_var() 4430 ) 4431 elif self._match(TokenType.COMMA): 4432 to = self._parse_types() 4433 else: 4434 to = None 4435 4436 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4437 4438 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4439 """ 4440 There are generally two variants of the DECODE function: 4441 4442 - DECODE(bin, charset) 4443 - DECODE(expression, search, result [, search, result] ... [, default]) 4444 4445 The second variant will always be parsed into a CASE expression. Note that NULL 4446 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4447 instead of relying on pattern matching. 4448 """ 4449 args = self._parse_csv(self._parse_conjunction) 4450 4451 if len(args) < 3: 4452 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4453 4454 expression, *expressions = args 4455 if not expression: 4456 return None 4457 4458 ifs = [] 4459 for search, result in zip(expressions[::2], expressions[1::2]): 4460 if not search or not result: 4461 return None 4462 4463 if isinstance(search, exp.Literal): 4464 ifs.append( 4465 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4466 ) 4467 elif isinstance(search, exp.Null): 4468 ifs.append( 4469 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4470 ) 4471 else: 4472 cond = exp.or_( 4473 exp.EQ(this=expression.copy(), expression=search), 4474 exp.and_( 4475 exp.Is(this=expression.copy(), expression=exp.Null()), 4476 exp.Is(this=search.copy(), expression=exp.Null()), 4477 copy=False, 4478 ), 4479 copy=False, 4480 ) 4481 ifs.append(exp.If(this=cond, true=result)) 4482 4483 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4484 4485 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4486 self._match_text_seq("KEY") 4487 key = self._parse_column() 4488 self._match_set((TokenType.COLON, TokenType.COMMA)) 4489 self._match_text_seq("VALUE") 4490 value = self._parse_bitwise() 4491 4492 if not key and not value: 4493 return None 4494 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4495 4496 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4497 if not this or not self._match_text_seq("FORMAT", "JSON"): 4498 return this 4499 4500 return self.expression(exp.FormatJson, this=this) 4501 4502 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4503 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4504 for value in values: 4505 if self._match_text_seq(value, "ON", on): 4506 return f"{value} ON {on}" 4507 4508 return None 4509 4510 def _parse_json_object(self) -> exp.JSONObject: 4511 star = self._parse_star() 4512 expressions = ( 4513 [star] 4514 if star 4515 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4516 ) 4517 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4518 4519 unique_keys = None 4520 if self._match_text_seq("WITH", "UNIQUE"): 4521 unique_keys = True 4522 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4523 unique_keys = False 4524 4525 self._match_text_seq("KEYS") 4526 4527 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4528 self._parse_type() 4529 ) 4530 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4531 4532 return self.expression( 4533 exp.JSONObject, 4534 expressions=expressions, 4535 null_handling=null_handling, 4536 unique_keys=unique_keys, 4537 return_type=return_type, 4538 encoding=encoding, 4539 ) 4540 4541 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4542 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4543 if not self._match_text_seq("NESTED"): 4544 this = self._parse_id_var() 4545 kind = self._parse_types(allow_identifiers=False) 4546 nested = None 4547 else: 4548 this = None 4549 kind = None 4550 nested = True 4551 4552 path = self._match_text_seq("PATH") and self._parse_string() 4553 nested_schema = nested and self._parse_json_schema() 4554 4555 return self.expression( 4556 exp.JSONColumnDef, 4557 this=this, 4558 kind=kind, 4559 path=path, 4560 nested_schema=nested_schema, 4561 ) 4562 4563 def _parse_json_schema(self) -> exp.JSONSchema: 4564 self._match_text_seq("COLUMNS") 4565 return self.expression( 4566 exp.JSONSchema, 4567 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4568 ) 4569 4570 def _parse_json_table(self) -> exp.JSONTable: 4571 this = self._parse_format_json(self._parse_bitwise()) 4572 path = self._match(TokenType.COMMA) and self._parse_string() 4573 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4574 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4575 schema = self._parse_json_schema() 4576 4577 return exp.JSONTable( 4578 this=this, 4579 schema=schema, 4580 path=path, 4581 error_handling=error_handling, 4582 empty_handling=empty_handling, 4583 ) 4584 4585 def _parse_match_against(self) -> exp.MatchAgainst: 4586 expressions = self._parse_csv(self._parse_column) 4587 4588 self._match_text_seq(")", "AGAINST", "(") 4589 4590 this = self._parse_string() 4591 4592 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4593 modifier = "IN NATURAL LANGUAGE MODE" 4594 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4595 modifier = f"{modifier} WITH QUERY EXPANSION" 4596 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4597 modifier = "IN BOOLEAN MODE" 4598 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4599 modifier = "WITH QUERY EXPANSION" 4600 else: 4601 modifier = None 4602 4603 return self.expression( 4604 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4605 ) 4606 4607 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4608 def _parse_open_json(self) -> exp.OpenJSON: 4609 this = self._parse_bitwise() 4610 path = self._match(TokenType.COMMA) and self._parse_string() 4611 4612 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4613 this = self._parse_field(any_token=True) 4614 kind = self._parse_types() 4615 path = self._parse_string() 4616 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4617 4618 return self.expression( 4619 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4620 ) 4621 4622 expressions = None 4623 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4624 self._match_l_paren() 4625 expressions = self._parse_csv(_parse_open_json_column_def) 4626 4627 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4628 4629 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4630 args = self._parse_csv(self._parse_bitwise) 4631 4632 if self._match(TokenType.IN): 4633 return self.expression( 4634 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4635 ) 4636 4637 if haystack_first: 4638 haystack = seq_get(args, 0) 4639 needle = seq_get(args, 1) 4640 else: 4641 needle = seq_get(args, 0) 4642 haystack = seq_get(args, 1) 4643 4644 return self.expression( 4645 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4646 ) 4647 4648 def _parse_predict(self) -> exp.Predict: 4649 self._match_text_seq("MODEL") 4650 this = self._parse_table() 4651 4652 self._match(TokenType.COMMA) 4653 self._match_text_seq("TABLE") 4654 4655 return self.expression( 4656 exp.Predict, 4657 this=this, 4658 expression=self._parse_table(), 4659 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4660 ) 4661 4662 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4663 args = self._parse_csv(self._parse_table) 4664 return exp.JoinHint(this=func_name.upper(), expressions=args) 4665 4666 def _parse_substring(self) -> exp.Substring: 4667 # Postgres supports the form: substring(string [from int] [for int]) 4668 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4669 4670 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4671 4672 if self._match(TokenType.FROM): 4673 args.append(self._parse_bitwise()) 4674 if self._match(TokenType.FOR): 4675 args.append(self._parse_bitwise()) 4676 4677 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4678 4679 def _parse_trim(self) -> exp.Trim: 4680 # https://www.w3resource.com/sql/character-functions/trim.php 4681 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4682 4683 position = None 4684 collation = None 4685 expression = None 4686 4687 if self._match_texts(self.TRIM_TYPES): 4688 position = self._prev.text.upper() 4689 4690 this = self._parse_bitwise() 4691 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4692 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4693 expression = self._parse_bitwise() 4694 4695 if invert_order: 4696 this, expression = expression, this 4697 4698 if self._match(TokenType.COLLATE): 4699 collation = self._parse_bitwise() 4700 4701 return self.expression( 4702 exp.Trim, this=this, position=position, expression=expression, collation=collation 4703 ) 4704 4705 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4706 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4707 4708 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4709 return self._parse_window(self._parse_id_var(), alias=True) 4710 4711 def _parse_respect_or_ignore_nulls( 4712 self, this: t.Optional[exp.Expression] 4713 ) -> t.Optional[exp.Expression]: 4714 if self._match_text_seq("IGNORE", "NULLS"): 4715 return self.expression(exp.IgnoreNulls, this=this) 4716 if self._match_text_seq("RESPECT", "NULLS"): 4717 return self.expression(exp.RespectNulls, this=this) 4718 return this 4719 4720 def _parse_window( 4721 self, this: t.Optional[exp.Expression], alias: bool = False 4722 ) -> t.Optional[exp.Expression]: 4723 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4724 self._match(TokenType.WHERE) 4725 this = self.expression( 4726 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4727 ) 4728 self._match_r_paren() 4729 4730 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4731 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4732 if self._match_text_seq("WITHIN", "GROUP"): 4733 order = self._parse_wrapped(self._parse_order) 4734 this = self.expression(exp.WithinGroup, this=this, expression=order) 4735 4736 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4737 # Some dialects choose to implement and some do not. 4738 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4739 4740 # There is some code above in _parse_lambda that handles 4741 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4742 4743 # The below changes handle 4744 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4745 4746 # Oracle allows both formats 4747 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4748 # and Snowflake chose to do the same for familiarity 4749 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4750 this = self._parse_respect_or_ignore_nulls(this) 4751 4752 # bigquery select from window x AS (partition by ...) 4753 if alias: 4754 over = None 4755 self._match(TokenType.ALIAS) 4756 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4757 return this 4758 else: 4759 over = self._prev.text.upper() 4760 4761 if not self._match(TokenType.L_PAREN): 4762 return self.expression( 4763 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4764 ) 4765 4766 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4767 4768 first = self._match(TokenType.FIRST) 4769 if self._match_text_seq("LAST"): 4770 first = False 4771 4772 partition, order = self._parse_partition_and_order() 4773 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4774 4775 if kind: 4776 self._match(TokenType.BETWEEN) 4777 start = self._parse_window_spec() 4778 self._match(TokenType.AND) 4779 end = self._parse_window_spec() 4780 4781 spec = self.expression( 4782 exp.WindowSpec, 4783 kind=kind, 4784 start=start["value"], 4785 start_side=start["side"], 4786 end=end["value"], 4787 end_side=end["side"], 4788 ) 4789 else: 4790 spec = None 4791 4792 self._match_r_paren() 4793 4794 window = self.expression( 4795 exp.Window, 4796 this=this, 4797 partition_by=partition, 4798 order=order, 4799 spec=spec, 4800 alias=window_alias, 4801 over=over, 4802 first=first, 4803 ) 4804 4805 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4806 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4807 return self._parse_window(window, alias=alias) 4808 4809 return window 4810 4811 def _parse_partition_and_order( 4812 self, 4813 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4814 return self._parse_partition_by(), self._parse_order() 4815 4816 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4817 self._match(TokenType.BETWEEN) 4818 4819 return { 4820 "value": ( 4821 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4822 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4823 or self._parse_bitwise() 4824 ), 4825 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4826 } 4827 4828 def _parse_alias( 4829 self, this: t.Optional[exp.Expression], explicit: bool = False 4830 ) -> t.Optional[exp.Expression]: 4831 any_token = self._match(TokenType.ALIAS) 4832 comments = self._prev_comments 4833 4834 if explicit and not any_token: 4835 return this 4836 4837 if self._match(TokenType.L_PAREN): 4838 aliases = self.expression( 4839 exp.Aliases, 4840 comments=comments, 4841 this=this, 4842 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4843 ) 4844 self._match_r_paren(aliases) 4845 return aliases 4846 4847 alias = self._parse_id_var(any_token) 4848 4849 if alias: 4850 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 4851 4852 return this 4853 4854 def _parse_id_var( 4855 self, 4856 any_token: bool = True, 4857 tokens: t.Optional[t.Collection[TokenType]] = None, 4858 ) -> t.Optional[exp.Expression]: 4859 identifier = self._parse_identifier() 4860 4861 if identifier: 4862 return identifier 4863 4864 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4865 quoted = self._prev.token_type == TokenType.STRING 4866 return exp.Identifier(this=self._prev.text, quoted=quoted) 4867 4868 return None 4869 4870 def _parse_string(self) -> t.Optional[exp.Expression]: 4871 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 4872 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 4873 return self._parse_placeholder() 4874 4875 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4876 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4877 4878 def _parse_number(self) -> t.Optional[exp.Expression]: 4879 if self._match(TokenType.NUMBER): 4880 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4881 return self._parse_placeholder() 4882 4883 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4884 if self._match(TokenType.IDENTIFIER): 4885 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4886 return self._parse_placeholder() 4887 4888 def _parse_var( 4889 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4890 ) -> t.Optional[exp.Expression]: 4891 if ( 4892 (any_token and self._advance_any()) 4893 or self._match(TokenType.VAR) 4894 or (self._match_set(tokens) if tokens else False) 4895 ): 4896 return self.expression(exp.Var, this=self._prev.text) 4897 return self._parse_placeholder() 4898 4899 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 4900 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 4901 self._advance() 4902 return self._prev 4903 return None 4904 4905 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4906 return self._parse_var() or self._parse_string() 4907 4908 def _parse_null(self) -> t.Optional[exp.Expression]: 4909 if self._match_set(self.NULL_TOKENS): 4910 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4911 return self._parse_placeholder() 4912 4913 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4914 if self._match(TokenType.TRUE): 4915 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4916 if self._match(TokenType.FALSE): 4917 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4918 return self._parse_placeholder() 4919 4920 def _parse_star(self) -> t.Optional[exp.Expression]: 4921 if self._match(TokenType.STAR): 4922 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4923 return self._parse_placeholder() 4924 4925 def _parse_parameter(self) -> exp.Parameter: 4926 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4927 return ( 4928 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4929 ) 4930 4931 self._match(TokenType.L_BRACE) 4932 this = _parse_parameter_part() 4933 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4934 self._match(TokenType.R_BRACE) 4935 4936 return self.expression(exp.Parameter, this=this, expression=expression) 4937 4938 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4939 if self._match_set(self.PLACEHOLDER_PARSERS): 4940 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4941 if placeholder: 4942 return placeholder 4943 self._advance(-1) 4944 return None 4945 4946 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4947 if not self._match(TokenType.EXCEPT): 4948 return None 4949 if self._match(TokenType.L_PAREN, advance=False): 4950 return self._parse_wrapped_csv(self._parse_column) 4951 4952 except_column = self._parse_column() 4953 return [except_column] if except_column else None 4954 4955 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4956 if not self._match(TokenType.REPLACE): 4957 return None 4958 if self._match(TokenType.L_PAREN, advance=False): 4959 return self._parse_wrapped_csv(self._parse_expression) 4960 4961 replace_expression = self._parse_expression() 4962 return [replace_expression] if replace_expression else None 4963 4964 def _parse_csv( 4965 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4966 ) -> t.List[exp.Expression]: 4967 parse_result = parse_method() 4968 items = [parse_result] if parse_result is not None else [] 4969 4970 while self._match(sep): 4971 self._add_comments(parse_result) 4972 parse_result = parse_method() 4973 if parse_result is not None: 4974 items.append(parse_result) 4975 4976 return items 4977 4978 def _parse_tokens( 4979 self, parse_method: t.Callable, expressions: t.Dict 4980 ) -> t.Optional[exp.Expression]: 4981 this = parse_method() 4982 4983 while self._match_set(expressions): 4984 this = self.expression( 4985 expressions[self._prev.token_type], 4986 this=this, 4987 comments=self._prev_comments, 4988 expression=parse_method(), 4989 ) 4990 4991 return this 4992 4993 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4994 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4995 4996 def _parse_wrapped_csv( 4997 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4998 ) -> t.List[exp.Expression]: 4999 return self._parse_wrapped( 5000 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5001 ) 5002 5003 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5004 wrapped = self._match(TokenType.L_PAREN) 5005 if not wrapped and not optional: 5006 self.raise_error("Expecting (") 5007 parse_result = parse_method() 5008 if wrapped: 5009 self._match_r_paren() 5010 return parse_result 5011 5012 def _parse_expressions(self) -> t.List[exp.Expression]: 5013 return self._parse_csv(self._parse_expression) 5014 5015 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5016 return self._parse_select() or self._parse_set_operations( 5017 self._parse_expression() if alias else self._parse_conjunction() 5018 ) 5019 5020 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5021 return self._parse_query_modifiers( 5022 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5023 ) 5024 5025 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5026 this = None 5027 if self._match_texts(self.TRANSACTION_KIND): 5028 this = self._prev.text 5029 5030 self._match_texts(("TRANSACTION", "WORK")) 5031 5032 modes = [] 5033 while True: 5034 mode = [] 5035 while self._match(TokenType.VAR): 5036 mode.append(self._prev.text) 5037 5038 if mode: 5039 modes.append(" ".join(mode)) 5040 if not self._match(TokenType.COMMA): 5041 break 5042 5043 return self.expression(exp.Transaction, this=this, modes=modes) 5044 5045 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5046 chain = None 5047 savepoint = None 5048 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5049 5050 self._match_texts(("TRANSACTION", "WORK")) 5051 5052 if self._match_text_seq("TO"): 5053 self._match_text_seq("SAVEPOINT") 5054 savepoint = self._parse_id_var() 5055 5056 if self._match(TokenType.AND): 5057 chain = not self._match_text_seq("NO") 5058 self._match_text_seq("CHAIN") 5059 5060 if is_rollback: 5061 return self.expression(exp.Rollback, savepoint=savepoint) 5062 5063 return self.expression(exp.Commit, chain=chain) 5064 5065 def _parse_refresh(self) -> exp.Refresh: 5066 self._match(TokenType.TABLE) 5067 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5068 5069 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5070 if not self._match_text_seq("ADD"): 5071 return None 5072 5073 self._match(TokenType.COLUMN) 5074 exists_column = self._parse_exists(not_=True) 5075 expression = self._parse_field_def() 5076 5077 if expression: 5078 expression.set("exists", exists_column) 5079 5080 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5081 if self._match_texts(("FIRST", "AFTER")): 5082 position = self._prev.text 5083 column_position = self.expression( 5084 exp.ColumnPosition, this=self._parse_column(), position=position 5085 ) 5086 expression.set("position", column_position) 5087 5088 return expression 5089 5090 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5091 drop = self._match(TokenType.DROP) and self._parse_drop() 5092 if drop and not isinstance(drop, exp.Command): 5093 drop.set("kind", drop.args.get("kind", "COLUMN")) 5094 return drop 5095 5096 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5097 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5098 return self.expression( 5099 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5100 ) 5101 5102 def _parse_add_constraint(self) -> exp.AddConstraint: 5103 this = None 5104 kind = self._prev.token_type 5105 5106 if kind == TokenType.CONSTRAINT: 5107 this = self._parse_id_var() 5108 5109 if self._match_text_seq("CHECK"): 5110 expression = self._parse_wrapped(self._parse_conjunction) 5111 enforced = self._match_text_seq("ENFORCED") 5112 5113 return self.expression( 5114 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5115 ) 5116 5117 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5118 expression = self._parse_foreign_key() 5119 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5120 expression = self._parse_primary_key() 5121 else: 5122 expression = None 5123 5124 return self.expression(exp.AddConstraint, this=this, expression=expression) 5125 5126 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5127 index = self._index - 1 5128 5129 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5130 return self._parse_csv(self._parse_add_constraint) 5131 5132 self._retreat(index) 5133 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5134 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5135 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5136 5137 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5138 self._match(TokenType.COLUMN) 5139 column = self._parse_field(any_token=True) 5140 5141 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5142 return self.expression(exp.AlterColumn, this=column, drop=True) 5143 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5144 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5145 5146 self._match_text_seq("SET", "DATA") 5147 return self.expression( 5148 exp.AlterColumn, 5149 this=column, 5150 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5151 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5152 using=self._match(TokenType.USING) and self._parse_conjunction(), 5153 ) 5154 5155 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5156 index = self._index - 1 5157 5158 partition_exists = self._parse_exists() 5159 if self._match(TokenType.PARTITION, advance=False): 5160 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5161 5162 self._retreat(index) 5163 return self._parse_csv(self._parse_drop_column) 5164 5165 def _parse_alter_table_rename(self) -> exp.RenameTable: 5166 self._match_text_seq("TO") 5167 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5168 5169 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5170 start = self._prev 5171 5172 if not self._match(TokenType.TABLE): 5173 return self._parse_as_command(start) 5174 5175 exists = self._parse_exists() 5176 only = self._match_text_seq("ONLY") 5177 this = self._parse_table(schema=True) 5178 5179 if self._next: 5180 self._advance() 5181 5182 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5183 if parser: 5184 actions = ensure_list(parser(self)) 5185 5186 if not self._curr: 5187 return self.expression( 5188 exp.AlterTable, 5189 this=this, 5190 exists=exists, 5191 actions=actions, 5192 only=only, 5193 ) 5194 5195 return self._parse_as_command(start) 5196 5197 def _parse_merge(self) -> exp.Merge: 5198 self._match(TokenType.INTO) 5199 target = self._parse_table() 5200 5201 if target and self._match(TokenType.ALIAS, advance=False): 5202 target.set("alias", self._parse_table_alias()) 5203 5204 self._match(TokenType.USING) 5205 using = self._parse_table() 5206 5207 self._match(TokenType.ON) 5208 on = self._parse_conjunction() 5209 5210 return self.expression( 5211 exp.Merge, 5212 this=target, 5213 using=using, 5214 on=on, 5215 expressions=self._parse_when_matched(), 5216 ) 5217 5218 def _parse_when_matched(self) -> t.List[exp.When]: 5219 whens = [] 5220 5221 while self._match(TokenType.WHEN): 5222 matched = not self._match(TokenType.NOT) 5223 self._match_text_seq("MATCHED") 5224 source = ( 5225 False 5226 if self._match_text_seq("BY", "TARGET") 5227 else self._match_text_seq("BY", "SOURCE") 5228 ) 5229 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5230 5231 self._match(TokenType.THEN) 5232 5233 if self._match(TokenType.INSERT): 5234 _this = self._parse_star() 5235 if _this: 5236 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5237 else: 5238 then = self.expression( 5239 exp.Insert, 5240 this=self._parse_value(), 5241 expression=self._match(TokenType.VALUES) and self._parse_value(), 5242 ) 5243 elif self._match(TokenType.UPDATE): 5244 expressions = self._parse_star() 5245 if expressions: 5246 then = self.expression(exp.Update, expressions=expressions) 5247 else: 5248 then = self.expression( 5249 exp.Update, 5250 expressions=self._match(TokenType.SET) 5251 and self._parse_csv(self._parse_equality), 5252 ) 5253 elif self._match(TokenType.DELETE): 5254 then = self.expression(exp.Var, this=self._prev.text) 5255 else: 5256 then = None 5257 5258 whens.append( 5259 self.expression( 5260 exp.When, 5261 matched=matched, 5262 source=source, 5263 condition=condition, 5264 then=then, 5265 ) 5266 ) 5267 return whens 5268 5269 def _parse_show(self) -> t.Optional[exp.Expression]: 5270 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5271 if parser: 5272 return parser(self) 5273 return self._parse_as_command(self._prev) 5274 5275 def _parse_set_item_assignment( 5276 self, kind: t.Optional[str] = None 5277 ) -> t.Optional[exp.Expression]: 5278 index = self._index 5279 5280 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5281 return self._parse_set_transaction(global_=kind == "GLOBAL") 5282 5283 left = self._parse_primary() or self._parse_id_var() 5284 assignment_delimiter = self._match_texts(("=", "TO")) 5285 5286 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5287 self._retreat(index) 5288 return None 5289 5290 right = self._parse_statement() or self._parse_id_var() 5291 this = self.expression(exp.EQ, this=left, expression=right) 5292 5293 return self.expression(exp.SetItem, this=this, kind=kind) 5294 5295 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5296 self._match_text_seq("TRANSACTION") 5297 characteristics = self._parse_csv( 5298 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5299 ) 5300 return self.expression( 5301 exp.SetItem, 5302 expressions=characteristics, 5303 kind="TRANSACTION", 5304 **{"global": global_}, # type: ignore 5305 ) 5306 5307 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5308 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5309 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5310 5311 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5312 index = self._index 5313 set_ = self.expression( 5314 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5315 ) 5316 5317 if self._curr: 5318 self._retreat(index) 5319 return self._parse_as_command(self._prev) 5320 5321 return set_ 5322 5323 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5324 for option in options: 5325 if self._match_text_seq(*option.split(" ")): 5326 return exp.var(option) 5327 return None 5328 5329 def _parse_as_command(self, start: Token) -> exp.Command: 5330 while self._curr: 5331 self._advance() 5332 text = self._find_sql(start, self._prev) 5333 size = len(start.text) 5334 return exp.Command(this=text[:size], expression=text[size:]) 5335 5336 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5337 settings = [] 5338 5339 self._match_l_paren() 5340 kind = self._parse_id_var() 5341 5342 if self._match(TokenType.L_PAREN): 5343 while True: 5344 key = self._parse_id_var() 5345 value = self._parse_primary() 5346 5347 if not key and value is None: 5348 break 5349 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5350 self._match(TokenType.R_PAREN) 5351 5352 self._match_r_paren() 5353 5354 return self.expression( 5355 exp.DictProperty, 5356 this=this, 5357 kind=kind.this if kind else None, 5358 settings=settings, 5359 ) 5360 5361 def _parse_dict_range(self, this: str) -> exp.DictRange: 5362 self._match_l_paren() 5363 has_min = self._match_text_seq("MIN") 5364 if has_min: 5365 min = self._parse_var() or self._parse_primary() 5366 self._match_text_seq("MAX") 5367 max = self._parse_var() or self._parse_primary() 5368 else: 5369 max = self._parse_var() or self._parse_primary() 5370 min = exp.Literal.number(0) 5371 self._match_r_paren() 5372 return self.expression(exp.DictRange, this=this, min=min, max=max) 5373 5374 def _parse_comprehension( 5375 self, this: t.Optional[exp.Expression] 5376 ) -> t.Optional[exp.Comprehension]: 5377 index = self._index 5378 expression = self._parse_column() 5379 if not self._match(TokenType.IN): 5380 self._retreat(index - 1) 5381 return None 5382 iterator = self._parse_column() 5383 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5384 return self.expression( 5385 exp.Comprehension, 5386 this=this, 5387 expression=expression, 5388 iterator=iterator, 5389 condition=condition, 5390 ) 5391 5392 def _find_parser( 5393 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5394 ) -> t.Optional[t.Callable]: 5395 if not self._curr: 5396 return None 5397 5398 index = self._index 5399 this = [] 5400 while True: 5401 # The current token might be multiple words 5402 curr = self._curr.text.upper() 5403 key = curr.split(" ") 5404 this.append(curr) 5405 5406 self._advance() 5407 result, trie = in_trie(trie, key) 5408 if result == TrieResult.FAILED: 5409 break 5410 5411 if result == TrieResult.EXISTS: 5412 subparser = parsers[" ".join(this)] 5413 return subparser 5414 5415 self._retreat(index) 5416 return None 5417 5418 def _match(self, token_type, advance=True, expression=None): 5419 if not self._curr: 5420 return None 5421 5422 if self._curr.token_type == token_type: 5423 if advance: 5424 self._advance() 5425 self._add_comments(expression) 5426 return True 5427 5428 return None 5429 5430 def _match_set(self, types, advance=True): 5431 if not self._curr: 5432 return None 5433 5434 if self._curr.token_type in types: 5435 if advance: 5436 self._advance() 5437 return True 5438 5439 return None 5440 5441 def _match_pair(self, token_type_a, token_type_b, advance=True): 5442 if not self._curr or not self._next: 5443 return None 5444 5445 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5446 if advance: 5447 self._advance(2) 5448 return True 5449 5450 return None 5451 5452 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5453 if not self._match(TokenType.L_PAREN, expression=expression): 5454 self.raise_error("Expecting (") 5455 5456 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5457 if not self._match(TokenType.R_PAREN, expression=expression): 5458 self.raise_error("Expecting )") 5459 5460 def _match_texts(self, texts, advance=True): 5461 if self._curr and self._curr.text.upper() in texts: 5462 if advance: 5463 self._advance() 5464 return True 5465 return False 5466 5467 def _match_text_seq(self, *texts, advance=True): 5468 index = self._index 5469 for text in texts: 5470 if self._curr and self._curr.text.upper() == text: 5471 self._advance() 5472 else: 5473 self._retreat(index) 5474 return False 5475 5476 if not advance: 5477 self._retreat(index) 5478 5479 return True 5480 5481 @t.overload 5482 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5483 ... 5484 5485 @t.overload 5486 def _replace_columns_with_dots( 5487 self, this: t.Optional[exp.Expression] 5488 ) -> t.Optional[exp.Expression]: 5489 ... 5490 5491 def _replace_columns_with_dots(self, this): 5492 if isinstance(this, exp.Dot): 5493 exp.replace_children(this, self._replace_columns_with_dots) 5494 elif isinstance(this, exp.Column): 5495 exp.replace_children(this, self._replace_columns_with_dots) 5496 table = this.args.get("table") 5497 this = ( 5498 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5499 ) 5500 5501 return this 5502 5503 def _replace_lambda( 5504 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5505 ) -> t.Optional[exp.Expression]: 5506 if not node: 5507 return node 5508 5509 for column in node.find_all(exp.Column): 5510 if column.parts[0].name in lambda_variables: 5511 dot_or_id = column.to_dot() if column.table else column.this 5512 parent = column.parent 5513 5514 while isinstance(parent, exp.Dot): 5515 if not isinstance(parent.parent, exp.Dot): 5516 parent.replace(dot_or_id) 5517 break 5518 parent = parent.parent 5519 else: 5520 if column is node: 5521 node = dot_or_id 5522 else: 5523 column.replace(dot_or_id) 5524 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
974 def __init__( 975 self, 976 error_level: t.Optional[ErrorLevel] = None, 977 error_message_context: int = 100, 978 max_errors: int = 3, 979 dialect: DialectType = None, 980 ): 981 from sqlglot.dialects import Dialect 982 983 self.error_level = error_level or ErrorLevel.IMMEDIATE 984 self.error_message_context = error_message_context 985 self.max_errors = max_errors 986 self.dialect = Dialect.get_or_raise(dialect) 987 self.reset()
999 def parse( 1000 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1001 ) -> t.List[t.Optional[exp.Expression]]: 1002 """ 1003 Parses a list of tokens and returns a list of syntax trees, one tree 1004 per parsed SQL statement. 1005 1006 Args: 1007 raw_tokens: The list of tokens. 1008 sql: The original SQL string, used to produce helpful debug messages. 1009 1010 Returns: 1011 The list of the produced syntax trees. 1012 """ 1013 return self._parse( 1014 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1015 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1017 def parse_into( 1018 self, 1019 expression_types: exp.IntoType, 1020 raw_tokens: t.List[Token], 1021 sql: t.Optional[str] = None, 1022 ) -> t.List[t.Optional[exp.Expression]]: 1023 """ 1024 Parses a list of tokens into a given Expression type. If a collection of Expression 1025 types is given instead, this method will try to parse the token list into each one 1026 of them, stopping at the first for which the parsing succeeds. 1027 1028 Args: 1029 expression_types: The expression type(s) to try and parse the token list into. 1030 raw_tokens: The list of tokens. 1031 sql: The original SQL string, used to produce helpful debug messages. 1032 1033 Returns: 1034 The target Expression. 1035 """ 1036 errors = [] 1037 for expression_type in ensure_list(expression_types): 1038 parser = self.EXPRESSION_PARSERS.get(expression_type) 1039 if not parser: 1040 raise TypeError(f"No parser registered for {expression_type}") 1041 1042 try: 1043 return self._parse(parser, raw_tokens, sql) 1044 except ParseError as e: 1045 e.errors[0]["into_expression"] = expression_type 1046 errors.append(e) 1047 1048 raise ParseError( 1049 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1050 errors=merge_errors(errors), 1051 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1088 def check_errors(self) -> None: 1089 """Logs or raises any found errors, depending on the chosen error level setting.""" 1090 if self.error_level == ErrorLevel.WARN: 1091 for error in self.errors: 1092 logger.error(str(error)) 1093 elif self.error_level == ErrorLevel.RAISE and self.errors: 1094 raise ParseError( 1095 concat_messages(self.errors, self.max_errors), 1096 errors=merge_errors(self.errors), 1097 )
Logs or raises any found errors, depending on the chosen error level setting.
1099 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1100 """ 1101 Appends an error in the list of recorded errors or raises it, depending on the chosen 1102 error level setting. 1103 """ 1104 token = token or self._curr or self._prev or Token.string("") 1105 start = token.start 1106 end = token.end + 1 1107 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1108 highlight = self.sql[start:end] 1109 end_context = self.sql[end : end + self.error_message_context] 1110 1111 error = ParseError.new( 1112 f"{message}. Line {token.line}, Col: {token.col}.\n" 1113 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1114 description=message, 1115 line=token.line, 1116 col=token.col, 1117 start_context=start_context, 1118 highlight=highlight, 1119 end_context=end_context, 1120 ) 1121 1122 if self.error_level == ErrorLevel.IMMEDIATE: 1123 raise error 1124 1125 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1127 def expression( 1128 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1129 ) -> E: 1130 """ 1131 Creates a new, validated Expression. 1132 1133 Args: 1134 exp_class: The expression class to instantiate. 1135 comments: An optional list of comments to attach to the expression. 1136 kwargs: The arguments to set for the expression along with their respective values. 1137 1138 Returns: 1139 The target expression. 1140 """ 1141 instance = exp_class(**kwargs) 1142 instance.add_comments(comments) if comments else self._add_comments(instance) 1143 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1150 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1151 """ 1152 Validates an Expression, making sure that all its mandatory arguments are set. 1153 1154 Args: 1155 expression: The expression to validate. 1156 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1157 1158 Returns: 1159 The validated expression. 1160 """ 1161 if self.error_level != ErrorLevel.IGNORE: 1162 for error_message in expression.error_messages(args): 1163 self.raise_error(error_message) 1164 1165 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.