sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.MODEL, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 654 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 655 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 656 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 657 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 658 "LIKE": lambda self: self._parse_create_like(), 659 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 660 "LOCK": lambda self: self._parse_locking(), 661 "LOCKING": lambda self: self._parse_locking(), 662 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 663 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 664 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 665 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 666 "NO": lambda self: self._parse_no_property(), 667 "ON": lambda self: self._parse_on_property(), 668 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 669 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 670 "PARTITION BY": lambda self: self._parse_partitioned_by(), 671 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 672 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 673 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 674 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 675 "REMOTE": lambda self: self._parse_remote_with_connection(), 676 "RETURNS": lambda self: self._parse_returns(), 677 "ROW": lambda self: self._parse_row(), 678 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 679 "SAMPLE": lambda self: self.expression( 680 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 681 ), 682 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 683 "SETTINGS": lambda self: self.expression( 684 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 685 ), 686 "SORTKEY": lambda self: self._parse_sortkey(), 687 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 688 "STABLE": lambda self: self.expression( 689 exp.StabilityProperty, this=exp.Literal.string("STABLE") 690 ), 691 "STORED": lambda self: self._parse_stored(), 692 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 693 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 694 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 695 "TO": lambda self: self._parse_to_table(), 696 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 697 "TRANSFORM": lambda self: self.expression( 698 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 699 ), 700 "TTL": lambda self: self._parse_ttl(), 701 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 702 "VOLATILE": lambda self: self._parse_volatile_property(), 703 "WITH": lambda self: self._parse_with_property(), 704 } 705 706 CONSTRAINT_PARSERS = { 707 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 708 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 709 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 710 "CHARACTER SET": lambda self: self.expression( 711 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 712 ), 713 "CHECK": lambda self: self.expression( 714 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 715 ), 716 "COLLATE": lambda self: self.expression( 717 exp.CollateColumnConstraint, this=self._parse_var() 718 ), 719 "COMMENT": lambda self: self.expression( 720 exp.CommentColumnConstraint, this=self._parse_string() 721 ), 722 "COMPRESS": lambda self: self._parse_compress(), 723 "CLUSTERED": lambda self: self.expression( 724 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 725 ), 726 "NONCLUSTERED": lambda self: self.expression( 727 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 728 ), 729 "DEFAULT": lambda self: self.expression( 730 exp.DefaultColumnConstraint, this=self._parse_bitwise() 731 ), 732 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 733 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 734 "FORMAT": lambda self: self.expression( 735 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 736 ), 737 "GENERATED": lambda self: self._parse_generated_as_identity(), 738 "IDENTITY": lambda self: self._parse_auto_increment(), 739 "INLINE": lambda self: self._parse_inline(), 740 "LIKE": lambda self: self._parse_create_like(), 741 "NOT": lambda self: self._parse_not_constraint(), 742 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 743 "ON": lambda self: ( 744 self._match(TokenType.UPDATE) 745 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 746 ) 747 or self.expression(exp.OnProperty, this=self._parse_id_var()), 748 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 749 "PRIMARY KEY": lambda self: self._parse_primary_key(), 750 "REFERENCES": lambda self: self._parse_references(match=False), 751 "TITLE": lambda self: self.expression( 752 exp.TitleColumnConstraint, this=self._parse_var_or_string() 753 ), 754 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 755 "UNIQUE": lambda self: self._parse_unique(), 756 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 757 "WITH": lambda self: self.expression( 758 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 759 ), 760 } 761 762 ALTER_PARSERS = { 763 "ADD": lambda self: self._parse_alter_table_add(), 764 "ALTER": lambda self: self._parse_alter_table_alter(), 765 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 766 "DROP": lambda self: self._parse_alter_table_drop(), 767 "RENAME": lambda self: self._parse_alter_table_rename(), 768 } 769 770 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 771 772 NO_PAREN_FUNCTION_PARSERS = { 773 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 774 "CASE": lambda self: self._parse_case(), 775 "IF": lambda self: self._parse_if(), 776 "NEXT": lambda self: self._parse_next_value_for(), 777 } 778 779 INVALID_FUNC_NAME_TOKENS = { 780 TokenType.IDENTIFIER, 781 TokenType.STRING, 782 } 783 784 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 785 786 FUNCTION_PARSERS = { 787 "ANY_VALUE": lambda self: self._parse_any_value(), 788 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 789 "CONCAT": lambda self: self._parse_concat(), 790 "CONCAT_WS": lambda self: self._parse_concat_ws(), 791 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 792 "DECODE": lambda self: self._parse_decode(), 793 "EXTRACT": lambda self: self._parse_extract(), 794 "JSON_OBJECT": lambda self: self._parse_json_object(), 795 "JSON_TABLE": lambda self: self._parse_json_table(), 796 "LOG": lambda self: self._parse_logarithm(), 797 "MATCH": lambda self: self._parse_match_against(), 798 "OPENJSON": lambda self: self._parse_open_json(), 799 "POSITION": lambda self: self._parse_position(), 800 "PREDICT": lambda self: self._parse_predict(), 801 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 802 "STRING_AGG": lambda self: self._parse_string_agg(), 803 "SUBSTRING": lambda self: self._parse_substring(), 804 "TRIM": lambda self: self._parse_trim(), 805 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 806 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 807 } 808 809 QUERY_MODIFIER_PARSERS = { 810 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 811 TokenType.WHERE: lambda self: ("where", self._parse_where()), 812 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 813 TokenType.HAVING: lambda self: ("having", self._parse_having()), 814 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 815 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 816 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 817 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 818 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 819 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 820 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 821 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 822 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 823 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 824 TokenType.CLUSTER_BY: lambda self: ( 825 "cluster", 826 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 827 ), 828 TokenType.DISTRIBUTE_BY: lambda self: ( 829 "distribute", 830 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 831 ), 832 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 833 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 834 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 835 } 836 837 SET_PARSERS = { 838 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 839 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 840 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 841 "TRANSACTION": lambda self: self._parse_set_transaction(), 842 } 843 844 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 845 846 TYPE_LITERAL_PARSERS = { 847 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 848 } 849 850 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 851 852 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 853 854 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 855 856 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 857 TRANSACTION_CHARACTERISTICS = { 858 "ISOLATION LEVEL REPEATABLE READ", 859 "ISOLATION LEVEL READ COMMITTED", 860 "ISOLATION LEVEL READ UNCOMMITTED", 861 "ISOLATION LEVEL SERIALIZABLE", 862 "READ WRITE", 863 "READ ONLY", 864 } 865 866 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 867 868 CLONE_KEYWORDS = {"CLONE", "COPY"} 869 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 870 871 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 872 873 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 874 875 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 876 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 877 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 878 879 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 880 881 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 882 883 DISTINCT_TOKENS = {TokenType.DISTINCT} 884 885 NULL_TOKENS = {TokenType.NULL} 886 887 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 888 889 STRICT_CAST = True 890 891 # A NULL arg in CONCAT yields NULL by default 892 CONCAT_NULL_OUTPUTS_STRING = False 893 894 PREFIXED_PIVOT_COLUMNS = False 895 IDENTIFY_PIVOT_STRINGS = False 896 897 LOG_BASE_FIRST = True 898 LOG_DEFAULTS_TO_LN = False 899 900 # Whether or not ADD is present for each column added by ALTER TABLE 901 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 902 903 # Whether or not the table sample clause expects CSV syntax 904 TABLESAMPLE_CSV = False 905 906 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 907 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 908 909 # Whether the TRIM function expects the characters to trim as its first argument 910 TRIM_PATTERN_FIRST = False 911 912 __slots__ = ( 913 "error_level", 914 "error_message_context", 915 "max_errors", 916 "sql", 917 "errors", 918 "_tokens", 919 "_index", 920 "_curr", 921 "_next", 922 "_prev", 923 "_prev_comments", 924 "_tokenizer", 925 ) 926 927 # Autofilled 928 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 929 INDEX_OFFSET: int = 0 930 UNNEST_COLUMN_ONLY: bool = False 931 ALIAS_POST_TABLESAMPLE: bool = False 932 STRICT_STRING_CONCAT = False 933 SUPPORTS_USER_DEFINED_TYPES = True 934 NORMALIZE_FUNCTIONS = "upper" 935 NULL_ORDERING: str = "nulls_are_small" 936 SHOW_TRIE: t.Dict = {} 937 SET_TRIE: t.Dict = {} 938 FORMAT_MAPPING: t.Dict[str, str] = {} 939 FORMAT_TRIE: t.Dict = {} 940 TIME_MAPPING: t.Dict[str, str] = {} 941 TIME_TRIE: t.Dict = {} 942 943 def __init__( 944 self, 945 error_level: t.Optional[ErrorLevel] = None, 946 error_message_context: int = 100, 947 max_errors: int = 3, 948 ): 949 self.error_level = error_level or ErrorLevel.IMMEDIATE 950 self.error_message_context = error_message_context 951 self.max_errors = max_errors 952 self._tokenizer = self.TOKENIZER_CLASS() 953 self.reset() 954 955 def reset(self): 956 self.sql = "" 957 self.errors = [] 958 self._tokens = [] 959 self._index = 0 960 self._curr = None 961 self._next = None 962 self._prev = None 963 self._prev_comments = None 964 965 def parse( 966 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 967 ) -> t.List[t.Optional[exp.Expression]]: 968 """ 969 Parses a list of tokens and returns a list of syntax trees, one tree 970 per parsed SQL statement. 971 972 Args: 973 raw_tokens: The list of tokens. 974 sql: The original SQL string, used to produce helpful debug messages. 975 976 Returns: 977 The list of the produced syntax trees. 978 """ 979 return self._parse( 980 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 981 ) 982 983 def parse_into( 984 self, 985 expression_types: exp.IntoType, 986 raw_tokens: t.List[Token], 987 sql: t.Optional[str] = None, 988 ) -> t.List[t.Optional[exp.Expression]]: 989 """ 990 Parses a list of tokens into a given Expression type. If a collection of Expression 991 types is given instead, this method will try to parse the token list into each one 992 of them, stopping at the first for which the parsing succeeds. 993 994 Args: 995 expression_types: The expression type(s) to try and parse the token list into. 996 raw_tokens: The list of tokens. 997 sql: The original SQL string, used to produce helpful debug messages. 998 999 Returns: 1000 The target Expression. 1001 """ 1002 errors = [] 1003 for expression_type in ensure_list(expression_types): 1004 parser = self.EXPRESSION_PARSERS.get(expression_type) 1005 if not parser: 1006 raise TypeError(f"No parser registered for {expression_type}") 1007 1008 try: 1009 return self._parse(parser, raw_tokens, sql) 1010 except ParseError as e: 1011 e.errors[0]["into_expression"] = expression_type 1012 errors.append(e) 1013 1014 raise ParseError( 1015 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1016 errors=merge_errors(errors), 1017 ) from errors[-1] 1018 1019 def _parse( 1020 self, 1021 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1022 raw_tokens: t.List[Token], 1023 sql: t.Optional[str] = None, 1024 ) -> t.List[t.Optional[exp.Expression]]: 1025 self.reset() 1026 self.sql = sql or "" 1027 1028 total = len(raw_tokens) 1029 chunks: t.List[t.List[Token]] = [[]] 1030 1031 for i, token in enumerate(raw_tokens): 1032 if token.token_type == TokenType.SEMICOLON: 1033 if i < total - 1: 1034 chunks.append([]) 1035 else: 1036 chunks[-1].append(token) 1037 1038 expressions = [] 1039 1040 for tokens in chunks: 1041 self._index = -1 1042 self._tokens = tokens 1043 self._advance() 1044 1045 expressions.append(parse_method(self)) 1046 1047 if self._index < len(self._tokens): 1048 self.raise_error("Invalid expression / Unexpected token") 1049 1050 self.check_errors() 1051 1052 return expressions 1053 1054 def check_errors(self) -> None: 1055 """Logs or raises any found errors, depending on the chosen error level setting.""" 1056 if self.error_level == ErrorLevel.WARN: 1057 for error in self.errors: 1058 logger.error(str(error)) 1059 elif self.error_level == ErrorLevel.RAISE and self.errors: 1060 raise ParseError( 1061 concat_messages(self.errors, self.max_errors), 1062 errors=merge_errors(self.errors), 1063 ) 1064 1065 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1066 """ 1067 Appends an error in the list of recorded errors or raises it, depending on the chosen 1068 error level setting. 1069 """ 1070 token = token or self._curr or self._prev or Token.string("") 1071 start = token.start 1072 end = token.end + 1 1073 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1074 highlight = self.sql[start:end] 1075 end_context = self.sql[end : end + self.error_message_context] 1076 1077 error = ParseError.new( 1078 f"{message}. Line {token.line}, Col: {token.col}.\n" 1079 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1080 description=message, 1081 line=token.line, 1082 col=token.col, 1083 start_context=start_context, 1084 highlight=highlight, 1085 end_context=end_context, 1086 ) 1087 1088 if self.error_level == ErrorLevel.IMMEDIATE: 1089 raise error 1090 1091 self.errors.append(error) 1092 1093 def expression( 1094 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1095 ) -> E: 1096 """ 1097 Creates a new, validated Expression. 1098 1099 Args: 1100 exp_class: The expression class to instantiate. 1101 comments: An optional list of comments to attach to the expression. 1102 kwargs: The arguments to set for the expression along with their respective values. 1103 1104 Returns: 1105 The target expression. 1106 """ 1107 instance = exp_class(**kwargs) 1108 instance.add_comments(comments) if comments else self._add_comments(instance) 1109 return self.validate_expression(instance) 1110 1111 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1112 if expression and self._prev_comments: 1113 expression.add_comments(self._prev_comments) 1114 self._prev_comments = None 1115 1116 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1117 """ 1118 Validates an Expression, making sure that all its mandatory arguments are set. 1119 1120 Args: 1121 expression: The expression to validate. 1122 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1123 1124 Returns: 1125 The validated expression. 1126 """ 1127 if self.error_level != ErrorLevel.IGNORE: 1128 for error_message in expression.error_messages(args): 1129 self.raise_error(error_message) 1130 1131 return expression 1132 1133 def _find_sql(self, start: Token, end: Token) -> str: 1134 return self.sql[start.start : end.end + 1] 1135 1136 def _advance(self, times: int = 1) -> None: 1137 self._index += times 1138 self._curr = seq_get(self._tokens, self._index) 1139 self._next = seq_get(self._tokens, self._index + 1) 1140 1141 if self._index > 0: 1142 self._prev = self._tokens[self._index - 1] 1143 self._prev_comments = self._prev.comments 1144 else: 1145 self._prev = None 1146 self._prev_comments = None 1147 1148 def _retreat(self, index: int) -> None: 1149 if index != self._index: 1150 self._advance(index - self._index) 1151 1152 def _parse_command(self) -> exp.Command: 1153 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1154 1155 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1156 start = self._prev 1157 exists = self._parse_exists() if allow_exists else None 1158 1159 self._match(TokenType.ON) 1160 1161 kind = self._match_set(self.CREATABLES) and self._prev 1162 if not kind: 1163 return self._parse_as_command(start) 1164 1165 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1166 this = self._parse_user_defined_function(kind=kind.token_type) 1167 elif kind.token_type == TokenType.TABLE: 1168 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1169 elif kind.token_type == TokenType.COLUMN: 1170 this = self._parse_column() 1171 else: 1172 this = self._parse_id_var() 1173 1174 self._match(TokenType.IS) 1175 1176 return self.expression( 1177 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1178 ) 1179 1180 def _parse_to_table( 1181 self, 1182 ) -> exp.ToTableProperty: 1183 table = self._parse_table_parts(schema=True) 1184 return self.expression(exp.ToTableProperty, this=table) 1185 1186 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1187 def _parse_ttl(self) -> exp.Expression: 1188 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1189 this = self._parse_bitwise() 1190 1191 if self._match_text_seq("DELETE"): 1192 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1193 if self._match_text_seq("RECOMPRESS"): 1194 return self.expression( 1195 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1196 ) 1197 if self._match_text_seq("TO", "DISK"): 1198 return self.expression( 1199 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1200 ) 1201 if self._match_text_seq("TO", "VOLUME"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1204 ) 1205 1206 return this 1207 1208 expressions = self._parse_csv(_parse_ttl_action) 1209 where = self._parse_where() 1210 group = self._parse_group() 1211 1212 aggregates = None 1213 if group and self._match(TokenType.SET): 1214 aggregates = self._parse_csv(self._parse_set_item) 1215 1216 return self.expression( 1217 exp.MergeTreeTTL, 1218 expressions=expressions, 1219 where=where, 1220 group=group, 1221 aggregates=aggregates, 1222 ) 1223 1224 def _parse_statement(self) -> t.Optional[exp.Expression]: 1225 if self._curr is None: 1226 return None 1227 1228 if self._match_set(self.STATEMENT_PARSERS): 1229 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1230 1231 if self._match_set(Tokenizer.COMMANDS): 1232 return self._parse_command() 1233 1234 expression = self._parse_expression() 1235 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1236 return self._parse_query_modifiers(expression) 1237 1238 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1239 start = self._prev 1240 temporary = self._match(TokenType.TEMPORARY) 1241 materialized = self._match_text_seq("MATERIALIZED") 1242 1243 kind = self._match_set(self.CREATABLES) and self._prev.text 1244 if not kind: 1245 return self._parse_as_command(start) 1246 1247 return self.expression( 1248 exp.Drop, 1249 comments=start.comments, 1250 exists=exists or self._parse_exists(), 1251 this=self._parse_table(schema=True), 1252 kind=kind, 1253 temporary=temporary, 1254 materialized=materialized, 1255 cascade=self._match_text_seq("CASCADE"), 1256 constraints=self._match_text_seq("CONSTRAINTS"), 1257 purge=self._match_text_seq("PURGE"), 1258 ) 1259 1260 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1261 return ( 1262 self._match_text_seq("IF") 1263 and (not not_ or self._match(TokenType.NOT)) 1264 and self._match(TokenType.EXISTS) 1265 ) 1266 1267 def _parse_create(self) -> exp.Create | exp.Command: 1268 # Note: this can't be None because we've matched a statement parser 1269 start = self._prev 1270 comments = self._prev_comments 1271 1272 replace = start.text.upper() == "REPLACE" or self._match_pair( 1273 TokenType.OR, TokenType.REPLACE 1274 ) 1275 unique = self._match(TokenType.UNIQUE) 1276 1277 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1278 self._advance() 1279 1280 properties = None 1281 create_token = self._match_set(self.CREATABLES) and self._prev 1282 1283 if not create_token: 1284 # exp.Properties.Location.POST_CREATE 1285 properties = self._parse_properties() 1286 create_token = self._match_set(self.CREATABLES) and self._prev 1287 1288 if not properties or not create_token: 1289 return self._parse_as_command(start) 1290 1291 exists = self._parse_exists(not_=True) 1292 this = None 1293 expression: t.Optional[exp.Expression] = None 1294 indexes = None 1295 no_schema_binding = None 1296 begin = None 1297 end = None 1298 clone = None 1299 1300 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1301 nonlocal properties 1302 if properties and temp_props: 1303 properties.expressions.extend(temp_props.expressions) 1304 elif temp_props: 1305 properties = temp_props 1306 1307 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1308 this = self._parse_user_defined_function(kind=create_token.token_type) 1309 1310 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1311 extend_props(self._parse_properties()) 1312 1313 self._match(TokenType.ALIAS) 1314 1315 if self._match(TokenType.COMMAND): 1316 expression = self._parse_as_command(self._prev) 1317 else: 1318 begin = self._match(TokenType.BEGIN) 1319 return_ = self._match_text_seq("RETURN") 1320 1321 if self._match(TokenType.STRING, advance=False): 1322 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1323 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1324 expression = self._parse_string() 1325 extend_props(self._parse_properties()) 1326 else: 1327 expression = self._parse_statement() 1328 1329 end = self._match_text_seq("END") 1330 1331 if return_: 1332 expression = self.expression(exp.Return, this=expression) 1333 elif create_token.token_type == TokenType.INDEX: 1334 this = self._parse_index(index=self._parse_id_var()) 1335 elif create_token.token_type in self.DB_CREATABLES: 1336 table_parts = self._parse_table_parts(schema=True) 1337 1338 # exp.Properties.Location.POST_NAME 1339 self._match(TokenType.COMMA) 1340 extend_props(self._parse_properties(before=True)) 1341 1342 this = self._parse_schema(this=table_parts) 1343 1344 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1345 extend_props(self._parse_properties()) 1346 1347 self._match(TokenType.ALIAS) 1348 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1349 # exp.Properties.Location.POST_ALIAS 1350 extend_props(self._parse_properties()) 1351 1352 expression = self._parse_ddl_select() 1353 1354 if create_token.token_type == TokenType.TABLE: 1355 # exp.Properties.Location.POST_EXPRESSION 1356 extend_props(self._parse_properties()) 1357 1358 indexes = [] 1359 while True: 1360 index = self._parse_index() 1361 1362 # exp.Properties.Location.POST_INDEX 1363 extend_props(self._parse_properties()) 1364 1365 if not index: 1366 break 1367 else: 1368 self._match(TokenType.COMMA) 1369 indexes.append(index) 1370 elif create_token.token_type == TokenType.VIEW: 1371 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1372 no_schema_binding = True 1373 1374 shallow = self._match_text_seq("SHALLOW") 1375 1376 if self._match_texts(self.CLONE_KEYWORDS): 1377 copy = self._prev.text.lower() == "copy" 1378 clone = self._parse_table(schema=True) 1379 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1380 clone_kind = ( 1381 self._match(TokenType.L_PAREN) 1382 and self._match_texts(self.CLONE_KINDS) 1383 and self._prev.text.upper() 1384 ) 1385 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1386 self._match(TokenType.R_PAREN) 1387 clone = self.expression( 1388 exp.Clone, 1389 this=clone, 1390 when=when, 1391 kind=clone_kind, 1392 shallow=shallow, 1393 expression=clone_expression, 1394 copy=copy, 1395 ) 1396 1397 return self.expression( 1398 exp.Create, 1399 comments=comments, 1400 this=this, 1401 kind=create_token.text, 1402 replace=replace, 1403 unique=unique, 1404 expression=expression, 1405 exists=exists, 1406 properties=properties, 1407 indexes=indexes, 1408 no_schema_binding=no_schema_binding, 1409 begin=begin, 1410 end=end, 1411 clone=clone, 1412 ) 1413 1414 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1415 # only used for teradata currently 1416 self._match(TokenType.COMMA) 1417 1418 kwargs = { 1419 "no": self._match_text_seq("NO"), 1420 "dual": self._match_text_seq("DUAL"), 1421 "before": self._match_text_seq("BEFORE"), 1422 "default": self._match_text_seq("DEFAULT"), 1423 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1424 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1425 "after": self._match_text_seq("AFTER"), 1426 "minimum": self._match_texts(("MIN", "MINIMUM")), 1427 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1428 } 1429 1430 if self._match_texts(self.PROPERTY_PARSERS): 1431 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1432 try: 1433 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1434 except TypeError: 1435 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1436 1437 return None 1438 1439 def _parse_property(self) -> t.Optional[exp.Expression]: 1440 if self._match_texts(self.PROPERTY_PARSERS): 1441 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1442 1443 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1444 return self._parse_character_set(default=True) 1445 1446 if self._match_text_seq("COMPOUND", "SORTKEY"): 1447 return self._parse_sortkey(compound=True) 1448 1449 if self._match_text_seq("SQL", "SECURITY"): 1450 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1451 1452 index = self._index 1453 key = self._parse_column() 1454 1455 if not self._match(TokenType.EQ): 1456 self._retreat(index) 1457 return None 1458 1459 return self.expression( 1460 exp.Property, 1461 this=key.to_dot() if isinstance(key, exp.Column) else key, 1462 value=self._parse_column() or self._parse_var(any_token=True), 1463 ) 1464 1465 def _parse_stored(self) -> exp.FileFormatProperty: 1466 self._match(TokenType.ALIAS) 1467 1468 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1469 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1470 1471 return self.expression( 1472 exp.FileFormatProperty, 1473 this=self.expression( 1474 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1475 ) 1476 if input_format or output_format 1477 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1478 ) 1479 1480 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1481 self._match(TokenType.EQ) 1482 self._match(TokenType.ALIAS) 1483 return self.expression(exp_class, this=self._parse_field()) 1484 1485 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1486 properties = [] 1487 while True: 1488 if before: 1489 prop = self._parse_property_before() 1490 else: 1491 prop = self._parse_property() 1492 1493 if not prop: 1494 break 1495 for p in ensure_list(prop): 1496 properties.append(p) 1497 1498 if properties: 1499 return self.expression(exp.Properties, expressions=properties) 1500 1501 return None 1502 1503 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1504 return self.expression( 1505 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1506 ) 1507 1508 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1509 if self._index >= 2: 1510 pre_volatile_token = self._tokens[self._index - 2] 1511 else: 1512 pre_volatile_token = None 1513 1514 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1515 return exp.VolatileProperty() 1516 1517 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1518 1519 def _parse_with_property( 1520 self, 1521 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1522 if self._match(TokenType.L_PAREN, advance=False): 1523 return self._parse_wrapped_csv(self._parse_property) 1524 1525 if self._match_text_seq("JOURNAL"): 1526 return self._parse_withjournaltable() 1527 1528 if self._match_text_seq("DATA"): 1529 return self._parse_withdata(no=False) 1530 elif self._match_text_seq("NO", "DATA"): 1531 return self._parse_withdata(no=True) 1532 1533 if not self._next: 1534 return None 1535 1536 return self._parse_withisolatedloading() 1537 1538 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1539 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1540 self._match(TokenType.EQ) 1541 1542 user = self._parse_id_var() 1543 self._match(TokenType.PARAMETER) 1544 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1545 1546 if not user or not host: 1547 return None 1548 1549 return exp.DefinerProperty(this=f"{user}@{host}") 1550 1551 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1552 self._match(TokenType.TABLE) 1553 self._match(TokenType.EQ) 1554 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1555 1556 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1557 return self.expression(exp.LogProperty, no=no) 1558 1559 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1560 return self.expression(exp.JournalProperty, **kwargs) 1561 1562 def _parse_checksum(self) -> exp.ChecksumProperty: 1563 self._match(TokenType.EQ) 1564 1565 on = None 1566 if self._match(TokenType.ON): 1567 on = True 1568 elif self._match_text_seq("OFF"): 1569 on = False 1570 1571 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1572 1573 def _parse_cluster(self) -> exp.Cluster: 1574 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1575 1576 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1577 self._match_text_seq("BY") 1578 1579 self._match_l_paren() 1580 expressions = self._parse_csv(self._parse_column) 1581 self._match_r_paren() 1582 1583 if self._match_text_seq("SORTED", "BY"): 1584 self._match_l_paren() 1585 sorted_by = self._parse_csv(self._parse_ordered) 1586 self._match_r_paren() 1587 else: 1588 sorted_by = None 1589 1590 self._match(TokenType.INTO) 1591 buckets = self._parse_number() 1592 self._match_text_seq("BUCKETS") 1593 1594 return self.expression( 1595 exp.ClusteredByProperty, 1596 expressions=expressions, 1597 sorted_by=sorted_by, 1598 buckets=buckets, 1599 ) 1600 1601 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1602 if not self._match_text_seq("GRANTS"): 1603 self._retreat(self._index - 1) 1604 return None 1605 1606 return self.expression(exp.CopyGrantsProperty) 1607 1608 def _parse_freespace(self) -> exp.FreespaceProperty: 1609 self._match(TokenType.EQ) 1610 return self.expression( 1611 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1612 ) 1613 1614 def _parse_mergeblockratio( 1615 self, no: bool = False, default: bool = False 1616 ) -> exp.MergeBlockRatioProperty: 1617 if self._match(TokenType.EQ): 1618 return self.expression( 1619 exp.MergeBlockRatioProperty, 1620 this=self._parse_number(), 1621 percent=self._match(TokenType.PERCENT), 1622 ) 1623 1624 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1625 1626 def _parse_datablocksize( 1627 self, 1628 default: t.Optional[bool] = None, 1629 minimum: t.Optional[bool] = None, 1630 maximum: t.Optional[bool] = None, 1631 ) -> exp.DataBlocksizeProperty: 1632 self._match(TokenType.EQ) 1633 size = self._parse_number() 1634 1635 units = None 1636 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1637 units = self._prev.text 1638 1639 return self.expression( 1640 exp.DataBlocksizeProperty, 1641 size=size, 1642 units=units, 1643 default=default, 1644 minimum=minimum, 1645 maximum=maximum, 1646 ) 1647 1648 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1649 self._match(TokenType.EQ) 1650 always = self._match_text_seq("ALWAYS") 1651 manual = self._match_text_seq("MANUAL") 1652 never = self._match_text_seq("NEVER") 1653 default = self._match_text_seq("DEFAULT") 1654 1655 autotemp = None 1656 if self._match_text_seq("AUTOTEMP"): 1657 autotemp = self._parse_schema() 1658 1659 return self.expression( 1660 exp.BlockCompressionProperty, 1661 always=always, 1662 manual=manual, 1663 never=never, 1664 default=default, 1665 autotemp=autotemp, 1666 ) 1667 1668 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1669 no = self._match_text_seq("NO") 1670 concurrent = self._match_text_seq("CONCURRENT") 1671 self._match_text_seq("ISOLATED", "LOADING") 1672 for_all = self._match_text_seq("FOR", "ALL") 1673 for_insert = self._match_text_seq("FOR", "INSERT") 1674 for_none = self._match_text_seq("FOR", "NONE") 1675 return self.expression( 1676 exp.IsolatedLoadingProperty, 1677 no=no, 1678 concurrent=concurrent, 1679 for_all=for_all, 1680 for_insert=for_insert, 1681 for_none=for_none, 1682 ) 1683 1684 def _parse_locking(self) -> exp.LockingProperty: 1685 if self._match(TokenType.TABLE): 1686 kind = "TABLE" 1687 elif self._match(TokenType.VIEW): 1688 kind = "VIEW" 1689 elif self._match(TokenType.ROW): 1690 kind = "ROW" 1691 elif self._match_text_seq("DATABASE"): 1692 kind = "DATABASE" 1693 else: 1694 kind = None 1695 1696 if kind in ("DATABASE", "TABLE", "VIEW"): 1697 this = self._parse_table_parts() 1698 else: 1699 this = None 1700 1701 if self._match(TokenType.FOR): 1702 for_or_in = "FOR" 1703 elif self._match(TokenType.IN): 1704 for_or_in = "IN" 1705 else: 1706 for_or_in = None 1707 1708 if self._match_text_seq("ACCESS"): 1709 lock_type = "ACCESS" 1710 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1711 lock_type = "EXCLUSIVE" 1712 elif self._match_text_seq("SHARE"): 1713 lock_type = "SHARE" 1714 elif self._match_text_seq("READ"): 1715 lock_type = "READ" 1716 elif self._match_text_seq("WRITE"): 1717 lock_type = "WRITE" 1718 elif self._match_text_seq("CHECKSUM"): 1719 lock_type = "CHECKSUM" 1720 else: 1721 lock_type = None 1722 1723 override = self._match_text_seq("OVERRIDE") 1724 1725 return self.expression( 1726 exp.LockingProperty, 1727 this=this, 1728 kind=kind, 1729 for_or_in=for_or_in, 1730 lock_type=lock_type, 1731 override=override, 1732 ) 1733 1734 def _parse_partition_by(self) -> t.List[exp.Expression]: 1735 if self._match(TokenType.PARTITION_BY): 1736 return self._parse_csv(self._parse_conjunction) 1737 return [] 1738 1739 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1740 self._match(TokenType.EQ) 1741 return self.expression( 1742 exp.PartitionedByProperty, 1743 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1744 ) 1745 1746 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1747 if self._match_text_seq("AND", "STATISTICS"): 1748 statistics = True 1749 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1750 statistics = False 1751 else: 1752 statistics = None 1753 1754 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1755 1756 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1757 if self._match_text_seq("PRIMARY", "INDEX"): 1758 return exp.NoPrimaryIndexProperty() 1759 return None 1760 1761 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1762 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1763 return exp.OnCommitProperty() 1764 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1765 return exp.OnCommitProperty(delete=True) 1766 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1767 1768 def _parse_distkey(self) -> exp.DistKeyProperty: 1769 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1770 1771 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1772 table = self._parse_table(schema=True) 1773 1774 options = [] 1775 while self._match_texts(("INCLUDING", "EXCLUDING")): 1776 this = self._prev.text.upper() 1777 1778 id_var = self._parse_id_var() 1779 if not id_var: 1780 return None 1781 1782 options.append( 1783 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1784 ) 1785 1786 return self.expression(exp.LikeProperty, this=table, expressions=options) 1787 1788 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1789 return self.expression( 1790 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1791 ) 1792 1793 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1794 self._match(TokenType.EQ) 1795 return self.expression( 1796 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1797 ) 1798 1799 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1800 self._match_text_seq("WITH", "CONNECTION") 1801 return self.expression( 1802 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1803 ) 1804 1805 def _parse_returns(self) -> exp.ReturnsProperty: 1806 value: t.Optional[exp.Expression] 1807 is_table = self._match(TokenType.TABLE) 1808 1809 if is_table: 1810 if self._match(TokenType.LT): 1811 value = self.expression( 1812 exp.Schema, 1813 this="TABLE", 1814 expressions=self._parse_csv(self._parse_struct_types), 1815 ) 1816 if not self._match(TokenType.GT): 1817 self.raise_error("Expecting >") 1818 else: 1819 value = self._parse_schema(exp.var("TABLE")) 1820 else: 1821 value = self._parse_types() 1822 1823 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1824 1825 def _parse_describe(self) -> exp.Describe: 1826 kind = self._match_set(self.CREATABLES) and self._prev.text 1827 this = self._parse_table(schema=True) 1828 properties = self._parse_properties() 1829 expressions = properties.expressions if properties else None 1830 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1831 1832 def _parse_insert(self) -> exp.Insert: 1833 comments = ensure_list(self._prev_comments) 1834 overwrite = self._match(TokenType.OVERWRITE) 1835 ignore = self._match(TokenType.IGNORE) 1836 local = self._match_text_seq("LOCAL") 1837 alternative = None 1838 1839 if self._match_text_seq("DIRECTORY"): 1840 this: t.Optional[exp.Expression] = self.expression( 1841 exp.Directory, 1842 this=self._parse_var_or_string(), 1843 local=local, 1844 row_format=self._parse_row_format(match_row=True), 1845 ) 1846 else: 1847 if self._match(TokenType.OR): 1848 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1849 1850 self._match(TokenType.INTO) 1851 comments += ensure_list(self._prev_comments) 1852 self._match(TokenType.TABLE) 1853 this = self._parse_table(schema=True) 1854 1855 returning = self._parse_returning() 1856 1857 return self.expression( 1858 exp.Insert, 1859 comments=comments, 1860 this=this, 1861 by_name=self._match_text_seq("BY", "NAME"), 1862 exists=self._parse_exists(), 1863 partition=self._parse_partition(), 1864 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1865 and self._parse_conjunction(), 1866 expression=self._parse_ddl_select(), 1867 conflict=self._parse_on_conflict(), 1868 returning=returning or self._parse_returning(), 1869 overwrite=overwrite, 1870 alternative=alternative, 1871 ignore=ignore, 1872 ) 1873 1874 def _parse_kill(self) -> exp.Kill: 1875 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1876 1877 return self.expression( 1878 exp.Kill, 1879 this=self._parse_primary(), 1880 kind=kind, 1881 ) 1882 1883 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1884 conflict = self._match_text_seq("ON", "CONFLICT") 1885 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1886 1887 if not conflict and not duplicate: 1888 return None 1889 1890 nothing = None 1891 expressions = None 1892 key = None 1893 constraint = None 1894 1895 if conflict: 1896 if self._match_text_seq("ON", "CONSTRAINT"): 1897 constraint = self._parse_id_var() 1898 else: 1899 key = self._parse_csv(self._parse_value) 1900 1901 self._match_text_seq("DO") 1902 if self._match_text_seq("NOTHING"): 1903 nothing = True 1904 else: 1905 self._match(TokenType.UPDATE) 1906 self._match(TokenType.SET) 1907 expressions = self._parse_csv(self._parse_equality) 1908 1909 return self.expression( 1910 exp.OnConflict, 1911 duplicate=duplicate, 1912 expressions=expressions, 1913 nothing=nothing, 1914 key=key, 1915 constraint=constraint, 1916 ) 1917 1918 def _parse_returning(self) -> t.Optional[exp.Returning]: 1919 if not self._match(TokenType.RETURNING): 1920 return None 1921 return self.expression( 1922 exp.Returning, 1923 expressions=self._parse_csv(self._parse_expression), 1924 into=self._match(TokenType.INTO) and self._parse_table_part(), 1925 ) 1926 1927 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1928 if not self._match(TokenType.FORMAT): 1929 return None 1930 return self._parse_row_format() 1931 1932 def _parse_row_format( 1933 self, match_row: bool = False 1934 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1935 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1936 return None 1937 1938 if self._match_text_seq("SERDE"): 1939 this = self._parse_string() 1940 1941 serde_properties = None 1942 if self._match(TokenType.SERDE_PROPERTIES): 1943 serde_properties = self.expression( 1944 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1945 ) 1946 1947 return self.expression( 1948 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1949 ) 1950 1951 self._match_text_seq("DELIMITED") 1952 1953 kwargs = {} 1954 1955 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1956 kwargs["fields"] = self._parse_string() 1957 if self._match_text_seq("ESCAPED", "BY"): 1958 kwargs["escaped"] = self._parse_string() 1959 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1960 kwargs["collection_items"] = self._parse_string() 1961 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1962 kwargs["map_keys"] = self._parse_string() 1963 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1964 kwargs["lines"] = self._parse_string() 1965 if self._match_text_seq("NULL", "DEFINED", "AS"): 1966 kwargs["null"] = self._parse_string() 1967 1968 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1969 1970 def _parse_load(self) -> exp.LoadData | exp.Command: 1971 if self._match_text_seq("DATA"): 1972 local = self._match_text_seq("LOCAL") 1973 self._match_text_seq("INPATH") 1974 inpath = self._parse_string() 1975 overwrite = self._match(TokenType.OVERWRITE) 1976 self._match_pair(TokenType.INTO, TokenType.TABLE) 1977 1978 return self.expression( 1979 exp.LoadData, 1980 this=self._parse_table(schema=True), 1981 local=local, 1982 overwrite=overwrite, 1983 inpath=inpath, 1984 partition=self._parse_partition(), 1985 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1986 serde=self._match_text_seq("SERDE") and self._parse_string(), 1987 ) 1988 return self._parse_as_command(self._prev) 1989 1990 def _parse_delete(self) -> exp.Delete: 1991 # This handles MySQL's "Multiple-Table Syntax" 1992 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1993 tables = None 1994 comments = self._prev_comments 1995 if not self._match(TokenType.FROM, advance=False): 1996 tables = self._parse_csv(self._parse_table) or None 1997 1998 returning = self._parse_returning() 1999 2000 return self.expression( 2001 exp.Delete, 2002 comments=comments, 2003 tables=tables, 2004 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2005 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2006 where=self._parse_where(), 2007 returning=returning or self._parse_returning(), 2008 limit=self._parse_limit(), 2009 ) 2010 2011 def _parse_update(self) -> exp.Update: 2012 comments = self._prev_comments 2013 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2014 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2015 returning = self._parse_returning() 2016 return self.expression( 2017 exp.Update, 2018 comments=comments, 2019 **{ # type: ignore 2020 "this": this, 2021 "expressions": expressions, 2022 "from": self._parse_from(joins=True), 2023 "where": self._parse_where(), 2024 "returning": returning or self._parse_returning(), 2025 "order": self._parse_order(), 2026 "limit": self._parse_limit(), 2027 }, 2028 ) 2029 2030 def _parse_uncache(self) -> exp.Uncache: 2031 if not self._match(TokenType.TABLE): 2032 self.raise_error("Expecting TABLE after UNCACHE") 2033 2034 return self.expression( 2035 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2036 ) 2037 2038 def _parse_cache(self) -> exp.Cache: 2039 lazy = self._match_text_seq("LAZY") 2040 self._match(TokenType.TABLE) 2041 table = self._parse_table(schema=True) 2042 2043 options = [] 2044 if self._match_text_seq("OPTIONS"): 2045 self._match_l_paren() 2046 k = self._parse_string() 2047 self._match(TokenType.EQ) 2048 v = self._parse_string() 2049 options = [k, v] 2050 self._match_r_paren() 2051 2052 self._match(TokenType.ALIAS) 2053 return self.expression( 2054 exp.Cache, 2055 this=table, 2056 lazy=lazy, 2057 options=options, 2058 expression=self._parse_select(nested=True), 2059 ) 2060 2061 def _parse_partition(self) -> t.Optional[exp.Partition]: 2062 if not self._match(TokenType.PARTITION): 2063 return None 2064 2065 return self.expression( 2066 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2067 ) 2068 2069 def _parse_value(self) -> exp.Tuple: 2070 if self._match(TokenType.L_PAREN): 2071 expressions = self._parse_csv(self._parse_conjunction) 2072 self._match_r_paren() 2073 return self.expression(exp.Tuple, expressions=expressions) 2074 2075 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2076 # https://prestodb.io/docs/current/sql/values.html 2077 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2078 2079 def _parse_projections(self) -> t.List[exp.Expression]: 2080 return self._parse_expressions() 2081 2082 def _parse_select( 2083 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2084 ) -> t.Optional[exp.Expression]: 2085 cte = self._parse_with() 2086 2087 if cte: 2088 this = self._parse_statement() 2089 2090 if not this: 2091 self.raise_error("Failed to parse any statement following CTE") 2092 return cte 2093 2094 if "with" in this.arg_types: 2095 this.set("with", cte) 2096 else: 2097 self.raise_error(f"{this.key} does not support CTE") 2098 this = cte 2099 2100 return this 2101 2102 # duckdb supports leading with FROM x 2103 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2104 2105 if self._match(TokenType.SELECT): 2106 comments = self._prev_comments 2107 2108 hint = self._parse_hint() 2109 all_ = self._match(TokenType.ALL) 2110 distinct = self._match_set(self.DISTINCT_TOKENS) 2111 2112 kind = ( 2113 self._match(TokenType.ALIAS) 2114 and self._match_texts(("STRUCT", "VALUE")) 2115 and self._prev.text 2116 ) 2117 2118 if distinct: 2119 distinct = self.expression( 2120 exp.Distinct, 2121 on=self._parse_value() if self._match(TokenType.ON) else None, 2122 ) 2123 2124 if all_ and distinct: 2125 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2126 2127 limit = self._parse_limit(top=True) 2128 projections = self._parse_projections() 2129 2130 this = self.expression( 2131 exp.Select, 2132 kind=kind, 2133 hint=hint, 2134 distinct=distinct, 2135 expressions=projections, 2136 limit=limit, 2137 ) 2138 this.comments = comments 2139 2140 into = self._parse_into() 2141 if into: 2142 this.set("into", into) 2143 2144 if not from_: 2145 from_ = self._parse_from() 2146 2147 if from_: 2148 this.set("from", from_) 2149 2150 this = self._parse_query_modifiers(this) 2151 elif (table or nested) and self._match(TokenType.L_PAREN): 2152 if self._match(TokenType.PIVOT): 2153 this = self._parse_simplified_pivot() 2154 elif self._match(TokenType.FROM): 2155 this = exp.select("*").from_( 2156 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2157 ) 2158 else: 2159 this = self._parse_table() if table else self._parse_select(nested=True) 2160 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2161 2162 self._match_r_paren() 2163 2164 # We return early here so that the UNION isn't attached to the subquery by the 2165 # following call to _parse_set_operations, but instead becomes the parent node 2166 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2167 elif self._match(TokenType.VALUES): 2168 this = self.expression( 2169 exp.Values, 2170 expressions=self._parse_csv(self._parse_value), 2171 alias=self._parse_table_alias(), 2172 ) 2173 elif from_: 2174 this = exp.select("*").from_(from_.this, copy=False) 2175 else: 2176 this = None 2177 2178 return self._parse_set_operations(this) 2179 2180 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2181 if not skip_with_token and not self._match(TokenType.WITH): 2182 return None 2183 2184 comments = self._prev_comments 2185 recursive = self._match(TokenType.RECURSIVE) 2186 2187 expressions = [] 2188 while True: 2189 expressions.append(self._parse_cte()) 2190 2191 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2192 break 2193 else: 2194 self._match(TokenType.WITH) 2195 2196 return self.expression( 2197 exp.With, comments=comments, expressions=expressions, recursive=recursive 2198 ) 2199 2200 def _parse_cte(self) -> exp.CTE: 2201 alias = self._parse_table_alias() 2202 if not alias or not alias.this: 2203 self.raise_error("Expected CTE to have alias") 2204 2205 self._match(TokenType.ALIAS) 2206 return self.expression( 2207 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2208 ) 2209 2210 def _parse_table_alias( 2211 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2212 ) -> t.Optional[exp.TableAlias]: 2213 any_token = self._match(TokenType.ALIAS) 2214 alias = ( 2215 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2216 or self._parse_string_as_identifier() 2217 ) 2218 2219 index = self._index 2220 if self._match(TokenType.L_PAREN): 2221 columns = self._parse_csv(self._parse_function_parameter) 2222 self._match_r_paren() if columns else self._retreat(index) 2223 else: 2224 columns = None 2225 2226 if not alias and not columns: 2227 return None 2228 2229 return self.expression(exp.TableAlias, this=alias, columns=columns) 2230 2231 def _parse_subquery( 2232 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2233 ) -> t.Optional[exp.Subquery]: 2234 if not this: 2235 return None 2236 2237 return self.expression( 2238 exp.Subquery, 2239 this=this, 2240 pivots=self._parse_pivots(), 2241 alias=self._parse_table_alias() if parse_alias else None, 2242 ) 2243 2244 def _parse_query_modifiers( 2245 self, this: t.Optional[exp.Expression] 2246 ) -> t.Optional[exp.Expression]: 2247 if isinstance(this, self.MODIFIABLES): 2248 for join in iter(self._parse_join, None): 2249 this.append("joins", join) 2250 for lateral in iter(self._parse_lateral, None): 2251 this.append("laterals", lateral) 2252 2253 while True: 2254 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2255 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2256 key, expression = parser(self) 2257 2258 if expression: 2259 this.set(key, expression) 2260 if key == "limit": 2261 offset = expression.args.pop("offset", None) 2262 if offset: 2263 this.set("offset", exp.Offset(expression=offset)) 2264 continue 2265 break 2266 return this 2267 2268 def _parse_hint(self) -> t.Optional[exp.Hint]: 2269 if self._match(TokenType.HINT): 2270 hints = [] 2271 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2272 hints.extend(hint) 2273 2274 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2275 self.raise_error("Expected */ after HINT") 2276 2277 return self.expression(exp.Hint, expressions=hints) 2278 2279 return None 2280 2281 def _parse_into(self) -> t.Optional[exp.Into]: 2282 if not self._match(TokenType.INTO): 2283 return None 2284 2285 temp = self._match(TokenType.TEMPORARY) 2286 unlogged = self._match_text_seq("UNLOGGED") 2287 self._match(TokenType.TABLE) 2288 2289 return self.expression( 2290 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2291 ) 2292 2293 def _parse_from( 2294 self, joins: bool = False, skip_from_token: bool = False 2295 ) -> t.Optional[exp.From]: 2296 if not skip_from_token and not self._match(TokenType.FROM): 2297 return None 2298 2299 return self.expression( 2300 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2301 ) 2302 2303 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2304 if not self._match(TokenType.MATCH_RECOGNIZE): 2305 return None 2306 2307 self._match_l_paren() 2308 2309 partition = self._parse_partition_by() 2310 order = self._parse_order() 2311 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2312 2313 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2314 rows = exp.var("ONE ROW PER MATCH") 2315 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2316 text = "ALL ROWS PER MATCH" 2317 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2318 text += f" SHOW EMPTY MATCHES" 2319 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2320 text += f" OMIT EMPTY MATCHES" 2321 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2322 text += f" WITH UNMATCHED ROWS" 2323 rows = exp.var(text) 2324 else: 2325 rows = None 2326 2327 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2328 text = "AFTER MATCH SKIP" 2329 if self._match_text_seq("PAST", "LAST", "ROW"): 2330 text += f" PAST LAST ROW" 2331 elif self._match_text_seq("TO", "NEXT", "ROW"): 2332 text += f" TO NEXT ROW" 2333 elif self._match_text_seq("TO", "FIRST"): 2334 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2335 elif self._match_text_seq("TO", "LAST"): 2336 text += f" TO LAST {self._advance_any().text}" # type: ignore 2337 after = exp.var(text) 2338 else: 2339 after = None 2340 2341 if self._match_text_seq("PATTERN"): 2342 self._match_l_paren() 2343 2344 if not self._curr: 2345 self.raise_error("Expecting )", self._curr) 2346 2347 paren = 1 2348 start = self._curr 2349 2350 while self._curr and paren > 0: 2351 if self._curr.token_type == TokenType.L_PAREN: 2352 paren += 1 2353 if self._curr.token_type == TokenType.R_PAREN: 2354 paren -= 1 2355 2356 end = self._prev 2357 self._advance() 2358 2359 if paren > 0: 2360 self.raise_error("Expecting )", self._curr) 2361 2362 pattern = exp.var(self._find_sql(start, end)) 2363 else: 2364 pattern = None 2365 2366 define = ( 2367 self._parse_csv( 2368 lambda: self.expression( 2369 exp.Alias, 2370 alias=self._parse_id_var(any_token=True), 2371 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2372 ) 2373 ) 2374 if self._match_text_seq("DEFINE") 2375 else None 2376 ) 2377 2378 self._match_r_paren() 2379 2380 return self.expression( 2381 exp.MatchRecognize, 2382 partition_by=partition, 2383 order=order, 2384 measures=measures, 2385 rows=rows, 2386 after=after, 2387 pattern=pattern, 2388 define=define, 2389 alias=self._parse_table_alias(), 2390 ) 2391 2392 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2393 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2394 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2395 2396 if outer_apply or cross_apply: 2397 this = self._parse_select(table=True) 2398 view = None 2399 outer = not cross_apply 2400 elif self._match(TokenType.LATERAL): 2401 this = self._parse_select(table=True) 2402 view = self._match(TokenType.VIEW) 2403 outer = self._match(TokenType.OUTER) 2404 else: 2405 return None 2406 2407 if not this: 2408 this = ( 2409 self._parse_unnest() 2410 or self._parse_function() 2411 or self._parse_id_var(any_token=False) 2412 ) 2413 2414 while self._match(TokenType.DOT): 2415 this = exp.Dot( 2416 this=this, 2417 expression=self._parse_function() or self._parse_id_var(any_token=False), 2418 ) 2419 2420 if view: 2421 table = self._parse_id_var(any_token=False) 2422 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2423 table_alias: t.Optional[exp.TableAlias] = self.expression( 2424 exp.TableAlias, this=table, columns=columns 2425 ) 2426 elif isinstance(this, exp.Subquery) and this.alias: 2427 # Ensures parity between the Subquery's and the Lateral's "alias" args 2428 table_alias = this.args["alias"].copy() 2429 else: 2430 table_alias = self._parse_table_alias() 2431 2432 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2433 2434 def _parse_join_parts( 2435 self, 2436 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2437 return ( 2438 self._match_set(self.JOIN_METHODS) and self._prev, 2439 self._match_set(self.JOIN_SIDES) and self._prev, 2440 self._match_set(self.JOIN_KINDS) and self._prev, 2441 ) 2442 2443 def _parse_join( 2444 self, skip_join_token: bool = False, parse_bracket: bool = False 2445 ) -> t.Optional[exp.Join]: 2446 if self._match(TokenType.COMMA): 2447 return self.expression(exp.Join, this=self._parse_table()) 2448 2449 index = self._index 2450 method, side, kind = self._parse_join_parts() 2451 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2452 join = self._match(TokenType.JOIN) 2453 2454 if not skip_join_token and not join: 2455 self._retreat(index) 2456 kind = None 2457 method = None 2458 side = None 2459 2460 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2461 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2462 2463 if not skip_join_token and not join and not outer_apply and not cross_apply: 2464 return None 2465 2466 if outer_apply: 2467 side = Token(TokenType.LEFT, "LEFT") 2468 2469 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2470 2471 if method: 2472 kwargs["method"] = method.text 2473 if side: 2474 kwargs["side"] = side.text 2475 if kind: 2476 kwargs["kind"] = kind.text 2477 if hint: 2478 kwargs["hint"] = hint 2479 2480 if self._match(TokenType.ON): 2481 kwargs["on"] = self._parse_conjunction() 2482 elif self._match(TokenType.USING): 2483 kwargs["using"] = self._parse_wrapped_id_vars() 2484 elif not (kind and kind.token_type == TokenType.CROSS): 2485 index = self._index 2486 join = self._parse_join() 2487 2488 if join and self._match(TokenType.ON): 2489 kwargs["on"] = self._parse_conjunction() 2490 elif join and self._match(TokenType.USING): 2491 kwargs["using"] = self._parse_wrapped_id_vars() 2492 else: 2493 join = None 2494 self._retreat(index) 2495 2496 kwargs["this"].set("joins", [join] if join else None) 2497 2498 comments = [c for token in (method, side, kind) if token for c in token.comments] 2499 return self.expression(exp.Join, comments=comments, **kwargs) 2500 2501 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2502 this = self._parse_conjunction() 2503 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2504 return this 2505 2506 opclass = self._parse_var(any_token=True) 2507 if opclass: 2508 return self.expression(exp.Opclass, this=this, expression=opclass) 2509 2510 return this 2511 2512 def _parse_index( 2513 self, 2514 index: t.Optional[exp.Expression] = None, 2515 ) -> t.Optional[exp.Index]: 2516 if index: 2517 unique = None 2518 primary = None 2519 amp = None 2520 2521 self._match(TokenType.ON) 2522 self._match(TokenType.TABLE) # hive 2523 table = self._parse_table_parts(schema=True) 2524 else: 2525 unique = self._match(TokenType.UNIQUE) 2526 primary = self._match_text_seq("PRIMARY") 2527 amp = self._match_text_seq("AMP") 2528 2529 if not self._match(TokenType.INDEX): 2530 return None 2531 2532 index = self._parse_id_var() 2533 table = None 2534 2535 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2536 2537 if self._match(TokenType.L_PAREN, advance=False): 2538 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2539 else: 2540 columns = None 2541 2542 return self.expression( 2543 exp.Index, 2544 this=index, 2545 table=table, 2546 using=using, 2547 columns=columns, 2548 unique=unique, 2549 primary=primary, 2550 amp=amp, 2551 partition_by=self._parse_partition_by(), 2552 where=self._parse_where(), 2553 ) 2554 2555 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2556 hints: t.List[exp.Expression] = [] 2557 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2558 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2559 hints.append( 2560 self.expression( 2561 exp.WithTableHint, 2562 expressions=self._parse_csv( 2563 lambda: self._parse_function() or self._parse_var(any_token=True) 2564 ), 2565 ) 2566 ) 2567 self._match_r_paren() 2568 else: 2569 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2570 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2571 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2572 2573 self._match_texts({"INDEX", "KEY"}) 2574 if self._match(TokenType.FOR): 2575 hint.set("target", self._advance_any() and self._prev.text.upper()) 2576 2577 hint.set("expressions", self._parse_wrapped_id_vars()) 2578 hints.append(hint) 2579 2580 return hints or None 2581 2582 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2583 return ( 2584 (not schema and self._parse_function(optional_parens=False)) 2585 or self._parse_id_var(any_token=False) 2586 or self._parse_string_as_identifier() 2587 or self._parse_placeholder() 2588 ) 2589 2590 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2591 catalog = None 2592 db = None 2593 table = self._parse_table_part(schema=schema) 2594 2595 while self._match(TokenType.DOT): 2596 if catalog: 2597 # This allows nesting the table in arbitrarily many dot expressions if needed 2598 table = self.expression( 2599 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2600 ) 2601 else: 2602 catalog = db 2603 db = table 2604 table = self._parse_table_part(schema=schema) 2605 2606 if not table: 2607 self.raise_error(f"Expected table name but got {self._curr}") 2608 2609 return self.expression( 2610 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2611 ) 2612 2613 def _parse_table( 2614 self, 2615 schema: bool = False, 2616 joins: bool = False, 2617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2618 parse_bracket: bool = False, 2619 ) -> t.Optional[exp.Expression]: 2620 lateral = self._parse_lateral() 2621 if lateral: 2622 return lateral 2623 2624 unnest = self._parse_unnest() 2625 if unnest: 2626 return unnest 2627 2628 values = self._parse_derived_table_values() 2629 if values: 2630 return values 2631 2632 subquery = self._parse_select(table=True) 2633 if subquery: 2634 if not subquery.args.get("pivots"): 2635 subquery.set("pivots", self._parse_pivots()) 2636 return subquery 2637 2638 bracket = parse_bracket and self._parse_bracket(None) 2639 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2640 this = t.cast( 2641 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2642 ) 2643 2644 if schema: 2645 return self._parse_schema(this=this) 2646 2647 version = self._parse_version() 2648 2649 if version: 2650 this.set("version", version) 2651 2652 if self.ALIAS_POST_TABLESAMPLE: 2653 table_sample = self._parse_table_sample() 2654 2655 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2656 if alias: 2657 this.set("alias", alias) 2658 2659 if self._match_text_seq("AT"): 2660 this.set("index", self._parse_id_var()) 2661 2662 this.set("hints", self._parse_table_hints()) 2663 2664 if not this.args.get("pivots"): 2665 this.set("pivots", self._parse_pivots()) 2666 2667 if not self.ALIAS_POST_TABLESAMPLE: 2668 table_sample = self._parse_table_sample() 2669 2670 if table_sample: 2671 table_sample.set("this", this) 2672 this = table_sample 2673 2674 if joins: 2675 for join in iter(self._parse_join, None): 2676 this.append("joins", join) 2677 2678 return this 2679 2680 def _parse_version(self) -> t.Optional[exp.Version]: 2681 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2682 this = "TIMESTAMP" 2683 elif self._match(TokenType.VERSION_SNAPSHOT): 2684 this = "VERSION" 2685 else: 2686 return None 2687 2688 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2689 kind = self._prev.text.upper() 2690 start = self._parse_bitwise() 2691 self._match_texts(("TO", "AND")) 2692 end = self._parse_bitwise() 2693 expression: t.Optional[exp.Expression] = self.expression( 2694 exp.Tuple, expressions=[start, end] 2695 ) 2696 elif self._match_text_seq("CONTAINED", "IN"): 2697 kind = "CONTAINED IN" 2698 expression = self.expression( 2699 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2700 ) 2701 elif self._match(TokenType.ALL): 2702 kind = "ALL" 2703 expression = None 2704 else: 2705 self._match_text_seq("AS", "OF") 2706 kind = "AS OF" 2707 expression = self._parse_type() 2708 2709 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2710 2711 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2712 if not self._match(TokenType.UNNEST): 2713 return None 2714 2715 expressions = self._parse_wrapped_csv(self._parse_type) 2716 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2717 2718 alias = self._parse_table_alias() if with_alias else None 2719 2720 if alias: 2721 if self.UNNEST_COLUMN_ONLY: 2722 if alias.args.get("columns"): 2723 self.raise_error("Unexpected extra column alias in unnest.") 2724 2725 alias.set("columns", [alias.this]) 2726 alias.set("this", None) 2727 2728 columns = alias.args.get("columns") or [] 2729 if offset and len(expressions) < len(columns): 2730 offset = columns.pop() 2731 2732 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2733 self._match(TokenType.ALIAS) 2734 offset = self._parse_id_var( 2735 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2736 ) or exp.to_identifier("offset") 2737 2738 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2739 2740 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2741 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2742 if not is_derived and not self._match(TokenType.VALUES): 2743 return None 2744 2745 expressions = self._parse_csv(self._parse_value) 2746 alias = self._parse_table_alias() 2747 2748 if is_derived: 2749 self._match_r_paren() 2750 2751 return self.expression( 2752 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2753 ) 2754 2755 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2756 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2757 as_modifier and self._match_text_seq("USING", "SAMPLE") 2758 ): 2759 return None 2760 2761 bucket_numerator = None 2762 bucket_denominator = None 2763 bucket_field = None 2764 percent = None 2765 rows = None 2766 size = None 2767 seed = None 2768 2769 kind = ( 2770 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2771 ) 2772 method = self._parse_var(tokens=(TokenType.ROW,)) 2773 2774 matched_l_paren = self._match(TokenType.L_PAREN) 2775 2776 if self.TABLESAMPLE_CSV: 2777 num = None 2778 expressions = self._parse_csv(self._parse_primary) 2779 else: 2780 expressions = None 2781 num = ( 2782 self._parse_factor() 2783 if self._match(TokenType.NUMBER, advance=False) 2784 else self._parse_primary() 2785 ) 2786 2787 if self._match_text_seq("BUCKET"): 2788 bucket_numerator = self._parse_number() 2789 self._match_text_seq("OUT", "OF") 2790 bucket_denominator = bucket_denominator = self._parse_number() 2791 self._match(TokenType.ON) 2792 bucket_field = self._parse_field() 2793 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2794 percent = num 2795 elif self._match(TokenType.ROWS): 2796 rows = num 2797 elif num: 2798 size = num 2799 2800 if matched_l_paren: 2801 self._match_r_paren() 2802 2803 if self._match(TokenType.L_PAREN): 2804 method = self._parse_var() 2805 seed = self._match(TokenType.COMMA) and self._parse_number() 2806 self._match_r_paren() 2807 elif self._match_texts(("SEED", "REPEATABLE")): 2808 seed = self._parse_wrapped(self._parse_number) 2809 2810 return self.expression( 2811 exp.TableSample, 2812 expressions=expressions, 2813 method=method, 2814 bucket_numerator=bucket_numerator, 2815 bucket_denominator=bucket_denominator, 2816 bucket_field=bucket_field, 2817 percent=percent, 2818 rows=rows, 2819 size=size, 2820 seed=seed, 2821 kind=kind, 2822 ) 2823 2824 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2825 return list(iter(self._parse_pivot, None)) or None 2826 2827 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2828 return list(iter(self._parse_join, None)) or None 2829 2830 # https://duckdb.org/docs/sql/statements/pivot 2831 def _parse_simplified_pivot(self) -> exp.Pivot: 2832 def _parse_on() -> t.Optional[exp.Expression]: 2833 this = self._parse_bitwise() 2834 return self._parse_in(this) if self._match(TokenType.IN) else this 2835 2836 this = self._parse_table() 2837 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2838 using = self._match(TokenType.USING) and self._parse_csv( 2839 lambda: self._parse_alias(self._parse_function()) 2840 ) 2841 group = self._parse_group() 2842 return self.expression( 2843 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2844 ) 2845 2846 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2847 index = self._index 2848 include_nulls = None 2849 2850 if self._match(TokenType.PIVOT): 2851 unpivot = False 2852 elif self._match(TokenType.UNPIVOT): 2853 unpivot = True 2854 2855 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2856 if self._match_text_seq("INCLUDE", "NULLS"): 2857 include_nulls = True 2858 elif self._match_text_seq("EXCLUDE", "NULLS"): 2859 include_nulls = False 2860 else: 2861 return None 2862 2863 expressions = [] 2864 field = None 2865 2866 if not self._match(TokenType.L_PAREN): 2867 self._retreat(index) 2868 return None 2869 2870 if unpivot: 2871 expressions = self._parse_csv(self._parse_column) 2872 else: 2873 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2874 2875 if not expressions: 2876 self.raise_error("Failed to parse PIVOT's aggregation list") 2877 2878 if not self._match(TokenType.FOR): 2879 self.raise_error("Expecting FOR") 2880 2881 value = self._parse_column() 2882 2883 if not self._match(TokenType.IN): 2884 self.raise_error("Expecting IN") 2885 2886 field = self._parse_in(value, alias=True) 2887 2888 self._match_r_paren() 2889 2890 pivot = self.expression( 2891 exp.Pivot, 2892 expressions=expressions, 2893 field=field, 2894 unpivot=unpivot, 2895 include_nulls=include_nulls, 2896 ) 2897 2898 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2899 pivot.set("alias", self._parse_table_alias()) 2900 2901 if not unpivot: 2902 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2903 2904 columns: t.List[exp.Expression] = [] 2905 for fld in pivot.args["field"].expressions: 2906 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2907 for name in names: 2908 if self.PREFIXED_PIVOT_COLUMNS: 2909 name = f"{name}_{field_name}" if name else field_name 2910 else: 2911 name = f"{field_name}_{name}" if name else field_name 2912 2913 columns.append(exp.to_identifier(name)) 2914 2915 pivot.set("columns", columns) 2916 2917 return pivot 2918 2919 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2920 return [agg.alias for agg in aggregations] 2921 2922 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2923 if not skip_where_token and not self._match(TokenType.WHERE): 2924 return None 2925 2926 return self.expression( 2927 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2928 ) 2929 2930 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2931 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2932 return None 2933 2934 elements = defaultdict(list) 2935 2936 if self._match(TokenType.ALL): 2937 return self.expression(exp.Group, all=True) 2938 2939 while True: 2940 expressions = self._parse_csv(self._parse_conjunction) 2941 if expressions: 2942 elements["expressions"].extend(expressions) 2943 2944 grouping_sets = self._parse_grouping_sets() 2945 if grouping_sets: 2946 elements["grouping_sets"].extend(grouping_sets) 2947 2948 rollup = None 2949 cube = None 2950 totals = None 2951 2952 with_ = self._match(TokenType.WITH) 2953 if self._match(TokenType.ROLLUP): 2954 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2955 elements["rollup"].extend(ensure_list(rollup)) 2956 2957 if self._match(TokenType.CUBE): 2958 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2959 elements["cube"].extend(ensure_list(cube)) 2960 2961 if self._match_text_seq("TOTALS"): 2962 totals = True 2963 elements["totals"] = True # type: ignore 2964 2965 if not (grouping_sets or rollup or cube or totals): 2966 break 2967 2968 return self.expression(exp.Group, **elements) # type: ignore 2969 2970 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2971 if not self._match(TokenType.GROUPING_SETS): 2972 return None 2973 2974 return self._parse_wrapped_csv(self._parse_grouping_set) 2975 2976 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2977 if self._match(TokenType.L_PAREN): 2978 grouping_set = self._parse_csv(self._parse_column) 2979 self._match_r_paren() 2980 return self.expression(exp.Tuple, expressions=grouping_set) 2981 2982 return self._parse_column() 2983 2984 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2985 if not skip_having_token and not self._match(TokenType.HAVING): 2986 return None 2987 return self.expression(exp.Having, this=self._parse_conjunction()) 2988 2989 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2990 if not self._match(TokenType.QUALIFY): 2991 return None 2992 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2993 2994 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2995 if skip_start_token: 2996 start = None 2997 elif self._match(TokenType.START_WITH): 2998 start = self._parse_conjunction() 2999 else: 3000 return None 3001 3002 self._match(TokenType.CONNECT_BY) 3003 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3004 exp.Prior, this=self._parse_bitwise() 3005 ) 3006 connect = self._parse_conjunction() 3007 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3008 3009 if not start and self._match(TokenType.START_WITH): 3010 start = self._parse_conjunction() 3011 3012 return self.expression(exp.Connect, start=start, connect=connect) 3013 3014 def _parse_order( 3015 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3016 ) -> t.Optional[exp.Expression]: 3017 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3018 return this 3019 3020 return self.expression( 3021 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3022 ) 3023 3024 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3025 if not self._match(token): 3026 return None 3027 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3028 3029 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3030 this = parse_method() if parse_method else self._parse_conjunction() 3031 3032 asc = self._match(TokenType.ASC) 3033 desc = self._match(TokenType.DESC) or (asc and False) 3034 3035 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3036 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3037 3038 nulls_first = is_nulls_first or False 3039 explicitly_null_ordered = is_nulls_first or is_nulls_last 3040 3041 if ( 3042 not explicitly_null_ordered 3043 and ( 3044 (not desc and self.NULL_ORDERING == "nulls_are_small") 3045 or (desc and self.NULL_ORDERING != "nulls_are_small") 3046 ) 3047 and self.NULL_ORDERING != "nulls_are_last" 3048 ): 3049 nulls_first = True 3050 3051 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3052 3053 def _parse_limit( 3054 self, this: t.Optional[exp.Expression] = None, top: bool = False 3055 ) -> t.Optional[exp.Expression]: 3056 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3057 comments = self._prev_comments 3058 if top: 3059 limit_paren = self._match(TokenType.L_PAREN) 3060 expression = self._parse_number() 3061 3062 if limit_paren: 3063 self._match_r_paren() 3064 else: 3065 expression = self._parse_term() 3066 3067 if self._match(TokenType.COMMA): 3068 offset = expression 3069 expression = self._parse_term() 3070 else: 3071 offset = None 3072 3073 limit_exp = self.expression( 3074 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3075 ) 3076 3077 return limit_exp 3078 3079 if self._match(TokenType.FETCH): 3080 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3081 direction = self._prev.text if direction else "FIRST" 3082 3083 count = self._parse_field(tokens=self.FETCH_TOKENS) 3084 percent = self._match(TokenType.PERCENT) 3085 3086 self._match_set((TokenType.ROW, TokenType.ROWS)) 3087 3088 only = self._match_text_seq("ONLY") 3089 with_ties = self._match_text_seq("WITH", "TIES") 3090 3091 if only and with_ties: 3092 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3093 3094 return self.expression( 3095 exp.Fetch, 3096 direction=direction, 3097 count=count, 3098 percent=percent, 3099 with_ties=with_ties, 3100 ) 3101 3102 return this 3103 3104 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3105 if not self._match(TokenType.OFFSET): 3106 return this 3107 3108 count = self._parse_term() 3109 self._match_set((TokenType.ROW, TokenType.ROWS)) 3110 return self.expression(exp.Offset, this=this, expression=count) 3111 3112 def _parse_locks(self) -> t.List[exp.Lock]: 3113 locks = [] 3114 while True: 3115 if self._match_text_seq("FOR", "UPDATE"): 3116 update = True 3117 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3118 "LOCK", "IN", "SHARE", "MODE" 3119 ): 3120 update = False 3121 else: 3122 break 3123 3124 expressions = None 3125 if self._match_text_seq("OF"): 3126 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3127 3128 wait: t.Optional[bool | exp.Expression] = None 3129 if self._match_text_seq("NOWAIT"): 3130 wait = True 3131 elif self._match_text_seq("WAIT"): 3132 wait = self._parse_primary() 3133 elif self._match_text_seq("SKIP", "LOCKED"): 3134 wait = False 3135 3136 locks.append( 3137 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3138 ) 3139 3140 return locks 3141 3142 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3143 if not self._match_set(self.SET_OPERATIONS): 3144 return this 3145 3146 token_type = self._prev.token_type 3147 3148 if token_type == TokenType.UNION: 3149 expression = exp.Union 3150 elif token_type == TokenType.EXCEPT: 3151 expression = exp.Except 3152 else: 3153 expression = exp.Intersect 3154 3155 return self.expression( 3156 expression, 3157 this=this, 3158 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3159 by_name=self._match_text_seq("BY", "NAME"), 3160 expression=self._parse_set_operations(self._parse_select(nested=True)), 3161 ) 3162 3163 def _parse_expression(self) -> t.Optional[exp.Expression]: 3164 return self._parse_alias(self._parse_conjunction()) 3165 3166 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3167 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3168 3169 def _parse_equality(self) -> t.Optional[exp.Expression]: 3170 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3171 3172 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3173 return self._parse_tokens(self._parse_range, self.COMPARISON) 3174 3175 def _parse_range(self) -> t.Optional[exp.Expression]: 3176 this = self._parse_bitwise() 3177 negate = self._match(TokenType.NOT) 3178 3179 if self._match_set(self.RANGE_PARSERS): 3180 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3181 if not expression: 3182 return this 3183 3184 this = expression 3185 elif self._match(TokenType.ISNULL): 3186 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3187 3188 # Postgres supports ISNULL and NOTNULL for conditions. 3189 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3190 if self._match(TokenType.NOTNULL): 3191 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3192 this = self.expression(exp.Not, this=this) 3193 3194 if negate: 3195 this = self.expression(exp.Not, this=this) 3196 3197 if self._match(TokenType.IS): 3198 this = self._parse_is(this) 3199 3200 return this 3201 3202 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3203 index = self._index - 1 3204 negate = self._match(TokenType.NOT) 3205 3206 if self._match_text_seq("DISTINCT", "FROM"): 3207 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3208 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3209 3210 expression = self._parse_null() or self._parse_boolean() 3211 if not expression: 3212 self._retreat(index) 3213 return None 3214 3215 this = self.expression(exp.Is, this=this, expression=expression) 3216 return self.expression(exp.Not, this=this) if negate else this 3217 3218 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3219 unnest = self._parse_unnest(with_alias=False) 3220 if unnest: 3221 this = self.expression(exp.In, this=this, unnest=unnest) 3222 elif self._match(TokenType.L_PAREN): 3223 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3224 3225 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3226 this = self.expression(exp.In, this=this, query=expressions[0]) 3227 else: 3228 this = self.expression(exp.In, this=this, expressions=expressions) 3229 3230 self._match_r_paren(this) 3231 else: 3232 this = self.expression(exp.In, this=this, field=self._parse_field()) 3233 3234 return this 3235 3236 def _parse_between(self, this: exp.Expression) -> exp.Between: 3237 low = self._parse_bitwise() 3238 self._match(TokenType.AND) 3239 high = self._parse_bitwise() 3240 return self.expression(exp.Between, this=this, low=low, high=high) 3241 3242 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3243 if not self._match(TokenType.ESCAPE): 3244 return this 3245 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3246 3247 def _parse_interval(self) -> t.Optional[exp.Interval]: 3248 index = self._index 3249 3250 if not self._match(TokenType.INTERVAL): 3251 return None 3252 3253 if self._match(TokenType.STRING, advance=False): 3254 this = self._parse_primary() 3255 else: 3256 this = self._parse_term() 3257 3258 if not this: 3259 self._retreat(index) 3260 return None 3261 3262 unit = self._parse_function() or self._parse_var(any_token=True) 3263 3264 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3265 # each INTERVAL expression into this canonical form so it's easy to transpile 3266 if this and this.is_number: 3267 this = exp.Literal.string(this.name) 3268 elif this and this.is_string: 3269 parts = this.name.split() 3270 3271 if len(parts) == 2: 3272 if unit: 3273 # This is not actually a unit, it's something else (e.g. a "window side") 3274 unit = None 3275 self._retreat(self._index - 1) 3276 3277 this = exp.Literal.string(parts[0]) 3278 unit = self.expression(exp.Var, this=parts[1]) 3279 3280 return self.expression(exp.Interval, this=this, unit=unit) 3281 3282 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3283 this = self._parse_term() 3284 3285 while True: 3286 if self._match_set(self.BITWISE): 3287 this = self.expression( 3288 self.BITWISE[self._prev.token_type], 3289 this=this, 3290 expression=self._parse_term(), 3291 ) 3292 elif self._match(TokenType.DQMARK): 3293 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3294 elif self._match_pair(TokenType.LT, TokenType.LT): 3295 this = self.expression( 3296 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3297 ) 3298 elif self._match_pair(TokenType.GT, TokenType.GT): 3299 this = self.expression( 3300 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3301 ) 3302 else: 3303 break 3304 3305 return this 3306 3307 def _parse_term(self) -> t.Optional[exp.Expression]: 3308 return self._parse_tokens(self._parse_factor, self.TERM) 3309 3310 def _parse_factor(self) -> t.Optional[exp.Expression]: 3311 return self._parse_tokens(self._parse_unary, self.FACTOR) 3312 3313 def _parse_unary(self) -> t.Optional[exp.Expression]: 3314 if self._match_set(self.UNARY_PARSERS): 3315 return self.UNARY_PARSERS[self._prev.token_type](self) 3316 return self._parse_at_time_zone(self._parse_type()) 3317 3318 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3319 interval = parse_interval and self._parse_interval() 3320 if interval: 3321 return interval 3322 3323 index = self._index 3324 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3325 this = self._parse_column() 3326 3327 if data_type: 3328 if isinstance(this, exp.Literal): 3329 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3330 if parser: 3331 return parser(self, this, data_type) 3332 return self.expression(exp.Cast, this=this, to=data_type) 3333 if not data_type.expressions: 3334 self._retreat(index) 3335 return self._parse_column() 3336 return self._parse_column_ops(data_type) 3337 3338 return this and self._parse_column_ops(this) 3339 3340 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3341 this = self._parse_type() 3342 if not this: 3343 return None 3344 3345 return self.expression( 3346 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3347 ) 3348 3349 def _parse_types( 3350 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3351 ) -> t.Optional[exp.Expression]: 3352 index = self._index 3353 3354 prefix = self._match_text_seq("SYSUDTLIB", ".") 3355 3356 if not self._match_set(self.TYPE_TOKENS): 3357 identifier = allow_identifiers and self._parse_id_var( 3358 any_token=False, tokens=(TokenType.VAR,) 3359 ) 3360 3361 if identifier: 3362 tokens = self._tokenizer.tokenize(identifier.name) 3363 3364 if len(tokens) != 1: 3365 self.raise_error("Unexpected identifier", self._prev) 3366 3367 if tokens[0].token_type in self.TYPE_TOKENS: 3368 self._prev = tokens[0] 3369 elif self.SUPPORTS_USER_DEFINED_TYPES: 3370 type_name = identifier.name 3371 3372 while self._match(TokenType.DOT): 3373 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3374 3375 return exp.DataType.build(type_name, udt=True) 3376 else: 3377 return None 3378 else: 3379 return None 3380 3381 type_token = self._prev.token_type 3382 3383 if type_token == TokenType.PSEUDO_TYPE: 3384 return self.expression(exp.PseudoType, this=self._prev.text) 3385 3386 if type_token == TokenType.OBJECT_IDENTIFIER: 3387 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3388 3389 nested = type_token in self.NESTED_TYPE_TOKENS 3390 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3391 expressions = None 3392 maybe_func = False 3393 3394 if self._match(TokenType.L_PAREN): 3395 if is_struct: 3396 expressions = self._parse_csv(self._parse_struct_types) 3397 elif nested: 3398 expressions = self._parse_csv( 3399 lambda: self._parse_types( 3400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3401 ) 3402 ) 3403 elif type_token in self.ENUM_TYPE_TOKENS: 3404 expressions = self._parse_csv(self._parse_equality) 3405 else: 3406 expressions = self._parse_csv(self._parse_type_size) 3407 3408 if not expressions or not self._match(TokenType.R_PAREN): 3409 self._retreat(index) 3410 return None 3411 3412 maybe_func = True 3413 3414 this: t.Optional[exp.Expression] = None 3415 values: t.Optional[t.List[exp.Expression]] = None 3416 3417 if nested and self._match(TokenType.LT): 3418 if is_struct: 3419 expressions = self._parse_csv(self._parse_struct_types) 3420 else: 3421 expressions = self._parse_csv( 3422 lambda: self._parse_types( 3423 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3424 ) 3425 ) 3426 3427 if not self._match(TokenType.GT): 3428 self.raise_error("Expecting >") 3429 3430 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3431 values = self._parse_csv(self._parse_conjunction) 3432 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3433 3434 if type_token in self.TIMESTAMPS: 3435 if self._match_text_seq("WITH", "TIME", "ZONE"): 3436 maybe_func = False 3437 tz_type = ( 3438 exp.DataType.Type.TIMETZ 3439 if type_token in self.TIMES 3440 else exp.DataType.Type.TIMESTAMPTZ 3441 ) 3442 this = exp.DataType(this=tz_type, expressions=expressions) 3443 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3444 maybe_func = False 3445 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3446 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3447 maybe_func = False 3448 elif type_token == TokenType.INTERVAL: 3449 unit = self._parse_var() 3450 3451 if self._match_text_seq("TO"): 3452 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3453 else: 3454 span = None 3455 3456 if span or not unit: 3457 this = self.expression( 3458 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3459 ) 3460 else: 3461 this = self.expression(exp.Interval, unit=unit) 3462 3463 if maybe_func and check_func: 3464 index2 = self._index 3465 peek = self._parse_string() 3466 3467 if not peek: 3468 self._retreat(index) 3469 return None 3470 3471 self._retreat(index2) 3472 3473 if not this: 3474 if self._match_text_seq("UNSIGNED"): 3475 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3476 if not unsigned_type_token: 3477 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3478 3479 type_token = unsigned_type_token or type_token 3480 3481 this = exp.DataType( 3482 this=exp.DataType.Type[type_token.value], 3483 expressions=expressions, 3484 nested=nested, 3485 values=values, 3486 prefix=prefix, 3487 ) 3488 3489 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3490 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3491 3492 return this 3493 3494 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3495 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3496 self._match(TokenType.COLON) 3497 return self._parse_column_def(this) 3498 3499 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3500 if not self._match_text_seq("AT", "TIME", "ZONE"): 3501 return this 3502 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3503 3504 def _parse_column(self) -> t.Optional[exp.Expression]: 3505 this = self._parse_field() 3506 if isinstance(this, exp.Identifier): 3507 this = self.expression(exp.Column, this=this) 3508 elif not this: 3509 return self._parse_bracket(this) 3510 return self._parse_column_ops(this) 3511 3512 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3513 this = self._parse_bracket(this) 3514 3515 while self._match_set(self.COLUMN_OPERATORS): 3516 op_token = self._prev.token_type 3517 op = self.COLUMN_OPERATORS.get(op_token) 3518 3519 if op_token == TokenType.DCOLON: 3520 field = self._parse_types() 3521 if not field: 3522 self.raise_error("Expected type") 3523 elif op and self._curr: 3524 self._advance() 3525 value = self._prev.text 3526 field = ( 3527 exp.Literal.number(value) 3528 if self._prev.token_type == TokenType.NUMBER 3529 else exp.Literal.string(value) 3530 ) 3531 else: 3532 field = self._parse_field(anonymous_func=True, any_token=True) 3533 3534 if isinstance(field, exp.Func): 3535 # bigquery allows function calls like x.y.count(...) 3536 # SAFE.SUBSTR(...) 3537 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3538 this = self._replace_columns_with_dots(this) 3539 3540 if op: 3541 this = op(self, this, field) 3542 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3543 this = self.expression( 3544 exp.Column, 3545 this=field, 3546 table=this.this, 3547 db=this.args.get("table"), 3548 catalog=this.args.get("db"), 3549 ) 3550 else: 3551 this = self.expression(exp.Dot, this=this, expression=field) 3552 this = self._parse_bracket(this) 3553 return this 3554 3555 def _parse_primary(self) -> t.Optional[exp.Expression]: 3556 if self._match_set(self.PRIMARY_PARSERS): 3557 token_type = self._prev.token_type 3558 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3559 3560 if token_type == TokenType.STRING: 3561 expressions = [primary] 3562 while self._match(TokenType.STRING): 3563 expressions.append(exp.Literal.string(self._prev.text)) 3564 3565 if len(expressions) > 1: 3566 return self.expression(exp.Concat, expressions=expressions) 3567 3568 return primary 3569 3570 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3571 return exp.Literal.number(f"0.{self._prev.text}") 3572 3573 if self._match(TokenType.L_PAREN): 3574 comments = self._prev_comments 3575 query = self._parse_select() 3576 3577 if query: 3578 expressions = [query] 3579 else: 3580 expressions = self._parse_expressions() 3581 3582 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3583 3584 if isinstance(this, exp.Subqueryable): 3585 this = self._parse_set_operations( 3586 self._parse_subquery(this=this, parse_alias=False) 3587 ) 3588 elif len(expressions) > 1: 3589 this = self.expression(exp.Tuple, expressions=expressions) 3590 else: 3591 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3592 3593 if this: 3594 this.add_comments(comments) 3595 3596 self._match_r_paren(expression=this) 3597 return this 3598 3599 return None 3600 3601 def _parse_field( 3602 self, 3603 any_token: bool = False, 3604 tokens: t.Optional[t.Collection[TokenType]] = None, 3605 anonymous_func: bool = False, 3606 ) -> t.Optional[exp.Expression]: 3607 return ( 3608 self._parse_primary() 3609 or self._parse_function(anonymous=anonymous_func) 3610 or self._parse_id_var(any_token=any_token, tokens=tokens) 3611 ) 3612 3613 def _parse_function( 3614 self, 3615 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3616 anonymous: bool = False, 3617 optional_parens: bool = True, 3618 ) -> t.Optional[exp.Expression]: 3619 if not self._curr: 3620 return None 3621 3622 token_type = self._curr.token_type 3623 this = self._curr.text 3624 upper = this.upper() 3625 3626 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3627 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3628 self._advance() 3629 return parser(self) 3630 3631 if not self._next or self._next.token_type != TokenType.L_PAREN: 3632 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3633 self._advance() 3634 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3635 3636 return None 3637 3638 if token_type not in self.FUNC_TOKENS: 3639 return None 3640 3641 self._advance(2) 3642 3643 parser = self.FUNCTION_PARSERS.get(upper) 3644 if parser and not anonymous: 3645 this = parser(self) 3646 else: 3647 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3648 3649 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3650 this = self.expression(subquery_predicate, this=self._parse_select()) 3651 self._match_r_paren() 3652 return this 3653 3654 if functions is None: 3655 functions = self.FUNCTIONS 3656 3657 function = functions.get(upper) 3658 3659 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3660 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3661 3662 if function and not anonymous: 3663 func = self.validate_expression(function(args), args) 3664 if not self.NORMALIZE_FUNCTIONS: 3665 func.meta["name"] = this 3666 this = func 3667 else: 3668 this = self.expression(exp.Anonymous, this=this, expressions=args) 3669 3670 self._match_r_paren(this) 3671 return self._parse_window(this) 3672 3673 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3674 return self._parse_column_def(self._parse_id_var()) 3675 3676 def _parse_user_defined_function( 3677 self, kind: t.Optional[TokenType] = None 3678 ) -> t.Optional[exp.Expression]: 3679 this = self._parse_id_var() 3680 3681 while self._match(TokenType.DOT): 3682 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3683 3684 if not self._match(TokenType.L_PAREN): 3685 return this 3686 3687 expressions = self._parse_csv(self._parse_function_parameter) 3688 self._match_r_paren() 3689 return self.expression( 3690 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3691 ) 3692 3693 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3694 literal = self._parse_primary() 3695 if literal: 3696 return self.expression(exp.Introducer, this=token.text, expression=literal) 3697 3698 return self.expression(exp.Identifier, this=token.text) 3699 3700 def _parse_session_parameter(self) -> exp.SessionParameter: 3701 kind = None 3702 this = self._parse_id_var() or self._parse_primary() 3703 3704 if this and self._match(TokenType.DOT): 3705 kind = this.name 3706 this = self._parse_var() or self._parse_primary() 3707 3708 return self.expression(exp.SessionParameter, this=this, kind=kind) 3709 3710 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3711 index = self._index 3712 3713 if self._match(TokenType.L_PAREN): 3714 expressions = t.cast( 3715 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3716 ) 3717 3718 if not self._match(TokenType.R_PAREN): 3719 self._retreat(index) 3720 else: 3721 expressions = [self._parse_id_var()] 3722 3723 if self._match_set(self.LAMBDAS): 3724 return self.LAMBDAS[self._prev.token_type](self, expressions) 3725 3726 self._retreat(index) 3727 3728 this: t.Optional[exp.Expression] 3729 3730 if self._match(TokenType.DISTINCT): 3731 this = self.expression( 3732 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3733 ) 3734 else: 3735 this = self._parse_select_or_expression(alias=alias) 3736 3737 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3738 3739 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3740 index = self._index 3741 3742 if not self.errors: 3743 try: 3744 if self._parse_select(nested=True): 3745 return this 3746 except ParseError: 3747 pass 3748 finally: 3749 self.errors.clear() 3750 self._retreat(index) 3751 3752 if not self._match(TokenType.L_PAREN): 3753 return this 3754 3755 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3756 3757 self._match_r_paren() 3758 return self.expression(exp.Schema, this=this, expressions=args) 3759 3760 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3761 return self._parse_column_def(self._parse_field(any_token=True)) 3762 3763 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3764 # column defs are not really columns, they're identifiers 3765 if isinstance(this, exp.Column): 3766 this = this.this 3767 3768 kind = self._parse_types(schema=True) 3769 3770 if self._match_text_seq("FOR", "ORDINALITY"): 3771 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3772 3773 constraints: t.List[exp.Expression] = [] 3774 3775 if not kind and self._match(TokenType.ALIAS): 3776 constraints.append( 3777 self.expression( 3778 exp.ComputedColumnConstraint, 3779 this=self._parse_conjunction(), 3780 persisted=self._match_text_seq("PERSISTED"), 3781 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3782 ) 3783 ) 3784 3785 while True: 3786 constraint = self._parse_column_constraint() 3787 if not constraint: 3788 break 3789 constraints.append(constraint) 3790 3791 if not kind and not constraints: 3792 return this 3793 3794 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3795 3796 def _parse_auto_increment( 3797 self, 3798 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3799 start = None 3800 increment = None 3801 3802 if self._match(TokenType.L_PAREN, advance=False): 3803 args = self._parse_wrapped_csv(self._parse_bitwise) 3804 start = seq_get(args, 0) 3805 increment = seq_get(args, 1) 3806 elif self._match_text_seq("START"): 3807 start = self._parse_bitwise() 3808 self._match_text_seq("INCREMENT") 3809 increment = self._parse_bitwise() 3810 3811 if start and increment: 3812 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3813 3814 return exp.AutoIncrementColumnConstraint() 3815 3816 def _parse_compress(self) -> exp.CompressColumnConstraint: 3817 if self._match(TokenType.L_PAREN, advance=False): 3818 return self.expression( 3819 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3820 ) 3821 3822 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3823 3824 def _parse_generated_as_identity( 3825 self, 3826 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3827 if self._match_text_seq("BY", "DEFAULT"): 3828 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3829 this = self.expression( 3830 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3831 ) 3832 else: 3833 self._match_text_seq("ALWAYS") 3834 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3835 3836 self._match(TokenType.ALIAS) 3837 identity = self._match_text_seq("IDENTITY") 3838 3839 if self._match(TokenType.L_PAREN): 3840 if self._match(TokenType.START_WITH): 3841 this.set("start", self._parse_bitwise()) 3842 if self._match_text_seq("INCREMENT", "BY"): 3843 this.set("increment", self._parse_bitwise()) 3844 if self._match_text_seq("MINVALUE"): 3845 this.set("minvalue", self._parse_bitwise()) 3846 if self._match_text_seq("MAXVALUE"): 3847 this.set("maxvalue", self._parse_bitwise()) 3848 3849 if self._match_text_seq("CYCLE"): 3850 this.set("cycle", True) 3851 elif self._match_text_seq("NO", "CYCLE"): 3852 this.set("cycle", False) 3853 3854 if not identity: 3855 this.set("expression", self._parse_bitwise()) 3856 3857 self._match_r_paren() 3858 3859 return this 3860 3861 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3862 self._match_text_seq("LENGTH") 3863 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3864 3865 def _parse_not_constraint( 3866 self, 3867 ) -> t.Optional[exp.Expression]: 3868 if self._match_text_seq("NULL"): 3869 return self.expression(exp.NotNullColumnConstraint) 3870 if self._match_text_seq("CASESPECIFIC"): 3871 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3872 if self._match_text_seq("FOR", "REPLICATION"): 3873 return self.expression(exp.NotForReplicationColumnConstraint) 3874 return None 3875 3876 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3877 if self._match(TokenType.CONSTRAINT): 3878 this = self._parse_id_var() 3879 else: 3880 this = None 3881 3882 if self._match_texts(self.CONSTRAINT_PARSERS): 3883 return self.expression( 3884 exp.ColumnConstraint, 3885 this=this, 3886 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3887 ) 3888 3889 return this 3890 3891 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3892 if not self._match(TokenType.CONSTRAINT): 3893 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3894 3895 this = self._parse_id_var() 3896 expressions = [] 3897 3898 while True: 3899 constraint = self._parse_unnamed_constraint() or self._parse_function() 3900 if not constraint: 3901 break 3902 expressions.append(constraint) 3903 3904 return self.expression(exp.Constraint, this=this, expressions=expressions) 3905 3906 def _parse_unnamed_constraint( 3907 self, constraints: t.Optional[t.Collection[str]] = None 3908 ) -> t.Optional[exp.Expression]: 3909 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3910 constraints or self.CONSTRAINT_PARSERS 3911 ): 3912 return None 3913 3914 constraint = self._prev.text.upper() 3915 if constraint not in self.CONSTRAINT_PARSERS: 3916 self.raise_error(f"No parser found for schema constraint {constraint}.") 3917 3918 return self.CONSTRAINT_PARSERS[constraint](self) 3919 3920 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3921 self._match_text_seq("KEY") 3922 return self.expression( 3923 exp.UniqueColumnConstraint, 3924 this=self._parse_schema(self._parse_id_var(any_token=False)), 3925 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3926 ) 3927 3928 def _parse_key_constraint_options(self) -> t.List[str]: 3929 options = [] 3930 while True: 3931 if not self._curr: 3932 break 3933 3934 if self._match(TokenType.ON): 3935 action = None 3936 on = self._advance_any() and self._prev.text 3937 3938 if self._match_text_seq("NO", "ACTION"): 3939 action = "NO ACTION" 3940 elif self._match_text_seq("CASCADE"): 3941 action = "CASCADE" 3942 elif self._match_text_seq("RESTRICT"): 3943 action = "RESTRICT" 3944 elif self._match_pair(TokenType.SET, TokenType.NULL): 3945 action = "SET NULL" 3946 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3947 action = "SET DEFAULT" 3948 else: 3949 self.raise_error("Invalid key constraint") 3950 3951 options.append(f"ON {on} {action}") 3952 elif self._match_text_seq("NOT", "ENFORCED"): 3953 options.append("NOT ENFORCED") 3954 elif self._match_text_seq("DEFERRABLE"): 3955 options.append("DEFERRABLE") 3956 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3957 options.append("INITIALLY DEFERRED") 3958 elif self._match_text_seq("NORELY"): 3959 options.append("NORELY") 3960 elif self._match_text_seq("MATCH", "FULL"): 3961 options.append("MATCH FULL") 3962 else: 3963 break 3964 3965 return options 3966 3967 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3968 if match and not self._match(TokenType.REFERENCES): 3969 return None 3970 3971 expressions = None 3972 this = self._parse_table(schema=True) 3973 options = self._parse_key_constraint_options() 3974 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3975 3976 def _parse_foreign_key(self) -> exp.ForeignKey: 3977 expressions = self._parse_wrapped_id_vars() 3978 reference = self._parse_references() 3979 options = {} 3980 3981 while self._match(TokenType.ON): 3982 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3983 self.raise_error("Expected DELETE or UPDATE") 3984 3985 kind = self._prev.text.lower() 3986 3987 if self._match_text_seq("NO", "ACTION"): 3988 action = "NO ACTION" 3989 elif self._match(TokenType.SET): 3990 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3991 action = "SET " + self._prev.text.upper() 3992 else: 3993 self._advance() 3994 action = self._prev.text.upper() 3995 3996 options[kind] = action 3997 3998 return self.expression( 3999 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4000 ) 4001 4002 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4003 return self._parse_field() 4004 4005 def _parse_primary_key( 4006 self, wrapped_optional: bool = False, in_props: bool = False 4007 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4008 desc = ( 4009 self._match_set((TokenType.ASC, TokenType.DESC)) 4010 and self._prev.token_type == TokenType.DESC 4011 ) 4012 4013 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4014 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4015 4016 expressions = self._parse_wrapped_csv( 4017 self._parse_primary_key_part, optional=wrapped_optional 4018 ) 4019 options = self._parse_key_constraint_options() 4020 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4021 4022 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4023 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4024 return this 4025 4026 bracket_kind = self._prev.token_type 4027 4028 if self._match(TokenType.COLON): 4029 expressions: t.List[exp.Expression] = [ 4030 self.expression(exp.Slice, expression=self._parse_conjunction()) 4031 ] 4032 else: 4033 expressions = self._parse_csv( 4034 lambda: self._parse_slice( 4035 self._parse_alias(self._parse_conjunction(), explicit=True) 4036 ) 4037 ) 4038 4039 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4040 if bracket_kind == TokenType.L_BRACE: 4041 this = self.expression(exp.Struct, expressions=expressions) 4042 elif not this or this.name.upper() == "ARRAY": 4043 this = self.expression(exp.Array, expressions=expressions) 4044 else: 4045 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4046 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4047 4048 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4049 self.raise_error("Expected ]") 4050 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4051 self.raise_error("Expected }") 4052 4053 self._add_comments(this) 4054 return self._parse_bracket(this) 4055 4056 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4057 if self._match(TokenType.COLON): 4058 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4059 return this 4060 4061 def _parse_case(self) -> t.Optional[exp.Expression]: 4062 ifs = [] 4063 default = None 4064 4065 comments = self._prev_comments 4066 expression = self._parse_conjunction() 4067 4068 while self._match(TokenType.WHEN): 4069 this = self._parse_conjunction() 4070 self._match(TokenType.THEN) 4071 then = self._parse_conjunction() 4072 ifs.append(self.expression(exp.If, this=this, true=then)) 4073 4074 if self._match(TokenType.ELSE): 4075 default = self._parse_conjunction() 4076 4077 if not self._match(TokenType.END): 4078 self.raise_error("Expected END after CASE", self._prev) 4079 4080 return self._parse_window( 4081 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4082 ) 4083 4084 def _parse_if(self) -> t.Optional[exp.Expression]: 4085 if self._match(TokenType.L_PAREN): 4086 args = self._parse_csv(self._parse_conjunction) 4087 this = self.validate_expression(exp.If.from_arg_list(args), args) 4088 self._match_r_paren() 4089 else: 4090 index = self._index - 1 4091 condition = self._parse_conjunction() 4092 4093 if not condition: 4094 self._retreat(index) 4095 return None 4096 4097 self._match(TokenType.THEN) 4098 true = self._parse_conjunction() 4099 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4100 self._match(TokenType.END) 4101 this = self.expression(exp.If, this=condition, true=true, false=false) 4102 4103 return self._parse_window(this) 4104 4105 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4106 if not self._match_text_seq("VALUE", "FOR"): 4107 self._retreat(self._index - 1) 4108 return None 4109 4110 return self.expression( 4111 exp.NextValueFor, 4112 this=self._parse_column(), 4113 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4114 ) 4115 4116 def _parse_extract(self) -> exp.Extract: 4117 this = self._parse_function() or self._parse_var() or self._parse_type() 4118 4119 if self._match(TokenType.FROM): 4120 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4121 4122 if not self._match(TokenType.COMMA): 4123 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4124 4125 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4126 4127 def _parse_any_value(self) -> exp.AnyValue: 4128 this = self._parse_lambda() 4129 is_max = None 4130 having = None 4131 4132 if self._match(TokenType.HAVING): 4133 self._match_texts(("MAX", "MIN")) 4134 is_max = self._prev.text == "MAX" 4135 having = self._parse_column() 4136 4137 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4138 4139 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4140 this = self._parse_conjunction() 4141 4142 if not self._match(TokenType.ALIAS): 4143 if self._match(TokenType.COMMA): 4144 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4145 4146 self.raise_error("Expected AS after CAST") 4147 4148 fmt = None 4149 to = self._parse_types() 4150 4151 if not to: 4152 self.raise_error("Expected TYPE after CAST") 4153 elif isinstance(to, exp.Identifier): 4154 to = exp.DataType.build(to.name, udt=True) 4155 elif to.this == exp.DataType.Type.CHAR: 4156 if self._match(TokenType.CHARACTER_SET): 4157 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4158 elif self._match(TokenType.FORMAT): 4159 fmt_string = self._parse_string() 4160 fmt = self._parse_at_time_zone(fmt_string) 4161 4162 if to.this in exp.DataType.TEMPORAL_TYPES: 4163 this = self.expression( 4164 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4165 this=this, 4166 format=exp.Literal.string( 4167 format_time( 4168 fmt_string.this if fmt_string else "", 4169 self.FORMAT_MAPPING or self.TIME_MAPPING, 4170 self.FORMAT_TRIE or self.TIME_TRIE, 4171 ) 4172 ), 4173 ) 4174 4175 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4176 this.set("zone", fmt.args["zone"]) 4177 4178 return this 4179 4180 return self.expression( 4181 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4182 ) 4183 4184 def _parse_concat(self) -> t.Optional[exp.Expression]: 4185 args = self._parse_csv(self._parse_conjunction) 4186 if self.CONCAT_NULL_OUTPUTS_STRING: 4187 args = self._ensure_string_if_null(args) 4188 4189 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4190 # we find such a call we replace it with its argument. 4191 if len(args) == 1: 4192 return args[0] 4193 4194 return self.expression( 4195 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4196 ) 4197 4198 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4199 args = self._parse_csv(self._parse_conjunction) 4200 if len(args) < 2: 4201 return self.expression(exp.ConcatWs, expressions=args) 4202 delim, *values = args 4203 if self.CONCAT_NULL_OUTPUTS_STRING: 4204 values = self._ensure_string_if_null(values) 4205 4206 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4207 4208 def _parse_string_agg(self) -> exp.Expression: 4209 if self._match(TokenType.DISTINCT): 4210 args: t.List[t.Optional[exp.Expression]] = [ 4211 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4212 ] 4213 if self._match(TokenType.COMMA): 4214 args.extend(self._parse_csv(self._parse_conjunction)) 4215 else: 4216 args = self._parse_csv(self._parse_conjunction) # type: ignore 4217 4218 index = self._index 4219 if not self._match(TokenType.R_PAREN) and args: 4220 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4221 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4222 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4223 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4224 4225 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4226 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4227 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4228 if not self._match_text_seq("WITHIN", "GROUP"): 4229 self._retreat(index) 4230 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4231 4232 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4233 order = self._parse_order(this=seq_get(args, 0)) 4234 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4235 4236 def _parse_convert( 4237 self, strict: bool, safe: t.Optional[bool] = None 4238 ) -> t.Optional[exp.Expression]: 4239 this = self._parse_bitwise() 4240 4241 if self._match(TokenType.USING): 4242 to: t.Optional[exp.Expression] = self.expression( 4243 exp.CharacterSet, this=self._parse_var() 4244 ) 4245 elif self._match(TokenType.COMMA): 4246 to = self._parse_types() 4247 else: 4248 to = None 4249 4250 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4251 4252 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4253 """ 4254 There are generally two variants of the DECODE function: 4255 4256 - DECODE(bin, charset) 4257 - DECODE(expression, search, result [, search, result] ... [, default]) 4258 4259 The second variant will always be parsed into a CASE expression. Note that NULL 4260 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4261 instead of relying on pattern matching. 4262 """ 4263 args = self._parse_csv(self._parse_conjunction) 4264 4265 if len(args) < 3: 4266 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4267 4268 expression, *expressions = args 4269 if not expression: 4270 return None 4271 4272 ifs = [] 4273 for search, result in zip(expressions[::2], expressions[1::2]): 4274 if not search or not result: 4275 return None 4276 4277 if isinstance(search, exp.Literal): 4278 ifs.append( 4279 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4280 ) 4281 elif isinstance(search, exp.Null): 4282 ifs.append( 4283 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4284 ) 4285 else: 4286 cond = exp.or_( 4287 exp.EQ(this=expression.copy(), expression=search), 4288 exp.and_( 4289 exp.Is(this=expression.copy(), expression=exp.Null()), 4290 exp.Is(this=search.copy(), expression=exp.Null()), 4291 copy=False, 4292 ), 4293 copy=False, 4294 ) 4295 ifs.append(exp.If(this=cond, true=result)) 4296 4297 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4298 4299 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4300 self._match_text_seq("KEY") 4301 key = self._parse_column() 4302 self._match_set((TokenType.COLON, TokenType.COMMA)) 4303 self._match_text_seq("VALUE") 4304 value = self._parse_bitwise() 4305 4306 if not key and not value: 4307 return None 4308 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4309 4310 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4311 if not this or not self._match_text_seq("FORMAT", "JSON"): 4312 return this 4313 4314 return self.expression(exp.FormatJson, this=this) 4315 4316 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4317 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4318 for value in values: 4319 if self._match_text_seq(value, "ON", on): 4320 return f"{value} ON {on}" 4321 4322 return None 4323 4324 def _parse_json_object(self) -> exp.JSONObject: 4325 star = self._parse_star() 4326 expressions = ( 4327 [star] 4328 if star 4329 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4330 ) 4331 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4332 4333 unique_keys = None 4334 if self._match_text_seq("WITH", "UNIQUE"): 4335 unique_keys = True 4336 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4337 unique_keys = False 4338 4339 self._match_text_seq("KEYS") 4340 4341 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4342 self._parse_type() 4343 ) 4344 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4345 4346 return self.expression( 4347 exp.JSONObject, 4348 expressions=expressions, 4349 null_handling=null_handling, 4350 unique_keys=unique_keys, 4351 return_type=return_type, 4352 encoding=encoding, 4353 ) 4354 4355 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4356 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4357 if not self._match_text_seq("NESTED"): 4358 this = self._parse_id_var() 4359 kind = self._parse_types(allow_identifiers=False) 4360 nested = None 4361 else: 4362 this = None 4363 kind = None 4364 nested = True 4365 4366 path = self._match_text_seq("PATH") and self._parse_string() 4367 nested_schema = nested and self._parse_json_schema() 4368 4369 return self.expression( 4370 exp.JSONColumnDef, 4371 this=this, 4372 kind=kind, 4373 path=path, 4374 nested_schema=nested_schema, 4375 ) 4376 4377 def _parse_json_schema(self) -> exp.JSONSchema: 4378 self._match_text_seq("COLUMNS") 4379 return self.expression( 4380 exp.JSONSchema, 4381 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4382 ) 4383 4384 def _parse_json_table(self) -> exp.JSONTable: 4385 this = self._parse_format_json(self._parse_bitwise()) 4386 path = self._match(TokenType.COMMA) and self._parse_string() 4387 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4388 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4389 schema = self._parse_json_schema() 4390 4391 return exp.JSONTable( 4392 this=this, 4393 schema=schema, 4394 path=path, 4395 error_handling=error_handling, 4396 empty_handling=empty_handling, 4397 ) 4398 4399 def _parse_logarithm(self) -> exp.Func: 4400 # Default argument order is base, expression 4401 args = self._parse_csv(self._parse_range) 4402 4403 if len(args) > 1: 4404 if not self.LOG_BASE_FIRST: 4405 args.reverse() 4406 return exp.Log.from_arg_list(args) 4407 4408 return self.expression( 4409 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4410 ) 4411 4412 def _parse_match_against(self) -> exp.MatchAgainst: 4413 expressions = self._parse_csv(self._parse_column) 4414 4415 self._match_text_seq(")", "AGAINST", "(") 4416 4417 this = self._parse_string() 4418 4419 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4420 modifier = "IN NATURAL LANGUAGE MODE" 4421 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4422 modifier = f"{modifier} WITH QUERY EXPANSION" 4423 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4424 modifier = "IN BOOLEAN MODE" 4425 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4426 modifier = "WITH QUERY EXPANSION" 4427 else: 4428 modifier = None 4429 4430 return self.expression( 4431 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4432 ) 4433 4434 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4435 def _parse_open_json(self) -> exp.OpenJSON: 4436 this = self._parse_bitwise() 4437 path = self._match(TokenType.COMMA) and self._parse_string() 4438 4439 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4440 this = self._parse_field(any_token=True) 4441 kind = self._parse_types() 4442 path = self._parse_string() 4443 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4444 4445 return self.expression( 4446 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4447 ) 4448 4449 expressions = None 4450 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4451 self._match_l_paren() 4452 expressions = self._parse_csv(_parse_open_json_column_def) 4453 4454 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4455 4456 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4457 args = self._parse_csv(self._parse_bitwise) 4458 4459 if self._match(TokenType.IN): 4460 return self.expression( 4461 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4462 ) 4463 4464 if haystack_first: 4465 haystack = seq_get(args, 0) 4466 needle = seq_get(args, 1) 4467 else: 4468 needle = seq_get(args, 0) 4469 haystack = seq_get(args, 1) 4470 4471 return self.expression( 4472 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4473 ) 4474 4475 def _parse_predict(self) -> exp.Predict: 4476 self._match_text_seq("MODEL") 4477 this = self._parse_table() 4478 4479 self._match(TokenType.COMMA) 4480 self._match_text_seq("TABLE") 4481 4482 return self.expression( 4483 exp.Predict, 4484 this=this, 4485 expression=self._parse_table(), 4486 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4487 ) 4488 4489 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4490 args = self._parse_csv(self._parse_table) 4491 return exp.JoinHint(this=func_name.upper(), expressions=args) 4492 4493 def _parse_substring(self) -> exp.Substring: 4494 # Postgres supports the form: substring(string [from int] [for int]) 4495 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4496 4497 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4498 4499 if self._match(TokenType.FROM): 4500 args.append(self._parse_bitwise()) 4501 if self._match(TokenType.FOR): 4502 args.append(self._parse_bitwise()) 4503 4504 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4505 4506 def _parse_trim(self) -> exp.Trim: 4507 # https://www.w3resource.com/sql/character-functions/trim.php 4508 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4509 4510 position = None 4511 collation = None 4512 expression = None 4513 4514 if self._match_texts(self.TRIM_TYPES): 4515 position = self._prev.text.upper() 4516 4517 this = self._parse_bitwise() 4518 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4519 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4520 expression = self._parse_bitwise() 4521 4522 if invert_order: 4523 this, expression = expression, this 4524 4525 if self._match(TokenType.COLLATE): 4526 collation = self._parse_bitwise() 4527 4528 return self.expression( 4529 exp.Trim, this=this, position=position, expression=expression, collation=collation 4530 ) 4531 4532 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4533 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4534 4535 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4536 return self._parse_window(self._parse_id_var(), alias=True) 4537 4538 def _parse_respect_or_ignore_nulls( 4539 self, this: t.Optional[exp.Expression] 4540 ) -> t.Optional[exp.Expression]: 4541 if self._match_text_seq("IGNORE", "NULLS"): 4542 return self.expression(exp.IgnoreNulls, this=this) 4543 if self._match_text_seq("RESPECT", "NULLS"): 4544 return self.expression(exp.RespectNulls, this=this) 4545 return this 4546 4547 def _parse_window( 4548 self, this: t.Optional[exp.Expression], alias: bool = False 4549 ) -> t.Optional[exp.Expression]: 4550 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4551 self._match(TokenType.WHERE) 4552 this = self.expression( 4553 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4554 ) 4555 self._match_r_paren() 4556 4557 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4558 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4559 if self._match_text_seq("WITHIN", "GROUP"): 4560 order = self._parse_wrapped(self._parse_order) 4561 this = self.expression(exp.WithinGroup, this=this, expression=order) 4562 4563 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4564 # Some dialects choose to implement and some do not. 4565 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4566 4567 # There is some code above in _parse_lambda that handles 4568 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4569 4570 # The below changes handle 4571 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4572 4573 # Oracle allows both formats 4574 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4575 # and Snowflake chose to do the same for familiarity 4576 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4577 this = self._parse_respect_or_ignore_nulls(this) 4578 4579 # bigquery select from window x AS (partition by ...) 4580 if alias: 4581 over = None 4582 self._match(TokenType.ALIAS) 4583 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4584 return this 4585 else: 4586 over = self._prev.text.upper() 4587 4588 if not self._match(TokenType.L_PAREN): 4589 return self.expression( 4590 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4591 ) 4592 4593 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4594 4595 first = self._match(TokenType.FIRST) 4596 if self._match_text_seq("LAST"): 4597 first = False 4598 4599 partition, order = self._parse_partition_and_order() 4600 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4601 4602 if kind: 4603 self._match(TokenType.BETWEEN) 4604 start = self._parse_window_spec() 4605 self._match(TokenType.AND) 4606 end = self._parse_window_spec() 4607 4608 spec = self.expression( 4609 exp.WindowSpec, 4610 kind=kind, 4611 start=start["value"], 4612 start_side=start["side"], 4613 end=end["value"], 4614 end_side=end["side"], 4615 ) 4616 else: 4617 spec = None 4618 4619 self._match_r_paren() 4620 4621 window = self.expression( 4622 exp.Window, 4623 this=this, 4624 partition_by=partition, 4625 order=order, 4626 spec=spec, 4627 alias=window_alias, 4628 over=over, 4629 first=first, 4630 ) 4631 4632 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4633 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4634 return self._parse_window(window, alias=alias) 4635 4636 return window 4637 4638 def _parse_partition_and_order( 4639 self, 4640 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4641 return self._parse_partition_by(), self._parse_order() 4642 4643 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4644 self._match(TokenType.BETWEEN) 4645 4646 return { 4647 "value": ( 4648 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4649 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4650 or self._parse_bitwise() 4651 ), 4652 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4653 } 4654 4655 def _parse_alias( 4656 self, this: t.Optional[exp.Expression], explicit: bool = False 4657 ) -> t.Optional[exp.Expression]: 4658 any_token = self._match(TokenType.ALIAS) 4659 4660 if explicit and not any_token: 4661 return this 4662 4663 if self._match(TokenType.L_PAREN): 4664 aliases = self.expression( 4665 exp.Aliases, 4666 this=this, 4667 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4668 ) 4669 self._match_r_paren(aliases) 4670 return aliases 4671 4672 alias = self._parse_id_var(any_token) 4673 4674 if alias: 4675 return self.expression(exp.Alias, this=this, alias=alias) 4676 4677 return this 4678 4679 def _parse_id_var( 4680 self, 4681 any_token: bool = True, 4682 tokens: t.Optional[t.Collection[TokenType]] = None, 4683 ) -> t.Optional[exp.Expression]: 4684 identifier = self._parse_identifier() 4685 4686 if identifier: 4687 return identifier 4688 4689 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4690 quoted = self._prev.token_type == TokenType.STRING 4691 return exp.Identifier(this=self._prev.text, quoted=quoted) 4692 4693 return None 4694 4695 def _parse_string(self) -> t.Optional[exp.Expression]: 4696 if self._match(TokenType.STRING): 4697 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4698 return self._parse_placeholder() 4699 4700 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4701 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4702 4703 def _parse_number(self) -> t.Optional[exp.Expression]: 4704 if self._match(TokenType.NUMBER): 4705 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4706 return self._parse_placeholder() 4707 4708 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4709 if self._match(TokenType.IDENTIFIER): 4710 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4711 return self._parse_placeholder() 4712 4713 def _parse_var( 4714 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4715 ) -> t.Optional[exp.Expression]: 4716 if ( 4717 (any_token and self._advance_any()) 4718 or self._match(TokenType.VAR) 4719 or (self._match_set(tokens) if tokens else False) 4720 ): 4721 return self.expression(exp.Var, this=self._prev.text) 4722 return self._parse_placeholder() 4723 4724 def _advance_any(self) -> t.Optional[Token]: 4725 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4726 self._advance() 4727 return self._prev 4728 return None 4729 4730 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4731 return self._parse_var() or self._parse_string() 4732 4733 def _parse_null(self) -> t.Optional[exp.Expression]: 4734 if self._match_set(self.NULL_TOKENS): 4735 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4736 return self._parse_placeholder() 4737 4738 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4739 if self._match(TokenType.TRUE): 4740 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4741 if self._match(TokenType.FALSE): 4742 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4743 return self._parse_placeholder() 4744 4745 def _parse_star(self) -> t.Optional[exp.Expression]: 4746 if self._match(TokenType.STAR): 4747 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4748 return self._parse_placeholder() 4749 4750 def _parse_parameter(self) -> exp.Parameter: 4751 wrapped = self._match(TokenType.L_BRACE) 4752 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4753 self._match(TokenType.R_BRACE) 4754 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4755 4756 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4757 if self._match_set(self.PLACEHOLDER_PARSERS): 4758 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4759 if placeholder: 4760 return placeholder 4761 self._advance(-1) 4762 return None 4763 4764 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4765 if not self._match(TokenType.EXCEPT): 4766 return None 4767 if self._match(TokenType.L_PAREN, advance=False): 4768 return self._parse_wrapped_csv(self._parse_column) 4769 4770 except_column = self._parse_column() 4771 return [except_column] if except_column else None 4772 4773 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4774 if not self._match(TokenType.REPLACE): 4775 return None 4776 if self._match(TokenType.L_PAREN, advance=False): 4777 return self._parse_wrapped_csv(self._parse_expression) 4778 4779 replace_expression = self._parse_expression() 4780 return [replace_expression] if replace_expression else None 4781 4782 def _parse_csv( 4783 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4784 ) -> t.List[exp.Expression]: 4785 parse_result = parse_method() 4786 items = [parse_result] if parse_result is not None else [] 4787 4788 while self._match(sep): 4789 self._add_comments(parse_result) 4790 parse_result = parse_method() 4791 if parse_result is not None: 4792 items.append(parse_result) 4793 4794 return items 4795 4796 def _parse_tokens( 4797 self, parse_method: t.Callable, expressions: t.Dict 4798 ) -> t.Optional[exp.Expression]: 4799 this = parse_method() 4800 4801 while self._match_set(expressions): 4802 this = self.expression( 4803 expressions[self._prev.token_type], 4804 this=this, 4805 comments=self._prev_comments, 4806 expression=parse_method(), 4807 ) 4808 4809 return this 4810 4811 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4812 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4813 4814 def _parse_wrapped_csv( 4815 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4816 ) -> t.List[exp.Expression]: 4817 return self._parse_wrapped( 4818 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4819 ) 4820 4821 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4822 wrapped = self._match(TokenType.L_PAREN) 4823 if not wrapped and not optional: 4824 self.raise_error("Expecting (") 4825 parse_result = parse_method() 4826 if wrapped: 4827 self._match_r_paren() 4828 return parse_result 4829 4830 def _parse_expressions(self) -> t.List[exp.Expression]: 4831 return self._parse_csv(self._parse_expression) 4832 4833 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4834 return self._parse_select() or self._parse_set_operations( 4835 self._parse_expression() if alias else self._parse_conjunction() 4836 ) 4837 4838 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4839 return self._parse_query_modifiers( 4840 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4841 ) 4842 4843 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4844 this = None 4845 if self._match_texts(self.TRANSACTION_KIND): 4846 this = self._prev.text 4847 4848 self._match_texts({"TRANSACTION", "WORK"}) 4849 4850 modes = [] 4851 while True: 4852 mode = [] 4853 while self._match(TokenType.VAR): 4854 mode.append(self._prev.text) 4855 4856 if mode: 4857 modes.append(" ".join(mode)) 4858 if not self._match(TokenType.COMMA): 4859 break 4860 4861 return self.expression(exp.Transaction, this=this, modes=modes) 4862 4863 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4864 chain = None 4865 savepoint = None 4866 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4867 4868 self._match_texts({"TRANSACTION", "WORK"}) 4869 4870 if self._match_text_seq("TO"): 4871 self._match_text_seq("SAVEPOINT") 4872 savepoint = self._parse_id_var() 4873 4874 if self._match(TokenType.AND): 4875 chain = not self._match_text_seq("NO") 4876 self._match_text_seq("CHAIN") 4877 4878 if is_rollback: 4879 return self.expression(exp.Rollback, savepoint=savepoint) 4880 4881 return self.expression(exp.Commit, chain=chain) 4882 4883 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4884 if not self._match_text_seq("ADD"): 4885 return None 4886 4887 self._match(TokenType.COLUMN) 4888 exists_column = self._parse_exists(not_=True) 4889 expression = self._parse_field_def() 4890 4891 if expression: 4892 expression.set("exists", exists_column) 4893 4894 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4895 if self._match_texts(("FIRST", "AFTER")): 4896 position = self._prev.text 4897 column_position = self.expression( 4898 exp.ColumnPosition, this=self._parse_column(), position=position 4899 ) 4900 expression.set("position", column_position) 4901 4902 return expression 4903 4904 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4905 drop = self._match(TokenType.DROP) and self._parse_drop() 4906 if drop and not isinstance(drop, exp.Command): 4907 drop.set("kind", drop.args.get("kind", "COLUMN")) 4908 return drop 4909 4910 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4911 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4912 return self.expression( 4913 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4914 ) 4915 4916 def _parse_add_constraint(self) -> exp.AddConstraint: 4917 this = None 4918 kind = self._prev.token_type 4919 4920 if kind == TokenType.CONSTRAINT: 4921 this = self._parse_id_var() 4922 4923 if self._match_text_seq("CHECK"): 4924 expression = self._parse_wrapped(self._parse_conjunction) 4925 enforced = self._match_text_seq("ENFORCED") 4926 4927 return self.expression( 4928 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4929 ) 4930 4931 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4932 expression = self._parse_foreign_key() 4933 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4934 expression = self._parse_primary_key() 4935 else: 4936 expression = None 4937 4938 return self.expression(exp.AddConstraint, this=this, expression=expression) 4939 4940 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4941 index = self._index - 1 4942 4943 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4944 return self._parse_csv(self._parse_add_constraint) 4945 4946 self._retreat(index) 4947 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4948 return self._parse_csv(self._parse_field_def) 4949 4950 return self._parse_csv(self._parse_add_column) 4951 4952 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4953 self._match(TokenType.COLUMN) 4954 column = self._parse_field(any_token=True) 4955 4956 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4957 return self.expression(exp.AlterColumn, this=column, drop=True) 4958 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4959 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4960 4961 self._match_text_seq("SET", "DATA") 4962 return self.expression( 4963 exp.AlterColumn, 4964 this=column, 4965 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4966 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4967 using=self._match(TokenType.USING) and self._parse_conjunction(), 4968 ) 4969 4970 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4971 index = self._index - 1 4972 4973 partition_exists = self._parse_exists() 4974 if self._match(TokenType.PARTITION, advance=False): 4975 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4976 4977 self._retreat(index) 4978 return self._parse_csv(self._parse_drop_column) 4979 4980 def _parse_alter_table_rename(self) -> exp.RenameTable: 4981 self._match_text_seq("TO") 4982 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4983 4984 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4985 start = self._prev 4986 4987 if not self._match(TokenType.TABLE): 4988 return self._parse_as_command(start) 4989 4990 exists = self._parse_exists() 4991 only = self._match_text_seq("ONLY") 4992 this = self._parse_table(schema=True) 4993 4994 if self._next: 4995 self._advance() 4996 4997 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4998 if parser: 4999 actions = ensure_list(parser(self)) 5000 5001 if not self._curr: 5002 return self.expression( 5003 exp.AlterTable, 5004 this=this, 5005 exists=exists, 5006 actions=actions, 5007 only=only, 5008 ) 5009 5010 return self._parse_as_command(start) 5011 5012 def _parse_merge(self) -> exp.Merge: 5013 self._match(TokenType.INTO) 5014 target = self._parse_table() 5015 5016 if target and self._match(TokenType.ALIAS, advance=False): 5017 target.set("alias", self._parse_table_alias()) 5018 5019 self._match(TokenType.USING) 5020 using = self._parse_table() 5021 5022 self._match(TokenType.ON) 5023 on = self._parse_conjunction() 5024 5025 whens = [] 5026 while self._match(TokenType.WHEN): 5027 matched = not self._match(TokenType.NOT) 5028 self._match_text_seq("MATCHED") 5029 source = ( 5030 False 5031 if self._match_text_seq("BY", "TARGET") 5032 else self._match_text_seq("BY", "SOURCE") 5033 ) 5034 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5035 5036 self._match(TokenType.THEN) 5037 5038 if self._match(TokenType.INSERT): 5039 _this = self._parse_star() 5040 if _this: 5041 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5042 else: 5043 then = self.expression( 5044 exp.Insert, 5045 this=self._parse_value(), 5046 expression=self._match(TokenType.VALUES) and self._parse_value(), 5047 ) 5048 elif self._match(TokenType.UPDATE): 5049 expressions = self._parse_star() 5050 if expressions: 5051 then = self.expression(exp.Update, expressions=expressions) 5052 else: 5053 then = self.expression( 5054 exp.Update, 5055 expressions=self._match(TokenType.SET) 5056 and self._parse_csv(self._parse_equality), 5057 ) 5058 elif self._match(TokenType.DELETE): 5059 then = self.expression(exp.Var, this=self._prev.text) 5060 else: 5061 then = None 5062 5063 whens.append( 5064 self.expression( 5065 exp.When, 5066 matched=matched, 5067 source=source, 5068 condition=condition, 5069 then=then, 5070 ) 5071 ) 5072 5073 return self.expression( 5074 exp.Merge, 5075 this=target, 5076 using=using, 5077 on=on, 5078 expressions=whens, 5079 ) 5080 5081 def _parse_show(self) -> t.Optional[exp.Expression]: 5082 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5083 if parser: 5084 return parser(self) 5085 return self._parse_as_command(self._prev) 5086 5087 def _parse_set_item_assignment( 5088 self, kind: t.Optional[str] = None 5089 ) -> t.Optional[exp.Expression]: 5090 index = self._index 5091 5092 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5093 return self._parse_set_transaction(global_=kind == "GLOBAL") 5094 5095 left = self._parse_primary() or self._parse_id_var() 5096 assignment_delimiter = self._match_texts(("=", "TO")) 5097 5098 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5099 self._retreat(index) 5100 return None 5101 5102 right = self._parse_statement() or self._parse_id_var() 5103 this = self.expression(exp.EQ, this=left, expression=right) 5104 5105 return self.expression(exp.SetItem, this=this, kind=kind) 5106 5107 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5108 self._match_text_seq("TRANSACTION") 5109 characteristics = self._parse_csv( 5110 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5111 ) 5112 return self.expression( 5113 exp.SetItem, 5114 expressions=characteristics, 5115 kind="TRANSACTION", 5116 **{"global": global_}, # type: ignore 5117 ) 5118 5119 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5120 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5121 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5122 5123 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5124 index = self._index 5125 set_ = self.expression( 5126 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5127 ) 5128 5129 if self._curr: 5130 self._retreat(index) 5131 return self._parse_as_command(self._prev) 5132 5133 return set_ 5134 5135 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5136 for option in options: 5137 if self._match_text_seq(*option.split(" ")): 5138 return exp.var(option) 5139 return None 5140 5141 def _parse_as_command(self, start: Token) -> exp.Command: 5142 while self._curr: 5143 self._advance() 5144 text = self._find_sql(start, self._prev) 5145 size = len(start.text) 5146 return exp.Command(this=text[:size], expression=text[size:]) 5147 5148 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5149 settings = [] 5150 5151 self._match_l_paren() 5152 kind = self._parse_id_var() 5153 5154 if self._match(TokenType.L_PAREN): 5155 while True: 5156 key = self._parse_id_var() 5157 value = self._parse_primary() 5158 5159 if not key and value is None: 5160 break 5161 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5162 self._match(TokenType.R_PAREN) 5163 5164 self._match_r_paren() 5165 5166 return self.expression( 5167 exp.DictProperty, 5168 this=this, 5169 kind=kind.this if kind else None, 5170 settings=settings, 5171 ) 5172 5173 def _parse_dict_range(self, this: str) -> exp.DictRange: 5174 self._match_l_paren() 5175 has_min = self._match_text_seq("MIN") 5176 if has_min: 5177 min = self._parse_var() or self._parse_primary() 5178 self._match_text_seq("MAX") 5179 max = self._parse_var() or self._parse_primary() 5180 else: 5181 max = self._parse_var() or self._parse_primary() 5182 min = exp.Literal.number(0) 5183 self._match_r_paren() 5184 return self.expression(exp.DictRange, this=this, min=min, max=max) 5185 5186 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5187 index = self._index 5188 expression = self._parse_column() 5189 if not self._match(TokenType.IN): 5190 self._retreat(index - 1) 5191 return None 5192 iterator = self._parse_column() 5193 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5194 return self.expression( 5195 exp.Comprehension, 5196 this=this, 5197 expression=expression, 5198 iterator=iterator, 5199 condition=condition, 5200 ) 5201 5202 def _find_parser( 5203 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5204 ) -> t.Optional[t.Callable]: 5205 if not self._curr: 5206 return None 5207 5208 index = self._index 5209 this = [] 5210 while True: 5211 # The current token might be multiple words 5212 curr = self._curr.text.upper() 5213 key = curr.split(" ") 5214 this.append(curr) 5215 5216 self._advance() 5217 result, trie = in_trie(trie, key) 5218 if result == TrieResult.FAILED: 5219 break 5220 5221 if result == TrieResult.EXISTS: 5222 subparser = parsers[" ".join(this)] 5223 return subparser 5224 5225 self._retreat(index) 5226 return None 5227 5228 def _match(self, token_type, advance=True, expression=None): 5229 if not self._curr: 5230 return None 5231 5232 if self._curr.token_type == token_type: 5233 if advance: 5234 self._advance() 5235 self._add_comments(expression) 5236 return True 5237 5238 return None 5239 5240 def _match_set(self, types, advance=True): 5241 if not self._curr: 5242 return None 5243 5244 if self._curr.token_type in types: 5245 if advance: 5246 self._advance() 5247 return True 5248 5249 return None 5250 5251 def _match_pair(self, token_type_a, token_type_b, advance=True): 5252 if not self._curr or not self._next: 5253 return None 5254 5255 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5256 if advance: 5257 self._advance(2) 5258 return True 5259 5260 return None 5261 5262 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5263 if not self._match(TokenType.L_PAREN, expression=expression): 5264 self.raise_error("Expecting (") 5265 5266 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5267 if not self._match(TokenType.R_PAREN, expression=expression): 5268 self.raise_error("Expecting )") 5269 5270 def _match_texts(self, texts, advance=True): 5271 if self._curr and self._curr.text.upper() in texts: 5272 if advance: 5273 self._advance() 5274 return True 5275 return False 5276 5277 def _match_text_seq(self, *texts, advance=True): 5278 index = self._index 5279 for text in texts: 5280 if self._curr and self._curr.text.upper() == text: 5281 self._advance() 5282 else: 5283 self._retreat(index) 5284 return False 5285 5286 if not advance: 5287 self._retreat(index) 5288 5289 return True 5290 5291 @t.overload 5292 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5293 ... 5294 5295 @t.overload 5296 def _replace_columns_with_dots( 5297 self, this: t.Optional[exp.Expression] 5298 ) -> t.Optional[exp.Expression]: 5299 ... 5300 5301 def _replace_columns_with_dots(self, this): 5302 if isinstance(this, exp.Dot): 5303 exp.replace_children(this, self._replace_columns_with_dots) 5304 elif isinstance(this, exp.Column): 5305 exp.replace_children(this, self._replace_columns_with_dots) 5306 table = this.args.get("table") 5307 this = ( 5308 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5309 ) 5310 5311 return this 5312 5313 def _replace_lambda( 5314 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5315 ) -> t.Optional[exp.Expression]: 5316 if not node: 5317 return node 5318 5319 for column in node.find_all(exp.Column): 5320 if column.parts[0].name in lambda_variables: 5321 dot_or_id = column.to_dot() if column.table else column.this 5322 parent = column.parent 5323 5324 while isinstance(parent, exp.Dot): 5325 if not isinstance(parent.parent, exp.Dot): 5326 parent.replace(dot_or_id) 5327 break 5328 parent = parent.parent 5329 else: 5330 if column is node: 5331 node = dot_or_id 5332 else: 5333 column.replace(dot_or_id) 5334 return node 5335 5336 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5337 return [ 5338 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5339 for value in values 5340 if value 5341 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.MODEL, 241 TokenType.DICTIONARY, 242 } 243 244 CREATABLES = { 245 TokenType.COLUMN, 246 TokenType.FUNCTION, 247 TokenType.INDEX, 248 TokenType.PROCEDURE, 249 *DB_CREATABLES, 250 } 251 252 # Tokens that can represent identifiers 253 ID_VAR_TOKENS = { 254 TokenType.VAR, 255 TokenType.ANTI, 256 TokenType.APPLY, 257 TokenType.ASC, 258 TokenType.AUTO_INCREMENT, 259 TokenType.BEGIN, 260 TokenType.CACHE, 261 TokenType.CASE, 262 TokenType.COLLATE, 263 TokenType.COMMAND, 264 TokenType.COMMENT, 265 TokenType.COMMIT, 266 TokenType.CONSTRAINT, 267 TokenType.DEFAULT, 268 TokenType.DELETE, 269 TokenType.DESC, 270 TokenType.DESCRIBE, 271 TokenType.DICTIONARY, 272 TokenType.DIV, 273 TokenType.END, 274 TokenType.EXECUTE, 275 TokenType.ESCAPE, 276 TokenType.FALSE, 277 TokenType.FIRST, 278 TokenType.FILTER, 279 TokenType.FORMAT, 280 TokenType.FULL, 281 TokenType.IS, 282 TokenType.ISNULL, 283 TokenType.INTERVAL, 284 TokenType.KEEP, 285 TokenType.KILL, 286 TokenType.LEFT, 287 TokenType.LOAD, 288 TokenType.MERGE, 289 TokenType.NATURAL, 290 TokenType.NEXT, 291 TokenType.OFFSET, 292 TokenType.ORDINALITY, 293 TokenType.OVERLAPS, 294 TokenType.OVERWRITE, 295 TokenType.PARTITION, 296 TokenType.PERCENT, 297 TokenType.PIVOT, 298 TokenType.PRAGMA, 299 TokenType.RANGE, 300 TokenType.REFERENCES, 301 TokenType.RIGHT, 302 TokenType.ROW, 303 TokenType.ROWS, 304 TokenType.SEMI, 305 TokenType.SET, 306 TokenType.SETTINGS, 307 TokenType.SHOW, 308 TokenType.TEMPORARY, 309 TokenType.TOP, 310 TokenType.TRUE, 311 TokenType.UNIQUE, 312 TokenType.UNPIVOT, 313 TokenType.UPDATE, 314 TokenType.VOLATILE, 315 TokenType.WINDOW, 316 *CREATABLES, 317 *SUBQUERY_PREDICATES, 318 *TYPE_TOKENS, 319 *NO_PAREN_FUNCTIONS, 320 } 321 322 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 323 324 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 325 TokenType.ANTI, 326 TokenType.APPLY, 327 TokenType.ASOF, 328 TokenType.FULL, 329 TokenType.LEFT, 330 TokenType.LOCK, 331 TokenType.NATURAL, 332 TokenType.OFFSET, 333 TokenType.RIGHT, 334 TokenType.SEMI, 335 TokenType.WINDOW, 336 } 337 338 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 339 340 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 341 342 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 343 344 FUNC_TOKENS = { 345 TokenType.COLLATE, 346 TokenType.COMMAND, 347 TokenType.CURRENT_DATE, 348 TokenType.CURRENT_DATETIME, 349 TokenType.CURRENT_TIMESTAMP, 350 TokenType.CURRENT_TIME, 351 TokenType.CURRENT_USER, 352 TokenType.FILTER, 353 TokenType.FIRST, 354 TokenType.FORMAT, 355 TokenType.GLOB, 356 TokenType.IDENTIFIER, 357 TokenType.INDEX, 358 TokenType.ISNULL, 359 TokenType.ILIKE, 360 TokenType.INSERT, 361 TokenType.LIKE, 362 TokenType.MERGE, 363 TokenType.OFFSET, 364 TokenType.PRIMARY_KEY, 365 TokenType.RANGE, 366 TokenType.REPLACE, 367 TokenType.RLIKE, 368 TokenType.ROW, 369 TokenType.UNNEST, 370 TokenType.VAR, 371 TokenType.LEFT, 372 TokenType.RIGHT, 373 TokenType.DATE, 374 TokenType.DATETIME, 375 TokenType.TABLE, 376 TokenType.TIMESTAMP, 377 TokenType.TIMESTAMPTZ, 378 TokenType.WINDOW, 379 TokenType.XOR, 380 *TYPE_TOKENS, 381 *SUBQUERY_PREDICATES, 382 } 383 384 CONJUNCTION = { 385 TokenType.AND: exp.And, 386 TokenType.OR: exp.Or, 387 } 388 389 EQUALITY = { 390 TokenType.EQ: exp.EQ, 391 TokenType.NEQ: exp.NEQ, 392 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 393 } 394 395 COMPARISON = { 396 TokenType.GT: exp.GT, 397 TokenType.GTE: exp.GTE, 398 TokenType.LT: exp.LT, 399 TokenType.LTE: exp.LTE, 400 } 401 402 BITWISE = { 403 TokenType.AMP: exp.BitwiseAnd, 404 TokenType.CARET: exp.BitwiseXor, 405 TokenType.PIPE: exp.BitwiseOr, 406 TokenType.DPIPE: exp.DPipe, 407 } 408 409 TERM = { 410 TokenType.DASH: exp.Sub, 411 TokenType.PLUS: exp.Add, 412 TokenType.MOD: exp.Mod, 413 TokenType.COLLATE: exp.Collate, 414 } 415 416 FACTOR = { 417 TokenType.DIV: exp.IntDiv, 418 TokenType.LR_ARROW: exp.Distance, 419 TokenType.SLASH: exp.Div, 420 TokenType.STAR: exp.Mul, 421 } 422 423 TIMES = { 424 TokenType.TIME, 425 TokenType.TIMETZ, 426 } 427 428 TIMESTAMPS = { 429 TokenType.TIMESTAMP, 430 TokenType.TIMESTAMPTZ, 431 TokenType.TIMESTAMPLTZ, 432 *TIMES, 433 } 434 435 SET_OPERATIONS = { 436 TokenType.UNION, 437 TokenType.INTERSECT, 438 TokenType.EXCEPT, 439 } 440 441 JOIN_METHODS = { 442 TokenType.NATURAL, 443 TokenType.ASOF, 444 } 445 446 JOIN_SIDES = { 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.FULL, 450 } 451 452 JOIN_KINDS = { 453 TokenType.INNER, 454 TokenType.OUTER, 455 TokenType.CROSS, 456 TokenType.SEMI, 457 TokenType.ANTI, 458 } 459 460 JOIN_HINTS: t.Set[str] = set() 461 462 LAMBDAS = { 463 TokenType.ARROW: lambda self, expressions: self.expression( 464 exp.Lambda, 465 this=self._replace_lambda( 466 self._parse_conjunction(), 467 {node.name for node in expressions}, 468 ), 469 expressions=expressions, 470 ), 471 TokenType.FARROW: lambda self, expressions: self.expression( 472 exp.Kwarg, 473 this=exp.var(expressions[0].name), 474 expression=self._parse_conjunction(), 475 ), 476 } 477 478 COLUMN_OPERATORS = { 479 TokenType.DOT: None, 480 TokenType.DCOLON: lambda self, this, to: self.expression( 481 exp.Cast if self.STRICT_CAST else exp.TryCast, 482 this=this, 483 to=to, 484 ), 485 TokenType.ARROW: lambda self, this, path: self.expression( 486 exp.JSONExtract, 487 this=this, 488 expression=path, 489 ), 490 TokenType.DARROW: lambda self, this, path: self.expression( 491 exp.JSONExtractScalar, 492 this=this, 493 expression=path, 494 ), 495 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 496 exp.JSONBExtract, 497 this=this, 498 expression=path, 499 ), 500 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 501 exp.JSONBExtractScalar, 502 this=this, 503 expression=path, 504 ), 505 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 506 exp.JSONBContains, 507 this=this, 508 expression=key, 509 ), 510 } 511 512 EXPRESSION_PARSERS = { 513 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 514 exp.Column: lambda self: self._parse_column(), 515 exp.Condition: lambda self: self._parse_conjunction(), 516 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 517 exp.Expression: lambda self: self._parse_statement(), 518 exp.From: lambda self: self._parse_from(), 519 exp.Group: lambda self: self._parse_group(), 520 exp.Having: lambda self: self._parse_having(), 521 exp.Identifier: lambda self: self._parse_id_var(), 522 exp.Join: lambda self: self._parse_join(), 523 exp.Lambda: lambda self: self._parse_lambda(), 524 exp.Lateral: lambda self: self._parse_lateral(), 525 exp.Limit: lambda self: self._parse_limit(), 526 exp.Offset: lambda self: self._parse_offset(), 527 exp.Order: lambda self: self._parse_order(), 528 exp.Ordered: lambda self: self._parse_ordered(), 529 exp.Properties: lambda self: self._parse_properties(), 530 exp.Qualify: lambda self: self._parse_qualify(), 531 exp.Returning: lambda self: self._parse_returning(), 532 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 533 exp.Table: lambda self: self._parse_table_parts(), 534 exp.TableAlias: lambda self: self._parse_table_alias(), 535 exp.Where: lambda self: self._parse_where(), 536 exp.Window: lambda self: self._parse_named_window(), 537 exp.With: lambda self: self._parse_with(), 538 "JOIN_TYPE": lambda self: self._parse_join_parts(), 539 } 540 541 STATEMENT_PARSERS = { 542 TokenType.ALTER: lambda self: self._parse_alter(), 543 TokenType.BEGIN: lambda self: self._parse_transaction(), 544 TokenType.CACHE: lambda self: self._parse_cache(), 545 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 546 TokenType.COMMENT: lambda self: self._parse_comment(), 547 TokenType.CREATE: lambda self: self._parse_create(), 548 TokenType.DELETE: lambda self: self._parse_delete(), 549 TokenType.DESC: lambda self: self._parse_describe(), 550 TokenType.DESCRIBE: lambda self: self._parse_describe(), 551 TokenType.DROP: lambda self: self._parse_drop(), 552 TokenType.INSERT: lambda self: self._parse_insert(), 553 TokenType.KILL: lambda self: self._parse_kill(), 554 TokenType.LOAD: lambda self: self._parse_load(), 555 TokenType.MERGE: lambda self: self._parse_merge(), 556 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 557 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 558 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 559 TokenType.SET: lambda self: self._parse_set(), 560 TokenType.UNCACHE: lambda self: self._parse_uncache(), 561 TokenType.UPDATE: lambda self: self._parse_update(), 562 TokenType.USE: lambda self: self.expression( 563 exp.Use, 564 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 565 and exp.var(self._prev.text), 566 this=self._parse_table(schema=False), 567 ), 568 } 569 570 UNARY_PARSERS = { 571 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 572 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 573 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 574 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 575 } 576 577 PRIMARY_PARSERS = { 578 TokenType.STRING: lambda self, token: self.expression( 579 exp.Literal, this=token.text, is_string=True 580 ), 581 TokenType.NUMBER: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=False 583 ), 584 TokenType.STAR: lambda self, _: self.expression( 585 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 586 ), 587 TokenType.NULL: lambda self, _: self.expression(exp.Null), 588 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 589 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 590 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 591 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 592 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 593 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 594 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 595 exp.National, this=token.text 596 ), 597 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 598 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 599 exp.RawString, this=token.text 600 ), 601 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 602 } 603 604 PLACEHOLDER_PARSERS = { 605 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 606 TokenType.PARAMETER: lambda self: self._parse_parameter(), 607 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 608 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 609 else None, 610 } 611 612 RANGE_PARSERS = { 613 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 614 TokenType.GLOB: binary_range_parser(exp.Glob), 615 TokenType.ILIKE: binary_range_parser(exp.ILike), 616 TokenType.IN: lambda self, this: self._parse_in(this), 617 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 618 TokenType.IS: lambda self, this: self._parse_is(this), 619 TokenType.LIKE: binary_range_parser(exp.Like), 620 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 621 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 622 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 623 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 624 } 625 626 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 627 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 628 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 629 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 630 "CHARACTER SET": lambda self: self._parse_character_set(), 631 "CHECKSUM": lambda self: self._parse_checksum(), 632 "CLUSTER BY": lambda self: self._parse_cluster(), 633 "CLUSTERED": lambda self: self._parse_clustered_by(), 634 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 635 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 636 "COPY": lambda self: self._parse_copy_property(), 637 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 638 "DEFINER": lambda self: self._parse_definer(), 639 "DETERMINISTIC": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 641 ), 642 "DISTKEY": lambda self: self._parse_distkey(), 643 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 644 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 645 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 646 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 647 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 648 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "FREESPACE": lambda self: self._parse_freespace(), 650 "HEAP": lambda self: self.expression(exp.HeapProperty), 651 "IMMUTABLE": lambda self: self.expression( 652 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 653 ), 654 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 655 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 656 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 657 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 658 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 659 "LIKE": lambda self: self._parse_create_like(), 660 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 661 "LOCK": lambda self: self._parse_locking(), 662 "LOCKING": lambda self: self._parse_locking(), 663 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 664 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 665 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 666 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 667 "NO": lambda self: self._parse_no_property(), 668 "ON": lambda self: self._parse_on_property(), 669 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 670 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 671 "PARTITION BY": lambda self: self._parse_partitioned_by(), 672 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 673 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 674 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 675 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 676 "REMOTE": lambda self: self._parse_remote_with_connection(), 677 "RETURNS": lambda self: self._parse_returns(), 678 "ROW": lambda self: self._parse_row(), 679 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 680 "SAMPLE": lambda self: self.expression( 681 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 682 ), 683 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 684 "SETTINGS": lambda self: self.expression( 685 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 686 ), 687 "SORTKEY": lambda self: self._parse_sortkey(), 688 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 689 "STABLE": lambda self: self.expression( 690 exp.StabilityProperty, this=exp.Literal.string("STABLE") 691 ), 692 "STORED": lambda self: self._parse_stored(), 693 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 694 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 695 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 696 "TO": lambda self: self._parse_to_table(), 697 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 698 "TRANSFORM": lambda self: self.expression( 699 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 700 ), 701 "TTL": lambda self: self._parse_ttl(), 702 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 703 "VOLATILE": lambda self: self._parse_volatile_property(), 704 "WITH": lambda self: self._parse_with_property(), 705 } 706 707 CONSTRAINT_PARSERS = { 708 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 709 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 710 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 711 "CHARACTER SET": lambda self: self.expression( 712 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 713 ), 714 "CHECK": lambda self: self.expression( 715 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 716 ), 717 "COLLATE": lambda self: self.expression( 718 exp.CollateColumnConstraint, this=self._parse_var() 719 ), 720 "COMMENT": lambda self: self.expression( 721 exp.CommentColumnConstraint, this=self._parse_string() 722 ), 723 "COMPRESS": lambda self: self._parse_compress(), 724 "CLUSTERED": lambda self: self.expression( 725 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 726 ), 727 "NONCLUSTERED": lambda self: self.expression( 728 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 729 ), 730 "DEFAULT": lambda self: self.expression( 731 exp.DefaultColumnConstraint, this=self._parse_bitwise() 732 ), 733 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 734 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 735 "FORMAT": lambda self: self.expression( 736 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 737 ), 738 "GENERATED": lambda self: self._parse_generated_as_identity(), 739 "IDENTITY": lambda self: self._parse_auto_increment(), 740 "INLINE": lambda self: self._parse_inline(), 741 "LIKE": lambda self: self._parse_create_like(), 742 "NOT": lambda self: self._parse_not_constraint(), 743 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 744 "ON": lambda self: ( 745 self._match(TokenType.UPDATE) 746 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 747 ) 748 or self.expression(exp.OnProperty, this=self._parse_id_var()), 749 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 750 "PRIMARY KEY": lambda self: self._parse_primary_key(), 751 "REFERENCES": lambda self: self._parse_references(match=False), 752 "TITLE": lambda self: self.expression( 753 exp.TitleColumnConstraint, this=self._parse_var_or_string() 754 ), 755 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 756 "UNIQUE": lambda self: self._parse_unique(), 757 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 758 "WITH": lambda self: self.expression( 759 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 760 ), 761 } 762 763 ALTER_PARSERS = { 764 "ADD": lambda self: self._parse_alter_table_add(), 765 "ALTER": lambda self: self._parse_alter_table_alter(), 766 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 767 "DROP": lambda self: self._parse_alter_table_drop(), 768 "RENAME": lambda self: self._parse_alter_table_rename(), 769 } 770 771 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 772 773 NO_PAREN_FUNCTION_PARSERS = { 774 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 775 "CASE": lambda self: self._parse_case(), 776 "IF": lambda self: self._parse_if(), 777 "NEXT": lambda self: self._parse_next_value_for(), 778 } 779 780 INVALID_FUNC_NAME_TOKENS = { 781 TokenType.IDENTIFIER, 782 TokenType.STRING, 783 } 784 785 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 786 787 FUNCTION_PARSERS = { 788 "ANY_VALUE": lambda self: self._parse_any_value(), 789 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 790 "CONCAT": lambda self: self._parse_concat(), 791 "CONCAT_WS": lambda self: self._parse_concat_ws(), 792 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 793 "DECODE": lambda self: self._parse_decode(), 794 "EXTRACT": lambda self: self._parse_extract(), 795 "JSON_OBJECT": lambda self: self._parse_json_object(), 796 "JSON_TABLE": lambda self: self._parse_json_table(), 797 "LOG": lambda self: self._parse_logarithm(), 798 "MATCH": lambda self: self._parse_match_against(), 799 "OPENJSON": lambda self: self._parse_open_json(), 800 "POSITION": lambda self: self._parse_position(), 801 "PREDICT": lambda self: self._parse_predict(), 802 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 803 "STRING_AGG": lambda self: self._parse_string_agg(), 804 "SUBSTRING": lambda self: self._parse_substring(), 805 "TRIM": lambda self: self._parse_trim(), 806 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 807 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 808 } 809 810 QUERY_MODIFIER_PARSERS = { 811 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 812 TokenType.WHERE: lambda self: ("where", self._parse_where()), 813 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 814 TokenType.HAVING: lambda self: ("having", self._parse_having()), 815 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 816 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 817 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 818 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 819 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 820 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 821 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 822 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 823 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 824 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 825 TokenType.CLUSTER_BY: lambda self: ( 826 "cluster", 827 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 828 ), 829 TokenType.DISTRIBUTE_BY: lambda self: ( 830 "distribute", 831 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 832 ), 833 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 834 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 835 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 836 } 837 838 SET_PARSERS = { 839 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 840 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 841 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 842 "TRANSACTION": lambda self: self._parse_set_transaction(), 843 } 844 845 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 846 847 TYPE_LITERAL_PARSERS = { 848 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 849 } 850 851 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 852 853 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 854 855 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 856 857 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 858 TRANSACTION_CHARACTERISTICS = { 859 "ISOLATION LEVEL REPEATABLE READ", 860 "ISOLATION LEVEL READ COMMITTED", 861 "ISOLATION LEVEL READ UNCOMMITTED", 862 "ISOLATION LEVEL SERIALIZABLE", 863 "READ WRITE", 864 "READ ONLY", 865 } 866 867 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 868 869 CLONE_KEYWORDS = {"CLONE", "COPY"} 870 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 871 872 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 873 874 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 875 876 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 877 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 878 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 879 880 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 881 882 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 883 884 DISTINCT_TOKENS = {TokenType.DISTINCT} 885 886 NULL_TOKENS = {TokenType.NULL} 887 888 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 889 890 STRICT_CAST = True 891 892 # A NULL arg in CONCAT yields NULL by default 893 CONCAT_NULL_OUTPUTS_STRING = False 894 895 PREFIXED_PIVOT_COLUMNS = False 896 IDENTIFY_PIVOT_STRINGS = False 897 898 LOG_BASE_FIRST = True 899 LOG_DEFAULTS_TO_LN = False 900 901 # Whether or not ADD is present for each column added by ALTER TABLE 902 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 903 904 # Whether or not the table sample clause expects CSV syntax 905 TABLESAMPLE_CSV = False 906 907 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 908 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 909 910 # Whether the TRIM function expects the characters to trim as its first argument 911 TRIM_PATTERN_FIRST = False 912 913 __slots__ = ( 914 "error_level", 915 "error_message_context", 916 "max_errors", 917 "sql", 918 "errors", 919 "_tokens", 920 "_index", 921 "_curr", 922 "_next", 923 "_prev", 924 "_prev_comments", 925 "_tokenizer", 926 ) 927 928 # Autofilled 929 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 930 INDEX_OFFSET: int = 0 931 UNNEST_COLUMN_ONLY: bool = False 932 ALIAS_POST_TABLESAMPLE: bool = False 933 STRICT_STRING_CONCAT = False 934 SUPPORTS_USER_DEFINED_TYPES = True 935 NORMALIZE_FUNCTIONS = "upper" 936 NULL_ORDERING: str = "nulls_are_small" 937 SHOW_TRIE: t.Dict = {} 938 SET_TRIE: t.Dict = {} 939 FORMAT_MAPPING: t.Dict[str, str] = {} 940 FORMAT_TRIE: t.Dict = {} 941 TIME_MAPPING: t.Dict[str, str] = {} 942 TIME_TRIE: t.Dict = {} 943 944 def __init__( 945 self, 946 error_level: t.Optional[ErrorLevel] = None, 947 error_message_context: int = 100, 948 max_errors: int = 3, 949 ): 950 self.error_level = error_level or ErrorLevel.IMMEDIATE 951 self.error_message_context = error_message_context 952 self.max_errors = max_errors 953 self._tokenizer = self.TOKENIZER_CLASS() 954 self.reset() 955 956 def reset(self): 957 self.sql = "" 958 self.errors = [] 959 self._tokens = [] 960 self._index = 0 961 self._curr = None 962 self._next = None 963 self._prev = None 964 self._prev_comments = None 965 966 def parse( 967 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 968 ) -> t.List[t.Optional[exp.Expression]]: 969 """ 970 Parses a list of tokens and returns a list of syntax trees, one tree 971 per parsed SQL statement. 972 973 Args: 974 raw_tokens: The list of tokens. 975 sql: The original SQL string, used to produce helpful debug messages. 976 977 Returns: 978 The list of the produced syntax trees. 979 """ 980 return self._parse( 981 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 982 ) 983 984 def parse_into( 985 self, 986 expression_types: exp.IntoType, 987 raw_tokens: t.List[Token], 988 sql: t.Optional[str] = None, 989 ) -> t.List[t.Optional[exp.Expression]]: 990 """ 991 Parses a list of tokens into a given Expression type. If a collection of Expression 992 types is given instead, this method will try to parse the token list into each one 993 of them, stopping at the first for which the parsing succeeds. 994 995 Args: 996 expression_types: The expression type(s) to try and parse the token list into. 997 raw_tokens: The list of tokens. 998 sql: The original SQL string, used to produce helpful debug messages. 999 1000 Returns: 1001 The target Expression. 1002 """ 1003 errors = [] 1004 for expression_type in ensure_list(expression_types): 1005 parser = self.EXPRESSION_PARSERS.get(expression_type) 1006 if not parser: 1007 raise TypeError(f"No parser registered for {expression_type}") 1008 1009 try: 1010 return self._parse(parser, raw_tokens, sql) 1011 except ParseError as e: 1012 e.errors[0]["into_expression"] = expression_type 1013 errors.append(e) 1014 1015 raise ParseError( 1016 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1017 errors=merge_errors(errors), 1018 ) from errors[-1] 1019 1020 def _parse( 1021 self, 1022 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1023 raw_tokens: t.List[Token], 1024 sql: t.Optional[str] = None, 1025 ) -> t.List[t.Optional[exp.Expression]]: 1026 self.reset() 1027 self.sql = sql or "" 1028 1029 total = len(raw_tokens) 1030 chunks: t.List[t.List[Token]] = [[]] 1031 1032 for i, token in enumerate(raw_tokens): 1033 if token.token_type == TokenType.SEMICOLON: 1034 if i < total - 1: 1035 chunks.append([]) 1036 else: 1037 chunks[-1].append(token) 1038 1039 expressions = [] 1040 1041 for tokens in chunks: 1042 self._index = -1 1043 self._tokens = tokens 1044 self._advance() 1045 1046 expressions.append(parse_method(self)) 1047 1048 if self._index < len(self._tokens): 1049 self.raise_error("Invalid expression / Unexpected token") 1050 1051 self.check_errors() 1052 1053 return expressions 1054 1055 def check_errors(self) -> None: 1056 """Logs or raises any found errors, depending on the chosen error level setting.""" 1057 if self.error_level == ErrorLevel.WARN: 1058 for error in self.errors: 1059 logger.error(str(error)) 1060 elif self.error_level == ErrorLevel.RAISE and self.errors: 1061 raise ParseError( 1062 concat_messages(self.errors, self.max_errors), 1063 errors=merge_errors(self.errors), 1064 ) 1065 1066 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1067 """ 1068 Appends an error in the list of recorded errors or raises it, depending on the chosen 1069 error level setting. 1070 """ 1071 token = token or self._curr or self._prev or Token.string("") 1072 start = token.start 1073 end = token.end + 1 1074 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1075 highlight = self.sql[start:end] 1076 end_context = self.sql[end : end + self.error_message_context] 1077 1078 error = ParseError.new( 1079 f"{message}. Line {token.line}, Col: {token.col}.\n" 1080 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1081 description=message, 1082 line=token.line, 1083 col=token.col, 1084 start_context=start_context, 1085 highlight=highlight, 1086 end_context=end_context, 1087 ) 1088 1089 if self.error_level == ErrorLevel.IMMEDIATE: 1090 raise error 1091 1092 self.errors.append(error) 1093 1094 def expression( 1095 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1096 ) -> E: 1097 """ 1098 Creates a new, validated Expression. 1099 1100 Args: 1101 exp_class: The expression class to instantiate. 1102 comments: An optional list of comments to attach to the expression. 1103 kwargs: The arguments to set for the expression along with their respective values. 1104 1105 Returns: 1106 The target expression. 1107 """ 1108 instance = exp_class(**kwargs) 1109 instance.add_comments(comments) if comments else self._add_comments(instance) 1110 return self.validate_expression(instance) 1111 1112 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1113 if expression and self._prev_comments: 1114 expression.add_comments(self._prev_comments) 1115 self._prev_comments = None 1116 1117 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1118 """ 1119 Validates an Expression, making sure that all its mandatory arguments are set. 1120 1121 Args: 1122 expression: The expression to validate. 1123 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1124 1125 Returns: 1126 The validated expression. 1127 """ 1128 if self.error_level != ErrorLevel.IGNORE: 1129 for error_message in expression.error_messages(args): 1130 self.raise_error(error_message) 1131 1132 return expression 1133 1134 def _find_sql(self, start: Token, end: Token) -> str: 1135 return self.sql[start.start : end.end + 1] 1136 1137 def _advance(self, times: int = 1) -> None: 1138 self._index += times 1139 self._curr = seq_get(self._tokens, self._index) 1140 self._next = seq_get(self._tokens, self._index + 1) 1141 1142 if self._index > 0: 1143 self._prev = self._tokens[self._index - 1] 1144 self._prev_comments = self._prev.comments 1145 else: 1146 self._prev = None 1147 self._prev_comments = None 1148 1149 def _retreat(self, index: int) -> None: 1150 if index != self._index: 1151 self._advance(index - self._index) 1152 1153 def _parse_command(self) -> exp.Command: 1154 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1155 1156 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1157 start = self._prev 1158 exists = self._parse_exists() if allow_exists else None 1159 1160 self._match(TokenType.ON) 1161 1162 kind = self._match_set(self.CREATABLES) and self._prev 1163 if not kind: 1164 return self._parse_as_command(start) 1165 1166 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1167 this = self._parse_user_defined_function(kind=kind.token_type) 1168 elif kind.token_type == TokenType.TABLE: 1169 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1170 elif kind.token_type == TokenType.COLUMN: 1171 this = self._parse_column() 1172 else: 1173 this = self._parse_id_var() 1174 1175 self._match(TokenType.IS) 1176 1177 return self.expression( 1178 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1179 ) 1180 1181 def _parse_to_table( 1182 self, 1183 ) -> exp.ToTableProperty: 1184 table = self._parse_table_parts(schema=True) 1185 return self.expression(exp.ToTableProperty, this=table) 1186 1187 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1188 def _parse_ttl(self) -> exp.Expression: 1189 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1190 this = self._parse_bitwise() 1191 1192 if self._match_text_seq("DELETE"): 1193 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1194 if self._match_text_seq("RECOMPRESS"): 1195 return self.expression( 1196 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1197 ) 1198 if self._match_text_seq("TO", "DISK"): 1199 return self.expression( 1200 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1201 ) 1202 if self._match_text_seq("TO", "VOLUME"): 1203 return self.expression( 1204 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1205 ) 1206 1207 return this 1208 1209 expressions = self._parse_csv(_parse_ttl_action) 1210 where = self._parse_where() 1211 group = self._parse_group() 1212 1213 aggregates = None 1214 if group and self._match(TokenType.SET): 1215 aggregates = self._parse_csv(self._parse_set_item) 1216 1217 return self.expression( 1218 exp.MergeTreeTTL, 1219 expressions=expressions, 1220 where=where, 1221 group=group, 1222 aggregates=aggregates, 1223 ) 1224 1225 def _parse_statement(self) -> t.Optional[exp.Expression]: 1226 if self._curr is None: 1227 return None 1228 1229 if self._match_set(self.STATEMENT_PARSERS): 1230 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1231 1232 if self._match_set(Tokenizer.COMMANDS): 1233 return self._parse_command() 1234 1235 expression = self._parse_expression() 1236 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1237 return self._parse_query_modifiers(expression) 1238 1239 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1240 start = self._prev 1241 temporary = self._match(TokenType.TEMPORARY) 1242 materialized = self._match_text_seq("MATERIALIZED") 1243 1244 kind = self._match_set(self.CREATABLES) and self._prev.text 1245 if not kind: 1246 return self._parse_as_command(start) 1247 1248 return self.expression( 1249 exp.Drop, 1250 comments=start.comments, 1251 exists=exists or self._parse_exists(), 1252 this=self._parse_table(schema=True), 1253 kind=kind, 1254 temporary=temporary, 1255 materialized=materialized, 1256 cascade=self._match_text_seq("CASCADE"), 1257 constraints=self._match_text_seq("CONSTRAINTS"), 1258 purge=self._match_text_seq("PURGE"), 1259 ) 1260 1261 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1262 return ( 1263 self._match_text_seq("IF") 1264 and (not not_ or self._match(TokenType.NOT)) 1265 and self._match(TokenType.EXISTS) 1266 ) 1267 1268 def _parse_create(self) -> exp.Create | exp.Command: 1269 # Note: this can't be None because we've matched a statement parser 1270 start = self._prev 1271 comments = self._prev_comments 1272 1273 replace = start.text.upper() == "REPLACE" or self._match_pair( 1274 TokenType.OR, TokenType.REPLACE 1275 ) 1276 unique = self._match(TokenType.UNIQUE) 1277 1278 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1279 self._advance() 1280 1281 properties = None 1282 create_token = self._match_set(self.CREATABLES) and self._prev 1283 1284 if not create_token: 1285 # exp.Properties.Location.POST_CREATE 1286 properties = self._parse_properties() 1287 create_token = self._match_set(self.CREATABLES) and self._prev 1288 1289 if not properties or not create_token: 1290 return self._parse_as_command(start) 1291 1292 exists = self._parse_exists(not_=True) 1293 this = None 1294 expression: t.Optional[exp.Expression] = None 1295 indexes = None 1296 no_schema_binding = None 1297 begin = None 1298 end = None 1299 clone = None 1300 1301 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1302 nonlocal properties 1303 if properties and temp_props: 1304 properties.expressions.extend(temp_props.expressions) 1305 elif temp_props: 1306 properties = temp_props 1307 1308 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1309 this = self._parse_user_defined_function(kind=create_token.token_type) 1310 1311 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1312 extend_props(self._parse_properties()) 1313 1314 self._match(TokenType.ALIAS) 1315 1316 if self._match(TokenType.COMMAND): 1317 expression = self._parse_as_command(self._prev) 1318 else: 1319 begin = self._match(TokenType.BEGIN) 1320 return_ = self._match_text_seq("RETURN") 1321 1322 if self._match(TokenType.STRING, advance=False): 1323 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1324 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1325 expression = self._parse_string() 1326 extend_props(self._parse_properties()) 1327 else: 1328 expression = self._parse_statement() 1329 1330 end = self._match_text_seq("END") 1331 1332 if return_: 1333 expression = self.expression(exp.Return, this=expression) 1334 elif create_token.token_type == TokenType.INDEX: 1335 this = self._parse_index(index=self._parse_id_var()) 1336 elif create_token.token_type in self.DB_CREATABLES: 1337 table_parts = self._parse_table_parts(schema=True) 1338 1339 # exp.Properties.Location.POST_NAME 1340 self._match(TokenType.COMMA) 1341 extend_props(self._parse_properties(before=True)) 1342 1343 this = self._parse_schema(this=table_parts) 1344 1345 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1346 extend_props(self._parse_properties()) 1347 1348 self._match(TokenType.ALIAS) 1349 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1350 # exp.Properties.Location.POST_ALIAS 1351 extend_props(self._parse_properties()) 1352 1353 expression = self._parse_ddl_select() 1354 1355 if create_token.token_type == TokenType.TABLE: 1356 # exp.Properties.Location.POST_EXPRESSION 1357 extend_props(self._parse_properties()) 1358 1359 indexes = [] 1360 while True: 1361 index = self._parse_index() 1362 1363 # exp.Properties.Location.POST_INDEX 1364 extend_props(self._parse_properties()) 1365 1366 if not index: 1367 break 1368 else: 1369 self._match(TokenType.COMMA) 1370 indexes.append(index) 1371 elif create_token.token_type == TokenType.VIEW: 1372 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1373 no_schema_binding = True 1374 1375 shallow = self._match_text_seq("SHALLOW") 1376 1377 if self._match_texts(self.CLONE_KEYWORDS): 1378 copy = self._prev.text.lower() == "copy" 1379 clone = self._parse_table(schema=True) 1380 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1381 clone_kind = ( 1382 self._match(TokenType.L_PAREN) 1383 and self._match_texts(self.CLONE_KINDS) 1384 and self._prev.text.upper() 1385 ) 1386 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1387 self._match(TokenType.R_PAREN) 1388 clone = self.expression( 1389 exp.Clone, 1390 this=clone, 1391 when=when, 1392 kind=clone_kind, 1393 shallow=shallow, 1394 expression=clone_expression, 1395 copy=copy, 1396 ) 1397 1398 return self.expression( 1399 exp.Create, 1400 comments=comments, 1401 this=this, 1402 kind=create_token.text, 1403 replace=replace, 1404 unique=unique, 1405 expression=expression, 1406 exists=exists, 1407 properties=properties, 1408 indexes=indexes, 1409 no_schema_binding=no_schema_binding, 1410 begin=begin, 1411 end=end, 1412 clone=clone, 1413 ) 1414 1415 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1416 # only used for teradata currently 1417 self._match(TokenType.COMMA) 1418 1419 kwargs = { 1420 "no": self._match_text_seq("NO"), 1421 "dual": self._match_text_seq("DUAL"), 1422 "before": self._match_text_seq("BEFORE"), 1423 "default": self._match_text_seq("DEFAULT"), 1424 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1425 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1426 "after": self._match_text_seq("AFTER"), 1427 "minimum": self._match_texts(("MIN", "MINIMUM")), 1428 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1429 } 1430 1431 if self._match_texts(self.PROPERTY_PARSERS): 1432 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1433 try: 1434 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1435 except TypeError: 1436 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1437 1438 return None 1439 1440 def _parse_property(self) -> t.Optional[exp.Expression]: 1441 if self._match_texts(self.PROPERTY_PARSERS): 1442 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1443 1444 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1445 return self._parse_character_set(default=True) 1446 1447 if self._match_text_seq("COMPOUND", "SORTKEY"): 1448 return self._parse_sortkey(compound=True) 1449 1450 if self._match_text_seq("SQL", "SECURITY"): 1451 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1452 1453 index = self._index 1454 key = self._parse_column() 1455 1456 if not self._match(TokenType.EQ): 1457 self._retreat(index) 1458 return None 1459 1460 return self.expression( 1461 exp.Property, 1462 this=key.to_dot() if isinstance(key, exp.Column) else key, 1463 value=self._parse_column() or self._parse_var(any_token=True), 1464 ) 1465 1466 def _parse_stored(self) -> exp.FileFormatProperty: 1467 self._match(TokenType.ALIAS) 1468 1469 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1470 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1471 1472 return self.expression( 1473 exp.FileFormatProperty, 1474 this=self.expression( 1475 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1476 ) 1477 if input_format or output_format 1478 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1479 ) 1480 1481 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1482 self._match(TokenType.EQ) 1483 self._match(TokenType.ALIAS) 1484 return self.expression(exp_class, this=self._parse_field()) 1485 1486 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1487 properties = [] 1488 while True: 1489 if before: 1490 prop = self._parse_property_before() 1491 else: 1492 prop = self._parse_property() 1493 1494 if not prop: 1495 break 1496 for p in ensure_list(prop): 1497 properties.append(p) 1498 1499 if properties: 1500 return self.expression(exp.Properties, expressions=properties) 1501 1502 return None 1503 1504 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1505 return self.expression( 1506 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1507 ) 1508 1509 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1510 if self._index >= 2: 1511 pre_volatile_token = self._tokens[self._index - 2] 1512 else: 1513 pre_volatile_token = None 1514 1515 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1516 return exp.VolatileProperty() 1517 1518 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1519 1520 def _parse_with_property( 1521 self, 1522 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1523 if self._match(TokenType.L_PAREN, advance=False): 1524 return self._parse_wrapped_csv(self._parse_property) 1525 1526 if self._match_text_seq("JOURNAL"): 1527 return self._parse_withjournaltable() 1528 1529 if self._match_text_seq("DATA"): 1530 return self._parse_withdata(no=False) 1531 elif self._match_text_seq("NO", "DATA"): 1532 return self._parse_withdata(no=True) 1533 1534 if not self._next: 1535 return None 1536 1537 return self._parse_withisolatedloading() 1538 1539 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1540 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1541 self._match(TokenType.EQ) 1542 1543 user = self._parse_id_var() 1544 self._match(TokenType.PARAMETER) 1545 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1546 1547 if not user or not host: 1548 return None 1549 1550 return exp.DefinerProperty(this=f"{user}@{host}") 1551 1552 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1553 self._match(TokenType.TABLE) 1554 self._match(TokenType.EQ) 1555 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1556 1557 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1558 return self.expression(exp.LogProperty, no=no) 1559 1560 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1561 return self.expression(exp.JournalProperty, **kwargs) 1562 1563 def _parse_checksum(self) -> exp.ChecksumProperty: 1564 self._match(TokenType.EQ) 1565 1566 on = None 1567 if self._match(TokenType.ON): 1568 on = True 1569 elif self._match_text_seq("OFF"): 1570 on = False 1571 1572 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1573 1574 def _parse_cluster(self) -> exp.Cluster: 1575 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1576 1577 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1578 self._match_text_seq("BY") 1579 1580 self._match_l_paren() 1581 expressions = self._parse_csv(self._parse_column) 1582 self._match_r_paren() 1583 1584 if self._match_text_seq("SORTED", "BY"): 1585 self._match_l_paren() 1586 sorted_by = self._parse_csv(self._parse_ordered) 1587 self._match_r_paren() 1588 else: 1589 sorted_by = None 1590 1591 self._match(TokenType.INTO) 1592 buckets = self._parse_number() 1593 self._match_text_seq("BUCKETS") 1594 1595 return self.expression( 1596 exp.ClusteredByProperty, 1597 expressions=expressions, 1598 sorted_by=sorted_by, 1599 buckets=buckets, 1600 ) 1601 1602 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1603 if not self._match_text_seq("GRANTS"): 1604 self._retreat(self._index - 1) 1605 return None 1606 1607 return self.expression(exp.CopyGrantsProperty) 1608 1609 def _parse_freespace(self) -> exp.FreespaceProperty: 1610 self._match(TokenType.EQ) 1611 return self.expression( 1612 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1613 ) 1614 1615 def _parse_mergeblockratio( 1616 self, no: bool = False, default: bool = False 1617 ) -> exp.MergeBlockRatioProperty: 1618 if self._match(TokenType.EQ): 1619 return self.expression( 1620 exp.MergeBlockRatioProperty, 1621 this=self._parse_number(), 1622 percent=self._match(TokenType.PERCENT), 1623 ) 1624 1625 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1626 1627 def _parse_datablocksize( 1628 self, 1629 default: t.Optional[bool] = None, 1630 minimum: t.Optional[bool] = None, 1631 maximum: t.Optional[bool] = None, 1632 ) -> exp.DataBlocksizeProperty: 1633 self._match(TokenType.EQ) 1634 size = self._parse_number() 1635 1636 units = None 1637 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1638 units = self._prev.text 1639 1640 return self.expression( 1641 exp.DataBlocksizeProperty, 1642 size=size, 1643 units=units, 1644 default=default, 1645 minimum=minimum, 1646 maximum=maximum, 1647 ) 1648 1649 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1650 self._match(TokenType.EQ) 1651 always = self._match_text_seq("ALWAYS") 1652 manual = self._match_text_seq("MANUAL") 1653 never = self._match_text_seq("NEVER") 1654 default = self._match_text_seq("DEFAULT") 1655 1656 autotemp = None 1657 if self._match_text_seq("AUTOTEMP"): 1658 autotemp = self._parse_schema() 1659 1660 return self.expression( 1661 exp.BlockCompressionProperty, 1662 always=always, 1663 manual=manual, 1664 never=never, 1665 default=default, 1666 autotemp=autotemp, 1667 ) 1668 1669 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1670 no = self._match_text_seq("NO") 1671 concurrent = self._match_text_seq("CONCURRENT") 1672 self._match_text_seq("ISOLATED", "LOADING") 1673 for_all = self._match_text_seq("FOR", "ALL") 1674 for_insert = self._match_text_seq("FOR", "INSERT") 1675 for_none = self._match_text_seq("FOR", "NONE") 1676 return self.expression( 1677 exp.IsolatedLoadingProperty, 1678 no=no, 1679 concurrent=concurrent, 1680 for_all=for_all, 1681 for_insert=for_insert, 1682 for_none=for_none, 1683 ) 1684 1685 def _parse_locking(self) -> exp.LockingProperty: 1686 if self._match(TokenType.TABLE): 1687 kind = "TABLE" 1688 elif self._match(TokenType.VIEW): 1689 kind = "VIEW" 1690 elif self._match(TokenType.ROW): 1691 kind = "ROW" 1692 elif self._match_text_seq("DATABASE"): 1693 kind = "DATABASE" 1694 else: 1695 kind = None 1696 1697 if kind in ("DATABASE", "TABLE", "VIEW"): 1698 this = self._parse_table_parts() 1699 else: 1700 this = None 1701 1702 if self._match(TokenType.FOR): 1703 for_or_in = "FOR" 1704 elif self._match(TokenType.IN): 1705 for_or_in = "IN" 1706 else: 1707 for_or_in = None 1708 1709 if self._match_text_seq("ACCESS"): 1710 lock_type = "ACCESS" 1711 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1712 lock_type = "EXCLUSIVE" 1713 elif self._match_text_seq("SHARE"): 1714 lock_type = "SHARE" 1715 elif self._match_text_seq("READ"): 1716 lock_type = "READ" 1717 elif self._match_text_seq("WRITE"): 1718 lock_type = "WRITE" 1719 elif self._match_text_seq("CHECKSUM"): 1720 lock_type = "CHECKSUM" 1721 else: 1722 lock_type = None 1723 1724 override = self._match_text_seq("OVERRIDE") 1725 1726 return self.expression( 1727 exp.LockingProperty, 1728 this=this, 1729 kind=kind, 1730 for_or_in=for_or_in, 1731 lock_type=lock_type, 1732 override=override, 1733 ) 1734 1735 def _parse_partition_by(self) -> t.List[exp.Expression]: 1736 if self._match(TokenType.PARTITION_BY): 1737 return self._parse_csv(self._parse_conjunction) 1738 return [] 1739 1740 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1741 self._match(TokenType.EQ) 1742 return self.expression( 1743 exp.PartitionedByProperty, 1744 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1745 ) 1746 1747 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1748 if self._match_text_seq("AND", "STATISTICS"): 1749 statistics = True 1750 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1751 statistics = False 1752 else: 1753 statistics = None 1754 1755 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1756 1757 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1758 if self._match_text_seq("PRIMARY", "INDEX"): 1759 return exp.NoPrimaryIndexProperty() 1760 return None 1761 1762 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1763 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1764 return exp.OnCommitProperty() 1765 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1766 return exp.OnCommitProperty(delete=True) 1767 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1768 1769 def _parse_distkey(self) -> exp.DistKeyProperty: 1770 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1771 1772 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1773 table = self._parse_table(schema=True) 1774 1775 options = [] 1776 while self._match_texts(("INCLUDING", "EXCLUDING")): 1777 this = self._prev.text.upper() 1778 1779 id_var = self._parse_id_var() 1780 if not id_var: 1781 return None 1782 1783 options.append( 1784 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1785 ) 1786 1787 return self.expression(exp.LikeProperty, this=table, expressions=options) 1788 1789 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1790 return self.expression( 1791 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1792 ) 1793 1794 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1795 self._match(TokenType.EQ) 1796 return self.expression( 1797 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1798 ) 1799 1800 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1801 self._match_text_seq("WITH", "CONNECTION") 1802 return self.expression( 1803 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1804 ) 1805 1806 def _parse_returns(self) -> exp.ReturnsProperty: 1807 value: t.Optional[exp.Expression] 1808 is_table = self._match(TokenType.TABLE) 1809 1810 if is_table: 1811 if self._match(TokenType.LT): 1812 value = self.expression( 1813 exp.Schema, 1814 this="TABLE", 1815 expressions=self._parse_csv(self._parse_struct_types), 1816 ) 1817 if not self._match(TokenType.GT): 1818 self.raise_error("Expecting >") 1819 else: 1820 value = self._parse_schema(exp.var("TABLE")) 1821 else: 1822 value = self._parse_types() 1823 1824 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1825 1826 def _parse_describe(self) -> exp.Describe: 1827 kind = self._match_set(self.CREATABLES) and self._prev.text 1828 this = self._parse_table(schema=True) 1829 properties = self._parse_properties() 1830 expressions = properties.expressions if properties else None 1831 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1832 1833 def _parse_insert(self) -> exp.Insert: 1834 comments = ensure_list(self._prev_comments) 1835 overwrite = self._match(TokenType.OVERWRITE) 1836 ignore = self._match(TokenType.IGNORE) 1837 local = self._match_text_seq("LOCAL") 1838 alternative = None 1839 1840 if self._match_text_seq("DIRECTORY"): 1841 this: t.Optional[exp.Expression] = self.expression( 1842 exp.Directory, 1843 this=self._parse_var_or_string(), 1844 local=local, 1845 row_format=self._parse_row_format(match_row=True), 1846 ) 1847 else: 1848 if self._match(TokenType.OR): 1849 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1850 1851 self._match(TokenType.INTO) 1852 comments += ensure_list(self._prev_comments) 1853 self._match(TokenType.TABLE) 1854 this = self._parse_table(schema=True) 1855 1856 returning = self._parse_returning() 1857 1858 return self.expression( 1859 exp.Insert, 1860 comments=comments, 1861 this=this, 1862 by_name=self._match_text_seq("BY", "NAME"), 1863 exists=self._parse_exists(), 1864 partition=self._parse_partition(), 1865 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1866 and self._parse_conjunction(), 1867 expression=self._parse_ddl_select(), 1868 conflict=self._parse_on_conflict(), 1869 returning=returning or self._parse_returning(), 1870 overwrite=overwrite, 1871 alternative=alternative, 1872 ignore=ignore, 1873 ) 1874 1875 def _parse_kill(self) -> exp.Kill: 1876 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1877 1878 return self.expression( 1879 exp.Kill, 1880 this=self._parse_primary(), 1881 kind=kind, 1882 ) 1883 1884 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1885 conflict = self._match_text_seq("ON", "CONFLICT") 1886 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1887 1888 if not conflict and not duplicate: 1889 return None 1890 1891 nothing = None 1892 expressions = None 1893 key = None 1894 constraint = None 1895 1896 if conflict: 1897 if self._match_text_seq("ON", "CONSTRAINT"): 1898 constraint = self._parse_id_var() 1899 else: 1900 key = self._parse_csv(self._parse_value) 1901 1902 self._match_text_seq("DO") 1903 if self._match_text_seq("NOTHING"): 1904 nothing = True 1905 else: 1906 self._match(TokenType.UPDATE) 1907 self._match(TokenType.SET) 1908 expressions = self._parse_csv(self._parse_equality) 1909 1910 return self.expression( 1911 exp.OnConflict, 1912 duplicate=duplicate, 1913 expressions=expressions, 1914 nothing=nothing, 1915 key=key, 1916 constraint=constraint, 1917 ) 1918 1919 def _parse_returning(self) -> t.Optional[exp.Returning]: 1920 if not self._match(TokenType.RETURNING): 1921 return None 1922 return self.expression( 1923 exp.Returning, 1924 expressions=self._parse_csv(self._parse_expression), 1925 into=self._match(TokenType.INTO) and self._parse_table_part(), 1926 ) 1927 1928 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1929 if not self._match(TokenType.FORMAT): 1930 return None 1931 return self._parse_row_format() 1932 1933 def _parse_row_format( 1934 self, match_row: bool = False 1935 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1936 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1937 return None 1938 1939 if self._match_text_seq("SERDE"): 1940 this = self._parse_string() 1941 1942 serde_properties = None 1943 if self._match(TokenType.SERDE_PROPERTIES): 1944 serde_properties = self.expression( 1945 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1946 ) 1947 1948 return self.expression( 1949 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1950 ) 1951 1952 self._match_text_seq("DELIMITED") 1953 1954 kwargs = {} 1955 1956 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1957 kwargs["fields"] = self._parse_string() 1958 if self._match_text_seq("ESCAPED", "BY"): 1959 kwargs["escaped"] = self._parse_string() 1960 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1961 kwargs["collection_items"] = self._parse_string() 1962 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1963 kwargs["map_keys"] = self._parse_string() 1964 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1965 kwargs["lines"] = self._parse_string() 1966 if self._match_text_seq("NULL", "DEFINED", "AS"): 1967 kwargs["null"] = self._parse_string() 1968 1969 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1970 1971 def _parse_load(self) -> exp.LoadData | exp.Command: 1972 if self._match_text_seq("DATA"): 1973 local = self._match_text_seq("LOCAL") 1974 self._match_text_seq("INPATH") 1975 inpath = self._parse_string() 1976 overwrite = self._match(TokenType.OVERWRITE) 1977 self._match_pair(TokenType.INTO, TokenType.TABLE) 1978 1979 return self.expression( 1980 exp.LoadData, 1981 this=self._parse_table(schema=True), 1982 local=local, 1983 overwrite=overwrite, 1984 inpath=inpath, 1985 partition=self._parse_partition(), 1986 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1987 serde=self._match_text_seq("SERDE") and self._parse_string(), 1988 ) 1989 return self._parse_as_command(self._prev) 1990 1991 def _parse_delete(self) -> exp.Delete: 1992 # This handles MySQL's "Multiple-Table Syntax" 1993 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1994 tables = None 1995 comments = self._prev_comments 1996 if not self._match(TokenType.FROM, advance=False): 1997 tables = self._parse_csv(self._parse_table) or None 1998 1999 returning = self._parse_returning() 2000 2001 return self.expression( 2002 exp.Delete, 2003 comments=comments, 2004 tables=tables, 2005 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2006 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2007 where=self._parse_where(), 2008 returning=returning or self._parse_returning(), 2009 limit=self._parse_limit(), 2010 ) 2011 2012 def _parse_update(self) -> exp.Update: 2013 comments = self._prev_comments 2014 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2015 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2016 returning = self._parse_returning() 2017 return self.expression( 2018 exp.Update, 2019 comments=comments, 2020 **{ # type: ignore 2021 "this": this, 2022 "expressions": expressions, 2023 "from": self._parse_from(joins=True), 2024 "where": self._parse_where(), 2025 "returning": returning or self._parse_returning(), 2026 "order": self._parse_order(), 2027 "limit": self._parse_limit(), 2028 }, 2029 ) 2030 2031 def _parse_uncache(self) -> exp.Uncache: 2032 if not self._match(TokenType.TABLE): 2033 self.raise_error("Expecting TABLE after UNCACHE") 2034 2035 return self.expression( 2036 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2037 ) 2038 2039 def _parse_cache(self) -> exp.Cache: 2040 lazy = self._match_text_seq("LAZY") 2041 self._match(TokenType.TABLE) 2042 table = self._parse_table(schema=True) 2043 2044 options = [] 2045 if self._match_text_seq("OPTIONS"): 2046 self._match_l_paren() 2047 k = self._parse_string() 2048 self._match(TokenType.EQ) 2049 v = self._parse_string() 2050 options = [k, v] 2051 self._match_r_paren() 2052 2053 self._match(TokenType.ALIAS) 2054 return self.expression( 2055 exp.Cache, 2056 this=table, 2057 lazy=lazy, 2058 options=options, 2059 expression=self._parse_select(nested=True), 2060 ) 2061 2062 def _parse_partition(self) -> t.Optional[exp.Partition]: 2063 if not self._match(TokenType.PARTITION): 2064 return None 2065 2066 return self.expression( 2067 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2068 ) 2069 2070 def _parse_value(self) -> exp.Tuple: 2071 if self._match(TokenType.L_PAREN): 2072 expressions = self._parse_csv(self._parse_conjunction) 2073 self._match_r_paren() 2074 return self.expression(exp.Tuple, expressions=expressions) 2075 2076 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2077 # https://prestodb.io/docs/current/sql/values.html 2078 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2079 2080 def _parse_projections(self) -> t.List[exp.Expression]: 2081 return self._parse_expressions() 2082 2083 def _parse_select( 2084 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2085 ) -> t.Optional[exp.Expression]: 2086 cte = self._parse_with() 2087 2088 if cte: 2089 this = self._parse_statement() 2090 2091 if not this: 2092 self.raise_error("Failed to parse any statement following CTE") 2093 return cte 2094 2095 if "with" in this.arg_types: 2096 this.set("with", cte) 2097 else: 2098 self.raise_error(f"{this.key} does not support CTE") 2099 this = cte 2100 2101 return this 2102 2103 # duckdb supports leading with FROM x 2104 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2105 2106 if self._match(TokenType.SELECT): 2107 comments = self._prev_comments 2108 2109 hint = self._parse_hint() 2110 all_ = self._match(TokenType.ALL) 2111 distinct = self._match_set(self.DISTINCT_TOKENS) 2112 2113 kind = ( 2114 self._match(TokenType.ALIAS) 2115 and self._match_texts(("STRUCT", "VALUE")) 2116 and self._prev.text 2117 ) 2118 2119 if distinct: 2120 distinct = self.expression( 2121 exp.Distinct, 2122 on=self._parse_value() if self._match(TokenType.ON) else None, 2123 ) 2124 2125 if all_ and distinct: 2126 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2127 2128 limit = self._parse_limit(top=True) 2129 projections = self._parse_projections() 2130 2131 this = self.expression( 2132 exp.Select, 2133 kind=kind, 2134 hint=hint, 2135 distinct=distinct, 2136 expressions=projections, 2137 limit=limit, 2138 ) 2139 this.comments = comments 2140 2141 into = self._parse_into() 2142 if into: 2143 this.set("into", into) 2144 2145 if not from_: 2146 from_ = self._parse_from() 2147 2148 if from_: 2149 this.set("from", from_) 2150 2151 this = self._parse_query_modifiers(this) 2152 elif (table or nested) and self._match(TokenType.L_PAREN): 2153 if self._match(TokenType.PIVOT): 2154 this = self._parse_simplified_pivot() 2155 elif self._match(TokenType.FROM): 2156 this = exp.select("*").from_( 2157 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2158 ) 2159 else: 2160 this = self._parse_table() if table else self._parse_select(nested=True) 2161 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2162 2163 self._match_r_paren() 2164 2165 # We return early here so that the UNION isn't attached to the subquery by the 2166 # following call to _parse_set_operations, but instead becomes the parent node 2167 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2168 elif self._match(TokenType.VALUES): 2169 this = self.expression( 2170 exp.Values, 2171 expressions=self._parse_csv(self._parse_value), 2172 alias=self._parse_table_alias(), 2173 ) 2174 elif from_: 2175 this = exp.select("*").from_(from_.this, copy=False) 2176 else: 2177 this = None 2178 2179 return self._parse_set_operations(this) 2180 2181 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2182 if not skip_with_token and not self._match(TokenType.WITH): 2183 return None 2184 2185 comments = self._prev_comments 2186 recursive = self._match(TokenType.RECURSIVE) 2187 2188 expressions = [] 2189 while True: 2190 expressions.append(self._parse_cte()) 2191 2192 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2193 break 2194 else: 2195 self._match(TokenType.WITH) 2196 2197 return self.expression( 2198 exp.With, comments=comments, expressions=expressions, recursive=recursive 2199 ) 2200 2201 def _parse_cte(self) -> exp.CTE: 2202 alias = self._parse_table_alias() 2203 if not alias or not alias.this: 2204 self.raise_error("Expected CTE to have alias") 2205 2206 self._match(TokenType.ALIAS) 2207 return self.expression( 2208 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2209 ) 2210 2211 def _parse_table_alias( 2212 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2213 ) -> t.Optional[exp.TableAlias]: 2214 any_token = self._match(TokenType.ALIAS) 2215 alias = ( 2216 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2217 or self._parse_string_as_identifier() 2218 ) 2219 2220 index = self._index 2221 if self._match(TokenType.L_PAREN): 2222 columns = self._parse_csv(self._parse_function_parameter) 2223 self._match_r_paren() if columns else self._retreat(index) 2224 else: 2225 columns = None 2226 2227 if not alias and not columns: 2228 return None 2229 2230 return self.expression(exp.TableAlias, this=alias, columns=columns) 2231 2232 def _parse_subquery( 2233 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2234 ) -> t.Optional[exp.Subquery]: 2235 if not this: 2236 return None 2237 2238 return self.expression( 2239 exp.Subquery, 2240 this=this, 2241 pivots=self._parse_pivots(), 2242 alias=self._parse_table_alias() if parse_alias else None, 2243 ) 2244 2245 def _parse_query_modifiers( 2246 self, this: t.Optional[exp.Expression] 2247 ) -> t.Optional[exp.Expression]: 2248 if isinstance(this, self.MODIFIABLES): 2249 for join in iter(self._parse_join, None): 2250 this.append("joins", join) 2251 for lateral in iter(self._parse_lateral, None): 2252 this.append("laterals", lateral) 2253 2254 while True: 2255 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2256 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2257 key, expression = parser(self) 2258 2259 if expression: 2260 this.set(key, expression) 2261 if key == "limit": 2262 offset = expression.args.pop("offset", None) 2263 if offset: 2264 this.set("offset", exp.Offset(expression=offset)) 2265 continue 2266 break 2267 return this 2268 2269 def _parse_hint(self) -> t.Optional[exp.Hint]: 2270 if self._match(TokenType.HINT): 2271 hints = [] 2272 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2273 hints.extend(hint) 2274 2275 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2276 self.raise_error("Expected */ after HINT") 2277 2278 return self.expression(exp.Hint, expressions=hints) 2279 2280 return None 2281 2282 def _parse_into(self) -> t.Optional[exp.Into]: 2283 if not self._match(TokenType.INTO): 2284 return None 2285 2286 temp = self._match(TokenType.TEMPORARY) 2287 unlogged = self._match_text_seq("UNLOGGED") 2288 self._match(TokenType.TABLE) 2289 2290 return self.expression( 2291 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2292 ) 2293 2294 def _parse_from( 2295 self, joins: bool = False, skip_from_token: bool = False 2296 ) -> t.Optional[exp.From]: 2297 if not skip_from_token and not self._match(TokenType.FROM): 2298 return None 2299 2300 return self.expression( 2301 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2302 ) 2303 2304 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2305 if not self._match(TokenType.MATCH_RECOGNIZE): 2306 return None 2307 2308 self._match_l_paren() 2309 2310 partition = self._parse_partition_by() 2311 order = self._parse_order() 2312 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2313 2314 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2315 rows = exp.var("ONE ROW PER MATCH") 2316 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2317 text = "ALL ROWS PER MATCH" 2318 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2319 text += f" SHOW EMPTY MATCHES" 2320 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2321 text += f" OMIT EMPTY MATCHES" 2322 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2323 text += f" WITH UNMATCHED ROWS" 2324 rows = exp.var(text) 2325 else: 2326 rows = None 2327 2328 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2329 text = "AFTER MATCH SKIP" 2330 if self._match_text_seq("PAST", "LAST", "ROW"): 2331 text += f" PAST LAST ROW" 2332 elif self._match_text_seq("TO", "NEXT", "ROW"): 2333 text += f" TO NEXT ROW" 2334 elif self._match_text_seq("TO", "FIRST"): 2335 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2336 elif self._match_text_seq("TO", "LAST"): 2337 text += f" TO LAST {self._advance_any().text}" # type: ignore 2338 after = exp.var(text) 2339 else: 2340 after = None 2341 2342 if self._match_text_seq("PATTERN"): 2343 self._match_l_paren() 2344 2345 if not self._curr: 2346 self.raise_error("Expecting )", self._curr) 2347 2348 paren = 1 2349 start = self._curr 2350 2351 while self._curr and paren > 0: 2352 if self._curr.token_type == TokenType.L_PAREN: 2353 paren += 1 2354 if self._curr.token_type == TokenType.R_PAREN: 2355 paren -= 1 2356 2357 end = self._prev 2358 self._advance() 2359 2360 if paren > 0: 2361 self.raise_error("Expecting )", self._curr) 2362 2363 pattern = exp.var(self._find_sql(start, end)) 2364 else: 2365 pattern = None 2366 2367 define = ( 2368 self._parse_csv( 2369 lambda: self.expression( 2370 exp.Alias, 2371 alias=self._parse_id_var(any_token=True), 2372 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2373 ) 2374 ) 2375 if self._match_text_seq("DEFINE") 2376 else None 2377 ) 2378 2379 self._match_r_paren() 2380 2381 return self.expression( 2382 exp.MatchRecognize, 2383 partition_by=partition, 2384 order=order, 2385 measures=measures, 2386 rows=rows, 2387 after=after, 2388 pattern=pattern, 2389 define=define, 2390 alias=self._parse_table_alias(), 2391 ) 2392 2393 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2394 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2395 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2396 2397 if outer_apply or cross_apply: 2398 this = self._parse_select(table=True) 2399 view = None 2400 outer = not cross_apply 2401 elif self._match(TokenType.LATERAL): 2402 this = self._parse_select(table=True) 2403 view = self._match(TokenType.VIEW) 2404 outer = self._match(TokenType.OUTER) 2405 else: 2406 return None 2407 2408 if not this: 2409 this = ( 2410 self._parse_unnest() 2411 or self._parse_function() 2412 or self._parse_id_var(any_token=False) 2413 ) 2414 2415 while self._match(TokenType.DOT): 2416 this = exp.Dot( 2417 this=this, 2418 expression=self._parse_function() or self._parse_id_var(any_token=False), 2419 ) 2420 2421 if view: 2422 table = self._parse_id_var(any_token=False) 2423 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2424 table_alias: t.Optional[exp.TableAlias] = self.expression( 2425 exp.TableAlias, this=table, columns=columns 2426 ) 2427 elif isinstance(this, exp.Subquery) and this.alias: 2428 # Ensures parity between the Subquery's and the Lateral's "alias" args 2429 table_alias = this.args["alias"].copy() 2430 else: 2431 table_alias = self._parse_table_alias() 2432 2433 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2434 2435 def _parse_join_parts( 2436 self, 2437 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2438 return ( 2439 self._match_set(self.JOIN_METHODS) and self._prev, 2440 self._match_set(self.JOIN_SIDES) and self._prev, 2441 self._match_set(self.JOIN_KINDS) and self._prev, 2442 ) 2443 2444 def _parse_join( 2445 self, skip_join_token: bool = False, parse_bracket: bool = False 2446 ) -> t.Optional[exp.Join]: 2447 if self._match(TokenType.COMMA): 2448 return self.expression(exp.Join, this=self._parse_table()) 2449 2450 index = self._index 2451 method, side, kind = self._parse_join_parts() 2452 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2453 join = self._match(TokenType.JOIN) 2454 2455 if not skip_join_token and not join: 2456 self._retreat(index) 2457 kind = None 2458 method = None 2459 side = None 2460 2461 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2462 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2463 2464 if not skip_join_token and not join and not outer_apply and not cross_apply: 2465 return None 2466 2467 if outer_apply: 2468 side = Token(TokenType.LEFT, "LEFT") 2469 2470 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2471 2472 if method: 2473 kwargs["method"] = method.text 2474 if side: 2475 kwargs["side"] = side.text 2476 if kind: 2477 kwargs["kind"] = kind.text 2478 if hint: 2479 kwargs["hint"] = hint 2480 2481 if self._match(TokenType.ON): 2482 kwargs["on"] = self._parse_conjunction() 2483 elif self._match(TokenType.USING): 2484 kwargs["using"] = self._parse_wrapped_id_vars() 2485 elif not (kind and kind.token_type == TokenType.CROSS): 2486 index = self._index 2487 join = self._parse_join() 2488 2489 if join and self._match(TokenType.ON): 2490 kwargs["on"] = self._parse_conjunction() 2491 elif join and self._match(TokenType.USING): 2492 kwargs["using"] = self._parse_wrapped_id_vars() 2493 else: 2494 join = None 2495 self._retreat(index) 2496 2497 kwargs["this"].set("joins", [join] if join else None) 2498 2499 comments = [c for token in (method, side, kind) if token for c in token.comments] 2500 return self.expression(exp.Join, comments=comments, **kwargs) 2501 2502 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2503 this = self._parse_conjunction() 2504 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2505 return this 2506 2507 opclass = self._parse_var(any_token=True) 2508 if opclass: 2509 return self.expression(exp.Opclass, this=this, expression=opclass) 2510 2511 return this 2512 2513 def _parse_index( 2514 self, 2515 index: t.Optional[exp.Expression] = None, 2516 ) -> t.Optional[exp.Index]: 2517 if index: 2518 unique = None 2519 primary = None 2520 amp = None 2521 2522 self._match(TokenType.ON) 2523 self._match(TokenType.TABLE) # hive 2524 table = self._parse_table_parts(schema=True) 2525 else: 2526 unique = self._match(TokenType.UNIQUE) 2527 primary = self._match_text_seq("PRIMARY") 2528 amp = self._match_text_seq("AMP") 2529 2530 if not self._match(TokenType.INDEX): 2531 return None 2532 2533 index = self._parse_id_var() 2534 table = None 2535 2536 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2537 2538 if self._match(TokenType.L_PAREN, advance=False): 2539 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2540 else: 2541 columns = None 2542 2543 return self.expression( 2544 exp.Index, 2545 this=index, 2546 table=table, 2547 using=using, 2548 columns=columns, 2549 unique=unique, 2550 primary=primary, 2551 amp=amp, 2552 partition_by=self._parse_partition_by(), 2553 where=self._parse_where(), 2554 ) 2555 2556 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2557 hints: t.List[exp.Expression] = [] 2558 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2559 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2560 hints.append( 2561 self.expression( 2562 exp.WithTableHint, 2563 expressions=self._parse_csv( 2564 lambda: self._parse_function() or self._parse_var(any_token=True) 2565 ), 2566 ) 2567 ) 2568 self._match_r_paren() 2569 else: 2570 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2571 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2572 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2573 2574 self._match_texts({"INDEX", "KEY"}) 2575 if self._match(TokenType.FOR): 2576 hint.set("target", self._advance_any() and self._prev.text.upper()) 2577 2578 hint.set("expressions", self._parse_wrapped_id_vars()) 2579 hints.append(hint) 2580 2581 return hints or None 2582 2583 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2584 return ( 2585 (not schema and self._parse_function(optional_parens=False)) 2586 or self._parse_id_var(any_token=False) 2587 or self._parse_string_as_identifier() 2588 or self._parse_placeholder() 2589 ) 2590 2591 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2592 catalog = None 2593 db = None 2594 table = self._parse_table_part(schema=schema) 2595 2596 while self._match(TokenType.DOT): 2597 if catalog: 2598 # This allows nesting the table in arbitrarily many dot expressions if needed 2599 table = self.expression( 2600 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2601 ) 2602 else: 2603 catalog = db 2604 db = table 2605 table = self._parse_table_part(schema=schema) 2606 2607 if not table: 2608 self.raise_error(f"Expected table name but got {self._curr}") 2609 2610 return self.expression( 2611 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2612 ) 2613 2614 def _parse_table( 2615 self, 2616 schema: bool = False, 2617 joins: bool = False, 2618 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2619 parse_bracket: bool = False, 2620 ) -> t.Optional[exp.Expression]: 2621 lateral = self._parse_lateral() 2622 if lateral: 2623 return lateral 2624 2625 unnest = self._parse_unnest() 2626 if unnest: 2627 return unnest 2628 2629 values = self._parse_derived_table_values() 2630 if values: 2631 return values 2632 2633 subquery = self._parse_select(table=True) 2634 if subquery: 2635 if not subquery.args.get("pivots"): 2636 subquery.set("pivots", self._parse_pivots()) 2637 return subquery 2638 2639 bracket = parse_bracket and self._parse_bracket(None) 2640 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2641 this = t.cast( 2642 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2643 ) 2644 2645 if schema: 2646 return self._parse_schema(this=this) 2647 2648 version = self._parse_version() 2649 2650 if version: 2651 this.set("version", version) 2652 2653 if self.ALIAS_POST_TABLESAMPLE: 2654 table_sample = self._parse_table_sample() 2655 2656 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2657 if alias: 2658 this.set("alias", alias) 2659 2660 if self._match_text_seq("AT"): 2661 this.set("index", self._parse_id_var()) 2662 2663 this.set("hints", self._parse_table_hints()) 2664 2665 if not this.args.get("pivots"): 2666 this.set("pivots", self._parse_pivots()) 2667 2668 if not self.ALIAS_POST_TABLESAMPLE: 2669 table_sample = self._parse_table_sample() 2670 2671 if table_sample: 2672 table_sample.set("this", this) 2673 this = table_sample 2674 2675 if joins: 2676 for join in iter(self._parse_join, None): 2677 this.append("joins", join) 2678 2679 return this 2680 2681 def _parse_version(self) -> t.Optional[exp.Version]: 2682 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2683 this = "TIMESTAMP" 2684 elif self._match(TokenType.VERSION_SNAPSHOT): 2685 this = "VERSION" 2686 else: 2687 return None 2688 2689 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2690 kind = self._prev.text.upper() 2691 start = self._parse_bitwise() 2692 self._match_texts(("TO", "AND")) 2693 end = self._parse_bitwise() 2694 expression: t.Optional[exp.Expression] = self.expression( 2695 exp.Tuple, expressions=[start, end] 2696 ) 2697 elif self._match_text_seq("CONTAINED", "IN"): 2698 kind = "CONTAINED IN" 2699 expression = self.expression( 2700 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2701 ) 2702 elif self._match(TokenType.ALL): 2703 kind = "ALL" 2704 expression = None 2705 else: 2706 self._match_text_seq("AS", "OF") 2707 kind = "AS OF" 2708 expression = self._parse_type() 2709 2710 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2711 2712 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2713 if not self._match(TokenType.UNNEST): 2714 return None 2715 2716 expressions = self._parse_wrapped_csv(self._parse_type) 2717 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2718 2719 alias = self._parse_table_alias() if with_alias else None 2720 2721 if alias: 2722 if self.UNNEST_COLUMN_ONLY: 2723 if alias.args.get("columns"): 2724 self.raise_error("Unexpected extra column alias in unnest.") 2725 2726 alias.set("columns", [alias.this]) 2727 alias.set("this", None) 2728 2729 columns = alias.args.get("columns") or [] 2730 if offset and len(expressions) < len(columns): 2731 offset = columns.pop() 2732 2733 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2734 self._match(TokenType.ALIAS) 2735 offset = self._parse_id_var( 2736 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2737 ) or exp.to_identifier("offset") 2738 2739 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2740 2741 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2742 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2743 if not is_derived and not self._match(TokenType.VALUES): 2744 return None 2745 2746 expressions = self._parse_csv(self._parse_value) 2747 alias = self._parse_table_alias() 2748 2749 if is_derived: 2750 self._match_r_paren() 2751 2752 return self.expression( 2753 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2754 ) 2755 2756 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2757 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2758 as_modifier and self._match_text_seq("USING", "SAMPLE") 2759 ): 2760 return None 2761 2762 bucket_numerator = None 2763 bucket_denominator = None 2764 bucket_field = None 2765 percent = None 2766 rows = None 2767 size = None 2768 seed = None 2769 2770 kind = ( 2771 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2772 ) 2773 method = self._parse_var(tokens=(TokenType.ROW,)) 2774 2775 matched_l_paren = self._match(TokenType.L_PAREN) 2776 2777 if self.TABLESAMPLE_CSV: 2778 num = None 2779 expressions = self._parse_csv(self._parse_primary) 2780 else: 2781 expressions = None 2782 num = ( 2783 self._parse_factor() 2784 if self._match(TokenType.NUMBER, advance=False) 2785 else self._parse_primary() 2786 ) 2787 2788 if self._match_text_seq("BUCKET"): 2789 bucket_numerator = self._parse_number() 2790 self._match_text_seq("OUT", "OF") 2791 bucket_denominator = bucket_denominator = self._parse_number() 2792 self._match(TokenType.ON) 2793 bucket_field = self._parse_field() 2794 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2795 percent = num 2796 elif self._match(TokenType.ROWS): 2797 rows = num 2798 elif num: 2799 size = num 2800 2801 if matched_l_paren: 2802 self._match_r_paren() 2803 2804 if self._match(TokenType.L_PAREN): 2805 method = self._parse_var() 2806 seed = self._match(TokenType.COMMA) and self._parse_number() 2807 self._match_r_paren() 2808 elif self._match_texts(("SEED", "REPEATABLE")): 2809 seed = self._parse_wrapped(self._parse_number) 2810 2811 return self.expression( 2812 exp.TableSample, 2813 expressions=expressions, 2814 method=method, 2815 bucket_numerator=bucket_numerator, 2816 bucket_denominator=bucket_denominator, 2817 bucket_field=bucket_field, 2818 percent=percent, 2819 rows=rows, 2820 size=size, 2821 seed=seed, 2822 kind=kind, 2823 ) 2824 2825 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2826 return list(iter(self._parse_pivot, None)) or None 2827 2828 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2829 return list(iter(self._parse_join, None)) or None 2830 2831 # https://duckdb.org/docs/sql/statements/pivot 2832 def _parse_simplified_pivot(self) -> exp.Pivot: 2833 def _parse_on() -> t.Optional[exp.Expression]: 2834 this = self._parse_bitwise() 2835 return self._parse_in(this) if self._match(TokenType.IN) else this 2836 2837 this = self._parse_table() 2838 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2839 using = self._match(TokenType.USING) and self._parse_csv( 2840 lambda: self._parse_alias(self._parse_function()) 2841 ) 2842 group = self._parse_group() 2843 return self.expression( 2844 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2845 ) 2846 2847 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2848 index = self._index 2849 include_nulls = None 2850 2851 if self._match(TokenType.PIVOT): 2852 unpivot = False 2853 elif self._match(TokenType.UNPIVOT): 2854 unpivot = True 2855 2856 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2857 if self._match_text_seq("INCLUDE", "NULLS"): 2858 include_nulls = True 2859 elif self._match_text_seq("EXCLUDE", "NULLS"): 2860 include_nulls = False 2861 else: 2862 return None 2863 2864 expressions = [] 2865 field = None 2866 2867 if not self._match(TokenType.L_PAREN): 2868 self._retreat(index) 2869 return None 2870 2871 if unpivot: 2872 expressions = self._parse_csv(self._parse_column) 2873 else: 2874 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2875 2876 if not expressions: 2877 self.raise_error("Failed to parse PIVOT's aggregation list") 2878 2879 if not self._match(TokenType.FOR): 2880 self.raise_error("Expecting FOR") 2881 2882 value = self._parse_column() 2883 2884 if not self._match(TokenType.IN): 2885 self.raise_error("Expecting IN") 2886 2887 field = self._parse_in(value, alias=True) 2888 2889 self._match_r_paren() 2890 2891 pivot = self.expression( 2892 exp.Pivot, 2893 expressions=expressions, 2894 field=field, 2895 unpivot=unpivot, 2896 include_nulls=include_nulls, 2897 ) 2898 2899 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2900 pivot.set("alias", self._parse_table_alias()) 2901 2902 if not unpivot: 2903 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2904 2905 columns: t.List[exp.Expression] = [] 2906 for fld in pivot.args["field"].expressions: 2907 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2908 for name in names: 2909 if self.PREFIXED_PIVOT_COLUMNS: 2910 name = f"{name}_{field_name}" if name else field_name 2911 else: 2912 name = f"{field_name}_{name}" if name else field_name 2913 2914 columns.append(exp.to_identifier(name)) 2915 2916 pivot.set("columns", columns) 2917 2918 return pivot 2919 2920 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2921 return [agg.alias for agg in aggregations] 2922 2923 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2924 if not skip_where_token and not self._match(TokenType.WHERE): 2925 return None 2926 2927 return self.expression( 2928 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2929 ) 2930 2931 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2932 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2933 return None 2934 2935 elements = defaultdict(list) 2936 2937 if self._match(TokenType.ALL): 2938 return self.expression(exp.Group, all=True) 2939 2940 while True: 2941 expressions = self._parse_csv(self._parse_conjunction) 2942 if expressions: 2943 elements["expressions"].extend(expressions) 2944 2945 grouping_sets = self._parse_grouping_sets() 2946 if grouping_sets: 2947 elements["grouping_sets"].extend(grouping_sets) 2948 2949 rollup = None 2950 cube = None 2951 totals = None 2952 2953 with_ = self._match(TokenType.WITH) 2954 if self._match(TokenType.ROLLUP): 2955 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2956 elements["rollup"].extend(ensure_list(rollup)) 2957 2958 if self._match(TokenType.CUBE): 2959 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2960 elements["cube"].extend(ensure_list(cube)) 2961 2962 if self._match_text_seq("TOTALS"): 2963 totals = True 2964 elements["totals"] = True # type: ignore 2965 2966 if not (grouping_sets or rollup or cube or totals): 2967 break 2968 2969 return self.expression(exp.Group, **elements) # type: ignore 2970 2971 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2972 if not self._match(TokenType.GROUPING_SETS): 2973 return None 2974 2975 return self._parse_wrapped_csv(self._parse_grouping_set) 2976 2977 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2978 if self._match(TokenType.L_PAREN): 2979 grouping_set = self._parse_csv(self._parse_column) 2980 self._match_r_paren() 2981 return self.expression(exp.Tuple, expressions=grouping_set) 2982 2983 return self._parse_column() 2984 2985 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2986 if not skip_having_token and not self._match(TokenType.HAVING): 2987 return None 2988 return self.expression(exp.Having, this=self._parse_conjunction()) 2989 2990 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2991 if not self._match(TokenType.QUALIFY): 2992 return None 2993 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2994 2995 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2996 if skip_start_token: 2997 start = None 2998 elif self._match(TokenType.START_WITH): 2999 start = self._parse_conjunction() 3000 else: 3001 return None 3002 3003 self._match(TokenType.CONNECT_BY) 3004 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3005 exp.Prior, this=self._parse_bitwise() 3006 ) 3007 connect = self._parse_conjunction() 3008 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3009 3010 if not start and self._match(TokenType.START_WITH): 3011 start = self._parse_conjunction() 3012 3013 return self.expression(exp.Connect, start=start, connect=connect) 3014 3015 def _parse_order( 3016 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3017 ) -> t.Optional[exp.Expression]: 3018 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3019 return this 3020 3021 return self.expression( 3022 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3023 ) 3024 3025 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3026 if not self._match(token): 3027 return None 3028 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3029 3030 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3031 this = parse_method() if parse_method else self._parse_conjunction() 3032 3033 asc = self._match(TokenType.ASC) 3034 desc = self._match(TokenType.DESC) or (asc and False) 3035 3036 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3037 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3038 3039 nulls_first = is_nulls_first or False 3040 explicitly_null_ordered = is_nulls_first or is_nulls_last 3041 3042 if ( 3043 not explicitly_null_ordered 3044 and ( 3045 (not desc and self.NULL_ORDERING == "nulls_are_small") 3046 or (desc and self.NULL_ORDERING != "nulls_are_small") 3047 ) 3048 and self.NULL_ORDERING != "nulls_are_last" 3049 ): 3050 nulls_first = True 3051 3052 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3053 3054 def _parse_limit( 3055 self, this: t.Optional[exp.Expression] = None, top: bool = False 3056 ) -> t.Optional[exp.Expression]: 3057 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3058 comments = self._prev_comments 3059 if top: 3060 limit_paren = self._match(TokenType.L_PAREN) 3061 expression = self._parse_number() 3062 3063 if limit_paren: 3064 self._match_r_paren() 3065 else: 3066 expression = self._parse_term() 3067 3068 if self._match(TokenType.COMMA): 3069 offset = expression 3070 expression = self._parse_term() 3071 else: 3072 offset = None 3073 3074 limit_exp = self.expression( 3075 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3076 ) 3077 3078 return limit_exp 3079 3080 if self._match(TokenType.FETCH): 3081 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3082 direction = self._prev.text if direction else "FIRST" 3083 3084 count = self._parse_field(tokens=self.FETCH_TOKENS) 3085 percent = self._match(TokenType.PERCENT) 3086 3087 self._match_set((TokenType.ROW, TokenType.ROWS)) 3088 3089 only = self._match_text_seq("ONLY") 3090 with_ties = self._match_text_seq("WITH", "TIES") 3091 3092 if only and with_ties: 3093 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3094 3095 return self.expression( 3096 exp.Fetch, 3097 direction=direction, 3098 count=count, 3099 percent=percent, 3100 with_ties=with_ties, 3101 ) 3102 3103 return this 3104 3105 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3106 if not self._match(TokenType.OFFSET): 3107 return this 3108 3109 count = self._parse_term() 3110 self._match_set((TokenType.ROW, TokenType.ROWS)) 3111 return self.expression(exp.Offset, this=this, expression=count) 3112 3113 def _parse_locks(self) -> t.List[exp.Lock]: 3114 locks = [] 3115 while True: 3116 if self._match_text_seq("FOR", "UPDATE"): 3117 update = True 3118 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3119 "LOCK", "IN", "SHARE", "MODE" 3120 ): 3121 update = False 3122 else: 3123 break 3124 3125 expressions = None 3126 if self._match_text_seq("OF"): 3127 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3128 3129 wait: t.Optional[bool | exp.Expression] = None 3130 if self._match_text_seq("NOWAIT"): 3131 wait = True 3132 elif self._match_text_seq("WAIT"): 3133 wait = self._parse_primary() 3134 elif self._match_text_seq("SKIP", "LOCKED"): 3135 wait = False 3136 3137 locks.append( 3138 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3139 ) 3140 3141 return locks 3142 3143 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3144 if not self._match_set(self.SET_OPERATIONS): 3145 return this 3146 3147 token_type = self._prev.token_type 3148 3149 if token_type == TokenType.UNION: 3150 expression = exp.Union 3151 elif token_type == TokenType.EXCEPT: 3152 expression = exp.Except 3153 else: 3154 expression = exp.Intersect 3155 3156 return self.expression( 3157 expression, 3158 this=this, 3159 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3160 by_name=self._match_text_seq("BY", "NAME"), 3161 expression=self._parse_set_operations(self._parse_select(nested=True)), 3162 ) 3163 3164 def _parse_expression(self) -> t.Optional[exp.Expression]: 3165 return self._parse_alias(self._parse_conjunction()) 3166 3167 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3168 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3169 3170 def _parse_equality(self) -> t.Optional[exp.Expression]: 3171 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3172 3173 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3174 return self._parse_tokens(self._parse_range, self.COMPARISON) 3175 3176 def _parse_range(self) -> t.Optional[exp.Expression]: 3177 this = self._parse_bitwise() 3178 negate = self._match(TokenType.NOT) 3179 3180 if self._match_set(self.RANGE_PARSERS): 3181 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3182 if not expression: 3183 return this 3184 3185 this = expression 3186 elif self._match(TokenType.ISNULL): 3187 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3188 3189 # Postgres supports ISNULL and NOTNULL for conditions. 3190 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3191 if self._match(TokenType.NOTNULL): 3192 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3193 this = self.expression(exp.Not, this=this) 3194 3195 if negate: 3196 this = self.expression(exp.Not, this=this) 3197 3198 if self._match(TokenType.IS): 3199 this = self._parse_is(this) 3200 3201 return this 3202 3203 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3204 index = self._index - 1 3205 negate = self._match(TokenType.NOT) 3206 3207 if self._match_text_seq("DISTINCT", "FROM"): 3208 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3209 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3210 3211 expression = self._parse_null() or self._parse_boolean() 3212 if not expression: 3213 self._retreat(index) 3214 return None 3215 3216 this = self.expression(exp.Is, this=this, expression=expression) 3217 return self.expression(exp.Not, this=this) if negate else this 3218 3219 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3220 unnest = self._parse_unnest(with_alias=False) 3221 if unnest: 3222 this = self.expression(exp.In, this=this, unnest=unnest) 3223 elif self._match(TokenType.L_PAREN): 3224 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3225 3226 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3227 this = self.expression(exp.In, this=this, query=expressions[0]) 3228 else: 3229 this = self.expression(exp.In, this=this, expressions=expressions) 3230 3231 self._match_r_paren(this) 3232 else: 3233 this = self.expression(exp.In, this=this, field=self._parse_field()) 3234 3235 return this 3236 3237 def _parse_between(self, this: exp.Expression) -> exp.Between: 3238 low = self._parse_bitwise() 3239 self._match(TokenType.AND) 3240 high = self._parse_bitwise() 3241 return self.expression(exp.Between, this=this, low=low, high=high) 3242 3243 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3244 if not self._match(TokenType.ESCAPE): 3245 return this 3246 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3247 3248 def _parse_interval(self) -> t.Optional[exp.Interval]: 3249 index = self._index 3250 3251 if not self._match(TokenType.INTERVAL): 3252 return None 3253 3254 if self._match(TokenType.STRING, advance=False): 3255 this = self._parse_primary() 3256 else: 3257 this = self._parse_term() 3258 3259 if not this: 3260 self._retreat(index) 3261 return None 3262 3263 unit = self._parse_function() or self._parse_var(any_token=True) 3264 3265 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3266 # each INTERVAL expression into this canonical form so it's easy to transpile 3267 if this and this.is_number: 3268 this = exp.Literal.string(this.name) 3269 elif this and this.is_string: 3270 parts = this.name.split() 3271 3272 if len(parts) == 2: 3273 if unit: 3274 # This is not actually a unit, it's something else (e.g. a "window side") 3275 unit = None 3276 self._retreat(self._index - 1) 3277 3278 this = exp.Literal.string(parts[0]) 3279 unit = self.expression(exp.Var, this=parts[1]) 3280 3281 return self.expression(exp.Interval, this=this, unit=unit) 3282 3283 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3284 this = self._parse_term() 3285 3286 while True: 3287 if self._match_set(self.BITWISE): 3288 this = self.expression( 3289 self.BITWISE[self._prev.token_type], 3290 this=this, 3291 expression=self._parse_term(), 3292 ) 3293 elif self._match(TokenType.DQMARK): 3294 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3295 elif self._match_pair(TokenType.LT, TokenType.LT): 3296 this = self.expression( 3297 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3298 ) 3299 elif self._match_pair(TokenType.GT, TokenType.GT): 3300 this = self.expression( 3301 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3302 ) 3303 else: 3304 break 3305 3306 return this 3307 3308 def _parse_term(self) -> t.Optional[exp.Expression]: 3309 return self._parse_tokens(self._parse_factor, self.TERM) 3310 3311 def _parse_factor(self) -> t.Optional[exp.Expression]: 3312 return self._parse_tokens(self._parse_unary, self.FACTOR) 3313 3314 def _parse_unary(self) -> t.Optional[exp.Expression]: 3315 if self._match_set(self.UNARY_PARSERS): 3316 return self.UNARY_PARSERS[self._prev.token_type](self) 3317 return self._parse_at_time_zone(self._parse_type()) 3318 3319 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3320 interval = parse_interval and self._parse_interval() 3321 if interval: 3322 return interval 3323 3324 index = self._index 3325 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3326 this = self._parse_column() 3327 3328 if data_type: 3329 if isinstance(this, exp.Literal): 3330 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3331 if parser: 3332 return parser(self, this, data_type) 3333 return self.expression(exp.Cast, this=this, to=data_type) 3334 if not data_type.expressions: 3335 self._retreat(index) 3336 return self._parse_column() 3337 return self._parse_column_ops(data_type) 3338 3339 return this and self._parse_column_ops(this) 3340 3341 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3342 this = self._parse_type() 3343 if not this: 3344 return None 3345 3346 return self.expression( 3347 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3348 ) 3349 3350 def _parse_types( 3351 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3352 ) -> t.Optional[exp.Expression]: 3353 index = self._index 3354 3355 prefix = self._match_text_seq("SYSUDTLIB", ".") 3356 3357 if not self._match_set(self.TYPE_TOKENS): 3358 identifier = allow_identifiers and self._parse_id_var( 3359 any_token=False, tokens=(TokenType.VAR,) 3360 ) 3361 3362 if identifier: 3363 tokens = self._tokenizer.tokenize(identifier.name) 3364 3365 if len(tokens) != 1: 3366 self.raise_error("Unexpected identifier", self._prev) 3367 3368 if tokens[0].token_type in self.TYPE_TOKENS: 3369 self._prev = tokens[0] 3370 elif self.SUPPORTS_USER_DEFINED_TYPES: 3371 type_name = identifier.name 3372 3373 while self._match(TokenType.DOT): 3374 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3375 3376 return exp.DataType.build(type_name, udt=True) 3377 else: 3378 return None 3379 else: 3380 return None 3381 3382 type_token = self._prev.token_type 3383 3384 if type_token == TokenType.PSEUDO_TYPE: 3385 return self.expression(exp.PseudoType, this=self._prev.text) 3386 3387 if type_token == TokenType.OBJECT_IDENTIFIER: 3388 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3389 3390 nested = type_token in self.NESTED_TYPE_TOKENS 3391 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3392 expressions = None 3393 maybe_func = False 3394 3395 if self._match(TokenType.L_PAREN): 3396 if is_struct: 3397 expressions = self._parse_csv(self._parse_struct_types) 3398 elif nested: 3399 expressions = self._parse_csv( 3400 lambda: self._parse_types( 3401 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3402 ) 3403 ) 3404 elif type_token in self.ENUM_TYPE_TOKENS: 3405 expressions = self._parse_csv(self._parse_equality) 3406 else: 3407 expressions = self._parse_csv(self._parse_type_size) 3408 3409 if not expressions or not self._match(TokenType.R_PAREN): 3410 self._retreat(index) 3411 return None 3412 3413 maybe_func = True 3414 3415 this: t.Optional[exp.Expression] = None 3416 values: t.Optional[t.List[exp.Expression]] = None 3417 3418 if nested and self._match(TokenType.LT): 3419 if is_struct: 3420 expressions = self._parse_csv(self._parse_struct_types) 3421 else: 3422 expressions = self._parse_csv( 3423 lambda: self._parse_types( 3424 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3425 ) 3426 ) 3427 3428 if not self._match(TokenType.GT): 3429 self.raise_error("Expecting >") 3430 3431 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3432 values = self._parse_csv(self._parse_conjunction) 3433 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3434 3435 if type_token in self.TIMESTAMPS: 3436 if self._match_text_seq("WITH", "TIME", "ZONE"): 3437 maybe_func = False 3438 tz_type = ( 3439 exp.DataType.Type.TIMETZ 3440 if type_token in self.TIMES 3441 else exp.DataType.Type.TIMESTAMPTZ 3442 ) 3443 this = exp.DataType(this=tz_type, expressions=expressions) 3444 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3445 maybe_func = False 3446 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3447 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3448 maybe_func = False 3449 elif type_token == TokenType.INTERVAL: 3450 unit = self._parse_var() 3451 3452 if self._match_text_seq("TO"): 3453 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3454 else: 3455 span = None 3456 3457 if span or not unit: 3458 this = self.expression( 3459 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3460 ) 3461 else: 3462 this = self.expression(exp.Interval, unit=unit) 3463 3464 if maybe_func and check_func: 3465 index2 = self._index 3466 peek = self._parse_string() 3467 3468 if not peek: 3469 self._retreat(index) 3470 return None 3471 3472 self._retreat(index2) 3473 3474 if not this: 3475 if self._match_text_seq("UNSIGNED"): 3476 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3477 if not unsigned_type_token: 3478 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3479 3480 type_token = unsigned_type_token or type_token 3481 3482 this = exp.DataType( 3483 this=exp.DataType.Type[type_token.value], 3484 expressions=expressions, 3485 nested=nested, 3486 values=values, 3487 prefix=prefix, 3488 ) 3489 3490 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3491 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3492 3493 return this 3494 3495 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3496 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3497 self._match(TokenType.COLON) 3498 return self._parse_column_def(this) 3499 3500 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3501 if not self._match_text_seq("AT", "TIME", "ZONE"): 3502 return this 3503 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3504 3505 def _parse_column(self) -> t.Optional[exp.Expression]: 3506 this = self._parse_field() 3507 if isinstance(this, exp.Identifier): 3508 this = self.expression(exp.Column, this=this) 3509 elif not this: 3510 return self._parse_bracket(this) 3511 return self._parse_column_ops(this) 3512 3513 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3514 this = self._parse_bracket(this) 3515 3516 while self._match_set(self.COLUMN_OPERATORS): 3517 op_token = self._prev.token_type 3518 op = self.COLUMN_OPERATORS.get(op_token) 3519 3520 if op_token == TokenType.DCOLON: 3521 field = self._parse_types() 3522 if not field: 3523 self.raise_error("Expected type") 3524 elif op and self._curr: 3525 self._advance() 3526 value = self._prev.text 3527 field = ( 3528 exp.Literal.number(value) 3529 if self._prev.token_type == TokenType.NUMBER 3530 else exp.Literal.string(value) 3531 ) 3532 else: 3533 field = self._parse_field(anonymous_func=True, any_token=True) 3534 3535 if isinstance(field, exp.Func): 3536 # bigquery allows function calls like x.y.count(...) 3537 # SAFE.SUBSTR(...) 3538 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3539 this = self._replace_columns_with_dots(this) 3540 3541 if op: 3542 this = op(self, this, field) 3543 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3544 this = self.expression( 3545 exp.Column, 3546 this=field, 3547 table=this.this, 3548 db=this.args.get("table"), 3549 catalog=this.args.get("db"), 3550 ) 3551 else: 3552 this = self.expression(exp.Dot, this=this, expression=field) 3553 this = self._parse_bracket(this) 3554 return this 3555 3556 def _parse_primary(self) -> t.Optional[exp.Expression]: 3557 if self._match_set(self.PRIMARY_PARSERS): 3558 token_type = self._prev.token_type 3559 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3560 3561 if token_type == TokenType.STRING: 3562 expressions = [primary] 3563 while self._match(TokenType.STRING): 3564 expressions.append(exp.Literal.string(self._prev.text)) 3565 3566 if len(expressions) > 1: 3567 return self.expression(exp.Concat, expressions=expressions) 3568 3569 return primary 3570 3571 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3572 return exp.Literal.number(f"0.{self._prev.text}") 3573 3574 if self._match(TokenType.L_PAREN): 3575 comments = self._prev_comments 3576 query = self._parse_select() 3577 3578 if query: 3579 expressions = [query] 3580 else: 3581 expressions = self._parse_expressions() 3582 3583 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3584 3585 if isinstance(this, exp.Subqueryable): 3586 this = self._parse_set_operations( 3587 self._parse_subquery(this=this, parse_alias=False) 3588 ) 3589 elif len(expressions) > 1: 3590 this = self.expression(exp.Tuple, expressions=expressions) 3591 else: 3592 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3593 3594 if this: 3595 this.add_comments(comments) 3596 3597 self._match_r_paren(expression=this) 3598 return this 3599 3600 return None 3601 3602 def _parse_field( 3603 self, 3604 any_token: bool = False, 3605 tokens: t.Optional[t.Collection[TokenType]] = None, 3606 anonymous_func: bool = False, 3607 ) -> t.Optional[exp.Expression]: 3608 return ( 3609 self._parse_primary() 3610 or self._parse_function(anonymous=anonymous_func) 3611 or self._parse_id_var(any_token=any_token, tokens=tokens) 3612 ) 3613 3614 def _parse_function( 3615 self, 3616 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3617 anonymous: bool = False, 3618 optional_parens: bool = True, 3619 ) -> t.Optional[exp.Expression]: 3620 if not self._curr: 3621 return None 3622 3623 token_type = self._curr.token_type 3624 this = self._curr.text 3625 upper = this.upper() 3626 3627 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3628 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3629 self._advance() 3630 return parser(self) 3631 3632 if not self._next or self._next.token_type != TokenType.L_PAREN: 3633 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3634 self._advance() 3635 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3636 3637 return None 3638 3639 if token_type not in self.FUNC_TOKENS: 3640 return None 3641 3642 self._advance(2) 3643 3644 parser = self.FUNCTION_PARSERS.get(upper) 3645 if parser and not anonymous: 3646 this = parser(self) 3647 else: 3648 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3649 3650 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3651 this = self.expression(subquery_predicate, this=self._parse_select()) 3652 self._match_r_paren() 3653 return this 3654 3655 if functions is None: 3656 functions = self.FUNCTIONS 3657 3658 function = functions.get(upper) 3659 3660 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3661 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3662 3663 if function and not anonymous: 3664 func = self.validate_expression(function(args), args) 3665 if not self.NORMALIZE_FUNCTIONS: 3666 func.meta["name"] = this 3667 this = func 3668 else: 3669 this = self.expression(exp.Anonymous, this=this, expressions=args) 3670 3671 self._match_r_paren(this) 3672 return self._parse_window(this) 3673 3674 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3675 return self._parse_column_def(self._parse_id_var()) 3676 3677 def _parse_user_defined_function( 3678 self, kind: t.Optional[TokenType] = None 3679 ) -> t.Optional[exp.Expression]: 3680 this = self._parse_id_var() 3681 3682 while self._match(TokenType.DOT): 3683 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3684 3685 if not self._match(TokenType.L_PAREN): 3686 return this 3687 3688 expressions = self._parse_csv(self._parse_function_parameter) 3689 self._match_r_paren() 3690 return self.expression( 3691 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3692 ) 3693 3694 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3695 literal = self._parse_primary() 3696 if literal: 3697 return self.expression(exp.Introducer, this=token.text, expression=literal) 3698 3699 return self.expression(exp.Identifier, this=token.text) 3700 3701 def _parse_session_parameter(self) -> exp.SessionParameter: 3702 kind = None 3703 this = self._parse_id_var() or self._parse_primary() 3704 3705 if this and self._match(TokenType.DOT): 3706 kind = this.name 3707 this = self._parse_var() or self._parse_primary() 3708 3709 return self.expression(exp.SessionParameter, this=this, kind=kind) 3710 3711 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3712 index = self._index 3713 3714 if self._match(TokenType.L_PAREN): 3715 expressions = t.cast( 3716 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3717 ) 3718 3719 if not self._match(TokenType.R_PAREN): 3720 self._retreat(index) 3721 else: 3722 expressions = [self._parse_id_var()] 3723 3724 if self._match_set(self.LAMBDAS): 3725 return self.LAMBDAS[self._prev.token_type](self, expressions) 3726 3727 self._retreat(index) 3728 3729 this: t.Optional[exp.Expression] 3730 3731 if self._match(TokenType.DISTINCT): 3732 this = self.expression( 3733 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3734 ) 3735 else: 3736 this = self._parse_select_or_expression(alias=alias) 3737 3738 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3739 3740 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3741 index = self._index 3742 3743 if not self.errors: 3744 try: 3745 if self._parse_select(nested=True): 3746 return this 3747 except ParseError: 3748 pass 3749 finally: 3750 self.errors.clear() 3751 self._retreat(index) 3752 3753 if not self._match(TokenType.L_PAREN): 3754 return this 3755 3756 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3757 3758 self._match_r_paren() 3759 return self.expression(exp.Schema, this=this, expressions=args) 3760 3761 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3762 return self._parse_column_def(self._parse_field(any_token=True)) 3763 3764 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3765 # column defs are not really columns, they're identifiers 3766 if isinstance(this, exp.Column): 3767 this = this.this 3768 3769 kind = self._parse_types(schema=True) 3770 3771 if self._match_text_seq("FOR", "ORDINALITY"): 3772 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3773 3774 constraints: t.List[exp.Expression] = [] 3775 3776 if not kind and self._match(TokenType.ALIAS): 3777 constraints.append( 3778 self.expression( 3779 exp.ComputedColumnConstraint, 3780 this=self._parse_conjunction(), 3781 persisted=self._match_text_seq("PERSISTED"), 3782 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3783 ) 3784 ) 3785 3786 while True: 3787 constraint = self._parse_column_constraint() 3788 if not constraint: 3789 break 3790 constraints.append(constraint) 3791 3792 if not kind and not constraints: 3793 return this 3794 3795 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3796 3797 def _parse_auto_increment( 3798 self, 3799 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3800 start = None 3801 increment = None 3802 3803 if self._match(TokenType.L_PAREN, advance=False): 3804 args = self._parse_wrapped_csv(self._parse_bitwise) 3805 start = seq_get(args, 0) 3806 increment = seq_get(args, 1) 3807 elif self._match_text_seq("START"): 3808 start = self._parse_bitwise() 3809 self._match_text_seq("INCREMENT") 3810 increment = self._parse_bitwise() 3811 3812 if start and increment: 3813 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3814 3815 return exp.AutoIncrementColumnConstraint() 3816 3817 def _parse_compress(self) -> exp.CompressColumnConstraint: 3818 if self._match(TokenType.L_PAREN, advance=False): 3819 return self.expression( 3820 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3821 ) 3822 3823 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3824 3825 def _parse_generated_as_identity( 3826 self, 3827 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3828 if self._match_text_seq("BY", "DEFAULT"): 3829 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3830 this = self.expression( 3831 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3832 ) 3833 else: 3834 self._match_text_seq("ALWAYS") 3835 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3836 3837 self._match(TokenType.ALIAS) 3838 identity = self._match_text_seq("IDENTITY") 3839 3840 if self._match(TokenType.L_PAREN): 3841 if self._match(TokenType.START_WITH): 3842 this.set("start", self._parse_bitwise()) 3843 if self._match_text_seq("INCREMENT", "BY"): 3844 this.set("increment", self._parse_bitwise()) 3845 if self._match_text_seq("MINVALUE"): 3846 this.set("minvalue", self._parse_bitwise()) 3847 if self._match_text_seq("MAXVALUE"): 3848 this.set("maxvalue", self._parse_bitwise()) 3849 3850 if self._match_text_seq("CYCLE"): 3851 this.set("cycle", True) 3852 elif self._match_text_seq("NO", "CYCLE"): 3853 this.set("cycle", False) 3854 3855 if not identity: 3856 this.set("expression", self._parse_bitwise()) 3857 3858 self._match_r_paren() 3859 3860 return this 3861 3862 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3863 self._match_text_seq("LENGTH") 3864 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3865 3866 def _parse_not_constraint( 3867 self, 3868 ) -> t.Optional[exp.Expression]: 3869 if self._match_text_seq("NULL"): 3870 return self.expression(exp.NotNullColumnConstraint) 3871 if self._match_text_seq("CASESPECIFIC"): 3872 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3873 if self._match_text_seq("FOR", "REPLICATION"): 3874 return self.expression(exp.NotForReplicationColumnConstraint) 3875 return None 3876 3877 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3878 if self._match(TokenType.CONSTRAINT): 3879 this = self._parse_id_var() 3880 else: 3881 this = None 3882 3883 if self._match_texts(self.CONSTRAINT_PARSERS): 3884 return self.expression( 3885 exp.ColumnConstraint, 3886 this=this, 3887 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3888 ) 3889 3890 return this 3891 3892 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3893 if not self._match(TokenType.CONSTRAINT): 3894 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3895 3896 this = self._parse_id_var() 3897 expressions = [] 3898 3899 while True: 3900 constraint = self._parse_unnamed_constraint() or self._parse_function() 3901 if not constraint: 3902 break 3903 expressions.append(constraint) 3904 3905 return self.expression(exp.Constraint, this=this, expressions=expressions) 3906 3907 def _parse_unnamed_constraint( 3908 self, constraints: t.Optional[t.Collection[str]] = None 3909 ) -> t.Optional[exp.Expression]: 3910 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3911 constraints or self.CONSTRAINT_PARSERS 3912 ): 3913 return None 3914 3915 constraint = self._prev.text.upper() 3916 if constraint not in self.CONSTRAINT_PARSERS: 3917 self.raise_error(f"No parser found for schema constraint {constraint}.") 3918 3919 return self.CONSTRAINT_PARSERS[constraint](self) 3920 3921 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3922 self._match_text_seq("KEY") 3923 return self.expression( 3924 exp.UniqueColumnConstraint, 3925 this=self._parse_schema(self._parse_id_var(any_token=False)), 3926 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3927 ) 3928 3929 def _parse_key_constraint_options(self) -> t.List[str]: 3930 options = [] 3931 while True: 3932 if not self._curr: 3933 break 3934 3935 if self._match(TokenType.ON): 3936 action = None 3937 on = self._advance_any() and self._prev.text 3938 3939 if self._match_text_seq("NO", "ACTION"): 3940 action = "NO ACTION" 3941 elif self._match_text_seq("CASCADE"): 3942 action = "CASCADE" 3943 elif self._match_text_seq("RESTRICT"): 3944 action = "RESTRICT" 3945 elif self._match_pair(TokenType.SET, TokenType.NULL): 3946 action = "SET NULL" 3947 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3948 action = "SET DEFAULT" 3949 else: 3950 self.raise_error("Invalid key constraint") 3951 3952 options.append(f"ON {on} {action}") 3953 elif self._match_text_seq("NOT", "ENFORCED"): 3954 options.append("NOT ENFORCED") 3955 elif self._match_text_seq("DEFERRABLE"): 3956 options.append("DEFERRABLE") 3957 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3958 options.append("INITIALLY DEFERRED") 3959 elif self._match_text_seq("NORELY"): 3960 options.append("NORELY") 3961 elif self._match_text_seq("MATCH", "FULL"): 3962 options.append("MATCH FULL") 3963 else: 3964 break 3965 3966 return options 3967 3968 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3969 if match and not self._match(TokenType.REFERENCES): 3970 return None 3971 3972 expressions = None 3973 this = self._parse_table(schema=True) 3974 options = self._parse_key_constraint_options() 3975 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3976 3977 def _parse_foreign_key(self) -> exp.ForeignKey: 3978 expressions = self._parse_wrapped_id_vars() 3979 reference = self._parse_references() 3980 options = {} 3981 3982 while self._match(TokenType.ON): 3983 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3984 self.raise_error("Expected DELETE or UPDATE") 3985 3986 kind = self._prev.text.lower() 3987 3988 if self._match_text_seq("NO", "ACTION"): 3989 action = "NO ACTION" 3990 elif self._match(TokenType.SET): 3991 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3992 action = "SET " + self._prev.text.upper() 3993 else: 3994 self._advance() 3995 action = self._prev.text.upper() 3996 3997 options[kind] = action 3998 3999 return self.expression( 4000 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4001 ) 4002 4003 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4004 return self._parse_field() 4005 4006 def _parse_primary_key( 4007 self, wrapped_optional: bool = False, in_props: bool = False 4008 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4009 desc = ( 4010 self._match_set((TokenType.ASC, TokenType.DESC)) 4011 and self._prev.token_type == TokenType.DESC 4012 ) 4013 4014 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4015 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4016 4017 expressions = self._parse_wrapped_csv( 4018 self._parse_primary_key_part, optional=wrapped_optional 4019 ) 4020 options = self._parse_key_constraint_options() 4021 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4022 4023 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4024 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4025 return this 4026 4027 bracket_kind = self._prev.token_type 4028 4029 if self._match(TokenType.COLON): 4030 expressions: t.List[exp.Expression] = [ 4031 self.expression(exp.Slice, expression=self._parse_conjunction()) 4032 ] 4033 else: 4034 expressions = self._parse_csv( 4035 lambda: self._parse_slice( 4036 self._parse_alias(self._parse_conjunction(), explicit=True) 4037 ) 4038 ) 4039 4040 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4041 if bracket_kind == TokenType.L_BRACE: 4042 this = self.expression(exp.Struct, expressions=expressions) 4043 elif not this or this.name.upper() == "ARRAY": 4044 this = self.expression(exp.Array, expressions=expressions) 4045 else: 4046 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4047 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4048 4049 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4050 self.raise_error("Expected ]") 4051 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4052 self.raise_error("Expected }") 4053 4054 self._add_comments(this) 4055 return self._parse_bracket(this) 4056 4057 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4058 if self._match(TokenType.COLON): 4059 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4060 return this 4061 4062 def _parse_case(self) -> t.Optional[exp.Expression]: 4063 ifs = [] 4064 default = None 4065 4066 comments = self._prev_comments 4067 expression = self._parse_conjunction() 4068 4069 while self._match(TokenType.WHEN): 4070 this = self._parse_conjunction() 4071 self._match(TokenType.THEN) 4072 then = self._parse_conjunction() 4073 ifs.append(self.expression(exp.If, this=this, true=then)) 4074 4075 if self._match(TokenType.ELSE): 4076 default = self._parse_conjunction() 4077 4078 if not self._match(TokenType.END): 4079 self.raise_error("Expected END after CASE", self._prev) 4080 4081 return self._parse_window( 4082 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4083 ) 4084 4085 def _parse_if(self) -> t.Optional[exp.Expression]: 4086 if self._match(TokenType.L_PAREN): 4087 args = self._parse_csv(self._parse_conjunction) 4088 this = self.validate_expression(exp.If.from_arg_list(args), args) 4089 self._match_r_paren() 4090 else: 4091 index = self._index - 1 4092 condition = self._parse_conjunction() 4093 4094 if not condition: 4095 self._retreat(index) 4096 return None 4097 4098 self._match(TokenType.THEN) 4099 true = self._parse_conjunction() 4100 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4101 self._match(TokenType.END) 4102 this = self.expression(exp.If, this=condition, true=true, false=false) 4103 4104 return self._parse_window(this) 4105 4106 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4107 if not self._match_text_seq("VALUE", "FOR"): 4108 self._retreat(self._index - 1) 4109 return None 4110 4111 return self.expression( 4112 exp.NextValueFor, 4113 this=self._parse_column(), 4114 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4115 ) 4116 4117 def _parse_extract(self) -> exp.Extract: 4118 this = self._parse_function() or self._parse_var() or self._parse_type() 4119 4120 if self._match(TokenType.FROM): 4121 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4122 4123 if not self._match(TokenType.COMMA): 4124 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4125 4126 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4127 4128 def _parse_any_value(self) -> exp.AnyValue: 4129 this = self._parse_lambda() 4130 is_max = None 4131 having = None 4132 4133 if self._match(TokenType.HAVING): 4134 self._match_texts(("MAX", "MIN")) 4135 is_max = self._prev.text == "MAX" 4136 having = self._parse_column() 4137 4138 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4139 4140 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4141 this = self._parse_conjunction() 4142 4143 if not self._match(TokenType.ALIAS): 4144 if self._match(TokenType.COMMA): 4145 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4146 4147 self.raise_error("Expected AS after CAST") 4148 4149 fmt = None 4150 to = self._parse_types() 4151 4152 if not to: 4153 self.raise_error("Expected TYPE after CAST") 4154 elif isinstance(to, exp.Identifier): 4155 to = exp.DataType.build(to.name, udt=True) 4156 elif to.this == exp.DataType.Type.CHAR: 4157 if self._match(TokenType.CHARACTER_SET): 4158 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4159 elif self._match(TokenType.FORMAT): 4160 fmt_string = self._parse_string() 4161 fmt = self._parse_at_time_zone(fmt_string) 4162 4163 if to.this in exp.DataType.TEMPORAL_TYPES: 4164 this = self.expression( 4165 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4166 this=this, 4167 format=exp.Literal.string( 4168 format_time( 4169 fmt_string.this if fmt_string else "", 4170 self.FORMAT_MAPPING or self.TIME_MAPPING, 4171 self.FORMAT_TRIE or self.TIME_TRIE, 4172 ) 4173 ), 4174 ) 4175 4176 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4177 this.set("zone", fmt.args["zone"]) 4178 4179 return this 4180 4181 return self.expression( 4182 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4183 ) 4184 4185 def _parse_concat(self) -> t.Optional[exp.Expression]: 4186 args = self._parse_csv(self._parse_conjunction) 4187 if self.CONCAT_NULL_OUTPUTS_STRING: 4188 args = self._ensure_string_if_null(args) 4189 4190 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4191 # we find such a call we replace it with its argument. 4192 if len(args) == 1: 4193 return args[0] 4194 4195 return self.expression( 4196 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4197 ) 4198 4199 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4200 args = self._parse_csv(self._parse_conjunction) 4201 if len(args) < 2: 4202 return self.expression(exp.ConcatWs, expressions=args) 4203 delim, *values = args 4204 if self.CONCAT_NULL_OUTPUTS_STRING: 4205 values = self._ensure_string_if_null(values) 4206 4207 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4208 4209 def _parse_string_agg(self) -> exp.Expression: 4210 if self._match(TokenType.DISTINCT): 4211 args: t.List[t.Optional[exp.Expression]] = [ 4212 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4213 ] 4214 if self._match(TokenType.COMMA): 4215 args.extend(self._parse_csv(self._parse_conjunction)) 4216 else: 4217 args = self._parse_csv(self._parse_conjunction) # type: ignore 4218 4219 index = self._index 4220 if not self._match(TokenType.R_PAREN) and args: 4221 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4222 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4223 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4224 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4225 4226 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4227 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4228 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4229 if not self._match_text_seq("WITHIN", "GROUP"): 4230 self._retreat(index) 4231 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4232 4233 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4234 order = self._parse_order(this=seq_get(args, 0)) 4235 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4236 4237 def _parse_convert( 4238 self, strict: bool, safe: t.Optional[bool] = None 4239 ) -> t.Optional[exp.Expression]: 4240 this = self._parse_bitwise() 4241 4242 if self._match(TokenType.USING): 4243 to: t.Optional[exp.Expression] = self.expression( 4244 exp.CharacterSet, this=self._parse_var() 4245 ) 4246 elif self._match(TokenType.COMMA): 4247 to = self._parse_types() 4248 else: 4249 to = None 4250 4251 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4252 4253 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4254 """ 4255 There are generally two variants of the DECODE function: 4256 4257 - DECODE(bin, charset) 4258 - DECODE(expression, search, result [, search, result] ... [, default]) 4259 4260 The second variant will always be parsed into a CASE expression. Note that NULL 4261 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4262 instead of relying on pattern matching. 4263 """ 4264 args = self._parse_csv(self._parse_conjunction) 4265 4266 if len(args) < 3: 4267 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4268 4269 expression, *expressions = args 4270 if not expression: 4271 return None 4272 4273 ifs = [] 4274 for search, result in zip(expressions[::2], expressions[1::2]): 4275 if not search or not result: 4276 return None 4277 4278 if isinstance(search, exp.Literal): 4279 ifs.append( 4280 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4281 ) 4282 elif isinstance(search, exp.Null): 4283 ifs.append( 4284 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4285 ) 4286 else: 4287 cond = exp.or_( 4288 exp.EQ(this=expression.copy(), expression=search), 4289 exp.and_( 4290 exp.Is(this=expression.copy(), expression=exp.Null()), 4291 exp.Is(this=search.copy(), expression=exp.Null()), 4292 copy=False, 4293 ), 4294 copy=False, 4295 ) 4296 ifs.append(exp.If(this=cond, true=result)) 4297 4298 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4299 4300 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4301 self._match_text_seq("KEY") 4302 key = self._parse_column() 4303 self._match_set((TokenType.COLON, TokenType.COMMA)) 4304 self._match_text_seq("VALUE") 4305 value = self._parse_bitwise() 4306 4307 if not key and not value: 4308 return None 4309 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4310 4311 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4312 if not this or not self._match_text_seq("FORMAT", "JSON"): 4313 return this 4314 4315 return self.expression(exp.FormatJson, this=this) 4316 4317 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4318 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4319 for value in values: 4320 if self._match_text_seq(value, "ON", on): 4321 return f"{value} ON {on}" 4322 4323 return None 4324 4325 def _parse_json_object(self) -> exp.JSONObject: 4326 star = self._parse_star() 4327 expressions = ( 4328 [star] 4329 if star 4330 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4331 ) 4332 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4333 4334 unique_keys = None 4335 if self._match_text_seq("WITH", "UNIQUE"): 4336 unique_keys = True 4337 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4338 unique_keys = False 4339 4340 self._match_text_seq("KEYS") 4341 4342 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4343 self._parse_type() 4344 ) 4345 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4346 4347 return self.expression( 4348 exp.JSONObject, 4349 expressions=expressions, 4350 null_handling=null_handling, 4351 unique_keys=unique_keys, 4352 return_type=return_type, 4353 encoding=encoding, 4354 ) 4355 4356 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4357 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4358 if not self._match_text_seq("NESTED"): 4359 this = self._parse_id_var() 4360 kind = self._parse_types(allow_identifiers=False) 4361 nested = None 4362 else: 4363 this = None 4364 kind = None 4365 nested = True 4366 4367 path = self._match_text_seq("PATH") and self._parse_string() 4368 nested_schema = nested and self._parse_json_schema() 4369 4370 return self.expression( 4371 exp.JSONColumnDef, 4372 this=this, 4373 kind=kind, 4374 path=path, 4375 nested_schema=nested_schema, 4376 ) 4377 4378 def _parse_json_schema(self) -> exp.JSONSchema: 4379 self._match_text_seq("COLUMNS") 4380 return self.expression( 4381 exp.JSONSchema, 4382 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4383 ) 4384 4385 def _parse_json_table(self) -> exp.JSONTable: 4386 this = self._parse_format_json(self._parse_bitwise()) 4387 path = self._match(TokenType.COMMA) and self._parse_string() 4388 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4389 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4390 schema = self._parse_json_schema() 4391 4392 return exp.JSONTable( 4393 this=this, 4394 schema=schema, 4395 path=path, 4396 error_handling=error_handling, 4397 empty_handling=empty_handling, 4398 ) 4399 4400 def _parse_logarithm(self) -> exp.Func: 4401 # Default argument order is base, expression 4402 args = self._parse_csv(self._parse_range) 4403 4404 if len(args) > 1: 4405 if not self.LOG_BASE_FIRST: 4406 args.reverse() 4407 return exp.Log.from_arg_list(args) 4408 4409 return self.expression( 4410 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4411 ) 4412 4413 def _parse_match_against(self) -> exp.MatchAgainst: 4414 expressions = self._parse_csv(self._parse_column) 4415 4416 self._match_text_seq(")", "AGAINST", "(") 4417 4418 this = self._parse_string() 4419 4420 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4421 modifier = "IN NATURAL LANGUAGE MODE" 4422 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4423 modifier = f"{modifier} WITH QUERY EXPANSION" 4424 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4425 modifier = "IN BOOLEAN MODE" 4426 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4427 modifier = "WITH QUERY EXPANSION" 4428 else: 4429 modifier = None 4430 4431 return self.expression( 4432 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4433 ) 4434 4435 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4436 def _parse_open_json(self) -> exp.OpenJSON: 4437 this = self._parse_bitwise() 4438 path = self._match(TokenType.COMMA) and self._parse_string() 4439 4440 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4441 this = self._parse_field(any_token=True) 4442 kind = self._parse_types() 4443 path = self._parse_string() 4444 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4445 4446 return self.expression( 4447 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4448 ) 4449 4450 expressions = None 4451 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4452 self._match_l_paren() 4453 expressions = self._parse_csv(_parse_open_json_column_def) 4454 4455 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4456 4457 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4458 args = self._parse_csv(self._parse_bitwise) 4459 4460 if self._match(TokenType.IN): 4461 return self.expression( 4462 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4463 ) 4464 4465 if haystack_first: 4466 haystack = seq_get(args, 0) 4467 needle = seq_get(args, 1) 4468 else: 4469 needle = seq_get(args, 0) 4470 haystack = seq_get(args, 1) 4471 4472 return self.expression( 4473 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4474 ) 4475 4476 def _parse_predict(self) -> exp.Predict: 4477 self._match_text_seq("MODEL") 4478 this = self._parse_table() 4479 4480 self._match(TokenType.COMMA) 4481 self._match_text_seq("TABLE") 4482 4483 return self.expression( 4484 exp.Predict, 4485 this=this, 4486 expression=self._parse_table(), 4487 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4488 ) 4489 4490 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4491 args = self._parse_csv(self._parse_table) 4492 return exp.JoinHint(this=func_name.upper(), expressions=args) 4493 4494 def _parse_substring(self) -> exp.Substring: 4495 # Postgres supports the form: substring(string [from int] [for int]) 4496 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4497 4498 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4499 4500 if self._match(TokenType.FROM): 4501 args.append(self._parse_bitwise()) 4502 if self._match(TokenType.FOR): 4503 args.append(self._parse_bitwise()) 4504 4505 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4506 4507 def _parse_trim(self) -> exp.Trim: 4508 # https://www.w3resource.com/sql/character-functions/trim.php 4509 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4510 4511 position = None 4512 collation = None 4513 expression = None 4514 4515 if self._match_texts(self.TRIM_TYPES): 4516 position = self._prev.text.upper() 4517 4518 this = self._parse_bitwise() 4519 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4520 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4521 expression = self._parse_bitwise() 4522 4523 if invert_order: 4524 this, expression = expression, this 4525 4526 if self._match(TokenType.COLLATE): 4527 collation = self._parse_bitwise() 4528 4529 return self.expression( 4530 exp.Trim, this=this, position=position, expression=expression, collation=collation 4531 ) 4532 4533 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4534 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4535 4536 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4537 return self._parse_window(self._parse_id_var(), alias=True) 4538 4539 def _parse_respect_or_ignore_nulls( 4540 self, this: t.Optional[exp.Expression] 4541 ) -> t.Optional[exp.Expression]: 4542 if self._match_text_seq("IGNORE", "NULLS"): 4543 return self.expression(exp.IgnoreNulls, this=this) 4544 if self._match_text_seq("RESPECT", "NULLS"): 4545 return self.expression(exp.RespectNulls, this=this) 4546 return this 4547 4548 def _parse_window( 4549 self, this: t.Optional[exp.Expression], alias: bool = False 4550 ) -> t.Optional[exp.Expression]: 4551 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4552 self._match(TokenType.WHERE) 4553 this = self.expression( 4554 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4555 ) 4556 self._match_r_paren() 4557 4558 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4559 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4560 if self._match_text_seq("WITHIN", "GROUP"): 4561 order = self._parse_wrapped(self._parse_order) 4562 this = self.expression(exp.WithinGroup, this=this, expression=order) 4563 4564 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4565 # Some dialects choose to implement and some do not. 4566 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4567 4568 # There is some code above in _parse_lambda that handles 4569 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4570 4571 # The below changes handle 4572 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4573 4574 # Oracle allows both formats 4575 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4576 # and Snowflake chose to do the same for familiarity 4577 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4578 this = self._parse_respect_or_ignore_nulls(this) 4579 4580 # bigquery select from window x AS (partition by ...) 4581 if alias: 4582 over = None 4583 self._match(TokenType.ALIAS) 4584 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4585 return this 4586 else: 4587 over = self._prev.text.upper() 4588 4589 if not self._match(TokenType.L_PAREN): 4590 return self.expression( 4591 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4592 ) 4593 4594 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4595 4596 first = self._match(TokenType.FIRST) 4597 if self._match_text_seq("LAST"): 4598 first = False 4599 4600 partition, order = self._parse_partition_and_order() 4601 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4602 4603 if kind: 4604 self._match(TokenType.BETWEEN) 4605 start = self._parse_window_spec() 4606 self._match(TokenType.AND) 4607 end = self._parse_window_spec() 4608 4609 spec = self.expression( 4610 exp.WindowSpec, 4611 kind=kind, 4612 start=start["value"], 4613 start_side=start["side"], 4614 end=end["value"], 4615 end_side=end["side"], 4616 ) 4617 else: 4618 spec = None 4619 4620 self._match_r_paren() 4621 4622 window = self.expression( 4623 exp.Window, 4624 this=this, 4625 partition_by=partition, 4626 order=order, 4627 spec=spec, 4628 alias=window_alias, 4629 over=over, 4630 first=first, 4631 ) 4632 4633 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4634 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4635 return self._parse_window(window, alias=alias) 4636 4637 return window 4638 4639 def _parse_partition_and_order( 4640 self, 4641 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4642 return self._parse_partition_by(), self._parse_order() 4643 4644 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4645 self._match(TokenType.BETWEEN) 4646 4647 return { 4648 "value": ( 4649 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4650 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4651 or self._parse_bitwise() 4652 ), 4653 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4654 } 4655 4656 def _parse_alias( 4657 self, this: t.Optional[exp.Expression], explicit: bool = False 4658 ) -> t.Optional[exp.Expression]: 4659 any_token = self._match(TokenType.ALIAS) 4660 4661 if explicit and not any_token: 4662 return this 4663 4664 if self._match(TokenType.L_PAREN): 4665 aliases = self.expression( 4666 exp.Aliases, 4667 this=this, 4668 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4669 ) 4670 self._match_r_paren(aliases) 4671 return aliases 4672 4673 alias = self._parse_id_var(any_token) 4674 4675 if alias: 4676 return self.expression(exp.Alias, this=this, alias=alias) 4677 4678 return this 4679 4680 def _parse_id_var( 4681 self, 4682 any_token: bool = True, 4683 tokens: t.Optional[t.Collection[TokenType]] = None, 4684 ) -> t.Optional[exp.Expression]: 4685 identifier = self._parse_identifier() 4686 4687 if identifier: 4688 return identifier 4689 4690 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4691 quoted = self._prev.token_type == TokenType.STRING 4692 return exp.Identifier(this=self._prev.text, quoted=quoted) 4693 4694 return None 4695 4696 def _parse_string(self) -> t.Optional[exp.Expression]: 4697 if self._match(TokenType.STRING): 4698 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4699 return self._parse_placeholder() 4700 4701 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4702 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4703 4704 def _parse_number(self) -> t.Optional[exp.Expression]: 4705 if self._match(TokenType.NUMBER): 4706 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4707 return self._parse_placeholder() 4708 4709 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4710 if self._match(TokenType.IDENTIFIER): 4711 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4712 return self._parse_placeholder() 4713 4714 def _parse_var( 4715 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4716 ) -> t.Optional[exp.Expression]: 4717 if ( 4718 (any_token and self._advance_any()) 4719 or self._match(TokenType.VAR) 4720 or (self._match_set(tokens) if tokens else False) 4721 ): 4722 return self.expression(exp.Var, this=self._prev.text) 4723 return self._parse_placeholder() 4724 4725 def _advance_any(self) -> t.Optional[Token]: 4726 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4727 self._advance() 4728 return self._prev 4729 return None 4730 4731 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4732 return self._parse_var() or self._parse_string() 4733 4734 def _parse_null(self) -> t.Optional[exp.Expression]: 4735 if self._match_set(self.NULL_TOKENS): 4736 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4737 return self._parse_placeholder() 4738 4739 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4740 if self._match(TokenType.TRUE): 4741 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4742 if self._match(TokenType.FALSE): 4743 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4744 return self._parse_placeholder() 4745 4746 def _parse_star(self) -> t.Optional[exp.Expression]: 4747 if self._match(TokenType.STAR): 4748 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4749 return self._parse_placeholder() 4750 4751 def _parse_parameter(self) -> exp.Parameter: 4752 wrapped = self._match(TokenType.L_BRACE) 4753 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4754 self._match(TokenType.R_BRACE) 4755 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4756 4757 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4758 if self._match_set(self.PLACEHOLDER_PARSERS): 4759 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4760 if placeholder: 4761 return placeholder 4762 self._advance(-1) 4763 return None 4764 4765 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4766 if not self._match(TokenType.EXCEPT): 4767 return None 4768 if self._match(TokenType.L_PAREN, advance=False): 4769 return self._parse_wrapped_csv(self._parse_column) 4770 4771 except_column = self._parse_column() 4772 return [except_column] if except_column else None 4773 4774 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4775 if not self._match(TokenType.REPLACE): 4776 return None 4777 if self._match(TokenType.L_PAREN, advance=False): 4778 return self._parse_wrapped_csv(self._parse_expression) 4779 4780 replace_expression = self._parse_expression() 4781 return [replace_expression] if replace_expression else None 4782 4783 def _parse_csv( 4784 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4785 ) -> t.List[exp.Expression]: 4786 parse_result = parse_method() 4787 items = [parse_result] if parse_result is not None else [] 4788 4789 while self._match(sep): 4790 self._add_comments(parse_result) 4791 parse_result = parse_method() 4792 if parse_result is not None: 4793 items.append(parse_result) 4794 4795 return items 4796 4797 def _parse_tokens( 4798 self, parse_method: t.Callable, expressions: t.Dict 4799 ) -> t.Optional[exp.Expression]: 4800 this = parse_method() 4801 4802 while self._match_set(expressions): 4803 this = self.expression( 4804 expressions[self._prev.token_type], 4805 this=this, 4806 comments=self._prev_comments, 4807 expression=parse_method(), 4808 ) 4809 4810 return this 4811 4812 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4813 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4814 4815 def _parse_wrapped_csv( 4816 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4817 ) -> t.List[exp.Expression]: 4818 return self._parse_wrapped( 4819 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4820 ) 4821 4822 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4823 wrapped = self._match(TokenType.L_PAREN) 4824 if not wrapped and not optional: 4825 self.raise_error("Expecting (") 4826 parse_result = parse_method() 4827 if wrapped: 4828 self._match_r_paren() 4829 return parse_result 4830 4831 def _parse_expressions(self) -> t.List[exp.Expression]: 4832 return self._parse_csv(self._parse_expression) 4833 4834 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4835 return self._parse_select() or self._parse_set_operations( 4836 self._parse_expression() if alias else self._parse_conjunction() 4837 ) 4838 4839 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4840 return self._parse_query_modifiers( 4841 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4842 ) 4843 4844 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4845 this = None 4846 if self._match_texts(self.TRANSACTION_KIND): 4847 this = self._prev.text 4848 4849 self._match_texts({"TRANSACTION", "WORK"}) 4850 4851 modes = [] 4852 while True: 4853 mode = [] 4854 while self._match(TokenType.VAR): 4855 mode.append(self._prev.text) 4856 4857 if mode: 4858 modes.append(" ".join(mode)) 4859 if not self._match(TokenType.COMMA): 4860 break 4861 4862 return self.expression(exp.Transaction, this=this, modes=modes) 4863 4864 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4865 chain = None 4866 savepoint = None 4867 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4868 4869 self._match_texts({"TRANSACTION", "WORK"}) 4870 4871 if self._match_text_seq("TO"): 4872 self._match_text_seq("SAVEPOINT") 4873 savepoint = self._parse_id_var() 4874 4875 if self._match(TokenType.AND): 4876 chain = not self._match_text_seq("NO") 4877 self._match_text_seq("CHAIN") 4878 4879 if is_rollback: 4880 return self.expression(exp.Rollback, savepoint=savepoint) 4881 4882 return self.expression(exp.Commit, chain=chain) 4883 4884 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4885 if not self._match_text_seq("ADD"): 4886 return None 4887 4888 self._match(TokenType.COLUMN) 4889 exists_column = self._parse_exists(not_=True) 4890 expression = self._parse_field_def() 4891 4892 if expression: 4893 expression.set("exists", exists_column) 4894 4895 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4896 if self._match_texts(("FIRST", "AFTER")): 4897 position = self._prev.text 4898 column_position = self.expression( 4899 exp.ColumnPosition, this=self._parse_column(), position=position 4900 ) 4901 expression.set("position", column_position) 4902 4903 return expression 4904 4905 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4906 drop = self._match(TokenType.DROP) and self._parse_drop() 4907 if drop and not isinstance(drop, exp.Command): 4908 drop.set("kind", drop.args.get("kind", "COLUMN")) 4909 return drop 4910 4911 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4912 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4913 return self.expression( 4914 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4915 ) 4916 4917 def _parse_add_constraint(self) -> exp.AddConstraint: 4918 this = None 4919 kind = self._prev.token_type 4920 4921 if kind == TokenType.CONSTRAINT: 4922 this = self._parse_id_var() 4923 4924 if self._match_text_seq("CHECK"): 4925 expression = self._parse_wrapped(self._parse_conjunction) 4926 enforced = self._match_text_seq("ENFORCED") 4927 4928 return self.expression( 4929 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4930 ) 4931 4932 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4933 expression = self._parse_foreign_key() 4934 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4935 expression = self._parse_primary_key() 4936 else: 4937 expression = None 4938 4939 return self.expression(exp.AddConstraint, this=this, expression=expression) 4940 4941 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4942 index = self._index - 1 4943 4944 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4945 return self._parse_csv(self._parse_add_constraint) 4946 4947 self._retreat(index) 4948 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4949 return self._parse_csv(self._parse_field_def) 4950 4951 return self._parse_csv(self._parse_add_column) 4952 4953 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4954 self._match(TokenType.COLUMN) 4955 column = self._parse_field(any_token=True) 4956 4957 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4958 return self.expression(exp.AlterColumn, this=column, drop=True) 4959 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4960 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4961 4962 self._match_text_seq("SET", "DATA") 4963 return self.expression( 4964 exp.AlterColumn, 4965 this=column, 4966 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4967 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4968 using=self._match(TokenType.USING) and self._parse_conjunction(), 4969 ) 4970 4971 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4972 index = self._index - 1 4973 4974 partition_exists = self._parse_exists() 4975 if self._match(TokenType.PARTITION, advance=False): 4976 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4977 4978 self._retreat(index) 4979 return self._parse_csv(self._parse_drop_column) 4980 4981 def _parse_alter_table_rename(self) -> exp.RenameTable: 4982 self._match_text_seq("TO") 4983 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4984 4985 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4986 start = self._prev 4987 4988 if not self._match(TokenType.TABLE): 4989 return self._parse_as_command(start) 4990 4991 exists = self._parse_exists() 4992 only = self._match_text_seq("ONLY") 4993 this = self._parse_table(schema=True) 4994 4995 if self._next: 4996 self._advance() 4997 4998 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4999 if parser: 5000 actions = ensure_list(parser(self)) 5001 5002 if not self._curr: 5003 return self.expression( 5004 exp.AlterTable, 5005 this=this, 5006 exists=exists, 5007 actions=actions, 5008 only=only, 5009 ) 5010 5011 return self._parse_as_command(start) 5012 5013 def _parse_merge(self) -> exp.Merge: 5014 self._match(TokenType.INTO) 5015 target = self._parse_table() 5016 5017 if target and self._match(TokenType.ALIAS, advance=False): 5018 target.set("alias", self._parse_table_alias()) 5019 5020 self._match(TokenType.USING) 5021 using = self._parse_table() 5022 5023 self._match(TokenType.ON) 5024 on = self._parse_conjunction() 5025 5026 whens = [] 5027 while self._match(TokenType.WHEN): 5028 matched = not self._match(TokenType.NOT) 5029 self._match_text_seq("MATCHED") 5030 source = ( 5031 False 5032 if self._match_text_seq("BY", "TARGET") 5033 else self._match_text_seq("BY", "SOURCE") 5034 ) 5035 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5036 5037 self._match(TokenType.THEN) 5038 5039 if self._match(TokenType.INSERT): 5040 _this = self._parse_star() 5041 if _this: 5042 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5043 else: 5044 then = self.expression( 5045 exp.Insert, 5046 this=self._parse_value(), 5047 expression=self._match(TokenType.VALUES) and self._parse_value(), 5048 ) 5049 elif self._match(TokenType.UPDATE): 5050 expressions = self._parse_star() 5051 if expressions: 5052 then = self.expression(exp.Update, expressions=expressions) 5053 else: 5054 then = self.expression( 5055 exp.Update, 5056 expressions=self._match(TokenType.SET) 5057 and self._parse_csv(self._parse_equality), 5058 ) 5059 elif self._match(TokenType.DELETE): 5060 then = self.expression(exp.Var, this=self._prev.text) 5061 else: 5062 then = None 5063 5064 whens.append( 5065 self.expression( 5066 exp.When, 5067 matched=matched, 5068 source=source, 5069 condition=condition, 5070 then=then, 5071 ) 5072 ) 5073 5074 return self.expression( 5075 exp.Merge, 5076 this=target, 5077 using=using, 5078 on=on, 5079 expressions=whens, 5080 ) 5081 5082 def _parse_show(self) -> t.Optional[exp.Expression]: 5083 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5084 if parser: 5085 return parser(self) 5086 return self._parse_as_command(self._prev) 5087 5088 def _parse_set_item_assignment( 5089 self, kind: t.Optional[str] = None 5090 ) -> t.Optional[exp.Expression]: 5091 index = self._index 5092 5093 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5094 return self._parse_set_transaction(global_=kind == "GLOBAL") 5095 5096 left = self._parse_primary() or self._parse_id_var() 5097 assignment_delimiter = self._match_texts(("=", "TO")) 5098 5099 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5100 self._retreat(index) 5101 return None 5102 5103 right = self._parse_statement() or self._parse_id_var() 5104 this = self.expression(exp.EQ, this=left, expression=right) 5105 5106 return self.expression(exp.SetItem, this=this, kind=kind) 5107 5108 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5109 self._match_text_seq("TRANSACTION") 5110 characteristics = self._parse_csv( 5111 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5112 ) 5113 return self.expression( 5114 exp.SetItem, 5115 expressions=characteristics, 5116 kind="TRANSACTION", 5117 **{"global": global_}, # type: ignore 5118 ) 5119 5120 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5121 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5122 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5123 5124 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5125 index = self._index 5126 set_ = self.expression( 5127 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5128 ) 5129 5130 if self._curr: 5131 self._retreat(index) 5132 return self._parse_as_command(self._prev) 5133 5134 return set_ 5135 5136 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5137 for option in options: 5138 if self._match_text_seq(*option.split(" ")): 5139 return exp.var(option) 5140 return None 5141 5142 def _parse_as_command(self, start: Token) -> exp.Command: 5143 while self._curr: 5144 self._advance() 5145 text = self._find_sql(start, self._prev) 5146 size = len(start.text) 5147 return exp.Command(this=text[:size], expression=text[size:]) 5148 5149 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5150 settings = [] 5151 5152 self._match_l_paren() 5153 kind = self._parse_id_var() 5154 5155 if self._match(TokenType.L_PAREN): 5156 while True: 5157 key = self._parse_id_var() 5158 value = self._parse_primary() 5159 5160 if not key and value is None: 5161 break 5162 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5163 self._match(TokenType.R_PAREN) 5164 5165 self._match_r_paren() 5166 5167 return self.expression( 5168 exp.DictProperty, 5169 this=this, 5170 kind=kind.this if kind else None, 5171 settings=settings, 5172 ) 5173 5174 def _parse_dict_range(self, this: str) -> exp.DictRange: 5175 self._match_l_paren() 5176 has_min = self._match_text_seq("MIN") 5177 if has_min: 5178 min = self._parse_var() or self._parse_primary() 5179 self._match_text_seq("MAX") 5180 max = self._parse_var() or self._parse_primary() 5181 else: 5182 max = self._parse_var() or self._parse_primary() 5183 min = exp.Literal.number(0) 5184 self._match_r_paren() 5185 return self.expression(exp.DictRange, this=this, min=min, max=max) 5186 5187 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5188 index = self._index 5189 expression = self._parse_column() 5190 if not self._match(TokenType.IN): 5191 self._retreat(index - 1) 5192 return None 5193 iterator = self._parse_column() 5194 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5195 return self.expression( 5196 exp.Comprehension, 5197 this=this, 5198 expression=expression, 5199 iterator=iterator, 5200 condition=condition, 5201 ) 5202 5203 def _find_parser( 5204 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5205 ) -> t.Optional[t.Callable]: 5206 if not self._curr: 5207 return None 5208 5209 index = self._index 5210 this = [] 5211 while True: 5212 # The current token might be multiple words 5213 curr = self._curr.text.upper() 5214 key = curr.split(" ") 5215 this.append(curr) 5216 5217 self._advance() 5218 result, trie = in_trie(trie, key) 5219 if result == TrieResult.FAILED: 5220 break 5221 5222 if result == TrieResult.EXISTS: 5223 subparser = parsers[" ".join(this)] 5224 return subparser 5225 5226 self._retreat(index) 5227 return None 5228 5229 def _match(self, token_type, advance=True, expression=None): 5230 if not self._curr: 5231 return None 5232 5233 if self._curr.token_type == token_type: 5234 if advance: 5235 self._advance() 5236 self._add_comments(expression) 5237 return True 5238 5239 return None 5240 5241 def _match_set(self, types, advance=True): 5242 if not self._curr: 5243 return None 5244 5245 if self._curr.token_type in types: 5246 if advance: 5247 self._advance() 5248 return True 5249 5250 return None 5251 5252 def _match_pair(self, token_type_a, token_type_b, advance=True): 5253 if not self._curr or not self._next: 5254 return None 5255 5256 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5257 if advance: 5258 self._advance(2) 5259 return True 5260 5261 return None 5262 5263 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5264 if not self._match(TokenType.L_PAREN, expression=expression): 5265 self.raise_error("Expecting (") 5266 5267 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5268 if not self._match(TokenType.R_PAREN, expression=expression): 5269 self.raise_error("Expecting )") 5270 5271 def _match_texts(self, texts, advance=True): 5272 if self._curr and self._curr.text.upper() in texts: 5273 if advance: 5274 self._advance() 5275 return True 5276 return False 5277 5278 def _match_text_seq(self, *texts, advance=True): 5279 index = self._index 5280 for text in texts: 5281 if self._curr and self._curr.text.upper() == text: 5282 self._advance() 5283 else: 5284 self._retreat(index) 5285 return False 5286 5287 if not advance: 5288 self._retreat(index) 5289 5290 return True 5291 5292 @t.overload 5293 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5294 ... 5295 5296 @t.overload 5297 def _replace_columns_with_dots( 5298 self, this: t.Optional[exp.Expression] 5299 ) -> t.Optional[exp.Expression]: 5300 ... 5301 5302 def _replace_columns_with_dots(self, this): 5303 if isinstance(this, exp.Dot): 5304 exp.replace_children(this, self._replace_columns_with_dots) 5305 elif isinstance(this, exp.Column): 5306 exp.replace_children(this, self._replace_columns_with_dots) 5307 table = this.args.get("table") 5308 this = ( 5309 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5310 ) 5311 5312 return this 5313 5314 def _replace_lambda( 5315 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5316 ) -> t.Optional[exp.Expression]: 5317 if not node: 5318 return node 5319 5320 for column in node.find_all(exp.Column): 5321 if column.parts[0].name in lambda_variables: 5322 dot_or_id = column.to_dot() if column.table else column.this 5323 parent = column.parent 5324 5325 while isinstance(parent, exp.Dot): 5326 if not isinstance(parent.parent, exp.Dot): 5327 parent.replace(dot_or_id) 5328 break 5329 parent = parent.parent 5330 else: 5331 if column is node: 5332 node = dot_or_id 5333 else: 5334 column.replace(dot_or_id) 5335 return node 5336 5337 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5338 return [ 5339 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5340 for value in values 5341 if value 5342 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
944 def __init__( 945 self, 946 error_level: t.Optional[ErrorLevel] = None, 947 error_message_context: int = 100, 948 max_errors: int = 3, 949 ): 950 self.error_level = error_level or ErrorLevel.IMMEDIATE 951 self.error_message_context = error_message_context 952 self.max_errors = max_errors 953 self._tokenizer = self.TOKENIZER_CLASS() 954 self.reset()
966 def parse( 967 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 968 ) -> t.List[t.Optional[exp.Expression]]: 969 """ 970 Parses a list of tokens and returns a list of syntax trees, one tree 971 per parsed SQL statement. 972 973 Args: 974 raw_tokens: The list of tokens. 975 sql: The original SQL string, used to produce helpful debug messages. 976 977 Returns: 978 The list of the produced syntax trees. 979 """ 980 return self._parse( 981 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 982 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
984 def parse_into( 985 self, 986 expression_types: exp.IntoType, 987 raw_tokens: t.List[Token], 988 sql: t.Optional[str] = None, 989 ) -> t.List[t.Optional[exp.Expression]]: 990 """ 991 Parses a list of tokens into a given Expression type. If a collection of Expression 992 types is given instead, this method will try to parse the token list into each one 993 of them, stopping at the first for which the parsing succeeds. 994 995 Args: 996 expression_types: The expression type(s) to try and parse the token list into. 997 raw_tokens: The list of tokens. 998 sql: The original SQL string, used to produce helpful debug messages. 999 1000 Returns: 1001 The target Expression. 1002 """ 1003 errors = [] 1004 for expression_type in ensure_list(expression_types): 1005 parser = self.EXPRESSION_PARSERS.get(expression_type) 1006 if not parser: 1007 raise TypeError(f"No parser registered for {expression_type}") 1008 1009 try: 1010 return self._parse(parser, raw_tokens, sql) 1011 except ParseError as e: 1012 e.errors[0]["into_expression"] = expression_type 1013 errors.append(e) 1014 1015 raise ParseError( 1016 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1017 errors=merge_errors(errors), 1018 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1055 def check_errors(self) -> None: 1056 """Logs or raises any found errors, depending on the chosen error level setting.""" 1057 if self.error_level == ErrorLevel.WARN: 1058 for error in self.errors: 1059 logger.error(str(error)) 1060 elif self.error_level == ErrorLevel.RAISE and self.errors: 1061 raise ParseError( 1062 concat_messages(self.errors, self.max_errors), 1063 errors=merge_errors(self.errors), 1064 )
Logs or raises any found errors, depending on the chosen error level setting.
1066 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1067 """ 1068 Appends an error in the list of recorded errors or raises it, depending on the chosen 1069 error level setting. 1070 """ 1071 token = token or self._curr or self._prev or Token.string("") 1072 start = token.start 1073 end = token.end + 1 1074 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1075 highlight = self.sql[start:end] 1076 end_context = self.sql[end : end + self.error_message_context] 1077 1078 error = ParseError.new( 1079 f"{message}. Line {token.line}, Col: {token.col}.\n" 1080 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1081 description=message, 1082 line=token.line, 1083 col=token.col, 1084 start_context=start_context, 1085 highlight=highlight, 1086 end_context=end_context, 1087 ) 1088 1089 if self.error_level == ErrorLevel.IMMEDIATE: 1090 raise error 1091 1092 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1094 def expression( 1095 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1096 ) -> E: 1097 """ 1098 Creates a new, validated Expression. 1099 1100 Args: 1101 exp_class: The expression class to instantiate. 1102 comments: An optional list of comments to attach to the expression. 1103 kwargs: The arguments to set for the expression along with their respective values. 1104 1105 Returns: 1106 The target expression. 1107 """ 1108 instance = exp_class(**kwargs) 1109 instance.add_comments(comments) if comments else self._add_comments(instance) 1110 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1117 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1118 """ 1119 Validates an Expression, making sure that all its mandatory arguments are set. 1120 1121 Args: 1122 expression: The expression to validate. 1123 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1124 1125 Returns: 1126 The validated expression. 1127 """ 1128 if self.error_level != ErrorLevel.IGNORE: 1129 for error_message in expression.error_messages(args): 1130 self.raise_error(error_message) 1131 1132 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.