sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.BIGDECIMAL, 182 TokenType.UUID, 183 TokenType.GEOGRAPHY, 184 TokenType.GEOMETRY, 185 TokenType.HLLSKETCH, 186 TokenType.HSTORE, 187 TokenType.PSEUDO_TYPE, 188 TokenType.SUPER, 189 TokenType.SERIAL, 190 TokenType.SMALLSERIAL, 191 TokenType.BIGSERIAL, 192 TokenType.XML, 193 TokenType.YEAR, 194 TokenType.UNIQUEIDENTIFIER, 195 TokenType.USERDEFINED, 196 TokenType.MONEY, 197 TokenType.SMALLMONEY, 198 TokenType.ROWVERSION, 199 TokenType.IMAGE, 200 TokenType.VARIANT, 201 TokenType.OBJECT, 202 TokenType.OBJECT_IDENTIFIER, 203 TokenType.INET, 204 TokenType.IPADDRESS, 205 TokenType.IPPREFIX, 206 TokenType.UNKNOWN, 207 TokenType.NULL, 208 *ENUM_TYPE_TOKENS, 209 *NESTED_TYPE_TOKENS, 210 } 211 212 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 213 TokenType.BIGINT: TokenType.UBIGINT, 214 TokenType.INT: TokenType.UINT, 215 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 216 TokenType.SMALLINT: TokenType.USMALLINT, 217 TokenType.TINYINT: TokenType.UTINYINT, 218 } 219 220 SUBQUERY_PREDICATES = { 221 TokenType.ANY: exp.Any, 222 TokenType.ALL: exp.All, 223 TokenType.EXISTS: exp.Exists, 224 TokenType.SOME: exp.Any, 225 } 226 227 RESERVED_KEYWORDS = { 228 *Tokenizer.SINGLE_TOKENS.values(), 229 TokenType.SELECT, 230 } 231 232 DB_CREATABLES = { 233 TokenType.DATABASE, 234 TokenType.SCHEMA, 235 TokenType.TABLE, 236 TokenType.VIEW, 237 TokenType.DICTIONARY, 238 } 239 240 CREATABLES = { 241 TokenType.COLUMN, 242 TokenType.FUNCTION, 243 TokenType.INDEX, 244 TokenType.PROCEDURE, 245 *DB_CREATABLES, 246 } 247 248 # Tokens that can represent identifiers 249 ID_VAR_TOKENS = { 250 TokenType.VAR, 251 TokenType.ANTI, 252 TokenType.APPLY, 253 TokenType.ASC, 254 TokenType.AUTO_INCREMENT, 255 TokenType.BEGIN, 256 TokenType.CACHE, 257 TokenType.CASE, 258 TokenType.COLLATE, 259 TokenType.COMMAND, 260 TokenType.COMMENT, 261 TokenType.COMMIT, 262 TokenType.CONSTRAINT, 263 TokenType.DEFAULT, 264 TokenType.DELETE, 265 TokenType.DESC, 266 TokenType.DESCRIBE, 267 TokenType.DICTIONARY, 268 TokenType.DIV, 269 TokenType.END, 270 TokenType.EXECUTE, 271 TokenType.ESCAPE, 272 TokenType.FALSE, 273 TokenType.FIRST, 274 TokenType.FILTER, 275 TokenType.FORMAT, 276 TokenType.FULL, 277 TokenType.IS, 278 TokenType.ISNULL, 279 TokenType.INTERVAL, 280 TokenType.KEEP, 281 TokenType.LEFT, 282 TokenType.LOAD, 283 TokenType.MERGE, 284 TokenType.NATURAL, 285 TokenType.NEXT, 286 TokenType.OFFSET, 287 TokenType.ORDINALITY, 288 TokenType.OVERWRITE, 289 TokenType.PARTITION, 290 TokenType.PERCENT, 291 TokenType.PIVOT, 292 TokenType.PRAGMA, 293 TokenType.RANGE, 294 TokenType.REFERENCES, 295 TokenType.RIGHT, 296 TokenType.ROW, 297 TokenType.ROWS, 298 TokenType.SEMI, 299 TokenType.SET, 300 TokenType.SETTINGS, 301 TokenType.SHOW, 302 TokenType.TEMPORARY, 303 TokenType.TOP, 304 TokenType.TRUE, 305 TokenType.UNIQUE, 306 TokenType.UNPIVOT, 307 TokenType.UPDATE, 308 TokenType.VOLATILE, 309 TokenType.WINDOW, 310 *CREATABLES, 311 *SUBQUERY_PREDICATES, 312 *TYPE_TOKENS, 313 *NO_PAREN_FUNCTIONS, 314 } 315 316 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 317 318 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 319 TokenType.APPLY, 320 TokenType.ASOF, 321 TokenType.FULL, 322 TokenType.LEFT, 323 TokenType.LOCK, 324 TokenType.NATURAL, 325 TokenType.OFFSET, 326 TokenType.RIGHT, 327 TokenType.WINDOW, 328 } 329 330 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 331 332 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 333 334 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 335 336 FUNC_TOKENS = { 337 TokenType.COMMAND, 338 TokenType.CURRENT_DATE, 339 TokenType.CURRENT_DATETIME, 340 TokenType.CURRENT_TIMESTAMP, 341 TokenType.CURRENT_TIME, 342 TokenType.CURRENT_USER, 343 TokenType.FILTER, 344 TokenType.FIRST, 345 TokenType.FORMAT, 346 TokenType.GLOB, 347 TokenType.IDENTIFIER, 348 TokenType.INDEX, 349 TokenType.ISNULL, 350 TokenType.ILIKE, 351 TokenType.INSERT, 352 TokenType.LIKE, 353 TokenType.MERGE, 354 TokenType.OFFSET, 355 TokenType.PRIMARY_KEY, 356 TokenType.RANGE, 357 TokenType.REPLACE, 358 TokenType.RLIKE, 359 TokenType.ROW, 360 TokenType.UNNEST, 361 TokenType.VAR, 362 TokenType.LEFT, 363 TokenType.RIGHT, 364 TokenType.DATE, 365 TokenType.DATETIME, 366 TokenType.TABLE, 367 TokenType.TIMESTAMP, 368 TokenType.TIMESTAMPTZ, 369 TokenType.WINDOW, 370 TokenType.XOR, 371 *TYPE_TOKENS, 372 *SUBQUERY_PREDICATES, 373 } 374 375 CONJUNCTION = { 376 TokenType.AND: exp.And, 377 TokenType.OR: exp.Or, 378 } 379 380 EQUALITY = { 381 TokenType.EQ: exp.EQ, 382 TokenType.NEQ: exp.NEQ, 383 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 384 } 385 386 COMPARISON = { 387 TokenType.GT: exp.GT, 388 TokenType.GTE: exp.GTE, 389 TokenType.LT: exp.LT, 390 TokenType.LTE: exp.LTE, 391 } 392 393 BITWISE = { 394 TokenType.AMP: exp.BitwiseAnd, 395 TokenType.CARET: exp.BitwiseXor, 396 TokenType.PIPE: exp.BitwiseOr, 397 TokenType.DPIPE: exp.DPipe, 398 } 399 400 TERM = { 401 TokenType.DASH: exp.Sub, 402 TokenType.PLUS: exp.Add, 403 TokenType.MOD: exp.Mod, 404 TokenType.COLLATE: exp.Collate, 405 } 406 407 FACTOR = { 408 TokenType.DIV: exp.IntDiv, 409 TokenType.LR_ARROW: exp.Distance, 410 TokenType.SLASH: exp.Div, 411 TokenType.STAR: exp.Mul, 412 } 413 414 TIMES = { 415 TokenType.TIME, 416 TokenType.TIMETZ, 417 } 418 419 TIMESTAMPS = { 420 TokenType.TIMESTAMP, 421 TokenType.TIMESTAMPTZ, 422 TokenType.TIMESTAMPLTZ, 423 *TIMES, 424 } 425 426 SET_OPERATIONS = { 427 TokenType.UNION, 428 TokenType.INTERSECT, 429 TokenType.EXCEPT, 430 } 431 432 JOIN_METHODS = { 433 TokenType.NATURAL, 434 TokenType.ASOF, 435 } 436 437 JOIN_SIDES = { 438 TokenType.LEFT, 439 TokenType.RIGHT, 440 TokenType.FULL, 441 } 442 443 JOIN_KINDS = { 444 TokenType.INNER, 445 TokenType.OUTER, 446 TokenType.CROSS, 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 JOIN_HINTS: t.Set[str] = set() 452 453 LAMBDAS = { 454 TokenType.ARROW: lambda self, expressions: self.expression( 455 exp.Lambda, 456 this=self._replace_lambda( 457 self._parse_conjunction(), 458 {node.name for node in expressions}, 459 ), 460 expressions=expressions, 461 ), 462 TokenType.FARROW: lambda self, expressions: self.expression( 463 exp.Kwarg, 464 this=exp.var(expressions[0].name), 465 expression=self._parse_conjunction(), 466 ), 467 } 468 469 COLUMN_OPERATORS = { 470 TokenType.DOT: None, 471 TokenType.DCOLON: lambda self, this, to: self.expression( 472 exp.Cast if self.STRICT_CAST else exp.TryCast, 473 this=this, 474 to=to, 475 ), 476 TokenType.ARROW: lambda self, this, path: self.expression( 477 exp.JSONExtract, 478 this=this, 479 expression=path, 480 ), 481 TokenType.DARROW: lambda self, this, path: self.expression( 482 exp.JSONExtractScalar, 483 this=this, 484 expression=path, 485 ), 486 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 487 exp.JSONBExtract, 488 this=this, 489 expression=path, 490 ), 491 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 492 exp.JSONBExtractScalar, 493 this=this, 494 expression=path, 495 ), 496 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 497 exp.JSONBContains, 498 this=this, 499 expression=key, 500 ), 501 } 502 503 EXPRESSION_PARSERS = { 504 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 505 exp.Column: lambda self: self._parse_column(), 506 exp.Condition: lambda self: self._parse_conjunction(), 507 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 508 exp.Expression: lambda self: self._parse_statement(), 509 exp.From: lambda self: self._parse_from(), 510 exp.Group: lambda self: self._parse_group(), 511 exp.Having: lambda self: self._parse_having(), 512 exp.Identifier: lambda self: self._parse_id_var(), 513 exp.Join: lambda self: self._parse_join(), 514 exp.Lambda: lambda self: self._parse_lambda(), 515 exp.Lateral: lambda self: self._parse_lateral(), 516 exp.Limit: lambda self: self._parse_limit(), 517 exp.Offset: lambda self: self._parse_offset(), 518 exp.Order: lambda self: self._parse_order(), 519 exp.Ordered: lambda self: self._parse_ordered(), 520 exp.Properties: lambda self: self._parse_properties(), 521 exp.Qualify: lambda self: self._parse_qualify(), 522 exp.Returning: lambda self: self._parse_returning(), 523 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 524 exp.Table: lambda self: self._parse_table_parts(), 525 exp.TableAlias: lambda self: self._parse_table_alias(), 526 exp.Where: lambda self: self._parse_where(), 527 exp.Window: lambda self: self._parse_named_window(), 528 exp.With: lambda self: self._parse_with(), 529 "JOIN_TYPE": lambda self: self._parse_join_parts(), 530 } 531 532 STATEMENT_PARSERS = { 533 TokenType.ALTER: lambda self: self._parse_alter(), 534 TokenType.BEGIN: lambda self: self._parse_transaction(), 535 TokenType.CACHE: lambda self: self._parse_cache(), 536 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 537 TokenType.COMMENT: lambda self: self._parse_comment(), 538 TokenType.CREATE: lambda self: self._parse_create(), 539 TokenType.DELETE: lambda self: self._parse_delete(), 540 TokenType.DESC: lambda self: self._parse_describe(), 541 TokenType.DESCRIBE: lambda self: self._parse_describe(), 542 TokenType.DROP: lambda self: self._parse_drop(), 543 TokenType.INSERT: lambda self: self._parse_insert(), 544 TokenType.LOAD: lambda self: self._parse_load(), 545 TokenType.MERGE: lambda self: self._parse_merge(), 546 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 547 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 548 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 549 TokenType.SET: lambda self: self._parse_set(), 550 TokenType.UNCACHE: lambda self: self._parse_uncache(), 551 TokenType.UPDATE: lambda self: self._parse_update(), 552 TokenType.USE: lambda self: self.expression( 553 exp.Use, 554 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 555 and exp.var(self._prev.text), 556 this=self._parse_table(schema=False), 557 ), 558 } 559 560 UNARY_PARSERS = { 561 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 562 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 563 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 564 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 565 } 566 567 PRIMARY_PARSERS = { 568 TokenType.STRING: lambda self, token: self.expression( 569 exp.Literal, this=token.text, is_string=True 570 ), 571 TokenType.NUMBER: lambda self, token: self.expression( 572 exp.Literal, this=token.text, is_string=False 573 ), 574 TokenType.STAR: lambda self, _: self.expression( 575 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 576 ), 577 TokenType.NULL: lambda self, _: self.expression(exp.Null), 578 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 579 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 580 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 581 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 582 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 583 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 584 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 585 exp.National, this=token.text 586 ), 587 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 588 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 589 } 590 591 PLACEHOLDER_PARSERS = { 592 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 593 TokenType.PARAMETER: lambda self: self._parse_parameter(), 594 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 595 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 596 else None, 597 } 598 599 RANGE_PARSERS = { 600 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 601 TokenType.GLOB: binary_range_parser(exp.Glob), 602 TokenType.ILIKE: binary_range_parser(exp.ILike), 603 TokenType.IN: lambda self, this: self._parse_in(this), 604 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 605 TokenType.IS: lambda self, this: self._parse_is(this), 606 TokenType.LIKE: binary_range_parser(exp.Like), 607 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 608 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 609 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 610 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 611 } 612 613 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 614 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 615 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 616 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 617 "CHARACTER SET": lambda self: self._parse_character_set(), 618 "CHECKSUM": lambda self: self._parse_checksum(), 619 "CLUSTER BY": lambda self: self._parse_cluster(), 620 "CLUSTERED": lambda self: self._parse_clustered_by(), 621 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 622 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 623 "COPY": lambda self: self._parse_copy_property(), 624 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 625 "DEFINER": lambda self: self._parse_definer(), 626 "DETERMINISTIC": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "DISTKEY": lambda self: self._parse_distkey(), 630 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 631 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 632 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 633 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 634 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 635 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 636 "FREESPACE": lambda self: self._parse_freespace(), 637 "HEAP": lambda self: self.expression(exp.HeapProperty), 638 "IMMUTABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 642 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 643 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 644 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 645 "LIKE": lambda self: self._parse_create_like(), 646 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 647 "LOCK": lambda self: self._parse_locking(), 648 "LOCKING": lambda self: self._parse_locking(), 649 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 650 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 651 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 652 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 653 "NO": lambda self: self._parse_no_property(), 654 "ON": lambda self: self._parse_on_property(), 655 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 656 "PARTITION BY": lambda self: self._parse_partitioned_by(), 657 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 659 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 660 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 661 "RETURNS": lambda self: self._parse_returns(), 662 "ROW": lambda self: self._parse_row(), 663 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 664 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 665 "SETTINGS": lambda self: self.expression( 666 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 667 ), 668 "SORTKEY": lambda self: self._parse_sortkey(), 669 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 670 "STABLE": lambda self: self.expression( 671 exp.StabilityProperty, this=exp.Literal.string("STABLE") 672 ), 673 "STORED": lambda self: self._parse_stored(), 674 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 675 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 676 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 677 "TO": lambda self: self._parse_to_table(), 678 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 679 "TTL": lambda self: self._parse_ttl(), 680 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 681 "VOLATILE": lambda self: self._parse_volatile_property(), 682 "WITH": lambda self: self._parse_with_property(), 683 } 684 685 CONSTRAINT_PARSERS = { 686 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 687 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 688 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 689 "CHARACTER SET": lambda self: self.expression( 690 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "CHECK": lambda self: self.expression( 693 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 694 ), 695 "COLLATE": lambda self: self.expression( 696 exp.CollateColumnConstraint, this=self._parse_var() 697 ), 698 "COMMENT": lambda self: self.expression( 699 exp.CommentColumnConstraint, this=self._parse_string() 700 ), 701 "COMPRESS": lambda self: self._parse_compress(), 702 "CLUSTERED": lambda self: self.expression( 703 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 704 ), 705 "NONCLUSTERED": lambda self: self.expression( 706 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 707 ), 708 "DEFAULT": lambda self: self.expression( 709 exp.DefaultColumnConstraint, this=self._parse_bitwise() 710 ), 711 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 712 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 713 "FORMAT": lambda self: self.expression( 714 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "GENERATED": lambda self: self._parse_generated_as_identity(), 717 "IDENTITY": lambda self: self._parse_auto_increment(), 718 "INLINE": lambda self: self._parse_inline(), 719 "LIKE": lambda self: self._parse_create_like(), 720 "NOT": lambda self: self._parse_not_constraint(), 721 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 722 "ON": lambda self: ( 723 self._match(TokenType.UPDATE) 724 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 725 ) 726 or self.expression(exp.OnProperty, this=self._parse_id_var()), 727 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 728 "PRIMARY KEY": lambda self: self._parse_primary_key(), 729 "REFERENCES": lambda self: self._parse_references(match=False), 730 "TITLE": lambda self: self.expression( 731 exp.TitleColumnConstraint, this=self._parse_var_or_string() 732 ), 733 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 734 "UNIQUE": lambda self: self._parse_unique(), 735 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 736 "WITH": lambda self: self.expression( 737 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 738 ), 739 } 740 741 ALTER_PARSERS = { 742 "ADD": lambda self: self._parse_alter_table_add(), 743 "ALTER": lambda self: self._parse_alter_table_alter(), 744 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 745 "DROP": lambda self: self._parse_alter_table_drop(), 746 "RENAME": lambda self: self._parse_alter_table_rename(), 747 } 748 749 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 750 751 NO_PAREN_FUNCTION_PARSERS = { 752 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 753 "CASE": lambda self: self._parse_case(), 754 "IF": lambda self: self._parse_if(), 755 "NEXT": lambda self: self._parse_next_value_for(), 756 } 757 758 INVALID_FUNC_NAME_TOKENS = { 759 TokenType.IDENTIFIER, 760 TokenType.STRING, 761 } 762 763 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 764 765 FUNCTION_PARSERS = { 766 "ANY_VALUE": lambda self: self._parse_any_value(), 767 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 768 "CONCAT": lambda self: self._parse_concat(), 769 "CONCAT_WS": lambda self: self._parse_concat_ws(), 770 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 771 "DECODE": lambda self: self._parse_decode(), 772 "EXTRACT": lambda self: self._parse_extract(), 773 "JSON_OBJECT": lambda self: self._parse_json_object(), 774 "LOG": lambda self: self._parse_logarithm(), 775 "MATCH": lambda self: self._parse_match_against(), 776 "OPENJSON": lambda self: self._parse_open_json(), 777 "POSITION": lambda self: self._parse_position(), 778 "SAFE_CAST": lambda self: self._parse_cast(False), 779 "STRING_AGG": lambda self: self._parse_string_agg(), 780 "SUBSTRING": lambda self: self._parse_substring(), 781 "TRIM": lambda self: self._parse_trim(), 782 "TRY_CAST": lambda self: self._parse_cast(False), 783 "TRY_CONVERT": lambda self: self._parse_convert(False), 784 } 785 786 QUERY_MODIFIER_PARSERS = { 787 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 788 TokenType.WHERE: lambda self: ("where", self._parse_where()), 789 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 790 TokenType.HAVING: lambda self: ("having", self._parse_having()), 791 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 792 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 793 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 794 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 795 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 796 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 797 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 798 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 799 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 800 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.CLUSTER_BY: lambda self: ( 802 "cluster", 803 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 804 ), 805 TokenType.DISTRIBUTE_BY: lambda self: ( 806 "distribute", 807 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 808 ), 809 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 810 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 811 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 812 } 813 814 SET_PARSERS = { 815 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 816 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 817 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 818 "TRANSACTION": lambda self: self._parse_set_transaction(), 819 } 820 821 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 822 823 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 824 825 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 826 827 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 828 829 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 830 831 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 832 TRANSACTION_CHARACTERISTICS = { 833 "ISOLATION LEVEL REPEATABLE READ", 834 "ISOLATION LEVEL READ COMMITTED", 835 "ISOLATION LEVEL READ UNCOMMITTED", 836 "ISOLATION LEVEL SERIALIZABLE", 837 "READ WRITE", 838 "READ ONLY", 839 } 840 841 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 842 843 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 844 845 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 846 847 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 848 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 849 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 850 851 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 852 853 DISTINCT_TOKENS = {TokenType.DISTINCT} 854 855 STRICT_CAST = True 856 857 # A NULL arg in CONCAT yields NULL by default 858 CONCAT_NULL_OUTPUTS_STRING = False 859 860 PREFIXED_PIVOT_COLUMNS = False 861 IDENTIFY_PIVOT_STRINGS = False 862 863 LOG_BASE_FIRST = True 864 LOG_DEFAULTS_TO_LN = False 865 866 SUPPORTS_USER_DEFINED_TYPES = True 867 868 # Whether or not ADD is present for each column added by ALTER TABLE 869 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 870 871 # Whether or not the table sample clause expects CSV syntax 872 TABLESAMPLE_CSV = False 873 874 __slots__ = ( 875 "error_level", 876 "error_message_context", 877 "max_errors", 878 "sql", 879 "errors", 880 "_tokens", 881 "_index", 882 "_curr", 883 "_next", 884 "_prev", 885 "_prev_comments", 886 "_tokenizer", 887 ) 888 889 # Autofilled 890 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 891 INDEX_OFFSET: int = 0 892 UNNEST_COLUMN_ONLY: bool = False 893 ALIAS_POST_TABLESAMPLE: bool = False 894 STRICT_STRING_CONCAT = False 895 NORMALIZE_FUNCTIONS = "upper" 896 NULL_ORDERING: str = "nulls_are_small" 897 SHOW_TRIE: t.Dict = {} 898 SET_TRIE: t.Dict = {} 899 FORMAT_MAPPING: t.Dict[str, str] = {} 900 FORMAT_TRIE: t.Dict = {} 901 TIME_MAPPING: t.Dict[str, str] = {} 902 TIME_TRIE: t.Dict = {} 903 904 def __init__( 905 self, 906 error_level: t.Optional[ErrorLevel] = None, 907 error_message_context: int = 100, 908 max_errors: int = 3, 909 ): 910 self.error_level = error_level or ErrorLevel.IMMEDIATE 911 self.error_message_context = error_message_context 912 self.max_errors = max_errors 913 self._tokenizer = self.TOKENIZER_CLASS() 914 self.reset() 915 916 def reset(self): 917 self.sql = "" 918 self.errors = [] 919 self._tokens = [] 920 self._index = 0 921 self._curr = None 922 self._next = None 923 self._prev = None 924 self._prev_comments = None 925 926 def parse( 927 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 928 ) -> t.List[t.Optional[exp.Expression]]: 929 """ 930 Parses a list of tokens and returns a list of syntax trees, one tree 931 per parsed SQL statement. 932 933 Args: 934 raw_tokens: The list of tokens. 935 sql: The original SQL string, used to produce helpful debug messages. 936 937 Returns: 938 The list of the produced syntax trees. 939 """ 940 return self._parse( 941 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 942 ) 943 944 def parse_into( 945 self, 946 expression_types: exp.IntoType, 947 raw_tokens: t.List[Token], 948 sql: t.Optional[str] = None, 949 ) -> t.List[t.Optional[exp.Expression]]: 950 """ 951 Parses a list of tokens into a given Expression type. If a collection of Expression 952 types is given instead, this method will try to parse the token list into each one 953 of them, stopping at the first for which the parsing succeeds. 954 955 Args: 956 expression_types: The expression type(s) to try and parse the token list into. 957 raw_tokens: The list of tokens. 958 sql: The original SQL string, used to produce helpful debug messages. 959 960 Returns: 961 The target Expression. 962 """ 963 errors = [] 964 for expression_type in ensure_list(expression_types): 965 parser = self.EXPRESSION_PARSERS.get(expression_type) 966 if not parser: 967 raise TypeError(f"No parser registered for {expression_type}") 968 969 try: 970 return self._parse(parser, raw_tokens, sql) 971 except ParseError as e: 972 e.errors[0]["into_expression"] = expression_type 973 errors.append(e) 974 975 raise ParseError( 976 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 977 errors=merge_errors(errors), 978 ) from errors[-1] 979 980 def _parse( 981 self, 982 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 983 raw_tokens: t.List[Token], 984 sql: t.Optional[str] = None, 985 ) -> t.List[t.Optional[exp.Expression]]: 986 self.reset() 987 self.sql = sql or "" 988 989 total = len(raw_tokens) 990 chunks: t.List[t.List[Token]] = [[]] 991 992 for i, token in enumerate(raw_tokens): 993 if token.token_type == TokenType.SEMICOLON: 994 if i < total - 1: 995 chunks.append([]) 996 else: 997 chunks[-1].append(token) 998 999 expressions = [] 1000 1001 for tokens in chunks: 1002 self._index = -1 1003 self._tokens = tokens 1004 self._advance() 1005 1006 expressions.append(parse_method(self)) 1007 1008 if self._index < len(self._tokens): 1009 self.raise_error("Invalid expression / Unexpected token") 1010 1011 self.check_errors() 1012 1013 return expressions 1014 1015 def check_errors(self) -> None: 1016 """Logs or raises any found errors, depending on the chosen error level setting.""" 1017 if self.error_level == ErrorLevel.WARN: 1018 for error in self.errors: 1019 logger.error(str(error)) 1020 elif self.error_level == ErrorLevel.RAISE and self.errors: 1021 raise ParseError( 1022 concat_messages(self.errors, self.max_errors), 1023 errors=merge_errors(self.errors), 1024 ) 1025 1026 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1027 """ 1028 Appends an error in the list of recorded errors or raises it, depending on the chosen 1029 error level setting. 1030 """ 1031 token = token or self._curr or self._prev or Token.string("") 1032 start = token.start 1033 end = token.end + 1 1034 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1035 highlight = self.sql[start:end] 1036 end_context = self.sql[end : end + self.error_message_context] 1037 1038 error = ParseError.new( 1039 f"{message}. Line {token.line}, Col: {token.col}.\n" 1040 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1041 description=message, 1042 line=token.line, 1043 col=token.col, 1044 start_context=start_context, 1045 highlight=highlight, 1046 end_context=end_context, 1047 ) 1048 1049 if self.error_level == ErrorLevel.IMMEDIATE: 1050 raise error 1051 1052 self.errors.append(error) 1053 1054 def expression( 1055 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1056 ) -> E: 1057 """ 1058 Creates a new, validated Expression. 1059 1060 Args: 1061 exp_class: The expression class to instantiate. 1062 comments: An optional list of comments to attach to the expression. 1063 kwargs: The arguments to set for the expression along with their respective values. 1064 1065 Returns: 1066 The target expression. 1067 """ 1068 instance = exp_class(**kwargs) 1069 instance.add_comments(comments) if comments else self._add_comments(instance) 1070 return self.validate_expression(instance) 1071 1072 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1073 if expression and self._prev_comments: 1074 expression.add_comments(self._prev_comments) 1075 self._prev_comments = None 1076 1077 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1078 """ 1079 Validates an Expression, making sure that all its mandatory arguments are set. 1080 1081 Args: 1082 expression: The expression to validate. 1083 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1084 1085 Returns: 1086 The validated expression. 1087 """ 1088 if self.error_level != ErrorLevel.IGNORE: 1089 for error_message in expression.error_messages(args): 1090 self.raise_error(error_message) 1091 1092 return expression 1093 1094 def _find_sql(self, start: Token, end: Token) -> str: 1095 return self.sql[start.start : end.end + 1] 1096 1097 def _advance(self, times: int = 1) -> None: 1098 self._index += times 1099 self._curr = seq_get(self._tokens, self._index) 1100 self._next = seq_get(self._tokens, self._index + 1) 1101 1102 if self._index > 0: 1103 self._prev = self._tokens[self._index - 1] 1104 self._prev_comments = self._prev.comments 1105 else: 1106 self._prev = None 1107 self._prev_comments = None 1108 1109 def _retreat(self, index: int) -> None: 1110 if index != self._index: 1111 self._advance(index - self._index) 1112 1113 def _parse_command(self) -> exp.Command: 1114 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1115 1116 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1117 start = self._prev 1118 exists = self._parse_exists() if allow_exists else None 1119 1120 self._match(TokenType.ON) 1121 1122 kind = self._match_set(self.CREATABLES) and self._prev 1123 if not kind: 1124 return self._parse_as_command(start) 1125 1126 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1127 this = self._parse_user_defined_function(kind=kind.token_type) 1128 elif kind.token_type == TokenType.TABLE: 1129 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1130 elif kind.token_type == TokenType.COLUMN: 1131 this = self._parse_column() 1132 else: 1133 this = self._parse_id_var() 1134 1135 self._match(TokenType.IS) 1136 1137 return self.expression( 1138 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1139 ) 1140 1141 def _parse_to_table( 1142 self, 1143 ) -> exp.ToTableProperty: 1144 table = self._parse_table_parts(schema=True) 1145 return self.expression(exp.ToTableProperty, this=table) 1146 1147 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1148 def _parse_ttl(self) -> exp.Expression: 1149 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1150 this = self._parse_bitwise() 1151 1152 if self._match_text_seq("DELETE"): 1153 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1154 if self._match_text_seq("RECOMPRESS"): 1155 return self.expression( 1156 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1157 ) 1158 if self._match_text_seq("TO", "DISK"): 1159 return self.expression( 1160 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1161 ) 1162 if self._match_text_seq("TO", "VOLUME"): 1163 return self.expression( 1164 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1165 ) 1166 1167 return this 1168 1169 expressions = self._parse_csv(_parse_ttl_action) 1170 where = self._parse_where() 1171 group = self._parse_group() 1172 1173 aggregates = None 1174 if group and self._match(TokenType.SET): 1175 aggregates = self._parse_csv(self._parse_set_item) 1176 1177 return self.expression( 1178 exp.MergeTreeTTL, 1179 expressions=expressions, 1180 where=where, 1181 group=group, 1182 aggregates=aggregates, 1183 ) 1184 1185 def _parse_statement(self) -> t.Optional[exp.Expression]: 1186 if self._curr is None: 1187 return None 1188 1189 if self._match_set(self.STATEMENT_PARSERS): 1190 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1191 1192 if self._match_set(Tokenizer.COMMANDS): 1193 return self._parse_command() 1194 1195 expression = self._parse_expression() 1196 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1197 return self._parse_query_modifiers(expression) 1198 1199 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1200 start = self._prev 1201 temporary = self._match(TokenType.TEMPORARY) 1202 materialized = self._match_text_seq("MATERIALIZED") 1203 1204 kind = self._match_set(self.CREATABLES) and self._prev.text 1205 if not kind: 1206 return self._parse_as_command(start) 1207 1208 return self.expression( 1209 exp.Drop, 1210 comments=start.comments, 1211 exists=exists or self._parse_exists(), 1212 this=self._parse_table(schema=True), 1213 kind=kind, 1214 temporary=temporary, 1215 materialized=materialized, 1216 cascade=self._match_text_seq("CASCADE"), 1217 constraints=self._match_text_seq("CONSTRAINTS"), 1218 purge=self._match_text_seq("PURGE"), 1219 ) 1220 1221 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1222 return ( 1223 self._match_text_seq("IF") 1224 and (not not_ or self._match(TokenType.NOT)) 1225 and self._match(TokenType.EXISTS) 1226 ) 1227 1228 def _parse_create(self) -> exp.Create | exp.Command: 1229 # Note: this can't be None because we've matched a statement parser 1230 start = self._prev 1231 comments = self._prev_comments 1232 1233 replace = start.text.upper() == "REPLACE" or self._match_pair( 1234 TokenType.OR, TokenType.REPLACE 1235 ) 1236 unique = self._match(TokenType.UNIQUE) 1237 1238 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1239 self._advance() 1240 1241 properties = None 1242 create_token = self._match_set(self.CREATABLES) and self._prev 1243 1244 if not create_token: 1245 # exp.Properties.Location.POST_CREATE 1246 properties = self._parse_properties() 1247 create_token = self._match_set(self.CREATABLES) and self._prev 1248 1249 if not properties or not create_token: 1250 return self._parse_as_command(start) 1251 1252 exists = self._parse_exists(not_=True) 1253 this = None 1254 expression: t.Optional[exp.Expression] = None 1255 indexes = None 1256 no_schema_binding = None 1257 begin = None 1258 clone = None 1259 1260 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1261 nonlocal properties 1262 if properties and temp_props: 1263 properties.expressions.extend(temp_props.expressions) 1264 elif temp_props: 1265 properties = temp_props 1266 1267 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1268 this = self._parse_user_defined_function(kind=create_token.token_type) 1269 1270 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1271 extend_props(self._parse_properties()) 1272 1273 self._match(TokenType.ALIAS) 1274 1275 if self._match(TokenType.COMMAND): 1276 expression = self._parse_as_command(self._prev) 1277 else: 1278 begin = self._match(TokenType.BEGIN) 1279 return_ = self._match_text_seq("RETURN") 1280 expression = self._parse_statement() 1281 1282 if return_: 1283 expression = self.expression(exp.Return, this=expression) 1284 elif create_token.token_type == TokenType.INDEX: 1285 this = self._parse_index(index=self._parse_id_var()) 1286 elif create_token.token_type in self.DB_CREATABLES: 1287 table_parts = self._parse_table_parts(schema=True) 1288 1289 # exp.Properties.Location.POST_NAME 1290 self._match(TokenType.COMMA) 1291 extend_props(self._parse_properties(before=True)) 1292 1293 this = self._parse_schema(this=table_parts) 1294 1295 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1296 extend_props(self._parse_properties()) 1297 1298 self._match(TokenType.ALIAS) 1299 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1300 # exp.Properties.Location.POST_ALIAS 1301 extend_props(self._parse_properties()) 1302 1303 expression = self._parse_ddl_select() 1304 1305 if create_token.token_type == TokenType.TABLE: 1306 # exp.Properties.Location.POST_EXPRESSION 1307 extend_props(self._parse_properties()) 1308 1309 indexes = [] 1310 while True: 1311 index = self._parse_index() 1312 1313 # exp.Properties.Location.POST_INDEX 1314 extend_props(self._parse_properties()) 1315 1316 if not index: 1317 break 1318 else: 1319 self._match(TokenType.COMMA) 1320 indexes.append(index) 1321 elif create_token.token_type == TokenType.VIEW: 1322 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1323 no_schema_binding = True 1324 1325 shallow = self._match_text_seq("SHALLOW") 1326 1327 if self._match_text_seq("CLONE"): 1328 clone = self._parse_table(schema=True) 1329 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1330 clone_kind = ( 1331 self._match(TokenType.L_PAREN) 1332 and self._match_texts(self.CLONE_KINDS) 1333 and self._prev.text.upper() 1334 ) 1335 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1336 self._match(TokenType.R_PAREN) 1337 clone = self.expression( 1338 exp.Clone, 1339 this=clone, 1340 when=when, 1341 kind=clone_kind, 1342 shallow=shallow, 1343 expression=clone_expression, 1344 ) 1345 1346 return self.expression( 1347 exp.Create, 1348 comments=comments, 1349 this=this, 1350 kind=create_token.text, 1351 replace=replace, 1352 unique=unique, 1353 expression=expression, 1354 exists=exists, 1355 properties=properties, 1356 indexes=indexes, 1357 no_schema_binding=no_schema_binding, 1358 begin=begin, 1359 clone=clone, 1360 ) 1361 1362 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1363 # only used for teradata currently 1364 self._match(TokenType.COMMA) 1365 1366 kwargs = { 1367 "no": self._match_text_seq("NO"), 1368 "dual": self._match_text_seq("DUAL"), 1369 "before": self._match_text_seq("BEFORE"), 1370 "default": self._match_text_seq("DEFAULT"), 1371 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1372 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1373 "after": self._match_text_seq("AFTER"), 1374 "minimum": self._match_texts(("MIN", "MINIMUM")), 1375 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1376 } 1377 1378 if self._match_texts(self.PROPERTY_PARSERS): 1379 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1380 try: 1381 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1382 except TypeError: 1383 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1384 1385 return None 1386 1387 def _parse_property(self) -> t.Optional[exp.Expression]: 1388 if self._match_texts(self.PROPERTY_PARSERS): 1389 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1390 1391 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1392 return self._parse_character_set(default=True) 1393 1394 if self._match_text_seq("COMPOUND", "SORTKEY"): 1395 return self._parse_sortkey(compound=True) 1396 1397 if self._match_text_seq("SQL", "SECURITY"): 1398 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1399 1400 assignment = self._match_pair( 1401 TokenType.VAR, TokenType.EQ, advance=False 1402 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1403 1404 if assignment: 1405 key = self._parse_var_or_string() 1406 self._match(TokenType.EQ) 1407 return self.expression( 1408 exp.Property, 1409 this=key, 1410 value=self._parse_column() or self._parse_var(any_token=True), 1411 ) 1412 1413 return None 1414 1415 def _parse_stored(self) -> exp.FileFormatProperty: 1416 self._match(TokenType.ALIAS) 1417 1418 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1419 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1420 1421 return self.expression( 1422 exp.FileFormatProperty, 1423 this=self.expression( 1424 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1425 ) 1426 if input_format or output_format 1427 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1428 ) 1429 1430 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1431 self._match(TokenType.EQ) 1432 self._match(TokenType.ALIAS) 1433 return self.expression(exp_class, this=self._parse_field()) 1434 1435 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1436 properties = [] 1437 while True: 1438 if before: 1439 prop = self._parse_property_before() 1440 else: 1441 prop = self._parse_property() 1442 1443 if not prop: 1444 break 1445 for p in ensure_list(prop): 1446 properties.append(p) 1447 1448 if properties: 1449 return self.expression(exp.Properties, expressions=properties) 1450 1451 return None 1452 1453 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1454 return self.expression( 1455 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1456 ) 1457 1458 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1459 if self._index >= 2: 1460 pre_volatile_token = self._tokens[self._index - 2] 1461 else: 1462 pre_volatile_token = None 1463 1464 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1465 return exp.VolatileProperty() 1466 1467 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1468 1469 def _parse_with_property( 1470 self, 1471 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1472 if self._match(TokenType.L_PAREN, advance=False): 1473 return self._parse_wrapped_csv(self._parse_property) 1474 1475 if self._match_text_seq("JOURNAL"): 1476 return self._parse_withjournaltable() 1477 1478 if self._match_text_seq("DATA"): 1479 return self._parse_withdata(no=False) 1480 elif self._match_text_seq("NO", "DATA"): 1481 return self._parse_withdata(no=True) 1482 1483 if not self._next: 1484 return None 1485 1486 return self._parse_withisolatedloading() 1487 1488 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1489 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1490 self._match(TokenType.EQ) 1491 1492 user = self._parse_id_var() 1493 self._match(TokenType.PARAMETER) 1494 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1495 1496 if not user or not host: 1497 return None 1498 1499 return exp.DefinerProperty(this=f"{user}@{host}") 1500 1501 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1502 self._match(TokenType.TABLE) 1503 self._match(TokenType.EQ) 1504 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1505 1506 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1507 return self.expression(exp.LogProperty, no=no) 1508 1509 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1510 return self.expression(exp.JournalProperty, **kwargs) 1511 1512 def _parse_checksum(self) -> exp.ChecksumProperty: 1513 self._match(TokenType.EQ) 1514 1515 on = None 1516 if self._match(TokenType.ON): 1517 on = True 1518 elif self._match_text_seq("OFF"): 1519 on = False 1520 1521 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1522 1523 def _parse_cluster(self) -> exp.Cluster: 1524 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1525 1526 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1527 self._match_text_seq("BY") 1528 1529 self._match_l_paren() 1530 expressions = self._parse_csv(self._parse_column) 1531 self._match_r_paren() 1532 1533 if self._match_text_seq("SORTED", "BY"): 1534 self._match_l_paren() 1535 sorted_by = self._parse_csv(self._parse_ordered) 1536 self._match_r_paren() 1537 else: 1538 sorted_by = None 1539 1540 self._match(TokenType.INTO) 1541 buckets = self._parse_number() 1542 self._match_text_seq("BUCKETS") 1543 1544 return self.expression( 1545 exp.ClusteredByProperty, 1546 expressions=expressions, 1547 sorted_by=sorted_by, 1548 buckets=buckets, 1549 ) 1550 1551 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1552 if not self._match_text_seq("GRANTS"): 1553 self._retreat(self._index - 1) 1554 return None 1555 1556 return self.expression(exp.CopyGrantsProperty) 1557 1558 def _parse_freespace(self) -> exp.FreespaceProperty: 1559 self._match(TokenType.EQ) 1560 return self.expression( 1561 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1562 ) 1563 1564 def _parse_mergeblockratio( 1565 self, no: bool = False, default: bool = False 1566 ) -> exp.MergeBlockRatioProperty: 1567 if self._match(TokenType.EQ): 1568 return self.expression( 1569 exp.MergeBlockRatioProperty, 1570 this=self._parse_number(), 1571 percent=self._match(TokenType.PERCENT), 1572 ) 1573 1574 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1575 1576 def _parse_datablocksize( 1577 self, 1578 default: t.Optional[bool] = None, 1579 minimum: t.Optional[bool] = None, 1580 maximum: t.Optional[bool] = None, 1581 ) -> exp.DataBlocksizeProperty: 1582 self._match(TokenType.EQ) 1583 size = self._parse_number() 1584 1585 units = None 1586 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1587 units = self._prev.text 1588 1589 return self.expression( 1590 exp.DataBlocksizeProperty, 1591 size=size, 1592 units=units, 1593 default=default, 1594 minimum=minimum, 1595 maximum=maximum, 1596 ) 1597 1598 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1599 self._match(TokenType.EQ) 1600 always = self._match_text_seq("ALWAYS") 1601 manual = self._match_text_seq("MANUAL") 1602 never = self._match_text_seq("NEVER") 1603 default = self._match_text_seq("DEFAULT") 1604 1605 autotemp = None 1606 if self._match_text_seq("AUTOTEMP"): 1607 autotemp = self._parse_schema() 1608 1609 return self.expression( 1610 exp.BlockCompressionProperty, 1611 always=always, 1612 manual=manual, 1613 never=never, 1614 default=default, 1615 autotemp=autotemp, 1616 ) 1617 1618 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1619 no = self._match_text_seq("NO") 1620 concurrent = self._match_text_seq("CONCURRENT") 1621 self._match_text_seq("ISOLATED", "LOADING") 1622 for_all = self._match_text_seq("FOR", "ALL") 1623 for_insert = self._match_text_seq("FOR", "INSERT") 1624 for_none = self._match_text_seq("FOR", "NONE") 1625 return self.expression( 1626 exp.IsolatedLoadingProperty, 1627 no=no, 1628 concurrent=concurrent, 1629 for_all=for_all, 1630 for_insert=for_insert, 1631 for_none=for_none, 1632 ) 1633 1634 def _parse_locking(self) -> exp.LockingProperty: 1635 if self._match(TokenType.TABLE): 1636 kind = "TABLE" 1637 elif self._match(TokenType.VIEW): 1638 kind = "VIEW" 1639 elif self._match(TokenType.ROW): 1640 kind = "ROW" 1641 elif self._match_text_seq("DATABASE"): 1642 kind = "DATABASE" 1643 else: 1644 kind = None 1645 1646 if kind in ("DATABASE", "TABLE", "VIEW"): 1647 this = self._parse_table_parts() 1648 else: 1649 this = None 1650 1651 if self._match(TokenType.FOR): 1652 for_or_in = "FOR" 1653 elif self._match(TokenType.IN): 1654 for_or_in = "IN" 1655 else: 1656 for_or_in = None 1657 1658 if self._match_text_seq("ACCESS"): 1659 lock_type = "ACCESS" 1660 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1661 lock_type = "EXCLUSIVE" 1662 elif self._match_text_seq("SHARE"): 1663 lock_type = "SHARE" 1664 elif self._match_text_seq("READ"): 1665 lock_type = "READ" 1666 elif self._match_text_seq("WRITE"): 1667 lock_type = "WRITE" 1668 elif self._match_text_seq("CHECKSUM"): 1669 lock_type = "CHECKSUM" 1670 else: 1671 lock_type = None 1672 1673 override = self._match_text_seq("OVERRIDE") 1674 1675 return self.expression( 1676 exp.LockingProperty, 1677 this=this, 1678 kind=kind, 1679 for_or_in=for_or_in, 1680 lock_type=lock_type, 1681 override=override, 1682 ) 1683 1684 def _parse_partition_by(self) -> t.List[exp.Expression]: 1685 if self._match(TokenType.PARTITION_BY): 1686 return self._parse_csv(self._parse_conjunction) 1687 return [] 1688 1689 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1690 self._match(TokenType.EQ) 1691 return self.expression( 1692 exp.PartitionedByProperty, 1693 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1694 ) 1695 1696 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1697 if self._match_text_seq("AND", "STATISTICS"): 1698 statistics = True 1699 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1700 statistics = False 1701 else: 1702 statistics = None 1703 1704 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1705 1706 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1707 if self._match_text_seq("PRIMARY", "INDEX"): 1708 return exp.NoPrimaryIndexProperty() 1709 return None 1710 1711 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1712 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1713 return exp.OnCommitProperty() 1714 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1715 return exp.OnCommitProperty(delete=True) 1716 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1717 1718 def _parse_distkey(self) -> exp.DistKeyProperty: 1719 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1720 1721 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1722 table = self._parse_table(schema=True) 1723 1724 options = [] 1725 while self._match_texts(("INCLUDING", "EXCLUDING")): 1726 this = self._prev.text.upper() 1727 1728 id_var = self._parse_id_var() 1729 if not id_var: 1730 return None 1731 1732 options.append( 1733 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1734 ) 1735 1736 return self.expression(exp.LikeProperty, this=table, expressions=options) 1737 1738 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1739 return self.expression( 1740 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1741 ) 1742 1743 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1744 self._match(TokenType.EQ) 1745 return self.expression( 1746 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1747 ) 1748 1749 def _parse_returns(self) -> exp.ReturnsProperty: 1750 value: t.Optional[exp.Expression] 1751 is_table = self._match(TokenType.TABLE) 1752 1753 if is_table: 1754 if self._match(TokenType.LT): 1755 value = self.expression( 1756 exp.Schema, 1757 this="TABLE", 1758 expressions=self._parse_csv(self._parse_struct_types), 1759 ) 1760 if not self._match(TokenType.GT): 1761 self.raise_error("Expecting >") 1762 else: 1763 value = self._parse_schema(exp.var("TABLE")) 1764 else: 1765 value = self._parse_types() 1766 1767 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1768 1769 def _parse_describe(self) -> exp.Describe: 1770 kind = self._match_set(self.CREATABLES) and self._prev.text 1771 this = self._parse_table(schema=True) 1772 properties = self._parse_properties() 1773 expressions = properties.expressions if properties else None 1774 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1775 1776 def _parse_insert(self) -> exp.Insert: 1777 comments = ensure_list(self._prev_comments) 1778 overwrite = self._match(TokenType.OVERWRITE) 1779 ignore = self._match(TokenType.IGNORE) 1780 local = self._match_text_seq("LOCAL") 1781 alternative = None 1782 1783 if self._match_text_seq("DIRECTORY"): 1784 this: t.Optional[exp.Expression] = self.expression( 1785 exp.Directory, 1786 this=self._parse_var_or_string(), 1787 local=local, 1788 row_format=self._parse_row_format(match_row=True), 1789 ) 1790 else: 1791 if self._match(TokenType.OR): 1792 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1793 1794 self._match(TokenType.INTO) 1795 comments += ensure_list(self._prev_comments) 1796 self._match(TokenType.TABLE) 1797 this = self._parse_table(schema=True) 1798 1799 returning = self._parse_returning() 1800 1801 return self.expression( 1802 exp.Insert, 1803 comments=comments, 1804 this=this, 1805 by_name=self._match_text_seq("BY", "NAME"), 1806 exists=self._parse_exists(), 1807 partition=self._parse_partition(), 1808 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1809 and self._parse_conjunction(), 1810 expression=self._parse_ddl_select(), 1811 conflict=self._parse_on_conflict(), 1812 returning=returning or self._parse_returning(), 1813 overwrite=overwrite, 1814 alternative=alternative, 1815 ignore=ignore, 1816 ) 1817 1818 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1819 conflict = self._match_text_seq("ON", "CONFLICT") 1820 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1821 1822 if not conflict and not duplicate: 1823 return None 1824 1825 nothing = None 1826 expressions = None 1827 key = None 1828 constraint = None 1829 1830 if conflict: 1831 if self._match_text_seq("ON", "CONSTRAINT"): 1832 constraint = self._parse_id_var() 1833 else: 1834 key = self._parse_csv(self._parse_value) 1835 1836 self._match_text_seq("DO") 1837 if self._match_text_seq("NOTHING"): 1838 nothing = True 1839 else: 1840 self._match(TokenType.UPDATE) 1841 self._match(TokenType.SET) 1842 expressions = self._parse_csv(self._parse_equality) 1843 1844 return self.expression( 1845 exp.OnConflict, 1846 duplicate=duplicate, 1847 expressions=expressions, 1848 nothing=nothing, 1849 key=key, 1850 constraint=constraint, 1851 ) 1852 1853 def _parse_returning(self) -> t.Optional[exp.Returning]: 1854 if not self._match(TokenType.RETURNING): 1855 return None 1856 return self.expression( 1857 exp.Returning, 1858 expressions=self._parse_csv(self._parse_expression), 1859 into=self._match(TokenType.INTO) and self._parse_table_part(), 1860 ) 1861 1862 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1863 if not self._match(TokenType.FORMAT): 1864 return None 1865 return self._parse_row_format() 1866 1867 def _parse_row_format( 1868 self, match_row: bool = False 1869 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1870 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1871 return None 1872 1873 if self._match_text_seq("SERDE"): 1874 this = self._parse_string() 1875 1876 serde_properties = None 1877 if self._match(TokenType.SERDE_PROPERTIES): 1878 serde_properties = self.expression( 1879 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1880 ) 1881 1882 return self.expression( 1883 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1884 ) 1885 1886 self._match_text_seq("DELIMITED") 1887 1888 kwargs = {} 1889 1890 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1891 kwargs["fields"] = self._parse_string() 1892 if self._match_text_seq("ESCAPED", "BY"): 1893 kwargs["escaped"] = self._parse_string() 1894 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1895 kwargs["collection_items"] = self._parse_string() 1896 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1897 kwargs["map_keys"] = self._parse_string() 1898 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1899 kwargs["lines"] = self._parse_string() 1900 if self._match_text_seq("NULL", "DEFINED", "AS"): 1901 kwargs["null"] = self._parse_string() 1902 1903 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1904 1905 def _parse_load(self) -> exp.LoadData | exp.Command: 1906 if self._match_text_seq("DATA"): 1907 local = self._match_text_seq("LOCAL") 1908 self._match_text_seq("INPATH") 1909 inpath = self._parse_string() 1910 overwrite = self._match(TokenType.OVERWRITE) 1911 self._match_pair(TokenType.INTO, TokenType.TABLE) 1912 1913 return self.expression( 1914 exp.LoadData, 1915 this=self._parse_table(schema=True), 1916 local=local, 1917 overwrite=overwrite, 1918 inpath=inpath, 1919 partition=self._parse_partition(), 1920 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1921 serde=self._match_text_seq("SERDE") and self._parse_string(), 1922 ) 1923 return self._parse_as_command(self._prev) 1924 1925 def _parse_delete(self) -> exp.Delete: 1926 # This handles MySQL's "Multiple-Table Syntax" 1927 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1928 tables = None 1929 comments = self._prev_comments 1930 if not self._match(TokenType.FROM, advance=False): 1931 tables = self._parse_csv(self._parse_table) or None 1932 1933 returning = self._parse_returning() 1934 1935 return self.expression( 1936 exp.Delete, 1937 comments=comments, 1938 tables=tables, 1939 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1940 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1941 where=self._parse_where(), 1942 returning=returning or self._parse_returning(), 1943 limit=self._parse_limit(), 1944 ) 1945 1946 def _parse_update(self) -> exp.Update: 1947 comments = self._prev_comments 1948 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1949 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1950 returning = self._parse_returning() 1951 return self.expression( 1952 exp.Update, 1953 comments=comments, 1954 **{ # type: ignore 1955 "this": this, 1956 "expressions": expressions, 1957 "from": self._parse_from(joins=True), 1958 "where": self._parse_where(), 1959 "returning": returning or self._parse_returning(), 1960 "order": self._parse_order(), 1961 "limit": self._parse_limit(), 1962 }, 1963 ) 1964 1965 def _parse_uncache(self) -> exp.Uncache: 1966 if not self._match(TokenType.TABLE): 1967 self.raise_error("Expecting TABLE after UNCACHE") 1968 1969 return self.expression( 1970 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1971 ) 1972 1973 def _parse_cache(self) -> exp.Cache: 1974 lazy = self._match_text_seq("LAZY") 1975 self._match(TokenType.TABLE) 1976 table = self._parse_table(schema=True) 1977 1978 options = [] 1979 if self._match_text_seq("OPTIONS"): 1980 self._match_l_paren() 1981 k = self._parse_string() 1982 self._match(TokenType.EQ) 1983 v = self._parse_string() 1984 options = [k, v] 1985 self._match_r_paren() 1986 1987 self._match(TokenType.ALIAS) 1988 return self.expression( 1989 exp.Cache, 1990 this=table, 1991 lazy=lazy, 1992 options=options, 1993 expression=self._parse_select(nested=True), 1994 ) 1995 1996 def _parse_partition(self) -> t.Optional[exp.Partition]: 1997 if not self._match(TokenType.PARTITION): 1998 return None 1999 2000 return self.expression( 2001 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2002 ) 2003 2004 def _parse_value(self) -> exp.Tuple: 2005 if self._match(TokenType.L_PAREN): 2006 expressions = self._parse_csv(self._parse_conjunction) 2007 self._match_r_paren() 2008 return self.expression(exp.Tuple, expressions=expressions) 2009 2010 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2011 # https://prestodb.io/docs/current/sql/values.html 2012 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2013 2014 def _parse_projections(self) -> t.List[exp.Expression]: 2015 return self._parse_expressions() 2016 2017 def _parse_select( 2018 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2019 ) -> t.Optional[exp.Expression]: 2020 cte = self._parse_with() 2021 2022 if cte: 2023 this = self._parse_statement() 2024 2025 if not this: 2026 self.raise_error("Failed to parse any statement following CTE") 2027 return cte 2028 2029 if "with" in this.arg_types: 2030 this.set("with", cte) 2031 else: 2032 self.raise_error(f"{this.key} does not support CTE") 2033 this = cte 2034 2035 return this 2036 2037 # duckdb supports leading with FROM x 2038 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2039 2040 if self._match(TokenType.SELECT): 2041 comments = self._prev_comments 2042 2043 hint = self._parse_hint() 2044 all_ = self._match(TokenType.ALL) 2045 distinct = self._match_set(self.DISTINCT_TOKENS) 2046 2047 kind = ( 2048 self._match(TokenType.ALIAS) 2049 and self._match_texts(("STRUCT", "VALUE")) 2050 and self._prev.text 2051 ) 2052 2053 if distinct: 2054 distinct = self.expression( 2055 exp.Distinct, 2056 on=self._parse_value() if self._match(TokenType.ON) else None, 2057 ) 2058 2059 if all_ and distinct: 2060 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2061 2062 limit = self._parse_limit(top=True) 2063 projections = self._parse_projections() 2064 2065 this = self.expression( 2066 exp.Select, 2067 kind=kind, 2068 hint=hint, 2069 distinct=distinct, 2070 expressions=projections, 2071 limit=limit, 2072 ) 2073 this.comments = comments 2074 2075 into = self._parse_into() 2076 if into: 2077 this.set("into", into) 2078 2079 if not from_: 2080 from_ = self._parse_from() 2081 2082 if from_: 2083 this.set("from", from_) 2084 2085 this = self._parse_query_modifiers(this) 2086 elif (table or nested) and self._match(TokenType.L_PAREN): 2087 if self._match(TokenType.PIVOT): 2088 this = self._parse_simplified_pivot() 2089 elif self._match(TokenType.FROM): 2090 this = exp.select("*").from_( 2091 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2092 ) 2093 else: 2094 this = self._parse_table() if table else self._parse_select(nested=True) 2095 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2096 2097 self._match_r_paren() 2098 2099 # We return early here so that the UNION isn't attached to the subquery by the 2100 # following call to _parse_set_operations, but instead becomes the parent node 2101 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2102 elif self._match(TokenType.VALUES): 2103 this = self.expression( 2104 exp.Values, 2105 expressions=self._parse_csv(self._parse_value), 2106 alias=self._parse_table_alias(), 2107 ) 2108 elif from_: 2109 this = exp.select("*").from_(from_.this, copy=False) 2110 else: 2111 this = None 2112 2113 return self._parse_set_operations(this) 2114 2115 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2116 if not skip_with_token and not self._match(TokenType.WITH): 2117 return None 2118 2119 comments = self._prev_comments 2120 recursive = self._match(TokenType.RECURSIVE) 2121 2122 expressions = [] 2123 while True: 2124 expressions.append(self._parse_cte()) 2125 2126 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2127 break 2128 else: 2129 self._match(TokenType.WITH) 2130 2131 return self.expression( 2132 exp.With, comments=comments, expressions=expressions, recursive=recursive 2133 ) 2134 2135 def _parse_cte(self) -> exp.CTE: 2136 alias = self._parse_table_alias() 2137 if not alias or not alias.this: 2138 self.raise_error("Expected CTE to have alias") 2139 2140 self._match(TokenType.ALIAS) 2141 return self.expression( 2142 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2143 ) 2144 2145 def _parse_table_alias( 2146 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2147 ) -> t.Optional[exp.TableAlias]: 2148 any_token = self._match(TokenType.ALIAS) 2149 alias = ( 2150 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2151 or self._parse_string_as_identifier() 2152 ) 2153 2154 index = self._index 2155 if self._match(TokenType.L_PAREN): 2156 columns = self._parse_csv(self._parse_function_parameter) 2157 self._match_r_paren() if columns else self._retreat(index) 2158 else: 2159 columns = None 2160 2161 if not alias and not columns: 2162 return None 2163 2164 return self.expression(exp.TableAlias, this=alias, columns=columns) 2165 2166 def _parse_subquery( 2167 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2168 ) -> t.Optional[exp.Subquery]: 2169 if not this: 2170 return None 2171 2172 return self.expression( 2173 exp.Subquery, 2174 this=this, 2175 pivots=self._parse_pivots(), 2176 alias=self._parse_table_alias() if parse_alias else None, 2177 ) 2178 2179 def _parse_query_modifiers( 2180 self, this: t.Optional[exp.Expression] 2181 ) -> t.Optional[exp.Expression]: 2182 if isinstance(this, self.MODIFIABLES): 2183 for join in iter(self._parse_join, None): 2184 this.append("joins", join) 2185 for lateral in iter(self._parse_lateral, None): 2186 this.append("laterals", lateral) 2187 2188 while True: 2189 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2190 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2191 key, expression = parser(self) 2192 2193 if expression: 2194 this.set(key, expression) 2195 if key == "limit": 2196 offset = expression.args.pop("offset", None) 2197 if offset: 2198 this.set("offset", exp.Offset(expression=offset)) 2199 continue 2200 break 2201 return this 2202 2203 def _parse_hint(self) -> t.Optional[exp.Hint]: 2204 if self._match(TokenType.HINT): 2205 hints = [] 2206 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2207 hints.extend(hint) 2208 2209 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2210 self.raise_error("Expected */ after HINT") 2211 2212 return self.expression(exp.Hint, expressions=hints) 2213 2214 return None 2215 2216 def _parse_into(self) -> t.Optional[exp.Into]: 2217 if not self._match(TokenType.INTO): 2218 return None 2219 2220 temp = self._match(TokenType.TEMPORARY) 2221 unlogged = self._match_text_seq("UNLOGGED") 2222 self._match(TokenType.TABLE) 2223 2224 return self.expression( 2225 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2226 ) 2227 2228 def _parse_from( 2229 self, joins: bool = False, skip_from_token: bool = False 2230 ) -> t.Optional[exp.From]: 2231 if not skip_from_token and not self._match(TokenType.FROM): 2232 return None 2233 2234 return self.expression( 2235 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2236 ) 2237 2238 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2239 if not self._match(TokenType.MATCH_RECOGNIZE): 2240 return None 2241 2242 self._match_l_paren() 2243 2244 partition = self._parse_partition_by() 2245 order = self._parse_order() 2246 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2247 2248 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2249 rows = exp.var("ONE ROW PER MATCH") 2250 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2251 text = "ALL ROWS PER MATCH" 2252 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2253 text += f" SHOW EMPTY MATCHES" 2254 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2255 text += f" OMIT EMPTY MATCHES" 2256 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2257 text += f" WITH UNMATCHED ROWS" 2258 rows = exp.var(text) 2259 else: 2260 rows = None 2261 2262 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2263 text = "AFTER MATCH SKIP" 2264 if self._match_text_seq("PAST", "LAST", "ROW"): 2265 text += f" PAST LAST ROW" 2266 elif self._match_text_seq("TO", "NEXT", "ROW"): 2267 text += f" TO NEXT ROW" 2268 elif self._match_text_seq("TO", "FIRST"): 2269 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2270 elif self._match_text_seq("TO", "LAST"): 2271 text += f" TO LAST {self._advance_any().text}" # type: ignore 2272 after = exp.var(text) 2273 else: 2274 after = None 2275 2276 if self._match_text_seq("PATTERN"): 2277 self._match_l_paren() 2278 2279 if not self._curr: 2280 self.raise_error("Expecting )", self._curr) 2281 2282 paren = 1 2283 start = self._curr 2284 2285 while self._curr and paren > 0: 2286 if self._curr.token_type == TokenType.L_PAREN: 2287 paren += 1 2288 if self._curr.token_type == TokenType.R_PAREN: 2289 paren -= 1 2290 2291 end = self._prev 2292 self._advance() 2293 2294 if paren > 0: 2295 self.raise_error("Expecting )", self._curr) 2296 2297 pattern = exp.var(self._find_sql(start, end)) 2298 else: 2299 pattern = None 2300 2301 define = ( 2302 self._parse_csv( 2303 lambda: self.expression( 2304 exp.Alias, 2305 alias=self._parse_id_var(any_token=True), 2306 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2307 ) 2308 ) 2309 if self._match_text_seq("DEFINE") 2310 else None 2311 ) 2312 2313 self._match_r_paren() 2314 2315 return self.expression( 2316 exp.MatchRecognize, 2317 partition_by=partition, 2318 order=order, 2319 measures=measures, 2320 rows=rows, 2321 after=after, 2322 pattern=pattern, 2323 define=define, 2324 alias=self._parse_table_alias(), 2325 ) 2326 2327 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2328 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2329 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2330 2331 if outer_apply or cross_apply: 2332 this = self._parse_select(table=True) 2333 view = None 2334 outer = not cross_apply 2335 elif self._match(TokenType.LATERAL): 2336 this = self._parse_select(table=True) 2337 view = self._match(TokenType.VIEW) 2338 outer = self._match(TokenType.OUTER) 2339 else: 2340 return None 2341 2342 if not this: 2343 this = ( 2344 self._parse_unnest() 2345 or self._parse_function() 2346 or self._parse_id_var(any_token=False) 2347 ) 2348 2349 while self._match(TokenType.DOT): 2350 this = exp.Dot( 2351 this=this, 2352 expression=self._parse_function() or self._parse_id_var(any_token=False), 2353 ) 2354 2355 if view: 2356 table = self._parse_id_var(any_token=False) 2357 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2358 table_alias: t.Optional[exp.TableAlias] = self.expression( 2359 exp.TableAlias, this=table, columns=columns 2360 ) 2361 elif isinstance(this, exp.Subquery) and this.alias: 2362 # Ensures parity between the Subquery's and the Lateral's "alias" args 2363 table_alias = this.args["alias"].copy() 2364 else: 2365 table_alias = self._parse_table_alias() 2366 2367 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2368 2369 def _parse_join_parts( 2370 self, 2371 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2372 return ( 2373 self._match_set(self.JOIN_METHODS) and self._prev, 2374 self._match_set(self.JOIN_SIDES) and self._prev, 2375 self._match_set(self.JOIN_KINDS) and self._prev, 2376 ) 2377 2378 def _parse_join( 2379 self, skip_join_token: bool = False, parse_bracket: bool = False 2380 ) -> t.Optional[exp.Join]: 2381 if self._match(TokenType.COMMA): 2382 return self.expression(exp.Join, this=self._parse_table()) 2383 2384 index = self._index 2385 method, side, kind = self._parse_join_parts() 2386 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2387 join = self._match(TokenType.JOIN) 2388 2389 if not skip_join_token and not join: 2390 self._retreat(index) 2391 kind = None 2392 method = None 2393 side = None 2394 2395 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2396 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2397 2398 if not skip_join_token and not join and not outer_apply and not cross_apply: 2399 return None 2400 2401 if outer_apply: 2402 side = Token(TokenType.LEFT, "LEFT") 2403 2404 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2405 2406 if method: 2407 kwargs["method"] = method.text 2408 if side: 2409 kwargs["side"] = side.text 2410 if kind: 2411 kwargs["kind"] = kind.text 2412 if hint: 2413 kwargs["hint"] = hint 2414 2415 if self._match(TokenType.ON): 2416 kwargs["on"] = self._parse_conjunction() 2417 elif self._match(TokenType.USING): 2418 kwargs["using"] = self._parse_wrapped_id_vars() 2419 elif not (kind and kind.token_type == TokenType.CROSS): 2420 index = self._index 2421 joins = self._parse_joins() 2422 2423 if joins and self._match(TokenType.ON): 2424 kwargs["on"] = self._parse_conjunction() 2425 elif joins and self._match(TokenType.USING): 2426 kwargs["using"] = self._parse_wrapped_id_vars() 2427 else: 2428 joins = None 2429 self._retreat(index) 2430 2431 kwargs["this"].set("joins", joins) 2432 2433 comments = [c for token in (method, side, kind) if token for c in token.comments] 2434 return self.expression(exp.Join, comments=comments, **kwargs) 2435 2436 def _parse_index( 2437 self, 2438 index: t.Optional[exp.Expression] = None, 2439 ) -> t.Optional[exp.Index]: 2440 if index: 2441 unique = None 2442 primary = None 2443 amp = None 2444 2445 self._match(TokenType.ON) 2446 self._match(TokenType.TABLE) # hive 2447 table = self._parse_table_parts(schema=True) 2448 else: 2449 unique = self._match(TokenType.UNIQUE) 2450 primary = self._match_text_seq("PRIMARY") 2451 amp = self._match_text_seq("AMP") 2452 2453 if not self._match(TokenType.INDEX): 2454 return None 2455 2456 index = self._parse_id_var() 2457 table = None 2458 2459 using = self._parse_field() if self._match(TokenType.USING) else None 2460 2461 if self._match(TokenType.L_PAREN, advance=False): 2462 columns = self._parse_wrapped_csv(self._parse_ordered) 2463 else: 2464 columns = None 2465 2466 return self.expression( 2467 exp.Index, 2468 this=index, 2469 table=table, 2470 using=using, 2471 columns=columns, 2472 unique=unique, 2473 primary=primary, 2474 amp=amp, 2475 partition_by=self._parse_partition_by(), 2476 ) 2477 2478 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2479 hints: t.List[exp.Expression] = [] 2480 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2481 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2482 hints.append( 2483 self.expression( 2484 exp.WithTableHint, 2485 expressions=self._parse_csv( 2486 lambda: self._parse_function() or self._parse_var(any_token=True) 2487 ), 2488 ) 2489 ) 2490 self._match_r_paren() 2491 else: 2492 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2493 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2494 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2495 2496 self._match_texts({"INDEX", "KEY"}) 2497 if self._match(TokenType.FOR): 2498 hint.set("target", self._advance_any() and self._prev.text.upper()) 2499 2500 hint.set("expressions", self._parse_wrapped_id_vars()) 2501 hints.append(hint) 2502 2503 return hints or None 2504 2505 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2506 return ( 2507 (not schema and self._parse_function(optional_parens=False)) 2508 or self._parse_id_var(any_token=False) 2509 or self._parse_string_as_identifier() 2510 or self._parse_placeholder() 2511 ) 2512 2513 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2514 catalog = None 2515 db = None 2516 table = self._parse_table_part(schema=schema) 2517 2518 while self._match(TokenType.DOT): 2519 if catalog: 2520 # This allows nesting the table in arbitrarily many dot expressions if needed 2521 table = self.expression( 2522 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2523 ) 2524 else: 2525 catalog = db 2526 db = table 2527 table = self._parse_table_part(schema=schema) 2528 2529 if not table: 2530 self.raise_error(f"Expected table name but got {self._curr}") 2531 2532 return self.expression( 2533 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2534 ) 2535 2536 def _parse_table( 2537 self, 2538 schema: bool = False, 2539 joins: bool = False, 2540 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2541 parse_bracket: bool = False, 2542 ) -> t.Optional[exp.Expression]: 2543 lateral = self._parse_lateral() 2544 if lateral: 2545 return lateral 2546 2547 unnest = self._parse_unnest() 2548 if unnest: 2549 return unnest 2550 2551 values = self._parse_derived_table_values() 2552 if values: 2553 return values 2554 2555 subquery = self._parse_select(table=True) 2556 if subquery: 2557 if not subquery.args.get("pivots"): 2558 subquery.set("pivots", self._parse_pivots()) 2559 return subquery 2560 2561 bracket = parse_bracket and self._parse_bracket(None) 2562 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2563 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2564 2565 if schema: 2566 return self._parse_schema(this=this) 2567 2568 version = self._parse_version() 2569 2570 if version: 2571 this.set("version", version) 2572 2573 if self.ALIAS_POST_TABLESAMPLE: 2574 table_sample = self._parse_table_sample() 2575 2576 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2577 if alias: 2578 this.set("alias", alias) 2579 2580 this.set("hints", self._parse_table_hints()) 2581 2582 if not this.args.get("pivots"): 2583 this.set("pivots", self._parse_pivots()) 2584 2585 if not self.ALIAS_POST_TABLESAMPLE: 2586 table_sample = self._parse_table_sample() 2587 2588 if table_sample: 2589 table_sample.set("this", this) 2590 this = table_sample 2591 2592 if joins: 2593 for join in iter(self._parse_join, None): 2594 this.append("joins", join) 2595 2596 return this 2597 2598 def _parse_version(self) -> t.Optional[exp.Version]: 2599 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2600 this = "TIMESTAMP" 2601 elif self._match(TokenType.VERSION_SNAPSHOT): 2602 this = "VERSION" 2603 else: 2604 return None 2605 2606 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2607 kind = self._prev.text.upper() 2608 start = self._parse_bitwise() 2609 self._match_texts(("TO", "AND")) 2610 end = self._parse_bitwise() 2611 expression: t.Optional[exp.Expression] = self.expression( 2612 exp.Tuple, expressions=[start, end] 2613 ) 2614 elif self._match_text_seq("CONTAINED", "IN"): 2615 kind = "CONTAINED IN" 2616 expression = self.expression( 2617 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2618 ) 2619 elif self._match(TokenType.ALL): 2620 kind = "ALL" 2621 expression = None 2622 else: 2623 self._match_text_seq("AS", "OF") 2624 kind = "AS OF" 2625 expression = self._parse_type() 2626 2627 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2628 2629 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2630 if not self._match(TokenType.UNNEST): 2631 return None 2632 2633 expressions = self._parse_wrapped_csv(self._parse_type) 2634 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2635 2636 alias = self._parse_table_alias() if with_alias else None 2637 2638 if alias and self.UNNEST_COLUMN_ONLY: 2639 if alias.args.get("columns"): 2640 self.raise_error("Unexpected extra column alias in unnest.") 2641 2642 alias.set("columns", [alias.this]) 2643 alias.set("this", None) 2644 2645 offset = None 2646 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2647 self._match(TokenType.ALIAS) 2648 offset = self._parse_id_var() or exp.to_identifier("offset") 2649 2650 return self.expression( 2651 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2652 ) 2653 2654 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2655 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2656 if not is_derived and not self._match(TokenType.VALUES): 2657 return None 2658 2659 expressions = self._parse_csv(self._parse_value) 2660 alias = self._parse_table_alias() 2661 2662 if is_derived: 2663 self._match_r_paren() 2664 2665 return self.expression( 2666 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2667 ) 2668 2669 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2670 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2671 as_modifier and self._match_text_seq("USING", "SAMPLE") 2672 ): 2673 return None 2674 2675 bucket_numerator = None 2676 bucket_denominator = None 2677 bucket_field = None 2678 percent = None 2679 rows = None 2680 size = None 2681 seed = None 2682 2683 kind = ( 2684 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2685 ) 2686 method = self._parse_var(tokens=(TokenType.ROW,)) 2687 2688 self._match(TokenType.L_PAREN) 2689 2690 if self.TABLESAMPLE_CSV: 2691 num = None 2692 expressions = self._parse_csv(self._parse_primary) 2693 else: 2694 expressions = None 2695 num = self._parse_number() 2696 2697 if self._match_text_seq("BUCKET"): 2698 bucket_numerator = self._parse_number() 2699 self._match_text_seq("OUT", "OF") 2700 bucket_denominator = bucket_denominator = self._parse_number() 2701 self._match(TokenType.ON) 2702 bucket_field = self._parse_field() 2703 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2704 percent = num 2705 elif self._match(TokenType.ROWS): 2706 rows = num 2707 elif num: 2708 size = num 2709 2710 self._match(TokenType.R_PAREN) 2711 2712 if self._match(TokenType.L_PAREN): 2713 method = self._parse_var() 2714 seed = self._match(TokenType.COMMA) and self._parse_number() 2715 self._match_r_paren() 2716 elif self._match_texts(("SEED", "REPEATABLE")): 2717 seed = self._parse_wrapped(self._parse_number) 2718 2719 return self.expression( 2720 exp.TableSample, 2721 expressions=expressions, 2722 method=method, 2723 bucket_numerator=bucket_numerator, 2724 bucket_denominator=bucket_denominator, 2725 bucket_field=bucket_field, 2726 percent=percent, 2727 rows=rows, 2728 size=size, 2729 seed=seed, 2730 kind=kind, 2731 ) 2732 2733 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2734 return list(iter(self._parse_pivot, None)) or None 2735 2736 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2737 return list(iter(self._parse_join, None)) or None 2738 2739 # https://duckdb.org/docs/sql/statements/pivot 2740 def _parse_simplified_pivot(self) -> exp.Pivot: 2741 def _parse_on() -> t.Optional[exp.Expression]: 2742 this = self._parse_bitwise() 2743 return self._parse_in(this) if self._match(TokenType.IN) else this 2744 2745 this = self._parse_table() 2746 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2747 using = self._match(TokenType.USING) and self._parse_csv( 2748 lambda: self._parse_alias(self._parse_function()) 2749 ) 2750 group = self._parse_group() 2751 return self.expression( 2752 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2753 ) 2754 2755 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2756 index = self._index 2757 include_nulls = None 2758 2759 if self._match(TokenType.PIVOT): 2760 unpivot = False 2761 elif self._match(TokenType.UNPIVOT): 2762 unpivot = True 2763 2764 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2765 if self._match_text_seq("INCLUDE", "NULLS"): 2766 include_nulls = True 2767 elif self._match_text_seq("EXCLUDE", "NULLS"): 2768 include_nulls = False 2769 else: 2770 return None 2771 2772 expressions = [] 2773 field = None 2774 2775 if not self._match(TokenType.L_PAREN): 2776 self._retreat(index) 2777 return None 2778 2779 if unpivot: 2780 expressions = self._parse_csv(self._parse_column) 2781 else: 2782 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2783 2784 if not expressions: 2785 self.raise_error("Failed to parse PIVOT's aggregation list") 2786 2787 if not self._match(TokenType.FOR): 2788 self.raise_error("Expecting FOR") 2789 2790 value = self._parse_column() 2791 2792 if not self._match(TokenType.IN): 2793 self.raise_error("Expecting IN") 2794 2795 field = self._parse_in(value, alias=True) 2796 2797 self._match_r_paren() 2798 2799 pivot = self.expression( 2800 exp.Pivot, 2801 expressions=expressions, 2802 field=field, 2803 unpivot=unpivot, 2804 include_nulls=include_nulls, 2805 ) 2806 2807 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2808 pivot.set("alias", self._parse_table_alias()) 2809 2810 if not unpivot: 2811 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2812 2813 columns: t.List[exp.Expression] = [] 2814 for fld in pivot.args["field"].expressions: 2815 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2816 for name in names: 2817 if self.PREFIXED_PIVOT_COLUMNS: 2818 name = f"{name}_{field_name}" if name else field_name 2819 else: 2820 name = f"{field_name}_{name}" if name else field_name 2821 2822 columns.append(exp.to_identifier(name)) 2823 2824 pivot.set("columns", columns) 2825 2826 return pivot 2827 2828 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2829 return [agg.alias for agg in aggregations] 2830 2831 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2832 if not skip_where_token and not self._match(TokenType.WHERE): 2833 return None 2834 2835 return self.expression( 2836 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2837 ) 2838 2839 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2840 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2841 return None 2842 2843 elements = defaultdict(list) 2844 2845 if self._match(TokenType.ALL): 2846 return self.expression(exp.Group, all=True) 2847 2848 while True: 2849 expressions = self._parse_csv(self._parse_conjunction) 2850 if expressions: 2851 elements["expressions"].extend(expressions) 2852 2853 grouping_sets = self._parse_grouping_sets() 2854 if grouping_sets: 2855 elements["grouping_sets"].extend(grouping_sets) 2856 2857 rollup = None 2858 cube = None 2859 totals = None 2860 2861 with_ = self._match(TokenType.WITH) 2862 if self._match(TokenType.ROLLUP): 2863 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2864 elements["rollup"].extend(ensure_list(rollup)) 2865 2866 if self._match(TokenType.CUBE): 2867 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2868 elements["cube"].extend(ensure_list(cube)) 2869 2870 if self._match_text_seq("TOTALS"): 2871 totals = True 2872 elements["totals"] = True # type: ignore 2873 2874 if not (grouping_sets or rollup or cube or totals): 2875 break 2876 2877 return self.expression(exp.Group, **elements) # type: ignore 2878 2879 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2880 if not self._match(TokenType.GROUPING_SETS): 2881 return None 2882 2883 return self._parse_wrapped_csv(self._parse_grouping_set) 2884 2885 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2886 if self._match(TokenType.L_PAREN): 2887 grouping_set = self._parse_csv(self._parse_column) 2888 self._match_r_paren() 2889 return self.expression(exp.Tuple, expressions=grouping_set) 2890 2891 return self._parse_column() 2892 2893 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2894 if not skip_having_token and not self._match(TokenType.HAVING): 2895 return None 2896 return self.expression(exp.Having, this=self._parse_conjunction()) 2897 2898 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2899 if not self._match(TokenType.QUALIFY): 2900 return None 2901 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2902 2903 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2904 if skip_start_token: 2905 start = None 2906 elif self._match(TokenType.START_WITH): 2907 start = self._parse_conjunction() 2908 else: 2909 return None 2910 2911 self._match(TokenType.CONNECT_BY) 2912 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2913 exp.Prior, this=self._parse_bitwise() 2914 ) 2915 connect = self._parse_conjunction() 2916 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2917 return self.expression(exp.Connect, start=start, connect=connect) 2918 2919 def _parse_order( 2920 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2921 ) -> t.Optional[exp.Expression]: 2922 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2923 return this 2924 2925 return self.expression( 2926 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2927 ) 2928 2929 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2930 if not self._match(token): 2931 return None 2932 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2933 2934 def _parse_ordered(self) -> exp.Ordered: 2935 this = self._parse_conjunction() 2936 self._match(TokenType.ASC) 2937 2938 is_desc = self._match(TokenType.DESC) 2939 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2940 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2941 desc = is_desc or False 2942 asc = not desc 2943 nulls_first = is_nulls_first or False 2944 explicitly_null_ordered = is_nulls_first or is_nulls_last 2945 2946 if ( 2947 not explicitly_null_ordered 2948 and ( 2949 (asc and self.NULL_ORDERING == "nulls_are_small") 2950 or (desc and self.NULL_ORDERING != "nulls_are_small") 2951 ) 2952 and self.NULL_ORDERING != "nulls_are_last" 2953 ): 2954 nulls_first = True 2955 2956 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2957 2958 def _parse_limit( 2959 self, this: t.Optional[exp.Expression] = None, top: bool = False 2960 ) -> t.Optional[exp.Expression]: 2961 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2962 comments = self._prev_comments 2963 if top: 2964 limit_paren = self._match(TokenType.L_PAREN) 2965 expression = self._parse_number() 2966 2967 if limit_paren: 2968 self._match_r_paren() 2969 else: 2970 expression = self._parse_term() 2971 2972 if self._match(TokenType.COMMA): 2973 offset = expression 2974 expression = self._parse_term() 2975 else: 2976 offset = None 2977 2978 limit_exp = self.expression( 2979 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2980 ) 2981 2982 return limit_exp 2983 2984 if self._match(TokenType.FETCH): 2985 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2986 direction = self._prev.text if direction else "FIRST" 2987 2988 count = self._parse_number() 2989 percent = self._match(TokenType.PERCENT) 2990 2991 self._match_set((TokenType.ROW, TokenType.ROWS)) 2992 2993 only = self._match_text_seq("ONLY") 2994 with_ties = self._match_text_seq("WITH", "TIES") 2995 2996 if only and with_ties: 2997 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2998 2999 return self.expression( 3000 exp.Fetch, 3001 direction=direction, 3002 count=count, 3003 percent=percent, 3004 with_ties=with_ties, 3005 ) 3006 3007 return this 3008 3009 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3010 if not self._match(TokenType.OFFSET): 3011 return this 3012 3013 count = self._parse_term() 3014 self._match_set((TokenType.ROW, TokenType.ROWS)) 3015 return self.expression(exp.Offset, this=this, expression=count) 3016 3017 def _parse_locks(self) -> t.List[exp.Lock]: 3018 locks = [] 3019 while True: 3020 if self._match_text_seq("FOR", "UPDATE"): 3021 update = True 3022 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3023 "LOCK", "IN", "SHARE", "MODE" 3024 ): 3025 update = False 3026 else: 3027 break 3028 3029 expressions = None 3030 if self._match_text_seq("OF"): 3031 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3032 3033 wait: t.Optional[bool | exp.Expression] = None 3034 if self._match_text_seq("NOWAIT"): 3035 wait = True 3036 elif self._match_text_seq("WAIT"): 3037 wait = self._parse_primary() 3038 elif self._match_text_seq("SKIP", "LOCKED"): 3039 wait = False 3040 3041 locks.append( 3042 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3043 ) 3044 3045 return locks 3046 3047 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3048 if not self._match_set(self.SET_OPERATIONS): 3049 return this 3050 3051 token_type = self._prev.token_type 3052 3053 if token_type == TokenType.UNION: 3054 expression = exp.Union 3055 elif token_type == TokenType.EXCEPT: 3056 expression = exp.Except 3057 else: 3058 expression = exp.Intersect 3059 3060 return self.expression( 3061 expression, 3062 this=this, 3063 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3064 by_name=self._match_text_seq("BY", "NAME"), 3065 expression=self._parse_set_operations(self._parse_select(nested=True)), 3066 ) 3067 3068 def _parse_expression(self) -> t.Optional[exp.Expression]: 3069 return self._parse_alias(self._parse_conjunction()) 3070 3071 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3072 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3073 3074 def _parse_equality(self) -> t.Optional[exp.Expression]: 3075 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3076 3077 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3078 return self._parse_tokens(self._parse_range, self.COMPARISON) 3079 3080 def _parse_range(self) -> t.Optional[exp.Expression]: 3081 this = self._parse_bitwise() 3082 negate = self._match(TokenType.NOT) 3083 3084 if self._match_set(self.RANGE_PARSERS): 3085 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3086 if not expression: 3087 return this 3088 3089 this = expression 3090 elif self._match(TokenType.ISNULL): 3091 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3092 3093 # Postgres supports ISNULL and NOTNULL for conditions. 3094 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3095 if self._match(TokenType.NOTNULL): 3096 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3097 this = self.expression(exp.Not, this=this) 3098 3099 if negate: 3100 this = self.expression(exp.Not, this=this) 3101 3102 if self._match(TokenType.IS): 3103 this = self._parse_is(this) 3104 3105 return this 3106 3107 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3108 index = self._index - 1 3109 negate = self._match(TokenType.NOT) 3110 3111 if self._match_text_seq("DISTINCT", "FROM"): 3112 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3113 return self.expression(klass, this=this, expression=self._parse_expression()) 3114 3115 expression = self._parse_null() or self._parse_boolean() 3116 if not expression: 3117 self._retreat(index) 3118 return None 3119 3120 this = self.expression(exp.Is, this=this, expression=expression) 3121 return self.expression(exp.Not, this=this) if negate else this 3122 3123 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3124 unnest = self._parse_unnest(with_alias=False) 3125 if unnest: 3126 this = self.expression(exp.In, this=this, unnest=unnest) 3127 elif self._match(TokenType.L_PAREN): 3128 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3129 3130 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3131 this = self.expression(exp.In, this=this, query=expressions[0]) 3132 else: 3133 this = self.expression(exp.In, this=this, expressions=expressions) 3134 3135 self._match_r_paren(this) 3136 else: 3137 this = self.expression(exp.In, this=this, field=self._parse_field()) 3138 3139 return this 3140 3141 def _parse_between(self, this: exp.Expression) -> exp.Between: 3142 low = self._parse_bitwise() 3143 self._match(TokenType.AND) 3144 high = self._parse_bitwise() 3145 return self.expression(exp.Between, this=this, low=low, high=high) 3146 3147 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3148 if not self._match(TokenType.ESCAPE): 3149 return this 3150 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3151 3152 def _parse_interval(self) -> t.Optional[exp.Interval]: 3153 index = self._index 3154 3155 if not self._match(TokenType.INTERVAL): 3156 return None 3157 3158 if self._match(TokenType.STRING, advance=False): 3159 this = self._parse_primary() 3160 else: 3161 this = self._parse_term() 3162 3163 if not this: 3164 self._retreat(index) 3165 return None 3166 3167 unit = self._parse_function() or self._parse_var(any_token=True) 3168 3169 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3170 # each INTERVAL expression into this canonical form so it's easy to transpile 3171 if this and this.is_number: 3172 this = exp.Literal.string(this.name) 3173 elif this and this.is_string: 3174 parts = this.name.split() 3175 3176 if len(parts) == 2: 3177 if unit: 3178 # This is not actually a unit, it's something else (e.g. a "window side") 3179 unit = None 3180 self._retreat(self._index - 1) 3181 3182 this = exp.Literal.string(parts[0]) 3183 unit = self.expression(exp.Var, this=parts[1]) 3184 3185 return self.expression(exp.Interval, this=this, unit=unit) 3186 3187 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3188 this = self._parse_term() 3189 3190 while True: 3191 if self._match_set(self.BITWISE): 3192 this = self.expression( 3193 self.BITWISE[self._prev.token_type], 3194 this=this, 3195 expression=self._parse_term(), 3196 ) 3197 elif self._match(TokenType.DQMARK): 3198 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3199 elif self._match_pair(TokenType.LT, TokenType.LT): 3200 this = self.expression( 3201 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3202 ) 3203 elif self._match_pair(TokenType.GT, TokenType.GT): 3204 this = self.expression( 3205 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3206 ) 3207 else: 3208 break 3209 3210 return this 3211 3212 def _parse_term(self) -> t.Optional[exp.Expression]: 3213 return self._parse_tokens(self._parse_factor, self.TERM) 3214 3215 def _parse_factor(self) -> t.Optional[exp.Expression]: 3216 return self._parse_tokens(self._parse_unary, self.FACTOR) 3217 3218 def _parse_unary(self) -> t.Optional[exp.Expression]: 3219 if self._match_set(self.UNARY_PARSERS): 3220 return self.UNARY_PARSERS[self._prev.token_type](self) 3221 return self._parse_at_time_zone(self._parse_type()) 3222 3223 def _parse_type(self) -> t.Optional[exp.Expression]: 3224 interval = self._parse_interval() 3225 if interval: 3226 return interval 3227 3228 index = self._index 3229 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3230 this = self._parse_column() 3231 3232 if data_type: 3233 if isinstance(this, exp.Literal): 3234 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3235 if parser: 3236 return parser(self, this, data_type) 3237 return self.expression(exp.Cast, this=this, to=data_type) 3238 if not data_type.expressions: 3239 self._retreat(index) 3240 return self._parse_column() 3241 return self._parse_column_ops(data_type) 3242 3243 return this 3244 3245 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3246 this = self._parse_type() 3247 if not this: 3248 return None 3249 3250 return self.expression( 3251 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3252 ) 3253 3254 def _parse_types( 3255 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3256 ) -> t.Optional[exp.Expression]: 3257 index = self._index 3258 3259 prefix = self._match_text_seq("SYSUDTLIB", ".") 3260 3261 if not self._match_set(self.TYPE_TOKENS): 3262 identifier = allow_identifiers and self._parse_id_var( 3263 any_token=False, tokens=(TokenType.VAR,) 3264 ) 3265 3266 if identifier: 3267 tokens = self._tokenizer.tokenize(identifier.name) 3268 3269 if len(tokens) != 1: 3270 self.raise_error("Unexpected identifier", self._prev) 3271 3272 if tokens[0].token_type in self.TYPE_TOKENS: 3273 self._prev = tokens[0] 3274 elif self.SUPPORTS_USER_DEFINED_TYPES: 3275 return identifier 3276 else: 3277 return None 3278 else: 3279 return None 3280 3281 type_token = self._prev.token_type 3282 3283 if type_token == TokenType.PSEUDO_TYPE: 3284 return self.expression(exp.PseudoType, this=self._prev.text) 3285 3286 if type_token == TokenType.OBJECT_IDENTIFIER: 3287 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3288 3289 nested = type_token in self.NESTED_TYPE_TOKENS 3290 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3291 expressions = None 3292 maybe_func = False 3293 3294 if self._match(TokenType.L_PAREN): 3295 if is_struct: 3296 expressions = self._parse_csv(self._parse_struct_types) 3297 elif nested: 3298 expressions = self._parse_csv( 3299 lambda: self._parse_types( 3300 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3301 ) 3302 ) 3303 elif type_token in self.ENUM_TYPE_TOKENS: 3304 expressions = self._parse_csv(self._parse_equality) 3305 else: 3306 expressions = self._parse_csv(self._parse_type_size) 3307 3308 if not expressions or not self._match(TokenType.R_PAREN): 3309 self._retreat(index) 3310 return None 3311 3312 maybe_func = True 3313 3314 this: t.Optional[exp.Expression] = None 3315 values: t.Optional[t.List[exp.Expression]] = None 3316 3317 if nested and self._match(TokenType.LT): 3318 if is_struct: 3319 expressions = self._parse_csv(self._parse_struct_types) 3320 else: 3321 expressions = self._parse_csv( 3322 lambda: self._parse_types( 3323 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3324 ) 3325 ) 3326 3327 if not self._match(TokenType.GT): 3328 self.raise_error("Expecting >") 3329 3330 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3331 values = self._parse_csv(self._parse_conjunction) 3332 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3333 3334 if type_token in self.TIMESTAMPS: 3335 if self._match_text_seq("WITH", "TIME", "ZONE"): 3336 maybe_func = False 3337 tz_type = ( 3338 exp.DataType.Type.TIMETZ 3339 if type_token in self.TIMES 3340 else exp.DataType.Type.TIMESTAMPTZ 3341 ) 3342 this = exp.DataType(this=tz_type, expressions=expressions) 3343 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3344 maybe_func = False 3345 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3346 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3347 maybe_func = False 3348 elif type_token == TokenType.INTERVAL: 3349 unit = self._parse_var() 3350 3351 if self._match_text_seq("TO"): 3352 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3353 else: 3354 span = None 3355 3356 if span or not unit: 3357 this = self.expression( 3358 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3359 ) 3360 else: 3361 this = self.expression(exp.Interval, unit=unit) 3362 3363 if maybe_func and check_func: 3364 index2 = self._index 3365 peek = self._parse_string() 3366 3367 if not peek: 3368 self._retreat(index) 3369 return None 3370 3371 self._retreat(index2) 3372 3373 if not this: 3374 if self._match_text_seq("UNSIGNED"): 3375 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3376 if not unsigned_type_token: 3377 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3378 3379 type_token = unsigned_type_token or type_token 3380 3381 this = exp.DataType( 3382 this=exp.DataType.Type[type_token.value], 3383 expressions=expressions, 3384 nested=nested, 3385 values=values, 3386 prefix=prefix, 3387 ) 3388 3389 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3390 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3391 3392 return this 3393 3394 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3395 this = self._parse_type() or self._parse_id_var() 3396 self._match(TokenType.COLON) 3397 return self._parse_column_def(this) 3398 3399 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3400 if not self._match_text_seq("AT", "TIME", "ZONE"): 3401 return this 3402 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3403 3404 def _parse_column(self) -> t.Optional[exp.Expression]: 3405 this = self._parse_field() 3406 if isinstance(this, exp.Identifier): 3407 this = self.expression(exp.Column, this=this) 3408 elif not this: 3409 return self._parse_bracket(this) 3410 return self._parse_column_ops(this) 3411 3412 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3413 this = self._parse_bracket(this) 3414 3415 while self._match_set(self.COLUMN_OPERATORS): 3416 op_token = self._prev.token_type 3417 op = self.COLUMN_OPERATORS.get(op_token) 3418 3419 if op_token == TokenType.DCOLON: 3420 field = self._parse_types() 3421 if not field: 3422 self.raise_error("Expected type") 3423 elif op and self._curr: 3424 self._advance() 3425 value = self._prev.text 3426 field = ( 3427 exp.Literal.number(value) 3428 if self._prev.token_type == TokenType.NUMBER 3429 else exp.Literal.string(value) 3430 ) 3431 else: 3432 field = self._parse_field(anonymous_func=True, any_token=True) 3433 3434 if isinstance(field, exp.Func): 3435 # bigquery allows function calls like x.y.count(...) 3436 # SAFE.SUBSTR(...) 3437 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3438 this = self._replace_columns_with_dots(this) 3439 3440 if op: 3441 this = op(self, this, field) 3442 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3443 this = self.expression( 3444 exp.Column, 3445 this=field, 3446 table=this.this, 3447 db=this.args.get("table"), 3448 catalog=this.args.get("db"), 3449 ) 3450 else: 3451 this = self.expression(exp.Dot, this=this, expression=field) 3452 this = self._parse_bracket(this) 3453 return this 3454 3455 def _parse_primary(self) -> t.Optional[exp.Expression]: 3456 if self._match_set(self.PRIMARY_PARSERS): 3457 token_type = self._prev.token_type 3458 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3459 3460 if token_type == TokenType.STRING: 3461 expressions = [primary] 3462 while self._match(TokenType.STRING): 3463 expressions.append(exp.Literal.string(self._prev.text)) 3464 3465 if len(expressions) > 1: 3466 return self.expression(exp.Concat, expressions=expressions) 3467 3468 return primary 3469 3470 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3471 return exp.Literal.number(f"0.{self._prev.text}") 3472 3473 if self._match(TokenType.L_PAREN): 3474 comments = self._prev_comments 3475 query = self._parse_select() 3476 3477 if query: 3478 expressions = [query] 3479 else: 3480 expressions = self._parse_expressions() 3481 3482 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3483 3484 if isinstance(this, exp.Subqueryable): 3485 this = self._parse_set_operations( 3486 self._parse_subquery(this=this, parse_alias=False) 3487 ) 3488 elif len(expressions) > 1: 3489 this = self.expression(exp.Tuple, expressions=expressions) 3490 else: 3491 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3492 3493 if this: 3494 this.add_comments(comments) 3495 3496 self._match_r_paren(expression=this) 3497 return this 3498 3499 return None 3500 3501 def _parse_field( 3502 self, 3503 any_token: bool = False, 3504 tokens: t.Optional[t.Collection[TokenType]] = None, 3505 anonymous_func: bool = False, 3506 ) -> t.Optional[exp.Expression]: 3507 return ( 3508 self._parse_primary() 3509 or self._parse_function(anonymous=anonymous_func) 3510 or self._parse_id_var(any_token=any_token, tokens=tokens) 3511 ) 3512 3513 def _parse_function( 3514 self, 3515 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3516 anonymous: bool = False, 3517 optional_parens: bool = True, 3518 ) -> t.Optional[exp.Expression]: 3519 if not self._curr: 3520 return None 3521 3522 token_type = self._curr.token_type 3523 this = self._curr.text 3524 upper = this.upper() 3525 3526 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3527 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3528 self._advance() 3529 return parser(self) 3530 3531 if not self._next or self._next.token_type != TokenType.L_PAREN: 3532 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3533 self._advance() 3534 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3535 3536 return None 3537 3538 if token_type not in self.FUNC_TOKENS: 3539 return None 3540 3541 self._advance(2) 3542 3543 parser = self.FUNCTION_PARSERS.get(upper) 3544 if parser and not anonymous: 3545 this = parser(self) 3546 else: 3547 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3548 3549 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3550 this = self.expression(subquery_predicate, this=self._parse_select()) 3551 self._match_r_paren() 3552 return this 3553 3554 if functions is None: 3555 functions = self.FUNCTIONS 3556 3557 function = functions.get(upper) 3558 3559 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3560 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3561 3562 if function and not anonymous: 3563 func = self.validate_expression(function(args), args) 3564 if not self.NORMALIZE_FUNCTIONS: 3565 func.meta["name"] = this 3566 this = func 3567 else: 3568 this = self.expression(exp.Anonymous, this=this, expressions=args) 3569 3570 self._match_r_paren(this) 3571 return self._parse_window(this) 3572 3573 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3574 return self._parse_column_def(self._parse_id_var()) 3575 3576 def _parse_user_defined_function( 3577 self, kind: t.Optional[TokenType] = None 3578 ) -> t.Optional[exp.Expression]: 3579 this = self._parse_id_var() 3580 3581 while self._match(TokenType.DOT): 3582 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3583 3584 if not self._match(TokenType.L_PAREN): 3585 return this 3586 3587 expressions = self._parse_csv(self._parse_function_parameter) 3588 self._match_r_paren() 3589 return self.expression( 3590 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3591 ) 3592 3593 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3594 literal = self._parse_primary() 3595 if literal: 3596 return self.expression(exp.Introducer, this=token.text, expression=literal) 3597 3598 return self.expression(exp.Identifier, this=token.text) 3599 3600 def _parse_session_parameter(self) -> exp.SessionParameter: 3601 kind = None 3602 this = self._parse_id_var() or self._parse_primary() 3603 3604 if this and self._match(TokenType.DOT): 3605 kind = this.name 3606 this = self._parse_var() or self._parse_primary() 3607 3608 return self.expression(exp.SessionParameter, this=this, kind=kind) 3609 3610 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3611 index = self._index 3612 3613 if self._match(TokenType.L_PAREN): 3614 expressions = t.cast( 3615 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3616 ) 3617 3618 if not self._match(TokenType.R_PAREN): 3619 self._retreat(index) 3620 else: 3621 expressions = [self._parse_id_var()] 3622 3623 if self._match_set(self.LAMBDAS): 3624 return self.LAMBDAS[self._prev.token_type](self, expressions) 3625 3626 self._retreat(index) 3627 3628 this: t.Optional[exp.Expression] 3629 3630 if self._match(TokenType.DISTINCT): 3631 this = self.expression( 3632 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3633 ) 3634 else: 3635 this = self._parse_select_or_expression(alias=alias) 3636 3637 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3638 3639 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3640 index = self._index 3641 3642 if not self.errors: 3643 try: 3644 if self._parse_select(nested=True): 3645 return this 3646 except ParseError: 3647 pass 3648 finally: 3649 self.errors.clear() 3650 self._retreat(index) 3651 3652 if not self._match(TokenType.L_PAREN): 3653 return this 3654 3655 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3656 3657 self._match_r_paren() 3658 return self.expression(exp.Schema, this=this, expressions=args) 3659 3660 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3661 return self._parse_column_def(self._parse_field(any_token=True)) 3662 3663 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3664 # column defs are not really columns, they're identifiers 3665 if isinstance(this, exp.Column): 3666 this = this.this 3667 3668 kind = self._parse_types(schema=True) 3669 3670 if self._match_text_seq("FOR", "ORDINALITY"): 3671 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3672 3673 constraints: t.List[exp.Expression] = [] 3674 3675 if not kind and self._match(TokenType.ALIAS): 3676 constraints.append( 3677 self.expression( 3678 exp.ComputedColumnConstraint, 3679 this=self._parse_conjunction(), 3680 persisted=self._match_text_seq("PERSISTED"), 3681 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3682 ) 3683 ) 3684 3685 while True: 3686 constraint = self._parse_column_constraint() 3687 if not constraint: 3688 break 3689 constraints.append(constraint) 3690 3691 if not kind and not constraints: 3692 return this 3693 3694 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3695 3696 def _parse_auto_increment( 3697 self, 3698 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3699 start = None 3700 increment = None 3701 3702 if self._match(TokenType.L_PAREN, advance=False): 3703 args = self._parse_wrapped_csv(self._parse_bitwise) 3704 start = seq_get(args, 0) 3705 increment = seq_get(args, 1) 3706 elif self._match_text_seq("START"): 3707 start = self._parse_bitwise() 3708 self._match_text_seq("INCREMENT") 3709 increment = self._parse_bitwise() 3710 3711 if start and increment: 3712 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3713 3714 return exp.AutoIncrementColumnConstraint() 3715 3716 def _parse_compress(self) -> exp.CompressColumnConstraint: 3717 if self._match(TokenType.L_PAREN, advance=False): 3718 return self.expression( 3719 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3720 ) 3721 3722 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3723 3724 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3725 if self._match_text_seq("BY", "DEFAULT"): 3726 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3727 this = self.expression( 3728 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3729 ) 3730 else: 3731 self._match_text_seq("ALWAYS") 3732 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3733 3734 self._match(TokenType.ALIAS) 3735 identity = self._match_text_seq("IDENTITY") 3736 3737 if self._match(TokenType.L_PAREN): 3738 if self._match(TokenType.START_WITH): 3739 this.set("start", self._parse_bitwise()) 3740 if self._match_text_seq("INCREMENT", "BY"): 3741 this.set("increment", self._parse_bitwise()) 3742 if self._match_text_seq("MINVALUE"): 3743 this.set("minvalue", self._parse_bitwise()) 3744 if self._match_text_seq("MAXVALUE"): 3745 this.set("maxvalue", self._parse_bitwise()) 3746 3747 if self._match_text_seq("CYCLE"): 3748 this.set("cycle", True) 3749 elif self._match_text_seq("NO", "CYCLE"): 3750 this.set("cycle", False) 3751 3752 if not identity: 3753 this.set("expression", self._parse_bitwise()) 3754 3755 self._match_r_paren() 3756 3757 return this 3758 3759 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3760 self._match_text_seq("LENGTH") 3761 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3762 3763 def _parse_not_constraint( 3764 self, 3765 ) -> t.Optional[exp.Expression]: 3766 if self._match_text_seq("NULL"): 3767 return self.expression(exp.NotNullColumnConstraint) 3768 if self._match_text_seq("CASESPECIFIC"): 3769 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3770 if self._match_text_seq("FOR", "REPLICATION"): 3771 return self.expression(exp.NotForReplicationColumnConstraint) 3772 return None 3773 3774 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3775 if self._match(TokenType.CONSTRAINT): 3776 this = self._parse_id_var() 3777 else: 3778 this = None 3779 3780 if self._match_texts(self.CONSTRAINT_PARSERS): 3781 return self.expression( 3782 exp.ColumnConstraint, 3783 this=this, 3784 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3785 ) 3786 3787 return this 3788 3789 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3790 if not self._match(TokenType.CONSTRAINT): 3791 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3792 3793 this = self._parse_id_var() 3794 expressions = [] 3795 3796 while True: 3797 constraint = self._parse_unnamed_constraint() or self._parse_function() 3798 if not constraint: 3799 break 3800 expressions.append(constraint) 3801 3802 return self.expression(exp.Constraint, this=this, expressions=expressions) 3803 3804 def _parse_unnamed_constraint( 3805 self, constraints: t.Optional[t.Collection[str]] = None 3806 ) -> t.Optional[exp.Expression]: 3807 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3808 return None 3809 3810 constraint = self._prev.text.upper() 3811 if constraint not in self.CONSTRAINT_PARSERS: 3812 self.raise_error(f"No parser found for schema constraint {constraint}.") 3813 3814 return self.CONSTRAINT_PARSERS[constraint](self) 3815 3816 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3817 self._match_text_seq("KEY") 3818 return self.expression( 3819 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3820 ) 3821 3822 def _parse_key_constraint_options(self) -> t.List[str]: 3823 options = [] 3824 while True: 3825 if not self._curr: 3826 break 3827 3828 if self._match(TokenType.ON): 3829 action = None 3830 on = self._advance_any() and self._prev.text 3831 3832 if self._match_text_seq("NO", "ACTION"): 3833 action = "NO ACTION" 3834 elif self._match_text_seq("CASCADE"): 3835 action = "CASCADE" 3836 elif self._match_pair(TokenType.SET, TokenType.NULL): 3837 action = "SET NULL" 3838 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3839 action = "SET DEFAULT" 3840 else: 3841 self.raise_error("Invalid key constraint") 3842 3843 options.append(f"ON {on} {action}") 3844 elif self._match_text_seq("NOT", "ENFORCED"): 3845 options.append("NOT ENFORCED") 3846 elif self._match_text_seq("DEFERRABLE"): 3847 options.append("DEFERRABLE") 3848 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3849 options.append("INITIALLY DEFERRED") 3850 elif self._match_text_seq("NORELY"): 3851 options.append("NORELY") 3852 elif self._match_text_seq("MATCH", "FULL"): 3853 options.append("MATCH FULL") 3854 else: 3855 break 3856 3857 return options 3858 3859 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3860 if match and not self._match(TokenType.REFERENCES): 3861 return None 3862 3863 expressions = None 3864 this = self._parse_table(schema=True) 3865 options = self._parse_key_constraint_options() 3866 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3867 3868 def _parse_foreign_key(self) -> exp.ForeignKey: 3869 expressions = self._parse_wrapped_id_vars() 3870 reference = self._parse_references() 3871 options = {} 3872 3873 while self._match(TokenType.ON): 3874 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3875 self.raise_error("Expected DELETE or UPDATE") 3876 3877 kind = self._prev.text.lower() 3878 3879 if self._match_text_seq("NO", "ACTION"): 3880 action = "NO ACTION" 3881 elif self._match(TokenType.SET): 3882 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3883 action = "SET " + self._prev.text.upper() 3884 else: 3885 self._advance() 3886 action = self._prev.text.upper() 3887 3888 options[kind] = action 3889 3890 return self.expression( 3891 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3892 ) 3893 3894 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3895 return self._parse_field() 3896 3897 def _parse_primary_key( 3898 self, wrapped_optional: bool = False, in_props: bool = False 3899 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3900 desc = ( 3901 self._match_set((TokenType.ASC, TokenType.DESC)) 3902 and self._prev.token_type == TokenType.DESC 3903 ) 3904 3905 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3906 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3907 3908 expressions = self._parse_wrapped_csv( 3909 self._parse_primary_key_part, optional=wrapped_optional 3910 ) 3911 options = self._parse_key_constraint_options() 3912 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3913 3914 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3915 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3916 return this 3917 3918 bracket_kind = self._prev.token_type 3919 3920 if self._match(TokenType.COLON): 3921 expressions: t.List[exp.Expression] = [ 3922 self.expression(exp.Slice, expression=self._parse_conjunction()) 3923 ] 3924 else: 3925 expressions = self._parse_csv( 3926 lambda: self._parse_slice( 3927 self._parse_alias(self._parse_conjunction(), explicit=True) 3928 ) 3929 ) 3930 3931 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3932 if bracket_kind == TokenType.L_BRACE: 3933 this = self.expression(exp.Struct, expressions=expressions) 3934 elif not this or this.name.upper() == "ARRAY": 3935 this = self.expression(exp.Array, expressions=expressions) 3936 else: 3937 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3938 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3939 3940 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3941 self.raise_error("Expected ]") 3942 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3943 self.raise_error("Expected }") 3944 3945 self._add_comments(this) 3946 return self._parse_bracket(this) 3947 3948 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3949 if self._match(TokenType.COLON): 3950 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3951 return this 3952 3953 def _parse_case(self) -> t.Optional[exp.Expression]: 3954 ifs = [] 3955 default = None 3956 3957 comments = self._prev_comments 3958 expression = self._parse_conjunction() 3959 3960 while self._match(TokenType.WHEN): 3961 this = self._parse_conjunction() 3962 self._match(TokenType.THEN) 3963 then = self._parse_conjunction() 3964 ifs.append(self.expression(exp.If, this=this, true=then)) 3965 3966 if self._match(TokenType.ELSE): 3967 default = self._parse_conjunction() 3968 3969 if not self._match(TokenType.END): 3970 self.raise_error("Expected END after CASE", self._prev) 3971 3972 return self._parse_window( 3973 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3974 ) 3975 3976 def _parse_if(self) -> t.Optional[exp.Expression]: 3977 if self._match(TokenType.L_PAREN): 3978 args = self._parse_csv(self._parse_conjunction) 3979 this = self.validate_expression(exp.If.from_arg_list(args), args) 3980 self._match_r_paren() 3981 else: 3982 index = self._index - 1 3983 condition = self._parse_conjunction() 3984 3985 if not condition: 3986 self._retreat(index) 3987 return None 3988 3989 self._match(TokenType.THEN) 3990 true = self._parse_conjunction() 3991 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3992 self._match(TokenType.END) 3993 this = self.expression(exp.If, this=condition, true=true, false=false) 3994 3995 return self._parse_window(this) 3996 3997 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3998 if not self._match_text_seq("VALUE", "FOR"): 3999 self._retreat(self._index - 1) 4000 return None 4001 4002 return self.expression( 4003 exp.NextValueFor, 4004 this=self._parse_column(), 4005 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4006 ) 4007 4008 def _parse_extract(self) -> exp.Extract: 4009 this = self._parse_function() or self._parse_var() or self._parse_type() 4010 4011 if self._match(TokenType.FROM): 4012 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4013 4014 if not self._match(TokenType.COMMA): 4015 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4016 4017 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4018 4019 def _parse_any_value(self) -> exp.AnyValue: 4020 this = self._parse_lambda() 4021 is_max = None 4022 having = None 4023 4024 if self._match(TokenType.HAVING): 4025 self._match_texts(("MAX", "MIN")) 4026 is_max = self._prev.text == "MAX" 4027 having = self._parse_column() 4028 4029 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4030 4031 def _parse_cast(self, strict: bool) -> exp.Expression: 4032 this = self._parse_conjunction() 4033 4034 if not self._match(TokenType.ALIAS): 4035 if self._match(TokenType.COMMA): 4036 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4037 4038 self.raise_error("Expected AS after CAST") 4039 4040 fmt = None 4041 to = self._parse_types() 4042 4043 if not to: 4044 self.raise_error("Expected TYPE after CAST") 4045 elif isinstance(to, exp.Identifier): 4046 to = exp.DataType.build(to.name, udt=True) 4047 elif to.this == exp.DataType.Type.CHAR: 4048 if self._match(TokenType.CHARACTER_SET): 4049 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4050 elif self._match(TokenType.FORMAT): 4051 fmt_string = self._parse_string() 4052 fmt = self._parse_at_time_zone(fmt_string) 4053 4054 if to.this in exp.DataType.TEMPORAL_TYPES: 4055 this = self.expression( 4056 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4057 this=this, 4058 format=exp.Literal.string( 4059 format_time( 4060 fmt_string.this if fmt_string else "", 4061 self.FORMAT_MAPPING or self.TIME_MAPPING, 4062 self.FORMAT_TRIE or self.TIME_TRIE, 4063 ) 4064 ), 4065 ) 4066 4067 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4068 this.set("zone", fmt.args["zone"]) 4069 4070 return this 4071 4072 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4073 4074 def _parse_concat(self) -> t.Optional[exp.Expression]: 4075 args = self._parse_csv(self._parse_conjunction) 4076 if self.CONCAT_NULL_OUTPUTS_STRING: 4077 args = self._ensure_string_if_null(args) 4078 4079 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4080 # we find such a call we replace it with its argument. 4081 if len(args) == 1: 4082 return args[0] 4083 4084 return self.expression( 4085 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4086 ) 4087 4088 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4089 args = self._parse_csv(self._parse_conjunction) 4090 if len(args) < 2: 4091 return self.expression(exp.ConcatWs, expressions=args) 4092 delim, *values = args 4093 if self.CONCAT_NULL_OUTPUTS_STRING: 4094 values = self._ensure_string_if_null(values) 4095 4096 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4097 4098 def _parse_string_agg(self) -> exp.Expression: 4099 if self._match(TokenType.DISTINCT): 4100 args: t.List[t.Optional[exp.Expression]] = [ 4101 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4102 ] 4103 if self._match(TokenType.COMMA): 4104 args.extend(self._parse_csv(self._parse_conjunction)) 4105 else: 4106 args = self._parse_csv(self._parse_conjunction) # type: ignore 4107 4108 index = self._index 4109 if not self._match(TokenType.R_PAREN) and args: 4110 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4111 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4112 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4113 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4114 4115 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4116 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4117 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4118 if not self._match_text_seq("WITHIN", "GROUP"): 4119 self._retreat(index) 4120 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4121 4122 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4123 order = self._parse_order(this=seq_get(args, 0)) 4124 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4125 4126 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4127 this = self._parse_bitwise() 4128 4129 if self._match(TokenType.USING): 4130 to: t.Optional[exp.Expression] = self.expression( 4131 exp.CharacterSet, this=self._parse_var() 4132 ) 4133 elif self._match(TokenType.COMMA): 4134 to = self._parse_types() 4135 else: 4136 to = None 4137 4138 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4139 4140 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4141 """ 4142 There are generally two variants of the DECODE function: 4143 4144 - DECODE(bin, charset) 4145 - DECODE(expression, search, result [, search, result] ... [, default]) 4146 4147 The second variant will always be parsed into a CASE expression. Note that NULL 4148 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4149 instead of relying on pattern matching. 4150 """ 4151 args = self._parse_csv(self._parse_conjunction) 4152 4153 if len(args) < 3: 4154 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4155 4156 expression, *expressions = args 4157 if not expression: 4158 return None 4159 4160 ifs = [] 4161 for search, result in zip(expressions[::2], expressions[1::2]): 4162 if not search or not result: 4163 return None 4164 4165 if isinstance(search, exp.Literal): 4166 ifs.append( 4167 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4168 ) 4169 elif isinstance(search, exp.Null): 4170 ifs.append( 4171 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4172 ) 4173 else: 4174 cond = exp.or_( 4175 exp.EQ(this=expression.copy(), expression=search), 4176 exp.and_( 4177 exp.Is(this=expression.copy(), expression=exp.Null()), 4178 exp.Is(this=search.copy(), expression=exp.Null()), 4179 copy=False, 4180 ), 4181 copy=False, 4182 ) 4183 ifs.append(exp.If(this=cond, true=result)) 4184 4185 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4186 4187 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4188 self._match_text_seq("KEY") 4189 key = self._parse_column() 4190 self._match_set((TokenType.COLON, TokenType.COMMA)) 4191 self._match_text_seq("VALUE") 4192 value = self._parse_bitwise() 4193 4194 if not key and not value: 4195 return None 4196 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4197 4198 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4199 if not this or not self._match_text_seq("FORMAT", "JSON"): 4200 return this 4201 4202 return self.expression(exp.FormatJson, this=this) 4203 4204 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4205 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4206 for value in values: 4207 if self._match_text_seq(value, "ON", on): 4208 return f"{value} ON {on}" 4209 4210 return None 4211 4212 def _parse_json_object(self) -> exp.JSONObject: 4213 star = self._parse_star() 4214 expressions = ( 4215 [star] 4216 if star 4217 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4218 ) 4219 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4220 4221 unique_keys = None 4222 if self._match_text_seq("WITH", "UNIQUE"): 4223 unique_keys = True 4224 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4225 unique_keys = False 4226 4227 self._match_text_seq("KEYS") 4228 4229 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4230 self._parse_type() 4231 ) 4232 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4233 4234 return self.expression( 4235 exp.JSONObject, 4236 expressions=expressions, 4237 null_handling=null_handling, 4238 unique_keys=unique_keys, 4239 return_type=return_type, 4240 encoding=encoding, 4241 ) 4242 4243 def _parse_logarithm(self) -> exp.Func: 4244 # Default argument order is base, expression 4245 args = self._parse_csv(self._parse_range) 4246 4247 if len(args) > 1: 4248 if not self.LOG_BASE_FIRST: 4249 args.reverse() 4250 return exp.Log.from_arg_list(args) 4251 4252 return self.expression( 4253 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4254 ) 4255 4256 def _parse_match_against(self) -> exp.MatchAgainst: 4257 expressions = self._parse_csv(self._parse_column) 4258 4259 self._match_text_seq(")", "AGAINST", "(") 4260 4261 this = self._parse_string() 4262 4263 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4264 modifier = "IN NATURAL LANGUAGE MODE" 4265 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4266 modifier = f"{modifier} WITH QUERY EXPANSION" 4267 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4268 modifier = "IN BOOLEAN MODE" 4269 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4270 modifier = "WITH QUERY EXPANSION" 4271 else: 4272 modifier = None 4273 4274 return self.expression( 4275 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4276 ) 4277 4278 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4279 def _parse_open_json(self) -> exp.OpenJSON: 4280 this = self._parse_bitwise() 4281 path = self._match(TokenType.COMMA) and self._parse_string() 4282 4283 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4284 this = self._parse_field(any_token=True) 4285 kind = self._parse_types() 4286 path = self._parse_string() 4287 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4288 4289 return self.expression( 4290 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4291 ) 4292 4293 expressions = None 4294 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4295 self._match_l_paren() 4296 expressions = self._parse_csv(_parse_open_json_column_def) 4297 4298 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4299 4300 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4301 args = self._parse_csv(self._parse_bitwise) 4302 4303 if self._match(TokenType.IN): 4304 return self.expression( 4305 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4306 ) 4307 4308 if haystack_first: 4309 haystack = seq_get(args, 0) 4310 needle = seq_get(args, 1) 4311 else: 4312 needle = seq_get(args, 0) 4313 haystack = seq_get(args, 1) 4314 4315 return self.expression( 4316 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4317 ) 4318 4319 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4320 args = self._parse_csv(self._parse_table) 4321 return exp.JoinHint(this=func_name.upper(), expressions=args) 4322 4323 def _parse_substring(self) -> exp.Substring: 4324 # Postgres supports the form: substring(string [from int] [for int]) 4325 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4326 4327 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4328 4329 if self._match(TokenType.FROM): 4330 args.append(self._parse_bitwise()) 4331 if self._match(TokenType.FOR): 4332 args.append(self._parse_bitwise()) 4333 4334 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4335 4336 def _parse_trim(self) -> exp.Trim: 4337 # https://www.w3resource.com/sql/character-functions/trim.php 4338 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4339 4340 position = None 4341 collation = None 4342 4343 if self._match_texts(self.TRIM_TYPES): 4344 position = self._prev.text.upper() 4345 4346 expression = self._parse_bitwise() 4347 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4348 this = self._parse_bitwise() 4349 else: 4350 this = expression 4351 expression = None 4352 4353 if self._match(TokenType.COLLATE): 4354 collation = self._parse_bitwise() 4355 4356 return self.expression( 4357 exp.Trim, this=this, position=position, expression=expression, collation=collation 4358 ) 4359 4360 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4361 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4362 4363 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4364 return self._parse_window(self._parse_id_var(), alias=True) 4365 4366 def _parse_respect_or_ignore_nulls( 4367 self, this: t.Optional[exp.Expression] 4368 ) -> t.Optional[exp.Expression]: 4369 if self._match_text_seq("IGNORE", "NULLS"): 4370 return self.expression(exp.IgnoreNulls, this=this) 4371 if self._match_text_seq("RESPECT", "NULLS"): 4372 return self.expression(exp.RespectNulls, this=this) 4373 return this 4374 4375 def _parse_window( 4376 self, this: t.Optional[exp.Expression], alias: bool = False 4377 ) -> t.Optional[exp.Expression]: 4378 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4379 self._match(TokenType.WHERE) 4380 this = self.expression( 4381 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4382 ) 4383 self._match_r_paren() 4384 4385 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4386 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4387 if self._match_text_seq("WITHIN", "GROUP"): 4388 order = self._parse_wrapped(self._parse_order) 4389 this = self.expression(exp.WithinGroup, this=this, expression=order) 4390 4391 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4392 # Some dialects choose to implement and some do not. 4393 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4394 4395 # There is some code above in _parse_lambda that handles 4396 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4397 4398 # The below changes handle 4399 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4400 4401 # Oracle allows both formats 4402 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4403 # and Snowflake chose to do the same for familiarity 4404 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4405 this = self._parse_respect_or_ignore_nulls(this) 4406 4407 # bigquery select from window x AS (partition by ...) 4408 if alias: 4409 over = None 4410 self._match(TokenType.ALIAS) 4411 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4412 return this 4413 else: 4414 over = self._prev.text.upper() 4415 4416 if not self._match(TokenType.L_PAREN): 4417 return self.expression( 4418 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4419 ) 4420 4421 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4422 4423 first = self._match(TokenType.FIRST) 4424 if self._match_text_seq("LAST"): 4425 first = False 4426 4427 partition, order = self._parse_partition_and_order() 4428 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4429 4430 if kind: 4431 self._match(TokenType.BETWEEN) 4432 start = self._parse_window_spec() 4433 self._match(TokenType.AND) 4434 end = self._parse_window_spec() 4435 4436 spec = self.expression( 4437 exp.WindowSpec, 4438 kind=kind, 4439 start=start["value"], 4440 start_side=start["side"], 4441 end=end["value"], 4442 end_side=end["side"], 4443 ) 4444 else: 4445 spec = None 4446 4447 self._match_r_paren() 4448 4449 window = self.expression( 4450 exp.Window, 4451 this=this, 4452 partition_by=partition, 4453 order=order, 4454 spec=spec, 4455 alias=window_alias, 4456 over=over, 4457 first=first, 4458 ) 4459 4460 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4461 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4462 return self._parse_window(window, alias=alias) 4463 4464 return window 4465 4466 def _parse_partition_and_order( 4467 self, 4468 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4469 return self._parse_partition_by(), self._parse_order() 4470 4471 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4472 self._match(TokenType.BETWEEN) 4473 4474 return { 4475 "value": ( 4476 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4477 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4478 or self._parse_bitwise() 4479 ), 4480 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4481 } 4482 4483 def _parse_alias( 4484 self, this: t.Optional[exp.Expression], explicit: bool = False 4485 ) -> t.Optional[exp.Expression]: 4486 any_token = self._match(TokenType.ALIAS) 4487 4488 if explicit and not any_token: 4489 return this 4490 4491 if self._match(TokenType.L_PAREN): 4492 aliases = self.expression( 4493 exp.Aliases, 4494 this=this, 4495 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4496 ) 4497 self._match_r_paren(aliases) 4498 return aliases 4499 4500 alias = self._parse_id_var(any_token) 4501 4502 if alias: 4503 return self.expression(exp.Alias, this=this, alias=alias) 4504 4505 return this 4506 4507 def _parse_id_var( 4508 self, 4509 any_token: bool = True, 4510 tokens: t.Optional[t.Collection[TokenType]] = None, 4511 ) -> t.Optional[exp.Expression]: 4512 identifier = self._parse_identifier() 4513 4514 if identifier: 4515 return identifier 4516 4517 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4518 quoted = self._prev.token_type == TokenType.STRING 4519 return exp.Identifier(this=self._prev.text, quoted=quoted) 4520 4521 return None 4522 4523 def _parse_string(self) -> t.Optional[exp.Expression]: 4524 if self._match(TokenType.STRING): 4525 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4526 return self._parse_placeholder() 4527 4528 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4529 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4530 4531 def _parse_number(self) -> t.Optional[exp.Expression]: 4532 if self._match(TokenType.NUMBER): 4533 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4534 return self._parse_placeholder() 4535 4536 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4537 if self._match(TokenType.IDENTIFIER): 4538 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4539 return self._parse_placeholder() 4540 4541 def _parse_var( 4542 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4543 ) -> t.Optional[exp.Expression]: 4544 if ( 4545 (any_token and self._advance_any()) 4546 or self._match(TokenType.VAR) 4547 or (self._match_set(tokens) if tokens else False) 4548 ): 4549 return self.expression(exp.Var, this=self._prev.text) 4550 return self._parse_placeholder() 4551 4552 def _advance_any(self) -> t.Optional[Token]: 4553 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4554 self._advance() 4555 return self._prev 4556 return None 4557 4558 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4559 return self._parse_var() or self._parse_string() 4560 4561 def _parse_null(self) -> t.Optional[exp.Expression]: 4562 if self._match(TokenType.NULL): 4563 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4564 return self._parse_placeholder() 4565 4566 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4567 if self._match(TokenType.TRUE): 4568 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4569 if self._match(TokenType.FALSE): 4570 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4571 return self._parse_placeholder() 4572 4573 def _parse_star(self) -> t.Optional[exp.Expression]: 4574 if self._match(TokenType.STAR): 4575 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4576 return self._parse_placeholder() 4577 4578 def _parse_parameter(self) -> exp.Parameter: 4579 wrapped = self._match(TokenType.L_BRACE) 4580 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4581 self._match(TokenType.R_BRACE) 4582 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4583 4584 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4585 if self._match_set(self.PLACEHOLDER_PARSERS): 4586 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4587 if placeholder: 4588 return placeholder 4589 self._advance(-1) 4590 return None 4591 4592 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4593 if not self._match(TokenType.EXCEPT): 4594 return None 4595 if self._match(TokenType.L_PAREN, advance=False): 4596 return self._parse_wrapped_csv(self._parse_column) 4597 return self._parse_csv(self._parse_column) 4598 4599 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4600 if not self._match(TokenType.REPLACE): 4601 return None 4602 if self._match(TokenType.L_PAREN, advance=False): 4603 return self._parse_wrapped_csv(self._parse_expression) 4604 return self._parse_expressions() 4605 4606 def _parse_csv( 4607 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4608 ) -> t.List[exp.Expression]: 4609 parse_result = parse_method() 4610 items = [parse_result] if parse_result is not None else [] 4611 4612 while self._match(sep): 4613 self._add_comments(parse_result) 4614 parse_result = parse_method() 4615 if parse_result is not None: 4616 items.append(parse_result) 4617 4618 return items 4619 4620 def _parse_tokens( 4621 self, parse_method: t.Callable, expressions: t.Dict 4622 ) -> t.Optional[exp.Expression]: 4623 this = parse_method() 4624 4625 while self._match_set(expressions): 4626 this = self.expression( 4627 expressions[self._prev.token_type], 4628 this=this, 4629 comments=self._prev_comments, 4630 expression=parse_method(), 4631 ) 4632 4633 return this 4634 4635 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4636 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4637 4638 def _parse_wrapped_csv( 4639 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4640 ) -> t.List[exp.Expression]: 4641 return self._parse_wrapped( 4642 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4643 ) 4644 4645 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4646 wrapped = self._match(TokenType.L_PAREN) 4647 if not wrapped and not optional: 4648 self.raise_error("Expecting (") 4649 parse_result = parse_method() 4650 if wrapped: 4651 self._match_r_paren() 4652 return parse_result 4653 4654 def _parse_expressions(self) -> t.List[exp.Expression]: 4655 return self._parse_csv(self._parse_expression) 4656 4657 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4658 return self._parse_select() or self._parse_set_operations( 4659 self._parse_expression() if alias else self._parse_conjunction() 4660 ) 4661 4662 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4663 return self._parse_query_modifiers( 4664 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4665 ) 4666 4667 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4668 this = None 4669 if self._match_texts(self.TRANSACTION_KIND): 4670 this = self._prev.text 4671 4672 self._match_texts({"TRANSACTION", "WORK"}) 4673 4674 modes = [] 4675 while True: 4676 mode = [] 4677 while self._match(TokenType.VAR): 4678 mode.append(self._prev.text) 4679 4680 if mode: 4681 modes.append(" ".join(mode)) 4682 if not self._match(TokenType.COMMA): 4683 break 4684 4685 return self.expression(exp.Transaction, this=this, modes=modes) 4686 4687 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4688 chain = None 4689 savepoint = None 4690 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4691 4692 self._match_texts({"TRANSACTION", "WORK"}) 4693 4694 if self._match_text_seq("TO"): 4695 self._match_text_seq("SAVEPOINT") 4696 savepoint = self._parse_id_var() 4697 4698 if self._match(TokenType.AND): 4699 chain = not self._match_text_seq("NO") 4700 self._match_text_seq("CHAIN") 4701 4702 if is_rollback: 4703 return self.expression(exp.Rollback, savepoint=savepoint) 4704 4705 return self.expression(exp.Commit, chain=chain) 4706 4707 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4708 if not self._match_text_seq("ADD"): 4709 return None 4710 4711 self._match(TokenType.COLUMN) 4712 exists_column = self._parse_exists(not_=True) 4713 expression = self._parse_field_def() 4714 4715 if expression: 4716 expression.set("exists", exists_column) 4717 4718 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4719 if self._match_texts(("FIRST", "AFTER")): 4720 position = self._prev.text 4721 column_position = self.expression( 4722 exp.ColumnPosition, this=self._parse_column(), position=position 4723 ) 4724 expression.set("position", column_position) 4725 4726 return expression 4727 4728 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4729 drop = self._match(TokenType.DROP) and self._parse_drop() 4730 if drop and not isinstance(drop, exp.Command): 4731 drop.set("kind", drop.args.get("kind", "COLUMN")) 4732 return drop 4733 4734 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4735 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4736 return self.expression( 4737 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4738 ) 4739 4740 def _parse_add_constraint(self) -> exp.AddConstraint: 4741 this = None 4742 kind = self._prev.token_type 4743 4744 if kind == TokenType.CONSTRAINT: 4745 this = self._parse_id_var() 4746 4747 if self._match_text_seq("CHECK"): 4748 expression = self._parse_wrapped(self._parse_conjunction) 4749 enforced = self._match_text_seq("ENFORCED") 4750 4751 return self.expression( 4752 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4753 ) 4754 4755 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4756 expression = self._parse_foreign_key() 4757 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4758 expression = self._parse_primary_key() 4759 else: 4760 expression = None 4761 4762 return self.expression(exp.AddConstraint, this=this, expression=expression) 4763 4764 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4765 index = self._index - 1 4766 4767 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4768 return self._parse_csv(self._parse_add_constraint) 4769 4770 self._retreat(index) 4771 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4772 return self._parse_csv(self._parse_field_def) 4773 4774 return self._parse_csv(self._parse_add_column) 4775 4776 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4777 self._match(TokenType.COLUMN) 4778 column = self._parse_field(any_token=True) 4779 4780 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4781 return self.expression(exp.AlterColumn, this=column, drop=True) 4782 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4783 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4784 4785 self._match_text_seq("SET", "DATA") 4786 return self.expression( 4787 exp.AlterColumn, 4788 this=column, 4789 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4790 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4791 using=self._match(TokenType.USING) and self._parse_conjunction(), 4792 ) 4793 4794 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4795 index = self._index - 1 4796 4797 partition_exists = self._parse_exists() 4798 if self._match(TokenType.PARTITION, advance=False): 4799 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4800 4801 self._retreat(index) 4802 return self._parse_csv(self._parse_drop_column) 4803 4804 def _parse_alter_table_rename(self) -> exp.RenameTable: 4805 self._match_text_seq("TO") 4806 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4807 4808 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4809 start = self._prev 4810 4811 if not self._match(TokenType.TABLE): 4812 return self._parse_as_command(start) 4813 4814 exists = self._parse_exists() 4815 only = self._match_text_seq("ONLY") 4816 this = self._parse_table(schema=True) 4817 4818 if self._next: 4819 self._advance() 4820 4821 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4822 if parser: 4823 actions = ensure_list(parser(self)) 4824 4825 if not self._curr: 4826 return self.expression( 4827 exp.AlterTable, 4828 this=this, 4829 exists=exists, 4830 actions=actions, 4831 only=only, 4832 ) 4833 4834 return self._parse_as_command(start) 4835 4836 def _parse_merge(self) -> exp.Merge: 4837 self._match(TokenType.INTO) 4838 target = self._parse_table() 4839 4840 if target and self._match(TokenType.ALIAS, advance=False): 4841 target.set("alias", self._parse_table_alias()) 4842 4843 self._match(TokenType.USING) 4844 using = self._parse_table() 4845 4846 self._match(TokenType.ON) 4847 on = self._parse_conjunction() 4848 4849 whens = [] 4850 while self._match(TokenType.WHEN): 4851 matched = not self._match(TokenType.NOT) 4852 self._match_text_seq("MATCHED") 4853 source = ( 4854 False 4855 if self._match_text_seq("BY", "TARGET") 4856 else self._match_text_seq("BY", "SOURCE") 4857 ) 4858 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4859 4860 self._match(TokenType.THEN) 4861 4862 if self._match(TokenType.INSERT): 4863 _this = self._parse_star() 4864 if _this: 4865 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4866 else: 4867 then = self.expression( 4868 exp.Insert, 4869 this=self._parse_value(), 4870 expression=self._match(TokenType.VALUES) and self._parse_value(), 4871 ) 4872 elif self._match(TokenType.UPDATE): 4873 expressions = self._parse_star() 4874 if expressions: 4875 then = self.expression(exp.Update, expressions=expressions) 4876 else: 4877 then = self.expression( 4878 exp.Update, 4879 expressions=self._match(TokenType.SET) 4880 and self._parse_csv(self._parse_equality), 4881 ) 4882 elif self._match(TokenType.DELETE): 4883 then = self.expression(exp.Var, this=self._prev.text) 4884 else: 4885 then = None 4886 4887 whens.append( 4888 self.expression( 4889 exp.When, 4890 matched=matched, 4891 source=source, 4892 condition=condition, 4893 then=then, 4894 ) 4895 ) 4896 4897 return self.expression( 4898 exp.Merge, 4899 this=target, 4900 using=using, 4901 on=on, 4902 expressions=whens, 4903 ) 4904 4905 def _parse_show(self) -> t.Optional[exp.Expression]: 4906 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4907 if parser: 4908 return parser(self) 4909 return self._parse_as_command(self._prev) 4910 4911 def _parse_set_item_assignment( 4912 self, kind: t.Optional[str] = None 4913 ) -> t.Optional[exp.Expression]: 4914 index = self._index 4915 4916 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4917 return self._parse_set_transaction(global_=kind == "GLOBAL") 4918 4919 left = self._parse_primary() or self._parse_id_var() 4920 4921 if not self._match_texts(("=", "TO")): 4922 self._retreat(index) 4923 return None 4924 4925 right = self._parse_statement() or self._parse_id_var() 4926 this = self.expression(exp.EQ, this=left, expression=right) 4927 4928 return self.expression(exp.SetItem, this=this, kind=kind) 4929 4930 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4931 self._match_text_seq("TRANSACTION") 4932 characteristics = self._parse_csv( 4933 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4934 ) 4935 return self.expression( 4936 exp.SetItem, 4937 expressions=characteristics, 4938 kind="TRANSACTION", 4939 **{"global": global_}, # type: ignore 4940 ) 4941 4942 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4943 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4944 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4945 4946 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4947 index = self._index 4948 set_ = self.expression( 4949 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4950 ) 4951 4952 if self._curr: 4953 self._retreat(index) 4954 return self._parse_as_command(self._prev) 4955 4956 return set_ 4957 4958 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4959 for option in options: 4960 if self._match_text_seq(*option.split(" ")): 4961 return exp.var(option) 4962 return None 4963 4964 def _parse_as_command(self, start: Token) -> exp.Command: 4965 while self._curr: 4966 self._advance() 4967 text = self._find_sql(start, self._prev) 4968 size = len(start.text) 4969 return exp.Command(this=text[:size], expression=text[size:]) 4970 4971 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4972 settings = [] 4973 4974 self._match_l_paren() 4975 kind = self._parse_id_var() 4976 4977 if self._match(TokenType.L_PAREN): 4978 while True: 4979 key = self._parse_id_var() 4980 value = self._parse_primary() 4981 4982 if not key and value is None: 4983 break 4984 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4985 self._match(TokenType.R_PAREN) 4986 4987 self._match_r_paren() 4988 4989 return self.expression( 4990 exp.DictProperty, 4991 this=this, 4992 kind=kind.this if kind else None, 4993 settings=settings, 4994 ) 4995 4996 def _parse_dict_range(self, this: str) -> exp.DictRange: 4997 self._match_l_paren() 4998 has_min = self._match_text_seq("MIN") 4999 if has_min: 5000 min = self._parse_var() or self._parse_primary() 5001 self._match_text_seq("MAX") 5002 max = self._parse_var() or self._parse_primary() 5003 else: 5004 max = self._parse_var() or self._parse_primary() 5005 min = exp.Literal.number(0) 5006 self._match_r_paren() 5007 return self.expression(exp.DictRange, this=this, min=min, max=max) 5008 5009 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 5010 expression = self._parse_column() 5011 self._match(TokenType.IN) 5012 iterator = self._parse_column() 5013 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5014 return self.expression( 5015 exp.Comprehension, 5016 this=this, 5017 expression=expression, 5018 iterator=iterator, 5019 condition=condition, 5020 ) 5021 5022 def _find_parser( 5023 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5024 ) -> t.Optional[t.Callable]: 5025 if not self._curr: 5026 return None 5027 5028 index = self._index 5029 this = [] 5030 while True: 5031 # The current token might be multiple words 5032 curr = self._curr.text.upper() 5033 key = curr.split(" ") 5034 this.append(curr) 5035 5036 self._advance() 5037 result, trie = in_trie(trie, key) 5038 if result == TrieResult.FAILED: 5039 break 5040 5041 if result == TrieResult.EXISTS: 5042 subparser = parsers[" ".join(this)] 5043 return subparser 5044 5045 self._retreat(index) 5046 return None 5047 5048 def _match(self, token_type, advance=True, expression=None): 5049 if not self._curr: 5050 return None 5051 5052 if self._curr.token_type == token_type: 5053 if advance: 5054 self._advance() 5055 self._add_comments(expression) 5056 return True 5057 5058 return None 5059 5060 def _match_set(self, types, advance=True): 5061 if not self._curr: 5062 return None 5063 5064 if self._curr.token_type in types: 5065 if advance: 5066 self._advance() 5067 return True 5068 5069 return None 5070 5071 def _match_pair(self, token_type_a, token_type_b, advance=True): 5072 if not self._curr or not self._next: 5073 return None 5074 5075 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5076 if advance: 5077 self._advance(2) 5078 return True 5079 5080 return None 5081 5082 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5083 if not self._match(TokenType.L_PAREN, expression=expression): 5084 self.raise_error("Expecting (") 5085 5086 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5087 if not self._match(TokenType.R_PAREN, expression=expression): 5088 self.raise_error("Expecting )") 5089 5090 def _match_texts(self, texts, advance=True): 5091 if self._curr and self._curr.text.upper() in texts: 5092 if advance: 5093 self._advance() 5094 return True 5095 return False 5096 5097 def _match_text_seq(self, *texts, advance=True): 5098 index = self._index 5099 for text in texts: 5100 if self._curr and self._curr.text.upper() == text: 5101 self._advance() 5102 else: 5103 self._retreat(index) 5104 return False 5105 5106 if not advance: 5107 self._retreat(index) 5108 5109 return True 5110 5111 @t.overload 5112 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5113 ... 5114 5115 @t.overload 5116 def _replace_columns_with_dots( 5117 self, this: t.Optional[exp.Expression] 5118 ) -> t.Optional[exp.Expression]: 5119 ... 5120 5121 def _replace_columns_with_dots(self, this): 5122 if isinstance(this, exp.Dot): 5123 exp.replace_children(this, self._replace_columns_with_dots) 5124 elif isinstance(this, exp.Column): 5125 exp.replace_children(this, self._replace_columns_with_dots) 5126 table = this.args.get("table") 5127 this = ( 5128 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5129 ) 5130 5131 return this 5132 5133 def _replace_lambda( 5134 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5135 ) -> t.Optional[exp.Expression]: 5136 if not node: 5137 return node 5138 5139 for column in node.find_all(exp.Column): 5140 if column.parts[0].name in lambda_variables: 5141 dot_or_id = column.to_dot() if column.table else column.this 5142 parent = column.parent 5143 5144 while isinstance(parent, exp.Dot): 5145 if not isinstance(parent.parent, exp.Dot): 5146 parent.replace(dot_or_id) 5147 break 5148 parent = parent.parent 5149 else: 5150 if column is node: 5151 node = dot_or_id 5152 else: 5153 column.replace(dot_or_id) 5154 return node 5155 5156 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5157 return [ 5158 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5159 for value in values 5160 if value 5161 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 } 220 221 SUBQUERY_PREDICATES = { 222 TokenType.ANY: exp.Any, 223 TokenType.ALL: exp.All, 224 TokenType.EXISTS: exp.Exists, 225 TokenType.SOME: exp.Any, 226 } 227 228 RESERVED_KEYWORDS = { 229 *Tokenizer.SINGLE_TOKENS.values(), 230 TokenType.SELECT, 231 } 232 233 DB_CREATABLES = { 234 TokenType.DATABASE, 235 TokenType.SCHEMA, 236 TokenType.TABLE, 237 TokenType.VIEW, 238 TokenType.DICTIONARY, 239 } 240 241 CREATABLES = { 242 TokenType.COLUMN, 243 TokenType.FUNCTION, 244 TokenType.INDEX, 245 TokenType.PROCEDURE, 246 *DB_CREATABLES, 247 } 248 249 # Tokens that can represent identifiers 250 ID_VAR_TOKENS = { 251 TokenType.VAR, 252 TokenType.ANTI, 253 TokenType.APPLY, 254 TokenType.ASC, 255 TokenType.AUTO_INCREMENT, 256 TokenType.BEGIN, 257 TokenType.CACHE, 258 TokenType.CASE, 259 TokenType.COLLATE, 260 TokenType.COMMAND, 261 TokenType.COMMENT, 262 TokenType.COMMIT, 263 TokenType.CONSTRAINT, 264 TokenType.DEFAULT, 265 TokenType.DELETE, 266 TokenType.DESC, 267 TokenType.DESCRIBE, 268 TokenType.DICTIONARY, 269 TokenType.DIV, 270 TokenType.END, 271 TokenType.EXECUTE, 272 TokenType.ESCAPE, 273 TokenType.FALSE, 274 TokenType.FIRST, 275 TokenType.FILTER, 276 TokenType.FORMAT, 277 TokenType.FULL, 278 TokenType.IS, 279 TokenType.ISNULL, 280 TokenType.INTERVAL, 281 TokenType.KEEP, 282 TokenType.LEFT, 283 TokenType.LOAD, 284 TokenType.MERGE, 285 TokenType.NATURAL, 286 TokenType.NEXT, 287 TokenType.OFFSET, 288 TokenType.ORDINALITY, 289 TokenType.OVERWRITE, 290 TokenType.PARTITION, 291 TokenType.PERCENT, 292 TokenType.PIVOT, 293 TokenType.PRAGMA, 294 TokenType.RANGE, 295 TokenType.REFERENCES, 296 TokenType.RIGHT, 297 TokenType.ROW, 298 TokenType.ROWS, 299 TokenType.SEMI, 300 TokenType.SET, 301 TokenType.SETTINGS, 302 TokenType.SHOW, 303 TokenType.TEMPORARY, 304 TokenType.TOP, 305 TokenType.TRUE, 306 TokenType.UNIQUE, 307 TokenType.UNPIVOT, 308 TokenType.UPDATE, 309 TokenType.VOLATILE, 310 TokenType.WINDOW, 311 *CREATABLES, 312 *SUBQUERY_PREDICATES, 313 *TYPE_TOKENS, 314 *NO_PAREN_FUNCTIONS, 315 } 316 317 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 318 319 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 320 TokenType.APPLY, 321 TokenType.ASOF, 322 TokenType.FULL, 323 TokenType.LEFT, 324 TokenType.LOCK, 325 TokenType.NATURAL, 326 TokenType.OFFSET, 327 TokenType.RIGHT, 328 TokenType.WINDOW, 329 } 330 331 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 332 333 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 334 335 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 336 337 FUNC_TOKENS = { 338 TokenType.COMMAND, 339 TokenType.CURRENT_DATE, 340 TokenType.CURRENT_DATETIME, 341 TokenType.CURRENT_TIMESTAMP, 342 TokenType.CURRENT_TIME, 343 TokenType.CURRENT_USER, 344 TokenType.FILTER, 345 TokenType.FIRST, 346 TokenType.FORMAT, 347 TokenType.GLOB, 348 TokenType.IDENTIFIER, 349 TokenType.INDEX, 350 TokenType.ISNULL, 351 TokenType.ILIKE, 352 TokenType.INSERT, 353 TokenType.LIKE, 354 TokenType.MERGE, 355 TokenType.OFFSET, 356 TokenType.PRIMARY_KEY, 357 TokenType.RANGE, 358 TokenType.REPLACE, 359 TokenType.RLIKE, 360 TokenType.ROW, 361 TokenType.UNNEST, 362 TokenType.VAR, 363 TokenType.LEFT, 364 TokenType.RIGHT, 365 TokenType.DATE, 366 TokenType.DATETIME, 367 TokenType.TABLE, 368 TokenType.TIMESTAMP, 369 TokenType.TIMESTAMPTZ, 370 TokenType.WINDOW, 371 TokenType.XOR, 372 *TYPE_TOKENS, 373 *SUBQUERY_PREDICATES, 374 } 375 376 CONJUNCTION = { 377 TokenType.AND: exp.And, 378 TokenType.OR: exp.Or, 379 } 380 381 EQUALITY = { 382 TokenType.EQ: exp.EQ, 383 TokenType.NEQ: exp.NEQ, 384 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 385 } 386 387 COMPARISON = { 388 TokenType.GT: exp.GT, 389 TokenType.GTE: exp.GTE, 390 TokenType.LT: exp.LT, 391 TokenType.LTE: exp.LTE, 392 } 393 394 BITWISE = { 395 TokenType.AMP: exp.BitwiseAnd, 396 TokenType.CARET: exp.BitwiseXor, 397 TokenType.PIPE: exp.BitwiseOr, 398 TokenType.DPIPE: exp.DPipe, 399 } 400 401 TERM = { 402 TokenType.DASH: exp.Sub, 403 TokenType.PLUS: exp.Add, 404 TokenType.MOD: exp.Mod, 405 TokenType.COLLATE: exp.Collate, 406 } 407 408 FACTOR = { 409 TokenType.DIV: exp.IntDiv, 410 TokenType.LR_ARROW: exp.Distance, 411 TokenType.SLASH: exp.Div, 412 TokenType.STAR: exp.Mul, 413 } 414 415 TIMES = { 416 TokenType.TIME, 417 TokenType.TIMETZ, 418 } 419 420 TIMESTAMPS = { 421 TokenType.TIMESTAMP, 422 TokenType.TIMESTAMPTZ, 423 TokenType.TIMESTAMPLTZ, 424 *TIMES, 425 } 426 427 SET_OPERATIONS = { 428 TokenType.UNION, 429 TokenType.INTERSECT, 430 TokenType.EXCEPT, 431 } 432 433 JOIN_METHODS = { 434 TokenType.NATURAL, 435 TokenType.ASOF, 436 } 437 438 JOIN_SIDES = { 439 TokenType.LEFT, 440 TokenType.RIGHT, 441 TokenType.FULL, 442 } 443 444 JOIN_KINDS = { 445 TokenType.INNER, 446 TokenType.OUTER, 447 TokenType.CROSS, 448 TokenType.SEMI, 449 TokenType.ANTI, 450 } 451 452 JOIN_HINTS: t.Set[str] = set() 453 454 LAMBDAS = { 455 TokenType.ARROW: lambda self, expressions: self.expression( 456 exp.Lambda, 457 this=self._replace_lambda( 458 self._parse_conjunction(), 459 {node.name for node in expressions}, 460 ), 461 expressions=expressions, 462 ), 463 TokenType.FARROW: lambda self, expressions: self.expression( 464 exp.Kwarg, 465 this=exp.var(expressions[0].name), 466 expression=self._parse_conjunction(), 467 ), 468 } 469 470 COLUMN_OPERATORS = { 471 TokenType.DOT: None, 472 TokenType.DCOLON: lambda self, this, to: self.expression( 473 exp.Cast if self.STRICT_CAST else exp.TryCast, 474 this=this, 475 to=to, 476 ), 477 TokenType.ARROW: lambda self, this, path: self.expression( 478 exp.JSONExtract, 479 this=this, 480 expression=path, 481 ), 482 TokenType.DARROW: lambda self, this, path: self.expression( 483 exp.JSONExtractScalar, 484 this=this, 485 expression=path, 486 ), 487 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 488 exp.JSONBExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 493 exp.JSONBExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 498 exp.JSONBContains, 499 this=this, 500 expression=key, 501 ), 502 } 503 504 EXPRESSION_PARSERS = { 505 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 506 exp.Column: lambda self: self._parse_column(), 507 exp.Condition: lambda self: self._parse_conjunction(), 508 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 509 exp.Expression: lambda self: self._parse_statement(), 510 exp.From: lambda self: self._parse_from(), 511 exp.Group: lambda self: self._parse_group(), 512 exp.Having: lambda self: self._parse_having(), 513 exp.Identifier: lambda self: self._parse_id_var(), 514 exp.Join: lambda self: self._parse_join(), 515 exp.Lambda: lambda self: self._parse_lambda(), 516 exp.Lateral: lambda self: self._parse_lateral(), 517 exp.Limit: lambda self: self._parse_limit(), 518 exp.Offset: lambda self: self._parse_offset(), 519 exp.Order: lambda self: self._parse_order(), 520 exp.Ordered: lambda self: self._parse_ordered(), 521 exp.Properties: lambda self: self._parse_properties(), 522 exp.Qualify: lambda self: self._parse_qualify(), 523 exp.Returning: lambda self: self._parse_returning(), 524 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 525 exp.Table: lambda self: self._parse_table_parts(), 526 exp.TableAlias: lambda self: self._parse_table_alias(), 527 exp.Where: lambda self: self._parse_where(), 528 exp.Window: lambda self: self._parse_named_window(), 529 exp.With: lambda self: self._parse_with(), 530 "JOIN_TYPE": lambda self: self._parse_join_parts(), 531 } 532 533 STATEMENT_PARSERS = { 534 TokenType.ALTER: lambda self: self._parse_alter(), 535 TokenType.BEGIN: lambda self: self._parse_transaction(), 536 TokenType.CACHE: lambda self: self._parse_cache(), 537 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 538 TokenType.COMMENT: lambda self: self._parse_comment(), 539 TokenType.CREATE: lambda self: self._parse_create(), 540 TokenType.DELETE: lambda self: self._parse_delete(), 541 TokenType.DESC: lambda self: self._parse_describe(), 542 TokenType.DESCRIBE: lambda self: self._parse_describe(), 543 TokenType.DROP: lambda self: self._parse_drop(), 544 TokenType.INSERT: lambda self: self._parse_insert(), 545 TokenType.LOAD: lambda self: self._parse_load(), 546 TokenType.MERGE: lambda self: self._parse_merge(), 547 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 548 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 549 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 550 TokenType.SET: lambda self: self._parse_set(), 551 TokenType.UNCACHE: lambda self: self._parse_uncache(), 552 TokenType.UPDATE: lambda self: self._parse_update(), 553 TokenType.USE: lambda self: self.expression( 554 exp.Use, 555 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 556 and exp.var(self._prev.text), 557 this=self._parse_table(schema=False), 558 ), 559 } 560 561 UNARY_PARSERS = { 562 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 563 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 564 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 565 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 566 } 567 568 PRIMARY_PARSERS = { 569 TokenType.STRING: lambda self, token: self.expression( 570 exp.Literal, this=token.text, is_string=True 571 ), 572 TokenType.NUMBER: lambda self, token: self.expression( 573 exp.Literal, this=token.text, is_string=False 574 ), 575 TokenType.STAR: lambda self, _: self.expression( 576 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 577 ), 578 TokenType.NULL: lambda self, _: self.expression(exp.Null), 579 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 580 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 581 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 582 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 583 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 584 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 585 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 586 exp.National, this=token.text 587 ), 588 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 589 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 590 } 591 592 PLACEHOLDER_PARSERS = { 593 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 594 TokenType.PARAMETER: lambda self: self._parse_parameter(), 595 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 596 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 597 else None, 598 } 599 600 RANGE_PARSERS = { 601 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 602 TokenType.GLOB: binary_range_parser(exp.Glob), 603 TokenType.ILIKE: binary_range_parser(exp.ILike), 604 TokenType.IN: lambda self, this: self._parse_in(this), 605 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 606 TokenType.IS: lambda self, this: self._parse_is(this), 607 TokenType.LIKE: binary_range_parser(exp.Like), 608 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 609 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 610 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 611 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 612 } 613 614 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 615 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 616 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 617 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 618 "CHARACTER SET": lambda self: self._parse_character_set(), 619 "CHECKSUM": lambda self: self._parse_checksum(), 620 "CLUSTER BY": lambda self: self._parse_cluster(), 621 "CLUSTERED": lambda self: self._parse_clustered_by(), 622 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 623 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 624 "COPY": lambda self: self._parse_copy_property(), 625 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 626 "DEFINER": lambda self: self._parse_definer(), 627 "DETERMINISTIC": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "DISTKEY": lambda self: self._parse_distkey(), 631 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 632 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 633 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 634 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 635 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 636 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 637 "FREESPACE": lambda self: self._parse_freespace(), 638 "HEAP": lambda self: self.expression(exp.HeapProperty), 639 "IMMUTABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 641 ), 642 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 643 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 644 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 645 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 646 "LIKE": lambda self: self._parse_create_like(), 647 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 648 "LOCK": lambda self: self._parse_locking(), 649 "LOCKING": lambda self: self._parse_locking(), 650 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 651 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 652 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 653 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 654 "NO": lambda self: self._parse_no_property(), 655 "ON": lambda self: self._parse_on_property(), 656 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 657 "PARTITION BY": lambda self: self._parse_partitioned_by(), 658 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 659 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 660 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 661 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 662 "RETURNS": lambda self: self._parse_returns(), 663 "ROW": lambda self: self._parse_row(), 664 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 665 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 666 "SETTINGS": lambda self: self.expression( 667 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 668 ), 669 "SORTKEY": lambda self: self._parse_sortkey(), 670 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 671 "STABLE": lambda self: self.expression( 672 exp.StabilityProperty, this=exp.Literal.string("STABLE") 673 ), 674 "STORED": lambda self: self._parse_stored(), 675 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 676 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 677 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 678 "TO": lambda self: self._parse_to_table(), 679 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 680 "TTL": lambda self: self._parse_ttl(), 681 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 682 "VOLATILE": lambda self: self._parse_volatile_property(), 683 "WITH": lambda self: self._parse_with_property(), 684 } 685 686 CONSTRAINT_PARSERS = { 687 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 688 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 689 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 690 "CHARACTER SET": lambda self: self.expression( 691 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "CHECK": lambda self: self.expression( 694 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 695 ), 696 "COLLATE": lambda self: self.expression( 697 exp.CollateColumnConstraint, this=self._parse_var() 698 ), 699 "COMMENT": lambda self: self.expression( 700 exp.CommentColumnConstraint, this=self._parse_string() 701 ), 702 "COMPRESS": lambda self: self._parse_compress(), 703 "CLUSTERED": lambda self: self.expression( 704 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 705 ), 706 "NONCLUSTERED": lambda self: self.expression( 707 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 708 ), 709 "DEFAULT": lambda self: self.expression( 710 exp.DefaultColumnConstraint, this=self._parse_bitwise() 711 ), 712 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 713 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 714 "FORMAT": lambda self: self.expression( 715 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "GENERATED": lambda self: self._parse_generated_as_identity(), 718 "IDENTITY": lambda self: self._parse_auto_increment(), 719 "INLINE": lambda self: self._parse_inline(), 720 "LIKE": lambda self: self._parse_create_like(), 721 "NOT": lambda self: self._parse_not_constraint(), 722 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 723 "ON": lambda self: ( 724 self._match(TokenType.UPDATE) 725 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 726 ) 727 or self.expression(exp.OnProperty, this=self._parse_id_var()), 728 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 729 "PRIMARY KEY": lambda self: self._parse_primary_key(), 730 "REFERENCES": lambda self: self._parse_references(match=False), 731 "TITLE": lambda self: self.expression( 732 exp.TitleColumnConstraint, this=self._parse_var_or_string() 733 ), 734 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 735 "UNIQUE": lambda self: self._parse_unique(), 736 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 737 "WITH": lambda self: self.expression( 738 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 739 ), 740 } 741 742 ALTER_PARSERS = { 743 "ADD": lambda self: self._parse_alter_table_add(), 744 "ALTER": lambda self: self._parse_alter_table_alter(), 745 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 746 "DROP": lambda self: self._parse_alter_table_drop(), 747 "RENAME": lambda self: self._parse_alter_table_rename(), 748 } 749 750 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 751 752 NO_PAREN_FUNCTION_PARSERS = { 753 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 754 "CASE": lambda self: self._parse_case(), 755 "IF": lambda self: self._parse_if(), 756 "NEXT": lambda self: self._parse_next_value_for(), 757 } 758 759 INVALID_FUNC_NAME_TOKENS = { 760 TokenType.IDENTIFIER, 761 TokenType.STRING, 762 } 763 764 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 765 766 FUNCTION_PARSERS = { 767 "ANY_VALUE": lambda self: self._parse_any_value(), 768 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 769 "CONCAT": lambda self: self._parse_concat(), 770 "CONCAT_WS": lambda self: self._parse_concat_ws(), 771 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 772 "DECODE": lambda self: self._parse_decode(), 773 "EXTRACT": lambda self: self._parse_extract(), 774 "JSON_OBJECT": lambda self: self._parse_json_object(), 775 "LOG": lambda self: self._parse_logarithm(), 776 "MATCH": lambda self: self._parse_match_against(), 777 "OPENJSON": lambda self: self._parse_open_json(), 778 "POSITION": lambda self: self._parse_position(), 779 "SAFE_CAST": lambda self: self._parse_cast(False), 780 "STRING_AGG": lambda self: self._parse_string_agg(), 781 "SUBSTRING": lambda self: self._parse_substring(), 782 "TRIM": lambda self: self._parse_trim(), 783 "TRY_CAST": lambda self: self._parse_cast(False), 784 "TRY_CONVERT": lambda self: self._parse_convert(False), 785 } 786 787 QUERY_MODIFIER_PARSERS = { 788 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 789 TokenType.WHERE: lambda self: ("where", self._parse_where()), 790 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 791 TokenType.HAVING: lambda self: ("having", self._parse_having()), 792 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 793 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 794 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 795 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 796 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 797 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 798 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 799 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 800 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 801 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 802 TokenType.CLUSTER_BY: lambda self: ( 803 "cluster", 804 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 ), 806 TokenType.DISTRIBUTE_BY: lambda self: ( 807 "distribute", 808 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 809 ), 810 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 811 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 812 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 813 } 814 815 SET_PARSERS = { 816 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 817 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 818 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 819 "TRANSACTION": lambda self: self._parse_set_transaction(), 820 } 821 822 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 823 824 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 825 826 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 827 828 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 829 830 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 831 832 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 833 TRANSACTION_CHARACTERISTICS = { 834 "ISOLATION LEVEL REPEATABLE READ", 835 "ISOLATION LEVEL READ COMMITTED", 836 "ISOLATION LEVEL READ UNCOMMITTED", 837 "ISOLATION LEVEL SERIALIZABLE", 838 "READ WRITE", 839 "READ ONLY", 840 } 841 842 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 843 844 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 845 846 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 847 848 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 849 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 850 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 851 852 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 853 854 DISTINCT_TOKENS = {TokenType.DISTINCT} 855 856 STRICT_CAST = True 857 858 # A NULL arg in CONCAT yields NULL by default 859 CONCAT_NULL_OUTPUTS_STRING = False 860 861 PREFIXED_PIVOT_COLUMNS = False 862 IDENTIFY_PIVOT_STRINGS = False 863 864 LOG_BASE_FIRST = True 865 LOG_DEFAULTS_TO_LN = False 866 867 SUPPORTS_USER_DEFINED_TYPES = True 868 869 # Whether or not ADD is present for each column added by ALTER TABLE 870 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 871 872 # Whether or not the table sample clause expects CSV syntax 873 TABLESAMPLE_CSV = False 874 875 __slots__ = ( 876 "error_level", 877 "error_message_context", 878 "max_errors", 879 "sql", 880 "errors", 881 "_tokens", 882 "_index", 883 "_curr", 884 "_next", 885 "_prev", 886 "_prev_comments", 887 "_tokenizer", 888 ) 889 890 # Autofilled 891 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 892 INDEX_OFFSET: int = 0 893 UNNEST_COLUMN_ONLY: bool = False 894 ALIAS_POST_TABLESAMPLE: bool = False 895 STRICT_STRING_CONCAT = False 896 NORMALIZE_FUNCTIONS = "upper" 897 NULL_ORDERING: str = "nulls_are_small" 898 SHOW_TRIE: t.Dict = {} 899 SET_TRIE: t.Dict = {} 900 FORMAT_MAPPING: t.Dict[str, str] = {} 901 FORMAT_TRIE: t.Dict = {} 902 TIME_MAPPING: t.Dict[str, str] = {} 903 TIME_TRIE: t.Dict = {} 904 905 def __init__( 906 self, 907 error_level: t.Optional[ErrorLevel] = None, 908 error_message_context: int = 100, 909 max_errors: int = 3, 910 ): 911 self.error_level = error_level or ErrorLevel.IMMEDIATE 912 self.error_message_context = error_message_context 913 self.max_errors = max_errors 914 self._tokenizer = self.TOKENIZER_CLASS() 915 self.reset() 916 917 def reset(self): 918 self.sql = "" 919 self.errors = [] 920 self._tokens = [] 921 self._index = 0 922 self._curr = None 923 self._next = None 924 self._prev = None 925 self._prev_comments = None 926 927 def parse( 928 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 929 ) -> t.List[t.Optional[exp.Expression]]: 930 """ 931 Parses a list of tokens and returns a list of syntax trees, one tree 932 per parsed SQL statement. 933 934 Args: 935 raw_tokens: The list of tokens. 936 sql: The original SQL string, used to produce helpful debug messages. 937 938 Returns: 939 The list of the produced syntax trees. 940 """ 941 return self._parse( 942 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 943 ) 944 945 def parse_into( 946 self, 947 expression_types: exp.IntoType, 948 raw_tokens: t.List[Token], 949 sql: t.Optional[str] = None, 950 ) -> t.List[t.Optional[exp.Expression]]: 951 """ 952 Parses a list of tokens into a given Expression type. If a collection of Expression 953 types is given instead, this method will try to parse the token list into each one 954 of them, stopping at the first for which the parsing succeeds. 955 956 Args: 957 expression_types: The expression type(s) to try and parse the token list into. 958 raw_tokens: The list of tokens. 959 sql: The original SQL string, used to produce helpful debug messages. 960 961 Returns: 962 The target Expression. 963 """ 964 errors = [] 965 for expression_type in ensure_list(expression_types): 966 parser = self.EXPRESSION_PARSERS.get(expression_type) 967 if not parser: 968 raise TypeError(f"No parser registered for {expression_type}") 969 970 try: 971 return self._parse(parser, raw_tokens, sql) 972 except ParseError as e: 973 e.errors[0]["into_expression"] = expression_type 974 errors.append(e) 975 976 raise ParseError( 977 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 978 errors=merge_errors(errors), 979 ) from errors[-1] 980 981 def _parse( 982 self, 983 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 984 raw_tokens: t.List[Token], 985 sql: t.Optional[str] = None, 986 ) -> t.List[t.Optional[exp.Expression]]: 987 self.reset() 988 self.sql = sql or "" 989 990 total = len(raw_tokens) 991 chunks: t.List[t.List[Token]] = [[]] 992 993 for i, token in enumerate(raw_tokens): 994 if token.token_type == TokenType.SEMICOLON: 995 if i < total - 1: 996 chunks.append([]) 997 else: 998 chunks[-1].append(token) 999 1000 expressions = [] 1001 1002 for tokens in chunks: 1003 self._index = -1 1004 self._tokens = tokens 1005 self._advance() 1006 1007 expressions.append(parse_method(self)) 1008 1009 if self._index < len(self._tokens): 1010 self.raise_error("Invalid expression / Unexpected token") 1011 1012 self.check_errors() 1013 1014 return expressions 1015 1016 def check_errors(self) -> None: 1017 """Logs or raises any found errors, depending on the chosen error level setting.""" 1018 if self.error_level == ErrorLevel.WARN: 1019 for error in self.errors: 1020 logger.error(str(error)) 1021 elif self.error_level == ErrorLevel.RAISE and self.errors: 1022 raise ParseError( 1023 concat_messages(self.errors, self.max_errors), 1024 errors=merge_errors(self.errors), 1025 ) 1026 1027 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1028 """ 1029 Appends an error in the list of recorded errors or raises it, depending on the chosen 1030 error level setting. 1031 """ 1032 token = token or self._curr or self._prev or Token.string("") 1033 start = token.start 1034 end = token.end + 1 1035 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1036 highlight = self.sql[start:end] 1037 end_context = self.sql[end : end + self.error_message_context] 1038 1039 error = ParseError.new( 1040 f"{message}. Line {token.line}, Col: {token.col}.\n" 1041 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1042 description=message, 1043 line=token.line, 1044 col=token.col, 1045 start_context=start_context, 1046 highlight=highlight, 1047 end_context=end_context, 1048 ) 1049 1050 if self.error_level == ErrorLevel.IMMEDIATE: 1051 raise error 1052 1053 self.errors.append(error) 1054 1055 def expression( 1056 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1057 ) -> E: 1058 """ 1059 Creates a new, validated Expression. 1060 1061 Args: 1062 exp_class: The expression class to instantiate. 1063 comments: An optional list of comments to attach to the expression. 1064 kwargs: The arguments to set for the expression along with their respective values. 1065 1066 Returns: 1067 The target expression. 1068 """ 1069 instance = exp_class(**kwargs) 1070 instance.add_comments(comments) if comments else self._add_comments(instance) 1071 return self.validate_expression(instance) 1072 1073 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1074 if expression and self._prev_comments: 1075 expression.add_comments(self._prev_comments) 1076 self._prev_comments = None 1077 1078 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1079 """ 1080 Validates an Expression, making sure that all its mandatory arguments are set. 1081 1082 Args: 1083 expression: The expression to validate. 1084 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1085 1086 Returns: 1087 The validated expression. 1088 """ 1089 if self.error_level != ErrorLevel.IGNORE: 1090 for error_message in expression.error_messages(args): 1091 self.raise_error(error_message) 1092 1093 return expression 1094 1095 def _find_sql(self, start: Token, end: Token) -> str: 1096 return self.sql[start.start : end.end + 1] 1097 1098 def _advance(self, times: int = 1) -> None: 1099 self._index += times 1100 self._curr = seq_get(self._tokens, self._index) 1101 self._next = seq_get(self._tokens, self._index + 1) 1102 1103 if self._index > 0: 1104 self._prev = self._tokens[self._index - 1] 1105 self._prev_comments = self._prev.comments 1106 else: 1107 self._prev = None 1108 self._prev_comments = None 1109 1110 def _retreat(self, index: int) -> None: 1111 if index != self._index: 1112 self._advance(index - self._index) 1113 1114 def _parse_command(self) -> exp.Command: 1115 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1116 1117 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1118 start = self._prev 1119 exists = self._parse_exists() if allow_exists else None 1120 1121 self._match(TokenType.ON) 1122 1123 kind = self._match_set(self.CREATABLES) and self._prev 1124 if not kind: 1125 return self._parse_as_command(start) 1126 1127 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1128 this = self._parse_user_defined_function(kind=kind.token_type) 1129 elif kind.token_type == TokenType.TABLE: 1130 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1131 elif kind.token_type == TokenType.COLUMN: 1132 this = self._parse_column() 1133 else: 1134 this = self._parse_id_var() 1135 1136 self._match(TokenType.IS) 1137 1138 return self.expression( 1139 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1140 ) 1141 1142 def _parse_to_table( 1143 self, 1144 ) -> exp.ToTableProperty: 1145 table = self._parse_table_parts(schema=True) 1146 return self.expression(exp.ToTableProperty, this=table) 1147 1148 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1149 def _parse_ttl(self) -> exp.Expression: 1150 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1151 this = self._parse_bitwise() 1152 1153 if self._match_text_seq("DELETE"): 1154 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1155 if self._match_text_seq("RECOMPRESS"): 1156 return self.expression( 1157 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1158 ) 1159 if self._match_text_seq("TO", "DISK"): 1160 return self.expression( 1161 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1162 ) 1163 if self._match_text_seq("TO", "VOLUME"): 1164 return self.expression( 1165 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1166 ) 1167 1168 return this 1169 1170 expressions = self._parse_csv(_parse_ttl_action) 1171 where = self._parse_where() 1172 group = self._parse_group() 1173 1174 aggregates = None 1175 if group and self._match(TokenType.SET): 1176 aggregates = self._parse_csv(self._parse_set_item) 1177 1178 return self.expression( 1179 exp.MergeTreeTTL, 1180 expressions=expressions, 1181 where=where, 1182 group=group, 1183 aggregates=aggregates, 1184 ) 1185 1186 def _parse_statement(self) -> t.Optional[exp.Expression]: 1187 if self._curr is None: 1188 return None 1189 1190 if self._match_set(self.STATEMENT_PARSERS): 1191 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1192 1193 if self._match_set(Tokenizer.COMMANDS): 1194 return self._parse_command() 1195 1196 expression = self._parse_expression() 1197 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1198 return self._parse_query_modifiers(expression) 1199 1200 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1201 start = self._prev 1202 temporary = self._match(TokenType.TEMPORARY) 1203 materialized = self._match_text_seq("MATERIALIZED") 1204 1205 kind = self._match_set(self.CREATABLES) and self._prev.text 1206 if not kind: 1207 return self._parse_as_command(start) 1208 1209 return self.expression( 1210 exp.Drop, 1211 comments=start.comments, 1212 exists=exists or self._parse_exists(), 1213 this=self._parse_table(schema=True), 1214 kind=kind, 1215 temporary=temporary, 1216 materialized=materialized, 1217 cascade=self._match_text_seq("CASCADE"), 1218 constraints=self._match_text_seq("CONSTRAINTS"), 1219 purge=self._match_text_seq("PURGE"), 1220 ) 1221 1222 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1223 return ( 1224 self._match_text_seq("IF") 1225 and (not not_ or self._match(TokenType.NOT)) 1226 and self._match(TokenType.EXISTS) 1227 ) 1228 1229 def _parse_create(self) -> exp.Create | exp.Command: 1230 # Note: this can't be None because we've matched a statement parser 1231 start = self._prev 1232 comments = self._prev_comments 1233 1234 replace = start.text.upper() == "REPLACE" or self._match_pair( 1235 TokenType.OR, TokenType.REPLACE 1236 ) 1237 unique = self._match(TokenType.UNIQUE) 1238 1239 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1240 self._advance() 1241 1242 properties = None 1243 create_token = self._match_set(self.CREATABLES) and self._prev 1244 1245 if not create_token: 1246 # exp.Properties.Location.POST_CREATE 1247 properties = self._parse_properties() 1248 create_token = self._match_set(self.CREATABLES) and self._prev 1249 1250 if not properties or not create_token: 1251 return self._parse_as_command(start) 1252 1253 exists = self._parse_exists(not_=True) 1254 this = None 1255 expression: t.Optional[exp.Expression] = None 1256 indexes = None 1257 no_schema_binding = None 1258 begin = None 1259 clone = None 1260 1261 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1262 nonlocal properties 1263 if properties and temp_props: 1264 properties.expressions.extend(temp_props.expressions) 1265 elif temp_props: 1266 properties = temp_props 1267 1268 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1269 this = self._parse_user_defined_function(kind=create_token.token_type) 1270 1271 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1272 extend_props(self._parse_properties()) 1273 1274 self._match(TokenType.ALIAS) 1275 1276 if self._match(TokenType.COMMAND): 1277 expression = self._parse_as_command(self._prev) 1278 else: 1279 begin = self._match(TokenType.BEGIN) 1280 return_ = self._match_text_seq("RETURN") 1281 expression = self._parse_statement() 1282 1283 if return_: 1284 expression = self.expression(exp.Return, this=expression) 1285 elif create_token.token_type == TokenType.INDEX: 1286 this = self._parse_index(index=self._parse_id_var()) 1287 elif create_token.token_type in self.DB_CREATABLES: 1288 table_parts = self._parse_table_parts(schema=True) 1289 1290 # exp.Properties.Location.POST_NAME 1291 self._match(TokenType.COMMA) 1292 extend_props(self._parse_properties(before=True)) 1293 1294 this = self._parse_schema(this=table_parts) 1295 1296 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1297 extend_props(self._parse_properties()) 1298 1299 self._match(TokenType.ALIAS) 1300 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1301 # exp.Properties.Location.POST_ALIAS 1302 extend_props(self._parse_properties()) 1303 1304 expression = self._parse_ddl_select() 1305 1306 if create_token.token_type == TokenType.TABLE: 1307 # exp.Properties.Location.POST_EXPRESSION 1308 extend_props(self._parse_properties()) 1309 1310 indexes = [] 1311 while True: 1312 index = self._parse_index() 1313 1314 # exp.Properties.Location.POST_INDEX 1315 extend_props(self._parse_properties()) 1316 1317 if not index: 1318 break 1319 else: 1320 self._match(TokenType.COMMA) 1321 indexes.append(index) 1322 elif create_token.token_type == TokenType.VIEW: 1323 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1324 no_schema_binding = True 1325 1326 shallow = self._match_text_seq("SHALLOW") 1327 1328 if self._match_text_seq("CLONE"): 1329 clone = self._parse_table(schema=True) 1330 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1331 clone_kind = ( 1332 self._match(TokenType.L_PAREN) 1333 and self._match_texts(self.CLONE_KINDS) 1334 and self._prev.text.upper() 1335 ) 1336 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1337 self._match(TokenType.R_PAREN) 1338 clone = self.expression( 1339 exp.Clone, 1340 this=clone, 1341 when=when, 1342 kind=clone_kind, 1343 shallow=shallow, 1344 expression=clone_expression, 1345 ) 1346 1347 return self.expression( 1348 exp.Create, 1349 comments=comments, 1350 this=this, 1351 kind=create_token.text, 1352 replace=replace, 1353 unique=unique, 1354 expression=expression, 1355 exists=exists, 1356 properties=properties, 1357 indexes=indexes, 1358 no_schema_binding=no_schema_binding, 1359 begin=begin, 1360 clone=clone, 1361 ) 1362 1363 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1364 # only used for teradata currently 1365 self._match(TokenType.COMMA) 1366 1367 kwargs = { 1368 "no": self._match_text_seq("NO"), 1369 "dual": self._match_text_seq("DUAL"), 1370 "before": self._match_text_seq("BEFORE"), 1371 "default": self._match_text_seq("DEFAULT"), 1372 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1373 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1374 "after": self._match_text_seq("AFTER"), 1375 "minimum": self._match_texts(("MIN", "MINIMUM")), 1376 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1377 } 1378 1379 if self._match_texts(self.PROPERTY_PARSERS): 1380 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1381 try: 1382 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1383 except TypeError: 1384 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1385 1386 return None 1387 1388 def _parse_property(self) -> t.Optional[exp.Expression]: 1389 if self._match_texts(self.PROPERTY_PARSERS): 1390 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1391 1392 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1393 return self._parse_character_set(default=True) 1394 1395 if self._match_text_seq("COMPOUND", "SORTKEY"): 1396 return self._parse_sortkey(compound=True) 1397 1398 if self._match_text_seq("SQL", "SECURITY"): 1399 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1400 1401 assignment = self._match_pair( 1402 TokenType.VAR, TokenType.EQ, advance=False 1403 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1404 1405 if assignment: 1406 key = self._parse_var_or_string() 1407 self._match(TokenType.EQ) 1408 return self.expression( 1409 exp.Property, 1410 this=key, 1411 value=self._parse_column() or self._parse_var(any_token=True), 1412 ) 1413 1414 return None 1415 1416 def _parse_stored(self) -> exp.FileFormatProperty: 1417 self._match(TokenType.ALIAS) 1418 1419 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1420 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1421 1422 return self.expression( 1423 exp.FileFormatProperty, 1424 this=self.expression( 1425 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1426 ) 1427 if input_format or output_format 1428 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1429 ) 1430 1431 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1432 self._match(TokenType.EQ) 1433 self._match(TokenType.ALIAS) 1434 return self.expression(exp_class, this=self._parse_field()) 1435 1436 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1437 properties = [] 1438 while True: 1439 if before: 1440 prop = self._parse_property_before() 1441 else: 1442 prop = self._parse_property() 1443 1444 if not prop: 1445 break 1446 for p in ensure_list(prop): 1447 properties.append(p) 1448 1449 if properties: 1450 return self.expression(exp.Properties, expressions=properties) 1451 1452 return None 1453 1454 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1455 return self.expression( 1456 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1457 ) 1458 1459 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1460 if self._index >= 2: 1461 pre_volatile_token = self._tokens[self._index - 2] 1462 else: 1463 pre_volatile_token = None 1464 1465 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1466 return exp.VolatileProperty() 1467 1468 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1469 1470 def _parse_with_property( 1471 self, 1472 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1473 if self._match(TokenType.L_PAREN, advance=False): 1474 return self._parse_wrapped_csv(self._parse_property) 1475 1476 if self._match_text_seq("JOURNAL"): 1477 return self._parse_withjournaltable() 1478 1479 if self._match_text_seq("DATA"): 1480 return self._parse_withdata(no=False) 1481 elif self._match_text_seq("NO", "DATA"): 1482 return self._parse_withdata(no=True) 1483 1484 if not self._next: 1485 return None 1486 1487 return self._parse_withisolatedloading() 1488 1489 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1490 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1491 self._match(TokenType.EQ) 1492 1493 user = self._parse_id_var() 1494 self._match(TokenType.PARAMETER) 1495 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1496 1497 if not user or not host: 1498 return None 1499 1500 return exp.DefinerProperty(this=f"{user}@{host}") 1501 1502 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1503 self._match(TokenType.TABLE) 1504 self._match(TokenType.EQ) 1505 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1506 1507 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1508 return self.expression(exp.LogProperty, no=no) 1509 1510 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1511 return self.expression(exp.JournalProperty, **kwargs) 1512 1513 def _parse_checksum(self) -> exp.ChecksumProperty: 1514 self._match(TokenType.EQ) 1515 1516 on = None 1517 if self._match(TokenType.ON): 1518 on = True 1519 elif self._match_text_seq("OFF"): 1520 on = False 1521 1522 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1523 1524 def _parse_cluster(self) -> exp.Cluster: 1525 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1526 1527 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1528 self._match_text_seq("BY") 1529 1530 self._match_l_paren() 1531 expressions = self._parse_csv(self._parse_column) 1532 self._match_r_paren() 1533 1534 if self._match_text_seq("SORTED", "BY"): 1535 self._match_l_paren() 1536 sorted_by = self._parse_csv(self._parse_ordered) 1537 self._match_r_paren() 1538 else: 1539 sorted_by = None 1540 1541 self._match(TokenType.INTO) 1542 buckets = self._parse_number() 1543 self._match_text_seq("BUCKETS") 1544 1545 return self.expression( 1546 exp.ClusteredByProperty, 1547 expressions=expressions, 1548 sorted_by=sorted_by, 1549 buckets=buckets, 1550 ) 1551 1552 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1553 if not self._match_text_seq("GRANTS"): 1554 self._retreat(self._index - 1) 1555 return None 1556 1557 return self.expression(exp.CopyGrantsProperty) 1558 1559 def _parse_freespace(self) -> exp.FreespaceProperty: 1560 self._match(TokenType.EQ) 1561 return self.expression( 1562 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1563 ) 1564 1565 def _parse_mergeblockratio( 1566 self, no: bool = False, default: bool = False 1567 ) -> exp.MergeBlockRatioProperty: 1568 if self._match(TokenType.EQ): 1569 return self.expression( 1570 exp.MergeBlockRatioProperty, 1571 this=self._parse_number(), 1572 percent=self._match(TokenType.PERCENT), 1573 ) 1574 1575 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1576 1577 def _parse_datablocksize( 1578 self, 1579 default: t.Optional[bool] = None, 1580 minimum: t.Optional[bool] = None, 1581 maximum: t.Optional[bool] = None, 1582 ) -> exp.DataBlocksizeProperty: 1583 self._match(TokenType.EQ) 1584 size = self._parse_number() 1585 1586 units = None 1587 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1588 units = self._prev.text 1589 1590 return self.expression( 1591 exp.DataBlocksizeProperty, 1592 size=size, 1593 units=units, 1594 default=default, 1595 minimum=minimum, 1596 maximum=maximum, 1597 ) 1598 1599 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1600 self._match(TokenType.EQ) 1601 always = self._match_text_seq("ALWAYS") 1602 manual = self._match_text_seq("MANUAL") 1603 never = self._match_text_seq("NEVER") 1604 default = self._match_text_seq("DEFAULT") 1605 1606 autotemp = None 1607 if self._match_text_seq("AUTOTEMP"): 1608 autotemp = self._parse_schema() 1609 1610 return self.expression( 1611 exp.BlockCompressionProperty, 1612 always=always, 1613 manual=manual, 1614 never=never, 1615 default=default, 1616 autotemp=autotemp, 1617 ) 1618 1619 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1620 no = self._match_text_seq("NO") 1621 concurrent = self._match_text_seq("CONCURRENT") 1622 self._match_text_seq("ISOLATED", "LOADING") 1623 for_all = self._match_text_seq("FOR", "ALL") 1624 for_insert = self._match_text_seq("FOR", "INSERT") 1625 for_none = self._match_text_seq("FOR", "NONE") 1626 return self.expression( 1627 exp.IsolatedLoadingProperty, 1628 no=no, 1629 concurrent=concurrent, 1630 for_all=for_all, 1631 for_insert=for_insert, 1632 for_none=for_none, 1633 ) 1634 1635 def _parse_locking(self) -> exp.LockingProperty: 1636 if self._match(TokenType.TABLE): 1637 kind = "TABLE" 1638 elif self._match(TokenType.VIEW): 1639 kind = "VIEW" 1640 elif self._match(TokenType.ROW): 1641 kind = "ROW" 1642 elif self._match_text_seq("DATABASE"): 1643 kind = "DATABASE" 1644 else: 1645 kind = None 1646 1647 if kind in ("DATABASE", "TABLE", "VIEW"): 1648 this = self._parse_table_parts() 1649 else: 1650 this = None 1651 1652 if self._match(TokenType.FOR): 1653 for_or_in = "FOR" 1654 elif self._match(TokenType.IN): 1655 for_or_in = "IN" 1656 else: 1657 for_or_in = None 1658 1659 if self._match_text_seq("ACCESS"): 1660 lock_type = "ACCESS" 1661 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1662 lock_type = "EXCLUSIVE" 1663 elif self._match_text_seq("SHARE"): 1664 lock_type = "SHARE" 1665 elif self._match_text_seq("READ"): 1666 lock_type = "READ" 1667 elif self._match_text_seq("WRITE"): 1668 lock_type = "WRITE" 1669 elif self._match_text_seq("CHECKSUM"): 1670 lock_type = "CHECKSUM" 1671 else: 1672 lock_type = None 1673 1674 override = self._match_text_seq("OVERRIDE") 1675 1676 return self.expression( 1677 exp.LockingProperty, 1678 this=this, 1679 kind=kind, 1680 for_or_in=for_or_in, 1681 lock_type=lock_type, 1682 override=override, 1683 ) 1684 1685 def _parse_partition_by(self) -> t.List[exp.Expression]: 1686 if self._match(TokenType.PARTITION_BY): 1687 return self._parse_csv(self._parse_conjunction) 1688 return [] 1689 1690 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1691 self._match(TokenType.EQ) 1692 return self.expression( 1693 exp.PartitionedByProperty, 1694 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1695 ) 1696 1697 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1698 if self._match_text_seq("AND", "STATISTICS"): 1699 statistics = True 1700 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1701 statistics = False 1702 else: 1703 statistics = None 1704 1705 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1706 1707 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1708 if self._match_text_seq("PRIMARY", "INDEX"): 1709 return exp.NoPrimaryIndexProperty() 1710 return None 1711 1712 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1713 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1714 return exp.OnCommitProperty() 1715 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1716 return exp.OnCommitProperty(delete=True) 1717 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1718 1719 def _parse_distkey(self) -> exp.DistKeyProperty: 1720 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1721 1722 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1723 table = self._parse_table(schema=True) 1724 1725 options = [] 1726 while self._match_texts(("INCLUDING", "EXCLUDING")): 1727 this = self._prev.text.upper() 1728 1729 id_var = self._parse_id_var() 1730 if not id_var: 1731 return None 1732 1733 options.append( 1734 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1735 ) 1736 1737 return self.expression(exp.LikeProperty, this=table, expressions=options) 1738 1739 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1740 return self.expression( 1741 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1742 ) 1743 1744 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1745 self._match(TokenType.EQ) 1746 return self.expression( 1747 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1748 ) 1749 1750 def _parse_returns(self) -> exp.ReturnsProperty: 1751 value: t.Optional[exp.Expression] 1752 is_table = self._match(TokenType.TABLE) 1753 1754 if is_table: 1755 if self._match(TokenType.LT): 1756 value = self.expression( 1757 exp.Schema, 1758 this="TABLE", 1759 expressions=self._parse_csv(self._parse_struct_types), 1760 ) 1761 if not self._match(TokenType.GT): 1762 self.raise_error("Expecting >") 1763 else: 1764 value = self._parse_schema(exp.var("TABLE")) 1765 else: 1766 value = self._parse_types() 1767 1768 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1769 1770 def _parse_describe(self) -> exp.Describe: 1771 kind = self._match_set(self.CREATABLES) and self._prev.text 1772 this = self._parse_table(schema=True) 1773 properties = self._parse_properties() 1774 expressions = properties.expressions if properties else None 1775 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1776 1777 def _parse_insert(self) -> exp.Insert: 1778 comments = ensure_list(self._prev_comments) 1779 overwrite = self._match(TokenType.OVERWRITE) 1780 ignore = self._match(TokenType.IGNORE) 1781 local = self._match_text_seq("LOCAL") 1782 alternative = None 1783 1784 if self._match_text_seq("DIRECTORY"): 1785 this: t.Optional[exp.Expression] = self.expression( 1786 exp.Directory, 1787 this=self._parse_var_or_string(), 1788 local=local, 1789 row_format=self._parse_row_format(match_row=True), 1790 ) 1791 else: 1792 if self._match(TokenType.OR): 1793 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1794 1795 self._match(TokenType.INTO) 1796 comments += ensure_list(self._prev_comments) 1797 self._match(TokenType.TABLE) 1798 this = self._parse_table(schema=True) 1799 1800 returning = self._parse_returning() 1801 1802 return self.expression( 1803 exp.Insert, 1804 comments=comments, 1805 this=this, 1806 by_name=self._match_text_seq("BY", "NAME"), 1807 exists=self._parse_exists(), 1808 partition=self._parse_partition(), 1809 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1810 and self._parse_conjunction(), 1811 expression=self._parse_ddl_select(), 1812 conflict=self._parse_on_conflict(), 1813 returning=returning or self._parse_returning(), 1814 overwrite=overwrite, 1815 alternative=alternative, 1816 ignore=ignore, 1817 ) 1818 1819 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1820 conflict = self._match_text_seq("ON", "CONFLICT") 1821 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1822 1823 if not conflict and not duplicate: 1824 return None 1825 1826 nothing = None 1827 expressions = None 1828 key = None 1829 constraint = None 1830 1831 if conflict: 1832 if self._match_text_seq("ON", "CONSTRAINT"): 1833 constraint = self._parse_id_var() 1834 else: 1835 key = self._parse_csv(self._parse_value) 1836 1837 self._match_text_seq("DO") 1838 if self._match_text_seq("NOTHING"): 1839 nothing = True 1840 else: 1841 self._match(TokenType.UPDATE) 1842 self._match(TokenType.SET) 1843 expressions = self._parse_csv(self._parse_equality) 1844 1845 return self.expression( 1846 exp.OnConflict, 1847 duplicate=duplicate, 1848 expressions=expressions, 1849 nothing=nothing, 1850 key=key, 1851 constraint=constraint, 1852 ) 1853 1854 def _parse_returning(self) -> t.Optional[exp.Returning]: 1855 if not self._match(TokenType.RETURNING): 1856 return None 1857 return self.expression( 1858 exp.Returning, 1859 expressions=self._parse_csv(self._parse_expression), 1860 into=self._match(TokenType.INTO) and self._parse_table_part(), 1861 ) 1862 1863 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1864 if not self._match(TokenType.FORMAT): 1865 return None 1866 return self._parse_row_format() 1867 1868 def _parse_row_format( 1869 self, match_row: bool = False 1870 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1871 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1872 return None 1873 1874 if self._match_text_seq("SERDE"): 1875 this = self._parse_string() 1876 1877 serde_properties = None 1878 if self._match(TokenType.SERDE_PROPERTIES): 1879 serde_properties = self.expression( 1880 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1881 ) 1882 1883 return self.expression( 1884 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1885 ) 1886 1887 self._match_text_seq("DELIMITED") 1888 1889 kwargs = {} 1890 1891 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1892 kwargs["fields"] = self._parse_string() 1893 if self._match_text_seq("ESCAPED", "BY"): 1894 kwargs["escaped"] = self._parse_string() 1895 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1896 kwargs["collection_items"] = self._parse_string() 1897 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1898 kwargs["map_keys"] = self._parse_string() 1899 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1900 kwargs["lines"] = self._parse_string() 1901 if self._match_text_seq("NULL", "DEFINED", "AS"): 1902 kwargs["null"] = self._parse_string() 1903 1904 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1905 1906 def _parse_load(self) -> exp.LoadData | exp.Command: 1907 if self._match_text_seq("DATA"): 1908 local = self._match_text_seq("LOCAL") 1909 self._match_text_seq("INPATH") 1910 inpath = self._parse_string() 1911 overwrite = self._match(TokenType.OVERWRITE) 1912 self._match_pair(TokenType.INTO, TokenType.TABLE) 1913 1914 return self.expression( 1915 exp.LoadData, 1916 this=self._parse_table(schema=True), 1917 local=local, 1918 overwrite=overwrite, 1919 inpath=inpath, 1920 partition=self._parse_partition(), 1921 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1922 serde=self._match_text_seq("SERDE") and self._parse_string(), 1923 ) 1924 return self._parse_as_command(self._prev) 1925 1926 def _parse_delete(self) -> exp.Delete: 1927 # This handles MySQL's "Multiple-Table Syntax" 1928 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1929 tables = None 1930 comments = self._prev_comments 1931 if not self._match(TokenType.FROM, advance=False): 1932 tables = self._parse_csv(self._parse_table) or None 1933 1934 returning = self._parse_returning() 1935 1936 return self.expression( 1937 exp.Delete, 1938 comments=comments, 1939 tables=tables, 1940 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1941 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1942 where=self._parse_where(), 1943 returning=returning or self._parse_returning(), 1944 limit=self._parse_limit(), 1945 ) 1946 1947 def _parse_update(self) -> exp.Update: 1948 comments = self._prev_comments 1949 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1950 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1951 returning = self._parse_returning() 1952 return self.expression( 1953 exp.Update, 1954 comments=comments, 1955 **{ # type: ignore 1956 "this": this, 1957 "expressions": expressions, 1958 "from": self._parse_from(joins=True), 1959 "where": self._parse_where(), 1960 "returning": returning or self._parse_returning(), 1961 "order": self._parse_order(), 1962 "limit": self._parse_limit(), 1963 }, 1964 ) 1965 1966 def _parse_uncache(self) -> exp.Uncache: 1967 if not self._match(TokenType.TABLE): 1968 self.raise_error("Expecting TABLE after UNCACHE") 1969 1970 return self.expression( 1971 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1972 ) 1973 1974 def _parse_cache(self) -> exp.Cache: 1975 lazy = self._match_text_seq("LAZY") 1976 self._match(TokenType.TABLE) 1977 table = self._parse_table(schema=True) 1978 1979 options = [] 1980 if self._match_text_seq("OPTIONS"): 1981 self._match_l_paren() 1982 k = self._parse_string() 1983 self._match(TokenType.EQ) 1984 v = self._parse_string() 1985 options = [k, v] 1986 self._match_r_paren() 1987 1988 self._match(TokenType.ALIAS) 1989 return self.expression( 1990 exp.Cache, 1991 this=table, 1992 lazy=lazy, 1993 options=options, 1994 expression=self._parse_select(nested=True), 1995 ) 1996 1997 def _parse_partition(self) -> t.Optional[exp.Partition]: 1998 if not self._match(TokenType.PARTITION): 1999 return None 2000 2001 return self.expression( 2002 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2003 ) 2004 2005 def _parse_value(self) -> exp.Tuple: 2006 if self._match(TokenType.L_PAREN): 2007 expressions = self._parse_csv(self._parse_conjunction) 2008 self._match_r_paren() 2009 return self.expression(exp.Tuple, expressions=expressions) 2010 2011 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2012 # https://prestodb.io/docs/current/sql/values.html 2013 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2014 2015 def _parse_projections(self) -> t.List[exp.Expression]: 2016 return self._parse_expressions() 2017 2018 def _parse_select( 2019 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2020 ) -> t.Optional[exp.Expression]: 2021 cte = self._parse_with() 2022 2023 if cte: 2024 this = self._parse_statement() 2025 2026 if not this: 2027 self.raise_error("Failed to parse any statement following CTE") 2028 return cte 2029 2030 if "with" in this.arg_types: 2031 this.set("with", cte) 2032 else: 2033 self.raise_error(f"{this.key} does not support CTE") 2034 this = cte 2035 2036 return this 2037 2038 # duckdb supports leading with FROM x 2039 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2040 2041 if self._match(TokenType.SELECT): 2042 comments = self._prev_comments 2043 2044 hint = self._parse_hint() 2045 all_ = self._match(TokenType.ALL) 2046 distinct = self._match_set(self.DISTINCT_TOKENS) 2047 2048 kind = ( 2049 self._match(TokenType.ALIAS) 2050 and self._match_texts(("STRUCT", "VALUE")) 2051 and self._prev.text 2052 ) 2053 2054 if distinct: 2055 distinct = self.expression( 2056 exp.Distinct, 2057 on=self._parse_value() if self._match(TokenType.ON) else None, 2058 ) 2059 2060 if all_ and distinct: 2061 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2062 2063 limit = self._parse_limit(top=True) 2064 projections = self._parse_projections() 2065 2066 this = self.expression( 2067 exp.Select, 2068 kind=kind, 2069 hint=hint, 2070 distinct=distinct, 2071 expressions=projections, 2072 limit=limit, 2073 ) 2074 this.comments = comments 2075 2076 into = self._parse_into() 2077 if into: 2078 this.set("into", into) 2079 2080 if not from_: 2081 from_ = self._parse_from() 2082 2083 if from_: 2084 this.set("from", from_) 2085 2086 this = self._parse_query_modifiers(this) 2087 elif (table or nested) and self._match(TokenType.L_PAREN): 2088 if self._match(TokenType.PIVOT): 2089 this = self._parse_simplified_pivot() 2090 elif self._match(TokenType.FROM): 2091 this = exp.select("*").from_( 2092 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2093 ) 2094 else: 2095 this = self._parse_table() if table else self._parse_select(nested=True) 2096 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2097 2098 self._match_r_paren() 2099 2100 # We return early here so that the UNION isn't attached to the subquery by the 2101 # following call to _parse_set_operations, but instead becomes the parent node 2102 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2103 elif self._match(TokenType.VALUES): 2104 this = self.expression( 2105 exp.Values, 2106 expressions=self._parse_csv(self._parse_value), 2107 alias=self._parse_table_alias(), 2108 ) 2109 elif from_: 2110 this = exp.select("*").from_(from_.this, copy=False) 2111 else: 2112 this = None 2113 2114 return self._parse_set_operations(this) 2115 2116 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2117 if not skip_with_token and not self._match(TokenType.WITH): 2118 return None 2119 2120 comments = self._prev_comments 2121 recursive = self._match(TokenType.RECURSIVE) 2122 2123 expressions = [] 2124 while True: 2125 expressions.append(self._parse_cte()) 2126 2127 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2128 break 2129 else: 2130 self._match(TokenType.WITH) 2131 2132 return self.expression( 2133 exp.With, comments=comments, expressions=expressions, recursive=recursive 2134 ) 2135 2136 def _parse_cte(self) -> exp.CTE: 2137 alias = self._parse_table_alias() 2138 if not alias or not alias.this: 2139 self.raise_error("Expected CTE to have alias") 2140 2141 self._match(TokenType.ALIAS) 2142 return self.expression( 2143 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2144 ) 2145 2146 def _parse_table_alias( 2147 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2148 ) -> t.Optional[exp.TableAlias]: 2149 any_token = self._match(TokenType.ALIAS) 2150 alias = ( 2151 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2152 or self._parse_string_as_identifier() 2153 ) 2154 2155 index = self._index 2156 if self._match(TokenType.L_PAREN): 2157 columns = self._parse_csv(self._parse_function_parameter) 2158 self._match_r_paren() if columns else self._retreat(index) 2159 else: 2160 columns = None 2161 2162 if not alias and not columns: 2163 return None 2164 2165 return self.expression(exp.TableAlias, this=alias, columns=columns) 2166 2167 def _parse_subquery( 2168 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2169 ) -> t.Optional[exp.Subquery]: 2170 if not this: 2171 return None 2172 2173 return self.expression( 2174 exp.Subquery, 2175 this=this, 2176 pivots=self._parse_pivots(), 2177 alias=self._parse_table_alias() if parse_alias else None, 2178 ) 2179 2180 def _parse_query_modifiers( 2181 self, this: t.Optional[exp.Expression] 2182 ) -> t.Optional[exp.Expression]: 2183 if isinstance(this, self.MODIFIABLES): 2184 for join in iter(self._parse_join, None): 2185 this.append("joins", join) 2186 for lateral in iter(self._parse_lateral, None): 2187 this.append("laterals", lateral) 2188 2189 while True: 2190 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2191 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2192 key, expression = parser(self) 2193 2194 if expression: 2195 this.set(key, expression) 2196 if key == "limit": 2197 offset = expression.args.pop("offset", None) 2198 if offset: 2199 this.set("offset", exp.Offset(expression=offset)) 2200 continue 2201 break 2202 return this 2203 2204 def _parse_hint(self) -> t.Optional[exp.Hint]: 2205 if self._match(TokenType.HINT): 2206 hints = [] 2207 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2208 hints.extend(hint) 2209 2210 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2211 self.raise_error("Expected */ after HINT") 2212 2213 return self.expression(exp.Hint, expressions=hints) 2214 2215 return None 2216 2217 def _parse_into(self) -> t.Optional[exp.Into]: 2218 if not self._match(TokenType.INTO): 2219 return None 2220 2221 temp = self._match(TokenType.TEMPORARY) 2222 unlogged = self._match_text_seq("UNLOGGED") 2223 self._match(TokenType.TABLE) 2224 2225 return self.expression( 2226 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2227 ) 2228 2229 def _parse_from( 2230 self, joins: bool = False, skip_from_token: bool = False 2231 ) -> t.Optional[exp.From]: 2232 if not skip_from_token and not self._match(TokenType.FROM): 2233 return None 2234 2235 return self.expression( 2236 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2237 ) 2238 2239 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2240 if not self._match(TokenType.MATCH_RECOGNIZE): 2241 return None 2242 2243 self._match_l_paren() 2244 2245 partition = self._parse_partition_by() 2246 order = self._parse_order() 2247 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2248 2249 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2250 rows = exp.var("ONE ROW PER MATCH") 2251 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2252 text = "ALL ROWS PER MATCH" 2253 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2254 text += f" SHOW EMPTY MATCHES" 2255 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2256 text += f" OMIT EMPTY MATCHES" 2257 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2258 text += f" WITH UNMATCHED ROWS" 2259 rows = exp.var(text) 2260 else: 2261 rows = None 2262 2263 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2264 text = "AFTER MATCH SKIP" 2265 if self._match_text_seq("PAST", "LAST", "ROW"): 2266 text += f" PAST LAST ROW" 2267 elif self._match_text_seq("TO", "NEXT", "ROW"): 2268 text += f" TO NEXT ROW" 2269 elif self._match_text_seq("TO", "FIRST"): 2270 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2271 elif self._match_text_seq("TO", "LAST"): 2272 text += f" TO LAST {self._advance_any().text}" # type: ignore 2273 after = exp.var(text) 2274 else: 2275 after = None 2276 2277 if self._match_text_seq("PATTERN"): 2278 self._match_l_paren() 2279 2280 if not self._curr: 2281 self.raise_error("Expecting )", self._curr) 2282 2283 paren = 1 2284 start = self._curr 2285 2286 while self._curr and paren > 0: 2287 if self._curr.token_type == TokenType.L_PAREN: 2288 paren += 1 2289 if self._curr.token_type == TokenType.R_PAREN: 2290 paren -= 1 2291 2292 end = self._prev 2293 self._advance() 2294 2295 if paren > 0: 2296 self.raise_error("Expecting )", self._curr) 2297 2298 pattern = exp.var(self._find_sql(start, end)) 2299 else: 2300 pattern = None 2301 2302 define = ( 2303 self._parse_csv( 2304 lambda: self.expression( 2305 exp.Alias, 2306 alias=self._parse_id_var(any_token=True), 2307 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2308 ) 2309 ) 2310 if self._match_text_seq("DEFINE") 2311 else None 2312 ) 2313 2314 self._match_r_paren() 2315 2316 return self.expression( 2317 exp.MatchRecognize, 2318 partition_by=partition, 2319 order=order, 2320 measures=measures, 2321 rows=rows, 2322 after=after, 2323 pattern=pattern, 2324 define=define, 2325 alias=self._parse_table_alias(), 2326 ) 2327 2328 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2329 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2330 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2331 2332 if outer_apply or cross_apply: 2333 this = self._parse_select(table=True) 2334 view = None 2335 outer = not cross_apply 2336 elif self._match(TokenType.LATERAL): 2337 this = self._parse_select(table=True) 2338 view = self._match(TokenType.VIEW) 2339 outer = self._match(TokenType.OUTER) 2340 else: 2341 return None 2342 2343 if not this: 2344 this = ( 2345 self._parse_unnest() 2346 or self._parse_function() 2347 or self._parse_id_var(any_token=False) 2348 ) 2349 2350 while self._match(TokenType.DOT): 2351 this = exp.Dot( 2352 this=this, 2353 expression=self._parse_function() or self._parse_id_var(any_token=False), 2354 ) 2355 2356 if view: 2357 table = self._parse_id_var(any_token=False) 2358 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2359 table_alias: t.Optional[exp.TableAlias] = self.expression( 2360 exp.TableAlias, this=table, columns=columns 2361 ) 2362 elif isinstance(this, exp.Subquery) and this.alias: 2363 # Ensures parity between the Subquery's and the Lateral's "alias" args 2364 table_alias = this.args["alias"].copy() 2365 else: 2366 table_alias = self._parse_table_alias() 2367 2368 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2369 2370 def _parse_join_parts( 2371 self, 2372 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2373 return ( 2374 self._match_set(self.JOIN_METHODS) and self._prev, 2375 self._match_set(self.JOIN_SIDES) and self._prev, 2376 self._match_set(self.JOIN_KINDS) and self._prev, 2377 ) 2378 2379 def _parse_join( 2380 self, skip_join_token: bool = False, parse_bracket: bool = False 2381 ) -> t.Optional[exp.Join]: 2382 if self._match(TokenType.COMMA): 2383 return self.expression(exp.Join, this=self._parse_table()) 2384 2385 index = self._index 2386 method, side, kind = self._parse_join_parts() 2387 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2388 join = self._match(TokenType.JOIN) 2389 2390 if not skip_join_token and not join: 2391 self._retreat(index) 2392 kind = None 2393 method = None 2394 side = None 2395 2396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2398 2399 if not skip_join_token and not join and not outer_apply and not cross_apply: 2400 return None 2401 2402 if outer_apply: 2403 side = Token(TokenType.LEFT, "LEFT") 2404 2405 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2406 2407 if method: 2408 kwargs["method"] = method.text 2409 if side: 2410 kwargs["side"] = side.text 2411 if kind: 2412 kwargs["kind"] = kind.text 2413 if hint: 2414 kwargs["hint"] = hint 2415 2416 if self._match(TokenType.ON): 2417 kwargs["on"] = self._parse_conjunction() 2418 elif self._match(TokenType.USING): 2419 kwargs["using"] = self._parse_wrapped_id_vars() 2420 elif not (kind and kind.token_type == TokenType.CROSS): 2421 index = self._index 2422 joins = self._parse_joins() 2423 2424 if joins and self._match(TokenType.ON): 2425 kwargs["on"] = self._parse_conjunction() 2426 elif joins and self._match(TokenType.USING): 2427 kwargs["using"] = self._parse_wrapped_id_vars() 2428 else: 2429 joins = None 2430 self._retreat(index) 2431 2432 kwargs["this"].set("joins", joins) 2433 2434 comments = [c for token in (method, side, kind) if token for c in token.comments] 2435 return self.expression(exp.Join, comments=comments, **kwargs) 2436 2437 def _parse_index( 2438 self, 2439 index: t.Optional[exp.Expression] = None, 2440 ) -> t.Optional[exp.Index]: 2441 if index: 2442 unique = None 2443 primary = None 2444 amp = None 2445 2446 self._match(TokenType.ON) 2447 self._match(TokenType.TABLE) # hive 2448 table = self._parse_table_parts(schema=True) 2449 else: 2450 unique = self._match(TokenType.UNIQUE) 2451 primary = self._match_text_seq("PRIMARY") 2452 amp = self._match_text_seq("AMP") 2453 2454 if not self._match(TokenType.INDEX): 2455 return None 2456 2457 index = self._parse_id_var() 2458 table = None 2459 2460 using = self._parse_field() if self._match(TokenType.USING) else None 2461 2462 if self._match(TokenType.L_PAREN, advance=False): 2463 columns = self._parse_wrapped_csv(self._parse_ordered) 2464 else: 2465 columns = None 2466 2467 return self.expression( 2468 exp.Index, 2469 this=index, 2470 table=table, 2471 using=using, 2472 columns=columns, 2473 unique=unique, 2474 primary=primary, 2475 amp=amp, 2476 partition_by=self._parse_partition_by(), 2477 ) 2478 2479 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2480 hints: t.List[exp.Expression] = [] 2481 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2482 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2483 hints.append( 2484 self.expression( 2485 exp.WithTableHint, 2486 expressions=self._parse_csv( 2487 lambda: self._parse_function() or self._parse_var(any_token=True) 2488 ), 2489 ) 2490 ) 2491 self._match_r_paren() 2492 else: 2493 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2494 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2495 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2496 2497 self._match_texts({"INDEX", "KEY"}) 2498 if self._match(TokenType.FOR): 2499 hint.set("target", self._advance_any() and self._prev.text.upper()) 2500 2501 hint.set("expressions", self._parse_wrapped_id_vars()) 2502 hints.append(hint) 2503 2504 return hints or None 2505 2506 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2507 return ( 2508 (not schema and self._parse_function(optional_parens=False)) 2509 or self._parse_id_var(any_token=False) 2510 or self._parse_string_as_identifier() 2511 or self._parse_placeholder() 2512 ) 2513 2514 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2515 catalog = None 2516 db = None 2517 table = self._parse_table_part(schema=schema) 2518 2519 while self._match(TokenType.DOT): 2520 if catalog: 2521 # This allows nesting the table in arbitrarily many dot expressions if needed 2522 table = self.expression( 2523 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2524 ) 2525 else: 2526 catalog = db 2527 db = table 2528 table = self._parse_table_part(schema=schema) 2529 2530 if not table: 2531 self.raise_error(f"Expected table name but got {self._curr}") 2532 2533 return self.expression( 2534 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2535 ) 2536 2537 def _parse_table( 2538 self, 2539 schema: bool = False, 2540 joins: bool = False, 2541 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2542 parse_bracket: bool = False, 2543 ) -> t.Optional[exp.Expression]: 2544 lateral = self._parse_lateral() 2545 if lateral: 2546 return lateral 2547 2548 unnest = self._parse_unnest() 2549 if unnest: 2550 return unnest 2551 2552 values = self._parse_derived_table_values() 2553 if values: 2554 return values 2555 2556 subquery = self._parse_select(table=True) 2557 if subquery: 2558 if not subquery.args.get("pivots"): 2559 subquery.set("pivots", self._parse_pivots()) 2560 return subquery 2561 2562 bracket = parse_bracket and self._parse_bracket(None) 2563 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2564 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2565 2566 if schema: 2567 return self._parse_schema(this=this) 2568 2569 version = self._parse_version() 2570 2571 if version: 2572 this.set("version", version) 2573 2574 if self.ALIAS_POST_TABLESAMPLE: 2575 table_sample = self._parse_table_sample() 2576 2577 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2578 if alias: 2579 this.set("alias", alias) 2580 2581 this.set("hints", self._parse_table_hints()) 2582 2583 if not this.args.get("pivots"): 2584 this.set("pivots", self._parse_pivots()) 2585 2586 if not self.ALIAS_POST_TABLESAMPLE: 2587 table_sample = self._parse_table_sample() 2588 2589 if table_sample: 2590 table_sample.set("this", this) 2591 this = table_sample 2592 2593 if joins: 2594 for join in iter(self._parse_join, None): 2595 this.append("joins", join) 2596 2597 return this 2598 2599 def _parse_version(self) -> t.Optional[exp.Version]: 2600 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2601 this = "TIMESTAMP" 2602 elif self._match(TokenType.VERSION_SNAPSHOT): 2603 this = "VERSION" 2604 else: 2605 return None 2606 2607 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2608 kind = self._prev.text.upper() 2609 start = self._parse_bitwise() 2610 self._match_texts(("TO", "AND")) 2611 end = self._parse_bitwise() 2612 expression: t.Optional[exp.Expression] = self.expression( 2613 exp.Tuple, expressions=[start, end] 2614 ) 2615 elif self._match_text_seq("CONTAINED", "IN"): 2616 kind = "CONTAINED IN" 2617 expression = self.expression( 2618 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2619 ) 2620 elif self._match(TokenType.ALL): 2621 kind = "ALL" 2622 expression = None 2623 else: 2624 self._match_text_seq("AS", "OF") 2625 kind = "AS OF" 2626 expression = self._parse_type() 2627 2628 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2629 2630 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2631 if not self._match(TokenType.UNNEST): 2632 return None 2633 2634 expressions = self._parse_wrapped_csv(self._parse_type) 2635 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2636 2637 alias = self._parse_table_alias() if with_alias else None 2638 2639 if alias and self.UNNEST_COLUMN_ONLY: 2640 if alias.args.get("columns"): 2641 self.raise_error("Unexpected extra column alias in unnest.") 2642 2643 alias.set("columns", [alias.this]) 2644 alias.set("this", None) 2645 2646 offset = None 2647 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2648 self._match(TokenType.ALIAS) 2649 offset = self._parse_id_var() or exp.to_identifier("offset") 2650 2651 return self.expression( 2652 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2653 ) 2654 2655 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2656 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2657 if not is_derived and not self._match(TokenType.VALUES): 2658 return None 2659 2660 expressions = self._parse_csv(self._parse_value) 2661 alias = self._parse_table_alias() 2662 2663 if is_derived: 2664 self._match_r_paren() 2665 2666 return self.expression( 2667 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2668 ) 2669 2670 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2671 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2672 as_modifier and self._match_text_seq("USING", "SAMPLE") 2673 ): 2674 return None 2675 2676 bucket_numerator = None 2677 bucket_denominator = None 2678 bucket_field = None 2679 percent = None 2680 rows = None 2681 size = None 2682 seed = None 2683 2684 kind = ( 2685 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2686 ) 2687 method = self._parse_var(tokens=(TokenType.ROW,)) 2688 2689 self._match(TokenType.L_PAREN) 2690 2691 if self.TABLESAMPLE_CSV: 2692 num = None 2693 expressions = self._parse_csv(self._parse_primary) 2694 else: 2695 expressions = None 2696 num = self._parse_number() 2697 2698 if self._match_text_seq("BUCKET"): 2699 bucket_numerator = self._parse_number() 2700 self._match_text_seq("OUT", "OF") 2701 bucket_denominator = bucket_denominator = self._parse_number() 2702 self._match(TokenType.ON) 2703 bucket_field = self._parse_field() 2704 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2705 percent = num 2706 elif self._match(TokenType.ROWS): 2707 rows = num 2708 elif num: 2709 size = num 2710 2711 self._match(TokenType.R_PAREN) 2712 2713 if self._match(TokenType.L_PAREN): 2714 method = self._parse_var() 2715 seed = self._match(TokenType.COMMA) and self._parse_number() 2716 self._match_r_paren() 2717 elif self._match_texts(("SEED", "REPEATABLE")): 2718 seed = self._parse_wrapped(self._parse_number) 2719 2720 return self.expression( 2721 exp.TableSample, 2722 expressions=expressions, 2723 method=method, 2724 bucket_numerator=bucket_numerator, 2725 bucket_denominator=bucket_denominator, 2726 bucket_field=bucket_field, 2727 percent=percent, 2728 rows=rows, 2729 size=size, 2730 seed=seed, 2731 kind=kind, 2732 ) 2733 2734 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2735 return list(iter(self._parse_pivot, None)) or None 2736 2737 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2738 return list(iter(self._parse_join, None)) or None 2739 2740 # https://duckdb.org/docs/sql/statements/pivot 2741 def _parse_simplified_pivot(self) -> exp.Pivot: 2742 def _parse_on() -> t.Optional[exp.Expression]: 2743 this = self._parse_bitwise() 2744 return self._parse_in(this) if self._match(TokenType.IN) else this 2745 2746 this = self._parse_table() 2747 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2748 using = self._match(TokenType.USING) and self._parse_csv( 2749 lambda: self._parse_alias(self._parse_function()) 2750 ) 2751 group = self._parse_group() 2752 return self.expression( 2753 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2754 ) 2755 2756 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2757 index = self._index 2758 include_nulls = None 2759 2760 if self._match(TokenType.PIVOT): 2761 unpivot = False 2762 elif self._match(TokenType.UNPIVOT): 2763 unpivot = True 2764 2765 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2766 if self._match_text_seq("INCLUDE", "NULLS"): 2767 include_nulls = True 2768 elif self._match_text_seq("EXCLUDE", "NULLS"): 2769 include_nulls = False 2770 else: 2771 return None 2772 2773 expressions = [] 2774 field = None 2775 2776 if not self._match(TokenType.L_PAREN): 2777 self._retreat(index) 2778 return None 2779 2780 if unpivot: 2781 expressions = self._parse_csv(self._parse_column) 2782 else: 2783 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2784 2785 if not expressions: 2786 self.raise_error("Failed to parse PIVOT's aggregation list") 2787 2788 if not self._match(TokenType.FOR): 2789 self.raise_error("Expecting FOR") 2790 2791 value = self._parse_column() 2792 2793 if not self._match(TokenType.IN): 2794 self.raise_error("Expecting IN") 2795 2796 field = self._parse_in(value, alias=True) 2797 2798 self._match_r_paren() 2799 2800 pivot = self.expression( 2801 exp.Pivot, 2802 expressions=expressions, 2803 field=field, 2804 unpivot=unpivot, 2805 include_nulls=include_nulls, 2806 ) 2807 2808 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2809 pivot.set("alias", self._parse_table_alias()) 2810 2811 if not unpivot: 2812 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2813 2814 columns: t.List[exp.Expression] = [] 2815 for fld in pivot.args["field"].expressions: 2816 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2817 for name in names: 2818 if self.PREFIXED_PIVOT_COLUMNS: 2819 name = f"{name}_{field_name}" if name else field_name 2820 else: 2821 name = f"{field_name}_{name}" if name else field_name 2822 2823 columns.append(exp.to_identifier(name)) 2824 2825 pivot.set("columns", columns) 2826 2827 return pivot 2828 2829 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2830 return [agg.alias for agg in aggregations] 2831 2832 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2833 if not skip_where_token and not self._match(TokenType.WHERE): 2834 return None 2835 2836 return self.expression( 2837 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2838 ) 2839 2840 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2841 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2842 return None 2843 2844 elements = defaultdict(list) 2845 2846 if self._match(TokenType.ALL): 2847 return self.expression(exp.Group, all=True) 2848 2849 while True: 2850 expressions = self._parse_csv(self._parse_conjunction) 2851 if expressions: 2852 elements["expressions"].extend(expressions) 2853 2854 grouping_sets = self._parse_grouping_sets() 2855 if grouping_sets: 2856 elements["grouping_sets"].extend(grouping_sets) 2857 2858 rollup = None 2859 cube = None 2860 totals = None 2861 2862 with_ = self._match(TokenType.WITH) 2863 if self._match(TokenType.ROLLUP): 2864 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2865 elements["rollup"].extend(ensure_list(rollup)) 2866 2867 if self._match(TokenType.CUBE): 2868 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2869 elements["cube"].extend(ensure_list(cube)) 2870 2871 if self._match_text_seq("TOTALS"): 2872 totals = True 2873 elements["totals"] = True # type: ignore 2874 2875 if not (grouping_sets or rollup or cube or totals): 2876 break 2877 2878 return self.expression(exp.Group, **elements) # type: ignore 2879 2880 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2881 if not self._match(TokenType.GROUPING_SETS): 2882 return None 2883 2884 return self._parse_wrapped_csv(self._parse_grouping_set) 2885 2886 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2887 if self._match(TokenType.L_PAREN): 2888 grouping_set = self._parse_csv(self._parse_column) 2889 self._match_r_paren() 2890 return self.expression(exp.Tuple, expressions=grouping_set) 2891 2892 return self._parse_column() 2893 2894 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2895 if not skip_having_token and not self._match(TokenType.HAVING): 2896 return None 2897 return self.expression(exp.Having, this=self._parse_conjunction()) 2898 2899 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2900 if not self._match(TokenType.QUALIFY): 2901 return None 2902 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2903 2904 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2905 if skip_start_token: 2906 start = None 2907 elif self._match(TokenType.START_WITH): 2908 start = self._parse_conjunction() 2909 else: 2910 return None 2911 2912 self._match(TokenType.CONNECT_BY) 2913 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2914 exp.Prior, this=self._parse_bitwise() 2915 ) 2916 connect = self._parse_conjunction() 2917 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2918 return self.expression(exp.Connect, start=start, connect=connect) 2919 2920 def _parse_order( 2921 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2922 ) -> t.Optional[exp.Expression]: 2923 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2924 return this 2925 2926 return self.expression( 2927 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2928 ) 2929 2930 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2931 if not self._match(token): 2932 return None 2933 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2934 2935 def _parse_ordered(self) -> exp.Ordered: 2936 this = self._parse_conjunction() 2937 self._match(TokenType.ASC) 2938 2939 is_desc = self._match(TokenType.DESC) 2940 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2941 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2942 desc = is_desc or False 2943 asc = not desc 2944 nulls_first = is_nulls_first or False 2945 explicitly_null_ordered = is_nulls_first or is_nulls_last 2946 2947 if ( 2948 not explicitly_null_ordered 2949 and ( 2950 (asc and self.NULL_ORDERING == "nulls_are_small") 2951 or (desc and self.NULL_ORDERING != "nulls_are_small") 2952 ) 2953 and self.NULL_ORDERING != "nulls_are_last" 2954 ): 2955 nulls_first = True 2956 2957 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2958 2959 def _parse_limit( 2960 self, this: t.Optional[exp.Expression] = None, top: bool = False 2961 ) -> t.Optional[exp.Expression]: 2962 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2963 comments = self._prev_comments 2964 if top: 2965 limit_paren = self._match(TokenType.L_PAREN) 2966 expression = self._parse_number() 2967 2968 if limit_paren: 2969 self._match_r_paren() 2970 else: 2971 expression = self._parse_term() 2972 2973 if self._match(TokenType.COMMA): 2974 offset = expression 2975 expression = self._parse_term() 2976 else: 2977 offset = None 2978 2979 limit_exp = self.expression( 2980 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2981 ) 2982 2983 return limit_exp 2984 2985 if self._match(TokenType.FETCH): 2986 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2987 direction = self._prev.text if direction else "FIRST" 2988 2989 count = self._parse_number() 2990 percent = self._match(TokenType.PERCENT) 2991 2992 self._match_set((TokenType.ROW, TokenType.ROWS)) 2993 2994 only = self._match_text_seq("ONLY") 2995 with_ties = self._match_text_seq("WITH", "TIES") 2996 2997 if only and with_ties: 2998 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2999 3000 return self.expression( 3001 exp.Fetch, 3002 direction=direction, 3003 count=count, 3004 percent=percent, 3005 with_ties=with_ties, 3006 ) 3007 3008 return this 3009 3010 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3011 if not self._match(TokenType.OFFSET): 3012 return this 3013 3014 count = self._parse_term() 3015 self._match_set((TokenType.ROW, TokenType.ROWS)) 3016 return self.expression(exp.Offset, this=this, expression=count) 3017 3018 def _parse_locks(self) -> t.List[exp.Lock]: 3019 locks = [] 3020 while True: 3021 if self._match_text_seq("FOR", "UPDATE"): 3022 update = True 3023 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3024 "LOCK", "IN", "SHARE", "MODE" 3025 ): 3026 update = False 3027 else: 3028 break 3029 3030 expressions = None 3031 if self._match_text_seq("OF"): 3032 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3033 3034 wait: t.Optional[bool | exp.Expression] = None 3035 if self._match_text_seq("NOWAIT"): 3036 wait = True 3037 elif self._match_text_seq("WAIT"): 3038 wait = self._parse_primary() 3039 elif self._match_text_seq("SKIP", "LOCKED"): 3040 wait = False 3041 3042 locks.append( 3043 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3044 ) 3045 3046 return locks 3047 3048 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3049 if not self._match_set(self.SET_OPERATIONS): 3050 return this 3051 3052 token_type = self._prev.token_type 3053 3054 if token_type == TokenType.UNION: 3055 expression = exp.Union 3056 elif token_type == TokenType.EXCEPT: 3057 expression = exp.Except 3058 else: 3059 expression = exp.Intersect 3060 3061 return self.expression( 3062 expression, 3063 this=this, 3064 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3065 by_name=self._match_text_seq("BY", "NAME"), 3066 expression=self._parse_set_operations(self._parse_select(nested=True)), 3067 ) 3068 3069 def _parse_expression(self) -> t.Optional[exp.Expression]: 3070 return self._parse_alias(self._parse_conjunction()) 3071 3072 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3073 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3074 3075 def _parse_equality(self) -> t.Optional[exp.Expression]: 3076 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3077 3078 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3079 return self._parse_tokens(self._parse_range, self.COMPARISON) 3080 3081 def _parse_range(self) -> t.Optional[exp.Expression]: 3082 this = self._parse_bitwise() 3083 negate = self._match(TokenType.NOT) 3084 3085 if self._match_set(self.RANGE_PARSERS): 3086 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3087 if not expression: 3088 return this 3089 3090 this = expression 3091 elif self._match(TokenType.ISNULL): 3092 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3093 3094 # Postgres supports ISNULL and NOTNULL for conditions. 3095 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3096 if self._match(TokenType.NOTNULL): 3097 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3098 this = self.expression(exp.Not, this=this) 3099 3100 if negate: 3101 this = self.expression(exp.Not, this=this) 3102 3103 if self._match(TokenType.IS): 3104 this = self._parse_is(this) 3105 3106 return this 3107 3108 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3109 index = self._index - 1 3110 negate = self._match(TokenType.NOT) 3111 3112 if self._match_text_seq("DISTINCT", "FROM"): 3113 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3114 return self.expression(klass, this=this, expression=self._parse_expression()) 3115 3116 expression = self._parse_null() or self._parse_boolean() 3117 if not expression: 3118 self._retreat(index) 3119 return None 3120 3121 this = self.expression(exp.Is, this=this, expression=expression) 3122 return self.expression(exp.Not, this=this) if negate else this 3123 3124 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3125 unnest = self._parse_unnest(with_alias=False) 3126 if unnest: 3127 this = self.expression(exp.In, this=this, unnest=unnest) 3128 elif self._match(TokenType.L_PAREN): 3129 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3130 3131 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3132 this = self.expression(exp.In, this=this, query=expressions[0]) 3133 else: 3134 this = self.expression(exp.In, this=this, expressions=expressions) 3135 3136 self._match_r_paren(this) 3137 else: 3138 this = self.expression(exp.In, this=this, field=self._parse_field()) 3139 3140 return this 3141 3142 def _parse_between(self, this: exp.Expression) -> exp.Between: 3143 low = self._parse_bitwise() 3144 self._match(TokenType.AND) 3145 high = self._parse_bitwise() 3146 return self.expression(exp.Between, this=this, low=low, high=high) 3147 3148 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3149 if not self._match(TokenType.ESCAPE): 3150 return this 3151 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3152 3153 def _parse_interval(self) -> t.Optional[exp.Interval]: 3154 index = self._index 3155 3156 if not self._match(TokenType.INTERVAL): 3157 return None 3158 3159 if self._match(TokenType.STRING, advance=False): 3160 this = self._parse_primary() 3161 else: 3162 this = self._parse_term() 3163 3164 if not this: 3165 self._retreat(index) 3166 return None 3167 3168 unit = self._parse_function() or self._parse_var(any_token=True) 3169 3170 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3171 # each INTERVAL expression into this canonical form so it's easy to transpile 3172 if this and this.is_number: 3173 this = exp.Literal.string(this.name) 3174 elif this and this.is_string: 3175 parts = this.name.split() 3176 3177 if len(parts) == 2: 3178 if unit: 3179 # This is not actually a unit, it's something else (e.g. a "window side") 3180 unit = None 3181 self._retreat(self._index - 1) 3182 3183 this = exp.Literal.string(parts[0]) 3184 unit = self.expression(exp.Var, this=parts[1]) 3185 3186 return self.expression(exp.Interval, this=this, unit=unit) 3187 3188 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3189 this = self._parse_term() 3190 3191 while True: 3192 if self._match_set(self.BITWISE): 3193 this = self.expression( 3194 self.BITWISE[self._prev.token_type], 3195 this=this, 3196 expression=self._parse_term(), 3197 ) 3198 elif self._match(TokenType.DQMARK): 3199 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3200 elif self._match_pair(TokenType.LT, TokenType.LT): 3201 this = self.expression( 3202 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3203 ) 3204 elif self._match_pair(TokenType.GT, TokenType.GT): 3205 this = self.expression( 3206 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3207 ) 3208 else: 3209 break 3210 3211 return this 3212 3213 def _parse_term(self) -> t.Optional[exp.Expression]: 3214 return self._parse_tokens(self._parse_factor, self.TERM) 3215 3216 def _parse_factor(self) -> t.Optional[exp.Expression]: 3217 return self._parse_tokens(self._parse_unary, self.FACTOR) 3218 3219 def _parse_unary(self) -> t.Optional[exp.Expression]: 3220 if self._match_set(self.UNARY_PARSERS): 3221 return self.UNARY_PARSERS[self._prev.token_type](self) 3222 return self._parse_at_time_zone(self._parse_type()) 3223 3224 def _parse_type(self) -> t.Optional[exp.Expression]: 3225 interval = self._parse_interval() 3226 if interval: 3227 return interval 3228 3229 index = self._index 3230 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3231 this = self._parse_column() 3232 3233 if data_type: 3234 if isinstance(this, exp.Literal): 3235 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3236 if parser: 3237 return parser(self, this, data_type) 3238 return self.expression(exp.Cast, this=this, to=data_type) 3239 if not data_type.expressions: 3240 self._retreat(index) 3241 return self._parse_column() 3242 return self._parse_column_ops(data_type) 3243 3244 return this 3245 3246 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3247 this = self._parse_type() 3248 if not this: 3249 return None 3250 3251 return self.expression( 3252 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3253 ) 3254 3255 def _parse_types( 3256 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3257 ) -> t.Optional[exp.Expression]: 3258 index = self._index 3259 3260 prefix = self._match_text_seq("SYSUDTLIB", ".") 3261 3262 if not self._match_set(self.TYPE_TOKENS): 3263 identifier = allow_identifiers and self._parse_id_var( 3264 any_token=False, tokens=(TokenType.VAR,) 3265 ) 3266 3267 if identifier: 3268 tokens = self._tokenizer.tokenize(identifier.name) 3269 3270 if len(tokens) != 1: 3271 self.raise_error("Unexpected identifier", self._prev) 3272 3273 if tokens[0].token_type in self.TYPE_TOKENS: 3274 self._prev = tokens[0] 3275 elif self.SUPPORTS_USER_DEFINED_TYPES: 3276 return identifier 3277 else: 3278 return None 3279 else: 3280 return None 3281 3282 type_token = self._prev.token_type 3283 3284 if type_token == TokenType.PSEUDO_TYPE: 3285 return self.expression(exp.PseudoType, this=self._prev.text) 3286 3287 if type_token == TokenType.OBJECT_IDENTIFIER: 3288 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3289 3290 nested = type_token in self.NESTED_TYPE_TOKENS 3291 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3292 expressions = None 3293 maybe_func = False 3294 3295 if self._match(TokenType.L_PAREN): 3296 if is_struct: 3297 expressions = self._parse_csv(self._parse_struct_types) 3298 elif nested: 3299 expressions = self._parse_csv( 3300 lambda: self._parse_types( 3301 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3302 ) 3303 ) 3304 elif type_token in self.ENUM_TYPE_TOKENS: 3305 expressions = self._parse_csv(self._parse_equality) 3306 else: 3307 expressions = self._parse_csv(self._parse_type_size) 3308 3309 if not expressions or not self._match(TokenType.R_PAREN): 3310 self._retreat(index) 3311 return None 3312 3313 maybe_func = True 3314 3315 this: t.Optional[exp.Expression] = None 3316 values: t.Optional[t.List[exp.Expression]] = None 3317 3318 if nested and self._match(TokenType.LT): 3319 if is_struct: 3320 expressions = self._parse_csv(self._parse_struct_types) 3321 else: 3322 expressions = self._parse_csv( 3323 lambda: self._parse_types( 3324 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3325 ) 3326 ) 3327 3328 if not self._match(TokenType.GT): 3329 self.raise_error("Expecting >") 3330 3331 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3332 values = self._parse_csv(self._parse_conjunction) 3333 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3334 3335 if type_token in self.TIMESTAMPS: 3336 if self._match_text_seq("WITH", "TIME", "ZONE"): 3337 maybe_func = False 3338 tz_type = ( 3339 exp.DataType.Type.TIMETZ 3340 if type_token in self.TIMES 3341 else exp.DataType.Type.TIMESTAMPTZ 3342 ) 3343 this = exp.DataType(this=tz_type, expressions=expressions) 3344 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3345 maybe_func = False 3346 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3347 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3348 maybe_func = False 3349 elif type_token == TokenType.INTERVAL: 3350 unit = self._parse_var() 3351 3352 if self._match_text_seq("TO"): 3353 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3354 else: 3355 span = None 3356 3357 if span or not unit: 3358 this = self.expression( 3359 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3360 ) 3361 else: 3362 this = self.expression(exp.Interval, unit=unit) 3363 3364 if maybe_func and check_func: 3365 index2 = self._index 3366 peek = self._parse_string() 3367 3368 if not peek: 3369 self._retreat(index) 3370 return None 3371 3372 self._retreat(index2) 3373 3374 if not this: 3375 if self._match_text_seq("UNSIGNED"): 3376 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3377 if not unsigned_type_token: 3378 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3379 3380 type_token = unsigned_type_token or type_token 3381 3382 this = exp.DataType( 3383 this=exp.DataType.Type[type_token.value], 3384 expressions=expressions, 3385 nested=nested, 3386 values=values, 3387 prefix=prefix, 3388 ) 3389 3390 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3391 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3392 3393 return this 3394 3395 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3396 this = self._parse_type() or self._parse_id_var() 3397 self._match(TokenType.COLON) 3398 return self._parse_column_def(this) 3399 3400 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3401 if not self._match_text_seq("AT", "TIME", "ZONE"): 3402 return this 3403 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3404 3405 def _parse_column(self) -> t.Optional[exp.Expression]: 3406 this = self._parse_field() 3407 if isinstance(this, exp.Identifier): 3408 this = self.expression(exp.Column, this=this) 3409 elif not this: 3410 return self._parse_bracket(this) 3411 return self._parse_column_ops(this) 3412 3413 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3414 this = self._parse_bracket(this) 3415 3416 while self._match_set(self.COLUMN_OPERATORS): 3417 op_token = self._prev.token_type 3418 op = self.COLUMN_OPERATORS.get(op_token) 3419 3420 if op_token == TokenType.DCOLON: 3421 field = self._parse_types() 3422 if not field: 3423 self.raise_error("Expected type") 3424 elif op and self._curr: 3425 self._advance() 3426 value = self._prev.text 3427 field = ( 3428 exp.Literal.number(value) 3429 if self._prev.token_type == TokenType.NUMBER 3430 else exp.Literal.string(value) 3431 ) 3432 else: 3433 field = self._parse_field(anonymous_func=True, any_token=True) 3434 3435 if isinstance(field, exp.Func): 3436 # bigquery allows function calls like x.y.count(...) 3437 # SAFE.SUBSTR(...) 3438 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3439 this = self._replace_columns_with_dots(this) 3440 3441 if op: 3442 this = op(self, this, field) 3443 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3444 this = self.expression( 3445 exp.Column, 3446 this=field, 3447 table=this.this, 3448 db=this.args.get("table"), 3449 catalog=this.args.get("db"), 3450 ) 3451 else: 3452 this = self.expression(exp.Dot, this=this, expression=field) 3453 this = self._parse_bracket(this) 3454 return this 3455 3456 def _parse_primary(self) -> t.Optional[exp.Expression]: 3457 if self._match_set(self.PRIMARY_PARSERS): 3458 token_type = self._prev.token_type 3459 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3460 3461 if token_type == TokenType.STRING: 3462 expressions = [primary] 3463 while self._match(TokenType.STRING): 3464 expressions.append(exp.Literal.string(self._prev.text)) 3465 3466 if len(expressions) > 1: 3467 return self.expression(exp.Concat, expressions=expressions) 3468 3469 return primary 3470 3471 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3472 return exp.Literal.number(f"0.{self._prev.text}") 3473 3474 if self._match(TokenType.L_PAREN): 3475 comments = self._prev_comments 3476 query = self._parse_select() 3477 3478 if query: 3479 expressions = [query] 3480 else: 3481 expressions = self._parse_expressions() 3482 3483 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3484 3485 if isinstance(this, exp.Subqueryable): 3486 this = self._parse_set_operations( 3487 self._parse_subquery(this=this, parse_alias=False) 3488 ) 3489 elif len(expressions) > 1: 3490 this = self.expression(exp.Tuple, expressions=expressions) 3491 else: 3492 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3493 3494 if this: 3495 this.add_comments(comments) 3496 3497 self._match_r_paren(expression=this) 3498 return this 3499 3500 return None 3501 3502 def _parse_field( 3503 self, 3504 any_token: bool = False, 3505 tokens: t.Optional[t.Collection[TokenType]] = None, 3506 anonymous_func: bool = False, 3507 ) -> t.Optional[exp.Expression]: 3508 return ( 3509 self._parse_primary() 3510 or self._parse_function(anonymous=anonymous_func) 3511 or self._parse_id_var(any_token=any_token, tokens=tokens) 3512 ) 3513 3514 def _parse_function( 3515 self, 3516 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3517 anonymous: bool = False, 3518 optional_parens: bool = True, 3519 ) -> t.Optional[exp.Expression]: 3520 if not self._curr: 3521 return None 3522 3523 token_type = self._curr.token_type 3524 this = self._curr.text 3525 upper = this.upper() 3526 3527 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3528 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3529 self._advance() 3530 return parser(self) 3531 3532 if not self._next or self._next.token_type != TokenType.L_PAREN: 3533 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3534 self._advance() 3535 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3536 3537 return None 3538 3539 if token_type not in self.FUNC_TOKENS: 3540 return None 3541 3542 self._advance(2) 3543 3544 parser = self.FUNCTION_PARSERS.get(upper) 3545 if parser and not anonymous: 3546 this = parser(self) 3547 else: 3548 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3549 3550 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3551 this = self.expression(subquery_predicate, this=self._parse_select()) 3552 self._match_r_paren() 3553 return this 3554 3555 if functions is None: 3556 functions = self.FUNCTIONS 3557 3558 function = functions.get(upper) 3559 3560 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3561 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3562 3563 if function and not anonymous: 3564 func = self.validate_expression(function(args), args) 3565 if not self.NORMALIZE_FUNCTIONS: 3566 func.meta["name"] = this 3567 this = func 3568 else: 3569 this = self.expression(exp.Anonymous, this=this, expressions=args) 3570 3571 self._match_r_paren(this) 3572 return self._parse_window(this) 3573 3574 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3575 return self._parse_column_def(self._parse_id_var()) 3576 3577 def _parse_user_defined_function( 3578 self, kind: t.Optional[TokenType] = None 3579 ) -> t.Optional[exp.Expression]: 3580 this = self._parse_id_var() 3581 3582 while self._match(TokenType.DOT): 3583 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3584 3585 if not self._match(TokenType.L_PAREN): 3586 return this 3587 3588 expressions = self._parse_csv(self._parse_function_parameter) 3589 self._match_r_paren() 3590 return self.expression( 3591 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3592 ) 3593 3594 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3595 literal = self._parse_primary() 3596 if literal: 3597 return self.expression(exp.Introducer, this=token.text, expression=literal) 3598 3599 return self.expression(exp.Identifier, this=token.text) 3600 3601 def _parse_session_parameter(self) -> exp.SessionParameter: 3602 kind = None 3603 this = self._parse_id_var() or self._parse_primary() 3604 3605 if this and self._match(TokenType.DOT): 3606 kind = this.name 3607 this = self._parse_var() or self._parse_primary() 3608 3609 return self.expression(exp.SessionParameter, this=this, kind=kind) 3610 3611 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3612 index = self._index 3613 3614 if self._match(TokenType.L_PAREN): 3615 expressions = t.cast( 3616 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3617 ) 3618 3619 if not self._match(TokenType.R_PAREN): 3620 self._retreat(index) 3621 else: 3622 expressions = [self._parse_id_var()] 3623 3624 if self._match_set(self.LAMBDAS): 3625 return self.LAMBDAS[self._prev.token_type](self, expressions) 3626 3627 self._retreat(index) 3628 3629 this: t.Optional[exp.Expression] 3630 3631 if self._match(TokenType.DISTINCT): 3632 this = self.expression( 3633 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3634 ) 3635 else: 3636 this = self._parse_select_or_expression(alias=alias) 3637 3638 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3639 3640 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3641 index = self._index 3642 3643 if not self.errors: 3644 try: 3645 if self._parse_select(nested=True): 3646 return this 3647 except ParseError: 3648 pass 3649 finally: 3650 self.errors.clear() 3651 self._retreat(index) 3652 3653 if not self._match(TokenType.L_PAREN): 3654 return this 3655 3656 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3657 3658 self._match_r_paren() 3659 return self.expression(exp.Schema, this=this, expressions=args) 3660 3661 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3662 return self._parse_column_def(self._parse_field(any_token=True)) 3663 3664 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3665 # column defs are not really columns, they're identifiers 3666 if isinstance(this, exp.Column): 3667 this = this.this 3668 3669 kind = self._parse_types(schema=True) 3670 3671 if self._match_text_seq("FOR", "ORDINALITY"): 3672 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3673 3674 constraints: t.List[exp.Expression] = [] 3675 3676 if not kind and self._match(TokenType.ALIAS): 3677 constraints.append( 3678 self.expression( 3679 exp.ComputedColumnConstraint, 3680 this=self._parse_conjunction(), 3681 persisted=self._match_text_seq("PERSISTED"), 3682 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3683 ) 3684 ) 3685 3686 while True: 3687 constraint = self._parse_column_constraint() 3688 if not constraint: 3689 break 3690 constraints.append(constraint) 3691 3692 if not kind and not constraints: 3693 return this 3694 3695 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3696 3697 def _parse_auto_increment( 3698 self, 3699 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3700 start = None 3701 increment = None 3702 3703 if self._match(TokenType.L_PAREN, advance=False): 3704 args = self._parse_wrapped_csv(self._parse_bitwise) 3705 start = seq_get(args, 0) 3706 increment = seq_get(args, 1) 3707 elif self._match_text_seq("START"): 3708 start = self._parse_bitwise() 3709 self._match_text_seq("INCREMENT") 3710 increment = self._parse_bitwise() 3711 3712 if start and increment: 3713 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3714 3715 return exp.AutoIncrementColumnConstraint() 3716 3717 def _parse_compress(self) -> exp.CompressColumnConstraint: 3718 if self._match(TokenType.L_PAREN, advance=False): 3719 return self.expression( 3720 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3721 ) 3722 3723 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3724 3725 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3726 if self._match_text_seq("BY", "DEFAULT"): 3727 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3728 this = self.expression( 3729 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3730 ) 3731 else: 3732 self._match_text_seq("ALWAYS") 3733 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3734 3735 self._match(TokenType.ALIAS) 3736 identity = self._match_text_seq("IDENTITY") 3737 3738 if self._match(TokenType.L_PAREN): 3739 if self._match(TokenType.START_WITH): 3740 this.set("start", self._parse_bitwise()) 3741 if self._match_text_seq("INCREMENT", "BY"): 3742 this.set("increment", self._parse_bitwise()) 3743 if self._match_text_seq("MINVALUE"): 3744 this.set("minvalue", self._parse_bitwise()) 3745 if self._match_text_seq("MAXVALUE"): 3746 this.set("maxvalue", self._parse_bitwise()) 3747 3748 if self._match_text_seq("CYCLE"): 3749 this.set("cycle", True) 3750 elif self._match_text_seq("NO", "CYCLE"): 3751 this.set("cycle", False) 3752 3753 if not identity: 3754 this.set("expression", self._parse_bitwise()) 3755 3756 self._match_r_paren() 3757 3758 return this 3759 3760 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3761 self._match_text_seq("LENGTH") 3762 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3763 3764 def _parse_not_constraint( 3765 self, 3766 ) -> t.Optional[exp.Expression]: 3767 if self._match_text_seq("NULL"): 3768 return self.expression(exp.NotNullColumnConstraint) 3769 if self._match_text_seq("CASESPECIFIC"): 3770 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3771 if self._match_text_seq("FOR", "REPLICATION"): 3772 return self.expression(exp.NotForReplicationColumnConstraint) 3773 return None 3774 3775 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3776 if self._match(TokenType.CONSTRAINT): 3777 this = self._parse_id_var() 3778 else: 3779 this = None 3780 3781 if self._match_texts(self.CONSTRAINT_PARSERS): 3782 return self.expression( 3783 exp.ColumnConstraint, 3784 this=this, 3785 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3786 ) 3787 3788 return this 3789 3790 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3791 if not self._match(TokenType.CONSTRAINT): 3792 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3793 3794 this = self._parse_id_var() 3795 expressions = [] 3796 3797 while True: 3798 constraint = self._parse_unnamed_constraint() or self._parse_function() 3799 if not constraint: 3800 break 3801 expressions.append(constraint) 3802 3803 return self.expression(exp.Constraint, this=this, expressions=expressions) 3804 3805 def _parse_unnamed_constraint( 3806 self, constraints: t.Optional[t.Collection[str]] = None 3807 ) -> t.Optional[exp.Expression]: 3808 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3809 return None 3810 3811 constraint = self._prev.text.upper() 3812 if constraint not in self.CONSTRAINT_PARSERS: 3813 self.raise_error(f"No parser found for schema constraint {constraint}.") 3814 3815 return self.CONSTRAINT_PARSERS[constraint](self) 3816 3817 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3818 self._match_text_seq("KEY") 3819 return self.expression( 3820 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3821 ) 3822 3823 def _parse_key_constraint_options(self) -> t.List[str]: 3824 options = [] 3825 while True: 3826 if not self._curr: 3827 break 3828 3829 if self._match(TokenType.ON): 3830 action = None 3831 on = self._advance_any() and self._prev.text 3832 3833 if self._match_text_seq("NO", "ACTION"): 3834 action = "NO ACTION" 3835 elif self._match_text_seq("CASCADE"): 3836 action = "CASCADE" 3837 elif self._match_pair(TokenType.SET, TokenType.NULL): 3838 action = "SET NULL" 3839 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3840 action = "SET DEFAULT" 3841 else: 3842 self.raise_error("Invalid key constraint") 3843 3844 options.append(f"ON {on} {action}") 3845 elif self._match_text_seq("NOT", "ENFORCED"): 3846 options.append("NOT ENFORCED") 3847 elif self._match_text_seq("DEFERRABLE"): 3848 options.append("DEFERRABLE") 3849 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3850 options.append("INITIALLY DEFERRED") 3851 elif self._match_text_seq("NORELY"): 3852 options.append("NORELY") 3853 elif self._match_text_seq("MATCH", "FULL"): 3854 options.append("MATCH FULL") 3855 else: 3856 break 3857 3858 return options 3859 3860 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3861 if match and not self._match(TokenType.REFERENCES): 3862 return None 3863 3864 expressions = None 3865 this = self._parse_table(schema=True) 3866 options = self._parse_key_constraint_options() 3867 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3868 3869 def _parse_foreign_key(self) -> exp.ForeignKey: 3870 expressions = self._parse_wrapped_id_vars() 3871 reference = self._parse_references() 3872 options = {} 3873 3874 while self._match(TokenType.ON): 3875 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3876 self.raise_error("Expected DELETE or UPDATE") 3877 3878 kind = self._prev.text.lower() 3879 3880 if self._match_text_seq("NO", "ACTION"): 3881 action = "NO ACTION" 3882 elif self._match(TokenType.SET): 3883 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3884 action = "SET " + self._prev.text.upper() 3885 else: 3886 self._advance() 3887 action = self._prev.text.upper() 3888 3889 options[kind] = action 3890 3891 return self.expression( 3892 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3893 ) 3894 3895 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3896 return self._parse_field() 3897 3898 def _parse_primary_key( 3899 self, wrapped_optional: bool = False, in_props: bool = False 3900 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3901 desc = ( 3902 self._match_set((TokenType.ASC, TokenType.DESC)) 3903 and self._prev.token_type == TokenType.DESC 3904 ) 3905 3906 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3907 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3908 3909 expressions = self._parse_wrapped_csv( 3910 self._parse_primary_key_part, optional=wrapped_optional 3911 ) 3912 options = self._parse_key_constraint_options() 3913 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3914 3915 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3916 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3917 return this 3918 3919 bracket_kind = self._prev.token_type 3920 3921 if self._match(TokenType.COLON): 3922 expressions: t.List[exp.Expression] = [ 3923 self.expression(exp.Slice, expression=self._parse_conjunction()) 3924 ] 3925 else: 3926 expressions = self._parse_csv( 3927 lambda: self._parse_slice( 3928 self._parse_alias(self._parse_conjunction(), explicit=True) 3929 ) 3930 ) 3931 3932 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3933 if bracket_kind == TokenType.L_BRACE: 3934 this = self.expression(exp.Struct, expressions=expressions) 3935 elif not this or this.name.upper() == "ARRAY": 3936 this = self.expression(exp.Array, expressions=expressions) 3937 else: 3938 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3939 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3940 3941 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3942 self.raise_error("Expected ]") 3943 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3944 self.raise_error("Expected }") 3945 3946 self._add_comments(this) 3947 return self._parse_bracket(this) 3948 3949 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3950 if self._match(TokenType.COLON): 3951 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3952 return this 3953 3954 def _parse_case(self) -> t.Optional[exp.Expression]: 3955 ifs = [] 3956 default = None 3957 3958 comments = self._prev_comments 3959 expression = self._parse_conjunction() 3960 3961 while self._match(TokenType.WHEN): 3962 this = self._parse_conjunction() 3963 self._match(TokenType.THEN) 3964 then = self._parse_conjunction() 3965 ifs.append(self.expression(exp.If, this=this, true=then)) 3966 3967 if self._match(TokenType.ELSE): 3968 default = self._parse_conjunction() 3969 3970 if not self._match(TokenType.END): 3971 self.raise_error("Expected END after CASE", self._prev) 3972 3973 return self._parse_window( 3974 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3975 ) 3976 3977 def _parse_if(self) -> t.Optional[exp.Expression]: 3978 if self._match(TokenType.L_PAREN): 3979 args = self._parse_csv(self._parse_conjunction) 3980 this = self.validate_expression(exp.If.from_arg_list(args), args) 3981 self._match_r_paren() 3982 else: 3983 index = self._index - 1 3984 condition = self._parse_conjunction() 3985 3986 if not condition: 3987 self._retreat(index) 3988 return None 3989 3990 self._match(TokenType.THEN) 3991 true = self._parse_conjunction() 3992 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3993 self._match(TokenType.END) 3994 this = self.expression(exp.If, this=condition, true=true, false=false) 3995 3996 return self._parse_window(this) 3997 3998 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3999 if not self._match_text_seq("VALUE", "FOR"): 4000 self._retreat(self._index - 1) 4001 return None 4002 4003 return self.expression( 4004 exp.NextValueFor, 4005 this=self._parse_column(), 4006 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4007 ) 4008 4009 def _parse_extract(self) -> exp.Extract: 4010 this = self._parse_function() or self._parse_var() or self._parse_type() 4011 4012 if self._match(TokenType.FROM): 4013 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4014 4015 if not self._match(TokenType.COMMA): 4016 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4017 4018 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4019 4020 def _parse_any_value(self) -> exp.AnyValue: 4021 this = self._parse_lambda() 4022 is_max = None 4023 having = None 4024 4025 if self._match(TokenType.HAVING): 4026 self._match_texts(("MAX", "MIN")) 4027 is_max = self._prev.text == "MAX" 4028 having = self._parse_column() 4029 4030 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4031 4032 def _parse_cast(self, strict: bool) -> exp.Expression: 4033 this = self._parse_conjunction() 4034 4035 if not self._match(TokenType.ALIAS): 4036 if self._match(TokenType.COMMA): 4037 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4038 4039 self.raise_error("Expected AS after CAST") 4040 4041 fmt = None 4042 to = self._parse_types() 4043 4044 if not to: 4045 self.raise_error("Expected TYPE after CAST") 4046 elif isinstance(to, exp.Identifier): 4047 to = exp.DataType.build(to.name, udt=True) 4048 elif to.this == exp.DataType.Type.CHAR: 4049 if self._match(TokenType.CHARACTER_SET): 4050 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4051 elif self._match(TokenType.FORMAT): 4052 fmt_string = self._parse_string() 4053 fmt = self._parse_at_time_zone(fmt_string) 4054 4055 if to.this in exp.DataType.TEMPORAL_TYPES: 4056 this = self.expression( 4057 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4058 this=this, 4059 format=exp.Literal.string( 4060 format_time( 4061 fmt_string.this if fmt_string else "", 4062 self.FORMAT_MAPPING or self.TIME_MAPPING, 4063 self.FORMAT_TRIE or self.TIME_TRIE, 4064 ) 4065 ), 4066 ) 4067 4068 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4069 this.set("zone", fmt.args["zone"]) 4070 4071 return this 4072 4073 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4074 4075 def _parse_concat(self) -> t.Optional[exp.Expression]: 4076 args = self._parse_csv(self._parse_conjunction) 4077 if self.CONCAT_NULL_OUTPUTS_STRING: 4078 args = self._ensure_string_if_null(args) 4079 4080 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4081 # we find such a call we replace it with its argument. 4082 if len(args) == 1: 4083 return args[0] 4084 4085 return self.expression( 4086 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4087 ) 4088 4089 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4090 args = self._parse_csv(self._parse_conjunction) 4091 if len(args) < 2: 4092 return self.expression(exp.ConcatWs, expressions=args) 4093 delim, *values = args 4094 if self.CONCAT_NULL_OUTPUTS_STRING: 4095 values = self._ensure_string_if_null(values) 4096 4097 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4098 4099 def _parse_string_agg(self) -> exp.Expression: 4100 if self._match(TokenType.DISTINCT): 4101 args: t.List[t.Optional[exp.Expression]] = [ 4102 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4103 ] 4104 if self._match(TokenType.COMMA): 4105 args.extend(self._parse_csv(self._parse_conjunction)) 4106 else: 4107 args = self._parse_csv(self._parse_conjunction) # type: ignore 4108 4109 index = self._index 4110 if not self._match(TokenType.R_PAREN) and args: 4111 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4112 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4113 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4114 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4115 4116 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4117 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4118 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4119 if not self._match_text_seq("WITHIN", "GROUP"): 4120 self._retreat(index) 4121 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4122 4123 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4124 order = self._parse_order(this=seq_get(args, 0)) 4125 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4126 4127 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4128 this = self._parse_bitwise() 4129 4130 if self._match(TokenType.USING): 4131 to: t.Optional[exp.Expression] = self.expression( 4132 exp.CharacterSet, this=self._parse_var() 4133 ) 4134 elif self._match(TokenType.COMMA): 4135 to = self._parse_types() 4136 else: 4137 to = None 4138 4139 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4140 4141 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4142 """ 4143 There are generally two variants of the DECODE function: 4144 4145 - DECODE(bin, charset) 4146 - DECODE(expression, search, result [, search, result] ... [, default]) 4147 4148 The second variant will always be parsed into a CASE expression. Note that NULL 4149 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4150 instead of relying on pattern matching. 4151 """ 4152 args = self._parse_csv(self._parse_conjunction) 4153 4154 if len(args) < 3: 4155 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4156 4157 expression, *expressions = args 4158 if not expression: 4159 return None 4160 4161 ifs = [] 4162 for search, result in zip(expressions[::2], expressions[1::2]): 4163 if not search or not result: 4164 return None 4165 4166 if isinstance(search, exp.Literal): 4167 ifs.append( 4168 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4169 ) 4170 elif isinstance(search, exp.Null): 4171 ifs.append( 4172 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4173 ) 4174 else: 4175 cond = exp.or_( 4176 exp.EQ(this=expression.copy(), expression=search), 4177 exp.and_( 4178 exp.Is(this=expression.copy(), expression=exp.Null()), 4179 exp.Is(this=search.copy(), expression=exp.Null()), 4180 copy=False, 4181 ), 4182 copy=False, 4183 ) 4184 ifs.append(exp.If(this=cond, true=result)) 4185 4186 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4187 4188 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4189 self._match_text_seq("KEY") 4190 key = self._parse_column() 4191 self._match_set((TokenType.COLON, TokenType.COMMA)) 4192 self._match_text_seq("VALUE") 4193 value = self._parse_bitwise() 4194 4195 if not key and not value: 4196 return None 4197 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4198 4199 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4200 if not this or not self._match_text_seq("FORMAT", "JSON"): 4201 return this 4202 4203 return self.expression(exp.FormatJson, this=this) 4204 4205 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4206 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4207 for value in values: 4208 if self._match_text_seq(value, "ON", on): 4209 return f"{value} ON {on}" 4210 4211 return None 4212 4213 def _parse_json_object(self) -> exp.JSONObject: 4214 star = self._parse_star() 4215 expressions = ( 4216 [star] 4217 if star 4218 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4219 ) 4220 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4221 4222 unique_keys = None 4223 if self._match_text_seq("WITH", "UNIQUE"): 4224 unique_keys = True 4225 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4226 unique_keys = False 4227 4228 self._match_text_seq("KEYS") 4229 4230 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4231 self._parse_type() 4232 ) 4233 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4234 4235 return self.expression( 4236 exp.JSONObject, 4237 expressions=expressions, 4238 null_handling=null_handling, 4239 unique_keys=unique_keys, 4240 return_type=return_type, 4241 encoding=encoding, 4242 ) 4243 4244 def _parse_logarithm(self) -> exp.Func: 4245 # Default argument order is base, expression 4246 args = self._parse_csv(self._parse_range) 4247 4248 if len(args) > 1: 4249 if not self.LOG_BASE_FIRST: 4250 args.reverse() 4251 return exp.Log.from_arg_list(args) 4252 4253 return self.expression( 4254 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4255 ) 4256 4257 def _parse_match_against(self) -> exp.MatchAgainst: 4258 expressions = self._parse_csv(self._parse_column) 4259 4260 self._match_text_seq(")", "AGAINST", "(") 4261 4262 this = self._parse_string() 4263 4264 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4265 modifier = "IN NATURAL LANGUAGE MODE" 4266 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4267 modifier = f"{modifier} WITH QUERY EXPANSION" 4268 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4269 modifier = "IN BOOLEAN MODE" 4270 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4271 modifier = "WITH QUERY EXPANSION" 4272 else: 4273 modifier = None 4274 4275 return self.expression( 4276 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4277 ) 4278 4279 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4280 def _parse_open_json(self) -> exp.OpenJSON: 4281 this = self._parse_bitwise() 4282 path = self._match(TokenType.COMMA) and self._parse_string() 4283 4284 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4285 this = self._parse_field(any_token=True) 4286 kind = self._parse_types() 4287 path = self._parse_string() 4288 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4289 4290 return self.expression( 4291 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4292 ) 4293 4294 expressions = None 4295 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4296 self._match_l_paren() 4297 expressions = self._parse_csv(_parse_open_json_column_def) 4298 4299 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4300 4301 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4302 args = self._parse_csv(self._parse_bitwise) 4303 4304 if self._match(TokenType.IN): 4305 return self.expression( 4306 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4307 ) 4308 4309 if haystack_first: 4310 haystack = seq_get(args, 0) 4311 needle = seq_get(args, 1) 4312 else: 4313 needle = seq_get(args, 0) 4314 haystack = seq_get(args, 1) 4315 4316 return self.expression( 4317 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4318 ) 4319 4320 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4321 args = self._parse_csv(self._parse_table) 4322 return exp.JoinHint(this=func_name.upper(), expressions=args) 4323 4324 def _parse_substring(self) -> exp.Substring: 4325 # Postgres supports the form: substring(string [from int] [for int]) 4326 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4327 4328 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4329 4330 if self._match(TokenType.FROM): 4331 args.append(self._parse_bitwise()) 4332 if self._match(TokenType.FOR): 4333 args.append(self._parse_bitwise()) 4334 4335 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4336 4337 def _parse_trim(self) -> exp.Trim: 4338 # https://www.w3resource.com/sql/character-functions/trim.php 4339 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4340 4341 position = None 4342 collation = None 4343 4344 if self._match_texts(self.TRIM_TYPES): 4345 position = self._prev.text.upper() 4346 4347 expression = self._parse_bitwise() 4348 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4349 this = self._parse_bitwise() 4350 else: 4351 this = expression 4352 expression = None 4353 4354 if self._match(TokenType.COLLATE): 4355 collation = self._parse_bitwise() 4356 4357 return self.expression( 4358 exp.Trim, this=this, position=position, expression=expression, collation=collation 4359 ) 4360 4361 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4362 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4363 4364 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4365 return self._parse_window(self._parse_id_var(), alias=True) 4366 4367 def _parse_respect_or_ignore_nulls( 4368 self, this: t.Optional[exp.Expression] 4369 ) -> t.Optional[exp.Expression]: 4370 if self._match_text_seq("IGNORE", "NULLS"): 4371 return self.expression(exp.IgnoreNulls, this=this) 4372 if self._match_text_seq("RESPECT", "NULLS"): 4373 return self.expression(exp.RespectNulls, this=this) 4374 return this 4375 4376 def _parse_window( 4377 self, this: t.Optional[exp.Expression], alias: bool = False 4378 ) -> t.Optional[exp.Expression]: 4379 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4380 self._match(TokenType.WHERE) 4381 this = self.expression( 4382 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4383 ) 4384 self._match_r_paren() 4385 4386 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4387 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4388 if self._match_text_seq("WITHIN", "GROUP"): 4389 order = self._parse_wrapped(self._parse_order) 4390 this = self.expression(exp.WithinGroup, this=this, expression=order) 4391 4392 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4393 # Some dialects choose to implement and some do not. 4394 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4395 4396 # There is some code above in _parse_lambda that handles 4397 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4398 4399 # The below changes handle 4400 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4401 4402 # Oracle allows both formats 4403 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4404 # and Snowflake chose to do the same for familiarity 4405 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4406 this = self._parse_respect_or_ignore_nulls(this) 4407 4408 # bigquery select from window x AS (partition by ...) 4409 if alias: 4410 over = None 4411 self._match(TokenType.ALIAS) 4412 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4413 return this 4414 else: 4415 over = self._prev.text.upper() 4416 4417 if not self._match(TokenType.L_PAREN): 4418 return self.expression( 4419 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4420 ) 4421 4422 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4423 4424 first = self._match(TokenType.FIRST) 4425 if self._match_text_seq("LAST"): 4426 first = False 4427 4428 partition, order = self._parse_partition_and_order() 4429 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4430 4431 if kind: 4432 self._match(TokenType.BETWEEN) 4433 start = self._parse_window_spec() 4434 self._match(TokenType.AND) 4435 end = self._parse_window_spec() 4436 4437 spec = self.expression( 4438 exp.WindowSpec, 4439 kind=kind, 4440 start=start["value"], 4441 start_side=start["side"], 4442 end=end["value"], 4443 end_side=end["side"], 4444 ) 4445 else: 4446 spec = None 4447 4448 self._match_r_paren() 4449 4450 window = self.expression( 4451 exp.Window, 4452 this=this, 4453 partition_by=partition, 4454 order=order, 4455 spec=spec, 4456 alias=window_alias, 4457 over=over, 4458 first=first, 4459 ) 4460 4461 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4462 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4463 return self._parse_window(window, alias=alias) 4464 4465 return window 4466 4467 def _parse_partition_and_order( 4468 self, 4469 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4470 return self._parse_partition_by(), self._parse_order() 4471 4472 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4473 self._match(TokenType.BETWEEN) 4474 4475 return { 4476 "value": ( 4477 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4478 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4479 or self._parse_bitwise() 4480 ), 4481 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4482 } 4483 4484 def _parse_alias( 4485 self, this: t.Optional[exp.Expression], explicit: bool = False 4486 ) -> t.Optional[exp.Expression]: 4487 any_token = self._match(TokenType.ALIAS) 4488 4489 if explicit and not any_token: 4490 return this 4491 4492 if self._match(TokenType.L_PAREN): 4493 aliases = self.expression( 4494 exp.Aliases, 4495 this=this, 4496 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4497 ) 4498 self._match_r_paren(aliases) 4499 return aliases 4500 4501 alias = self._parse_id_var(any_token) 4502 4503 if alias: 4504 return self.expression(exp.Alias, this=this, alias=alias) 4505 4506 return this 4507 4508 def _parse_id_var( 4509 self, 4510 any_token: bool = True, 4511 tokens: t.Optional[t.Collection[TokenType]] = None, 4512 ) -> t.Optional[exp.Expression]: 4513 identifier = self._parse_identifier() 4514 4515 if identifier: 4516 return identifier 4517 4518 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4519 quoted = self._prev.token_type == TokenType.STRING 4520 return exp.Identifier(this=self._prev.text, quoted=quoted) 4521 4522 return None 4523 4524 def _parse_string(self) -> t.Optional[exp.Expression]: 4525 if self._match(TokenType.STRING): 4526 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4527 return self._parse_placeholder() 4528 4529 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4530 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4531 4532 def _parse_number(self) -> t.Optional[exp.Expression]: 4533 if self._match(TokenType.NUMBER): 4534 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4535 return self._parse_placeholder() 4536 4537 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4538 if self._match(TokenType.IDENTIFIER): 4539 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4540 return self._parse_placeholder() 4541 4542 def _parse_var( 4543 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4544 ) -> t.Optional[exp.Expression]: 4545 if ( 4546 (any_token and self._advance_any()) 4547 or self._match(TokenType.VAR) 4548 or (self._match_set(tokens) if tokens else False) 4549 ): 4550 return self.expression(exp.Var, this=self._prev.text) 4551 return self._parse_placeholder() 4552 4553 def _advance_any(self) -> t.Optional[Token]: 4554 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4555 self._advance() 4556 return self._prev 4557 return None 4558 4559 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4560 return self._parse_var() or self._parse_string() 4561 4562 def _parse_null(self) -> t.Optional[exp.Expression]: 4563 if self._match(TokenType.NULL): 4564 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4565 return self._parse_placeholder() 4566 4567 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4568 if self._match(TokenType.TRUE): 4569 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4570 if self._match(TokenType.FALSE): 4571 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4572 return self._parse_placeholder() 4573 4574 def _parse_star(self) -> t.Optional[exp.Expression]: 4575 if self._match(TokenType.STAR): 4576 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4577 return self._parse_placeholder() 4578 4579 def _parse_parameter(self) -> exp.Parameter: 4580 wrapped = self._match(TokenType.L_BRACE) 4581 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4582 self._match(TokenType.R_BRACE) 4583 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4584 4585 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4586 if self._match_set(self.PLACEHOLDER_PARSERS): 4587 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4588 if placeholder: 4589 return placeholder 4590 self._advance(-1) 4591 return None 4592 4593 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4594 if not self._match(TokenType.EXCEPT): 4595 return None 4596 if self._match(TokenType.L_PAREN, advance=False): 4597 return self._parse_wrapped_csv(self._parse_column) 4598 return self._parse_csv(self._parse_column) 4599 4600 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4601 if not self._match(TokenType.REPLACE): 4602 return None 4603 if self._match(TokenType.L_PAREN, advance=False): 4604 return self._parse_wrapped_csv(self._parse_expression) 4605 return self._parse_expressions() 4606 4607 def _parse_csv( 4608 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4609 ) -> t.List[exp.Expression]: 4610 parse_result = parse_method() 4611 items = [parse_result] if parse_result is not None else [] 4612 4613 while self._match(sep): 4614 self._add_comments(parse_result) 4615 parse_result = parse_method() 4616 if parse_result is not None: 4617 items.append(parse_result) 4618 4619 return items 4620 4621 def _parse_tokens( 4622 self, parse_method: t.Callable, expressions: t.Dict 4623 ) -> t.Optional[exp.Expression]: 4624 this = parse_method() 4625 4626 while self._match_set(expressions): 4627 this = self.expression( 4628 expressions[self._prev.token_type], 4629 this=this, 4630 comments=self._prev_comments, 4631 expression=parse_method(), 4632 ) 4633 4634 return this 4635 4636 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4637 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4638 4639 def _parse_wrapped_csv( 4640 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4641 ) -> t.List[exp.Expression]: 4642 return self._parse_wrapped( 4643 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4644 ) 4645 4646 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4647 wrapped = self._match(TokenType.L_PAREN) 4648 if not wrapped and not optional: 4649 self.raise_error("Expecting (") 4650 parse_result = parse_method() 4651 if wrapped: 4652 self._match_r_paren() 4653 return parse_result 4654 4655 def _parse_expressions(self) -> t.List[exp.Expression]: 4656 return self._parse_csv(self._parse_expression) 4657 4658 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4659 return self._parse_select() or self._parse_set_operations( 4660 self._parse_expression() if alias else self._parse_conjunction() 4661 ) 4662 4663 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4664 return self._parse_query_modifiers( 4665 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4666 ) 4667 4668 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4669 this = None 4670 if self._match_texts(self.TRANSACTION_KIND): 4671 this = self._prev.text 4672 4673 self._match_texts({"TRANSACTION", "WORK"}) 4674 4675 modes = [] 4676 while True: 4677 mode = [] 4678 while self._match(TokenType.VAR): 4679 mode.append(self._prev.text) 4680 4681 if mode: 4682 modes.append(" ".join(mode)) 4683 if not self._match(TokenType.COMMA): 4684 break 4685 4686 return self.expression(exp.Transaction, this=this, modes=modes) 4687 4688 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4689 chain = None 4690 savepoint = None 4691 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4692 4693 self._match_texts({"TRANSACTION", "WORK"}) 4694 4695 if self._match_text_seq("TO"): 4696 self._match_text_seq("SAVEPOINT") 4697 savepoint = self._parse_id_var() 4698 4699 if self._match(TokenType.AND): 4700 chain = not self._match_text_seq("NO") 4701 self._match_text_seq("CHAIN") 4702 4703 if is_rollback: 4704 return self.expression(exp.Rollback, savepoint=savepoint) 4705 4706 return self.expression(exp.Commit, chain=chain) 4707 4708 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4709 if not self._match_text_seq("ADD"): 4710 return None 4711 4712 self._match(TokenType.COLUMN) 4713 exists_column = self._parse_exists(not_=True) 4714 expression = self._parse_field_def() 4715 4716 if expression: 4717 expression.set("exists", exists_column) 4718 4719 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4720 if self._match_texts(("FIRST", "AFTER")): 4721 position = self._prev.text 4722 column_position = self.expression( 4723 exp.ColumnPosition, this=self._parse_column(), position=position 4724 ) 4725 expression.set("position", column_position) 4726 4727 return expression 4728 4729 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4730 drop = self._match(TokenType.DROP) and self._parse_drop() 4731 if drop and not isinstance(drop, exp.Command): 4732 drop.set("kind", drop.args.get("kind", "COLUMN")) 4733 return drop 4734 4735 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4736 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4737 return self.expression( 4738 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4739 ) 4740 4741 def _parse_add_constraint(self) -> exp.AddConstraint: 4742 this = None 4743 kind = self._prev.token_type 4744 4745 if kind == TokenType.CONSTRAINT: 4746 this = self._parse_id_var() 4747 4748 if self._match_text_seq("CHECK"): 4749 expression = self._parse_wrapped(self._parse_conjunction) 4750 enforced = self._match_text_seq("ENFORCED") 4751 4752 return self.expression( 4753 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4754 ) 4755 4756 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4757 expression = self._parse_foreign_key() 4758 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4759 expression = self._parse_primary_key() 4760 else: 4761 expression = None 4762 4763 return self.expression(exp.AddConstraint, this=this, expression=expression) 4764 4765 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4766 index = self._index - 1 4767 4768 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4769 return self._parse_csv(self._parse_add_constraint) 4770 4771 self._retreat(index) 4772 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4773 return self._parse_csv(self._parse_field_def) 4774 4775 return self._parse_csv(self._parse_add_column) 4776 4777 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4778 self._match(TokenType.COLUMN) 4779 column = self._parse_field(any_token=True) 4780 4781 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4782 return self.expression(exp.AlterColumn, this=column, drop=True) 4783 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4784 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4785 4786 self._match_text_seq("SET", "DATA") 4787 return self.expression( 4788 exp.AlterColumn, 4789 this=column, 4790 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4791 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4792 using=self._match(TokenType.USING) and self._parse_conjunction(), 4793 ) 4794 4795 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4796 index = self._index - 1 4797 4798 partition_exists = self._parse_exists() 4799 if self._match(TokenType.PARTITION, advance=False): 4800 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4801 4802 self._retreat(index) 4803 return self._parse_csv(self._parse_drop_column) 4804 4805 def _parse_alter_table_rename(self) -> exp.RenameTable: 4806 self._match_text_seq("TO") 4807 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4808 4809 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4810 start = self._prev 4811 4812 if not self._match(TokenType.TABLE): 4813 return self._parse_as_command(start) 4814 4815 exists = self._parse_exists() 4816 only = self._match_text_seq("ONLY") 4817 this = self._parse_table(schema=True) 4818 4819 if self._next: 4820 self._advance() 4821 4822 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4823 if parser: 4824 actions = ensure_list(parser(self)) 4825 4826 if not self._curr: 4827 return self.expression( 4828 exp.AlterTable, 4829 this=this, 4830 exists=exists, 4831 actions=actions, 4832 only=only, 4833 ) 4834 4835 return self._parse_as_command(start) 4836 4837 def _parse_merge(self) -> exp.Merge: 4838 self._match(TokenType.INTO) 4839 target = self._parse_table() 4840 4841 if target and self._match(TokenType.ALIAS, advance=False): 4842 target.set("alias", self._parse_table_alias()) 4843 4844 self._match(TokenType.USING) 4845 using = self._parse_table() 4846 4847 self._match(TokenType.ON) 4848 on = self._parse_conjunction() 4849 4850 whens = [] 4851 while self._match(TokenType.WHEN): 4852 matched = not self._match(TokenType.NOT) 4853 self._match_text_seq("MATCHED") 4854 source = ( 4855 False 4856 if self._match_text_seq("BY", "TARGET") 4857 else self._match_text_seq("BY", "SOURCE") 4858 ) 4859 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4860 4861 self._match(TokenType.THEN) 4862 4863 if self._match(TokenType.INSERT): 4864 _this = self._parse_star() 4865 if _this: 4866 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4867 else: 4868 then = self.expression( 4869 exp.Insert, 4870 this=self._parse_value(), 4871 expression=self._match(TokenType.VALUES) and self._parse_value(), 4872 ) 4873 elif self._match(TokenType.UPDATE): 4874 expressions = self._parse_star() 4875 if expressions: 4876 then = self.expression(exp.Update, expressions=expressions) 4877 else: 4878 then = self.expression( 4879 exp.Update, 4880 expressions=self._match(TokenType.SET) 4881 and self._parse_csv(self._parse_equality), 4882 ) 4883 elif self._match(TokenType.DELETE): 4884 then = self.expression(exp.Var, this=self._prev.text) 4885 else: 4886 then = None 4887 4888 whens.append( 4889 self.expression( 4890 exp.When, 4891 matched=matched, 4892 source=source, 4893 condition=condition, 4894 then=then, 4895 ) 4896 ) 4897 4898 return self.expression( 4899 exp.Merge, 4900 this=target, 4901 using=using, 4902 on=on, 4903 expressions=whens, 4904 ) 4905 4906 def _parse_show(self) -> t.Optional[exp.Expression]: 4907 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4908 if parser: 4909 return parser(self) 4910 return self._parse_as_command(self._prev) 4911 4912 def _parse_set_item_assignment( 4913 self, kind: t.Optional[str] = None 4914 ) -> t.Optional[exp.Expression]: 4915 index = self._index 4916 4917 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4918 return self._parse_set_transaction(global_=kind == "GLOBAL") 4919 4920 left = self._parse_primary() or self._parse_id_var() 4921 4922 if not self._match_texts(("=", "TO")): 4923 self._retreat(index) 4924 return None 4925 4926 right = self._parse_statement() or self._parse_id_var() 4927 this = self.expression(exp.EQ, this=left, expression=right) 4928 4929 return self.expression(exp.SetItem, this=this, kind=kind) 4930 4931 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4932 self._match_text_seq("TRANSACTION") 4933 characteristics = self._parse_csv( 4934 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4935 ) 4936 return self.expression( 4937 exp.SetItem, 4938 expressions=characteristics, 4939 kind="TRANSACTION", 4940 **{"global": global_}, # type: ignore 4941 ) 4942 4943 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4944 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4945 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4946 4947 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4948 index = self._index 4949 set_ = self.expression( 4950 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4951 ) 4952 4953 if self._curr: 4954 self._retreat(index) 4955 return self._parse_as_command(self._prev) 4956 4957 return set_ 4958 4959 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4960 for option in options: 4961 if self._match_text_seq(*option.split(" ")): 4962 return exp.var(option) 4963 return None 4964 4965 def _parse_as_command(self, start: Token) -> exp.Command: 4966 while self._curr: 4967 self._advance() 4968 text = self._find_sql(start, self._prev) 4969 size = len(start.text) 4970 return exp.Command(this=text[:size], expression=text[size:]) 4971 4972 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4973 settings = [] 4974 4975 self._match_l_paren() 4976 kind = self._parse_id_var() 4977 4978 if self._match(TokenType.L_PAREN): 4979 while True: 4980 key = self._parse_id_var() 4981 value = self._parse_primary() 4982 4983 if not key and value is None: 4984 break 4985 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4986 self._match(TokenType.R_PAREN) 4987 4988 self._match_r_paren() 4989 4990 return self.expression( 4991 exp.DictProperty, 4992 this=this, 4993 kind=kind.this if kind else None, 4994 settings=settings, 4995 ) 4996 4997 def _parse_dict_range(self, this: str) -> exp.DictRange: 4998 self._match_l_paren() 4999 has_min = self._match_text_seq("MIN") 5000 if has_min: 5001 min = self._parse_var() or self._parse_primary() 5002 self._match_text_seq("MAX") 5003 max = self._parse_var() or self._parse_primary() 5004 else: 5005 max = self._parse_var() or self._parse_primary() 5006 min = exp.Literal.number(0) 5007 self._match_r_paren() 5008 return self.expression(exp.DictRange, this=this, min=min, max=max) 5009 5010 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 5011 expression = self._parse_column() 5012 self._match(TokenType.IN) 5013 iterator = self._parse_column() 5014 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5015 return self.expression( 5016 exp.Comprehension, 5017 this=this, 5018 expression=expression, 5019 iterator=iterator, 5020 condition=condition, 5021 ) 5022 5023 def _find_parser( 5024 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5025 ) -> t.Optional[t.Callable]: 5026 if not self._curr: 5027 return None 5028 5029 index = self._index 5030 this = [] 5031 while True: 5032 # The current token might be multiple words 5033 curr = self._curr.text.upper() 5034 key = curr.split(" ") 5035 this.append(curr) 5036 5037 self._advance() 5038 result, trie = in_trie(trie, key) 5039 if result == TrieResult.FAILED: 5040 break 5041 5042 if result == TrieResult.EXISTS: 5043 subparser = parsers[" ".join(this)] 5044 return subparser 5045 5046 self._retreat(index) 5047 return None 5048 5049 def _match(self, token_type, advance=True, expression=None): 5050 if not self._curr: 5051 return None 5052 5053 if self._curr.token_type == token_type: 5054 if advance: 5055 self._advance() 5056 self._add_comments(expression) 5057 return True 5058 5059 return None 5060 5061 def _match_set(self, types, advance=True): 5062 if not self._curr: 5063 return None 5064 5065 if self._curr.token_type in types: 5066 if advance: 5067 self._advance() 5068 return True 5069 5070 return None 5071 5072 def _match_pair(self, token_type_a, token_type_b, advance=True): 5073 if not self._curr or not self._next: 5074 return None 5075 5076 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5077 if advance: 5078 self._advance(2) 5079 return True 5080 5081 return None 5082 5083 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5084 if not self._match(TokenType.L_PAREN, expression=expression): 5085 self.raise_error("Expecting (") 5086 5087 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5088 if not self._match(TokenType.R_PAREN, expression=expression): 5089 self.raise_error("Expecting )") 5090 5091 def _match_texts(self, texts, advance=True): 5092 if self._curr and self._curr.text.upper() in texts: 5093 if advance: 5094 self._advance() 5095 return True 5096 return False 5097 5098 def _match_text_seq(self, *texts, advance=True): 5099 index = self._index 5100 for text in texts: 5101 if self._curr and self._curr.text.upper() == text: 5102 self._advance() 5103 else: 5104 self._retreat(index) 5105 return False 5106 5107 if not advance: 5108 self._retreat(index) 5109 5110 return True 5111 5112 @t.overload 5113 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5114 ... 5115 5116 @t.overload 5117 def _replace_columns_with_dots( 5118 self, this: t.Optional[exp.Expression] 5119 ) -> t.Optional[exp.Expression]: 5120 ... 5121 5122 def _replace_columns_with_dots(self, this): 5123 if isinstance(this, exp.Dot): 5124 exp.replace_children(this, self._replace_columns_with_dots) 5125 elif isinstance(this, exp.Column): 5126 exp.replace_children(this, self._replace_columns_with_dots) 5127 table = this.args.get("table") 5128 this = ( 5129 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5130 ) 5131 5132 return this 5133 5134 def _replace_lambda( 5135 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5136 ) -> t.Optional[exp.Expression]: 5137 if not node: 5138 return node 5139 5140 for column in node.find_all(exp.Column): 5141 if column.parts[0].name in lambda_variables: 5142 dot_or_id = column.to_dot() if column.table else column.this 5143 parent = column.parent 5144 5145 while isinstance(parent, exp.Dot): 5146 if not isinstance(parent.parent, exp.Dot): 5147 parent.replace(dot_or_id) 5148 break 5149 parent = parent.parent 5150 else: 5151 if column is node: 5152 node = dot_or_id 5153 else: 5154 column.replace(dot_or_id) 5155 return node 5156 5157 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5158 return [ 5159 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5160 for value in values 5161 if value 5162 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
905 def __init__( 906 self, 907 error_level: t.Optional[ErrorLevel] = None, 908 error_message_context: int = 100, 909 max_errors: int = 3, 910 ): 911 self.error_level = error_level or ErrorLevel.IMMEDIATE 912 self.error_message_context = error_message_context 913 self.max_errors = max_errors 914 self._tokenizer = self.TOKENIZER_CLASS() 915 self.reset()
927 def parse( 928 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 929 ) -> t.List[t.Optional[exp.Expression]]: 930 """ 931 Parses a list of tokens and returns a list of syntax trees, one tree 932 per parsed SQL statement. 933 934 Args: 935 raw_tokens: The list of tokens. 936 sql: The original SQL string, used to produce helpful debug messages. 937 938 Returns: 939 The list of the produced syntax trees. 940 """ 941 return self._parse( 942 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 943 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
945 def parse_into( 946 self, 947 expression_types: exp.IntoType, 948 raw_tokens: t.List[Token], 949 sql: t.Optional[str] = None, 950 ) -> t.List[t.Optional[exp.Expression]]: 951 """ 952 Parses a list of tokens into a given Expression type. If a collection of Expression 953 types is given instead, this method will try to parse the token list into each one 954 of them, stopping at the first for which the parsing succeeds. 955 956 Args: 957 expression_types: The expression type(s) to try and parse the token list into. 958 raw_tokens: The list of tokens. 959 sql: The original SQL string, used to produce helpful debug messages. 960 961 Returns: 962 The target Expression. 963 """ 964 errors = [] 965 for expression_type in ensure_list(expression_types): 966 parser = self.EXPRESSION_PARSERS.get(expression_type) 967 if not parser: 968 raise TypeError(f"No parser registered for {expression_type}") 969 970 try: 971 return self._parse(parser, raw_tokens, sql) 972 except ParseError as e: 973 e.errors[0]["into_expression"] = expression_type 974 errors.append(e) 975 976 raise ParseError( 977 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 978 errors=merge_errors(errors), 979 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1016 def check_errors(self) -> None: 1017 """Logs or raises any found errors, depending on the chosen error level setting.""" 1018 if self.error_level == ErrorLevel.WARN: 1019 for error in self.errors: 1020 logger.error(str(error)) 1021 elif self.error_level == ErrorLevel.RAISE and self.errors: 1022 raise ParseError( 1023 concat_messages(self.errors, self.max_errors), 1024 errors=merge_errors(self.errors), 1025 )
Logs or raises any found errors, depending on the chosen error level setting.
1027 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1028 """ 1029 Appends an error in the list of recorded errors or raises it, depending on the chosen 1030 error level setting. 1031 """ 1032 token = token or self._curr or self._prev or Token.string("") 1033 start = token.start 1034 end = token.end + 1 1035 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1036 highlight = self.sql[start:end] 1037 end_context = self.sql[end : end + self.error_message_context] 1038 1039 error = ParseError.new( 1040 f"{message}. Line {token.line}, Col: {token.col}.\n" 1041 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1042 description=message, 1043 line=token.line, 1044 col=token.col, 1045 start_context=start_context, 1046 highlight=highlight, 1047 end_context=end_context, 1048 ) 1049 1050 if self.error_level == ErrorLevel.IMMEDIATE: 1051 raise error 1052 1053 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1055 def expression( 1056 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1057 ) -> E: 1058 """ 1059 Creates a new, validated Expression. 1060 1061 Args: 1062 exp_class: The expression class to instantiate. 1063 comments: An optional list of comments to attach to the expression. 1064 kwargs: The arguments to set for the expression along with their respective values. 1065 1066 Returns: 1067 The target expression. 1068 """ 1069 instance = exp_class(**kwargs) 1070 instance.add_comments(comments) if comments else self._add_comments(instance) 1071 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1078 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1079 """ 1080 Validates an Expression, making sure that all its mandatory arguments are set. 1081 1082 Args: 1083 expression: The expression to validate. 1084 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1085 1086 Returns: 1087 The validated expression. 1088 """ 1089 if self.error_level != ErrorLevel.IGNORE: 1090 for error_message in expression.error_messages(args): 1091 self.raise_error(error_message) 1092 1093 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.