sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "COPY": lambda self: self._parse_copy_property(), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 785 786 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 787 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 788 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 789 790 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 791 792 STRICT_CAST = True 793 794 # A NULL arg in CONCAT yields NULL by default 795 CONCAT_NULL_OUTPUTS_STRING = False 796 797 CONVERT_TYPE_FIRST = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1433 if not self._match_text_seq("GRANTS"): 1434 self._retreat(self._index - 1) 1435 return None 1436 1437 return self.expression(exp.CopyGrantsProperty) 1438 1439 def _parse_freespace(self) -> exp.FreespaceProperty: 1440 self._match(TokenType.EQ) 1441 return self.expression( 1442 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1443 ) 1444 1445 def _parse_mergeblockratio( 1446 self, no: bool = False, default: bool = False 1447 ) -> exp.MergeBlockRatioProperty: 1448 if self._match(TokenType.EQ): 1449 return self.expression( 1450 exp.MergeBlockRatioProperty, 1451 this=self._parse_number(), 1452 percent=self._match(TokenType.PERCENT), 1453 ) 1454 1455 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1456 1457 def _parse_datablocksize( 1458 self, 1459 default: t.Optional[bool] = None, 1460 minimum: t.Optional[bool] = None, 1461 maximum: t.Optional[bool] = None, 1462 ) -> exp.DataBlocksizeProperty: 1463 self._match(TokenType.EQ) 1464 size = self._parse_number() 1465 1466 units = None 1467 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1468 units = self._prev.text 1469 1470 return self.expression( 1471 exp.DataBlocksizeProperty, 1472 size=size, 1473 units=units, 1474 default=default, 1475 minimum=minimum, 1476 maximum=maximum, 1477 ) 1478 1479 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 1486 autotemp = None 1487 if self._match_text_seq("AUTOTEMP"): 1488 autotemp = self._parse_schema() 1489 1490 return self.expression( 1491 exp.BlockCompressionProperty, 1492 always=always, 1493 manual=manual, 1494 never=never, 1495 default=default, 1496 autotemp=autotemp, 1497 ) 1498 1499 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1500 no = self._match_text_seq("NO") 1501 concurrent = self._match_text_seq("CONCURRENT") 1502 self._match_text_seq("ISOLATED", "LOADING") 1503 for_all = self._match_text_seq("FOR", "ALL") 1504 for_insert = self._match_text_seq("FOR", "INSERT") 1505 for_none = self._match_text_seq("FOR", "NONE") 1506 return self.expression( 1507 exp.IsolatedLoadingProperty, 1508 no=no, 1509 concurrent=concurrent, 1510 for_all=for_all, 1511 for_insert=for_insert, 1512 for_none=for_none, 1513 ) 1514 1515 def _parse_locking(self) -> exp.LockingProperty: 1516 if self._match(TokenType.TABLE): 1517 kind = "TABLE" 1518 elif self._match(TokenType.VIEW): 1519 kind = "VIEW" 1520 elif self._match(TokenType.ROW): 1521 kind = "ROW" 1522 elif self._match_text_seq("DATABASE"): 1523 kind = "DATABASE" 1524 else: 1525 kind = None 1526 1527 if kind in ("DATABASE", "TABLE", "VIEW"): 1528 this = self._parse_table_parts() 1529 else: 1530 this = None 1531 1532 if self._match(TokenType.FOR): 1533 for_or_in = "FOR" 1534 elif self._match(TokenType.IN): 1535 for_or_in = "IN" 1536 else: 1537 for_or_in = None 1538 1539 if self._match_text_seq("ACCESS"): 1540 lock_type = "ACCESS" 1541 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1542 lock_type = "EXCLUSIVE" 1543 elif self._match_text_seq("SHARE"): 1544 lock_type = "SHARE" 1545 elif self._match_text_seq("READ"): 1546 lock_type = "READ" 1547 elif self._match_text_seq("WRITE"): 1548 lock_type = "WRITE" 1549 elif self._match_text_seq("CHECKSUM"): 1550 lock_type = "CHECKSUM" 1551 else: 1552 lock_type = None 1553 1554 override = self._match_text_seq("OVERRIDE") 1555 1556 return self.expression( 1557 exp.LockingProperty, 1558 this=this, 1559 kind=kind, 1560 for_or_in=for_or_in, 1561 lock_type=lock_type, 1562 override=override, 1563 ) 1564 1565 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1566 if self._match(TokenType.PARTITION_BY): 1567 return self._parse_csv(self._parse_conjunction) 1568 return [] 1569 1570 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1571 self._match(TokenType.EQ) 1572 return self.expression( 1573 exp.PartitionedByProperty, 1574 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1575 ) 1576 1577 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1578 if self._match_text_seq("AND", "STATISTICS"): 1579 statistics = True 1580 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1581 statistics = False 1582 else: 1583 statistics = None 1584 1585 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1586 1587 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1588 if self._match_text_seq("PRIMARY", "INDEX"): 1589 return exp.NoPrimaryIndexProperty() 1590 return None 1591 1592 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1593 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1594 return exp.OnCommitProperty() 1595 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1596 return exp.OnCommitProperty(delete=True) 1597 return None 1598 1599 def _parse_distkey(self) -> exp.DistKeyProperty: 1600 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1601 1602 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1603 table = self._parse_table(schema=True) 1604 1605 options = [] 1606 while self._match_texts(("INCLUDING", "EXCLUDING")): 1607 this = self._prev.text.upper() 1608 1609 id_var = self._parse_id_var() 1610 if not id_var: 1611 return None 1612 1613 options.append( 1614 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1615 ) 1616 1617 return self.expression(exp.LikeProperty, this=table, expressions=options) 1618 1619 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1620 return self.expression( 1621 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1622 ) 1623 1624 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1625 self._match(TokenType.EQ) 1626 return self.expression( 1627 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1628 ) 1629 1630 def _parse_returns(self) -> exp.ReturnsProperty: 1631 value: t.Optional[exp.Expression] 1632 is_table = self._match(TokenType.TABLE) 1633 1634 if is_table: 1635 if self._match(TokenType.LT): 1636 value = self.expression( 1637 exp.Schema, 1638 this="TABLE", 1639 expressions=self._parse_csv(self._parse_struct_types), 1640 ) 1641 if not self._match(TokenType.GT): 1642 self.raise_error("Expecting >") 1643 else: 1644 value = self._parse_schema(exp.var("TABLE")) 1645 else: 1646 value = self._parse_types() 1647 1648 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1649 1650 def _parse_describe(self) -> exp.Describe: 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 this = self._parse_table() 1653 return self.expression(exp.Describe, this=this, kind=kind) 1654 1655 def _parse_insert(self) -> exp.Insert: 1656 overwrite = self._match(TokenType.OVERWRITE) 1657 local = self._match_text_seq("LOCAL") 1658 alternative = None 1659 1660 if self._match_text_seq("DIRECTORY"): 1661 this: t.Optional[exp.Expression] = self.expression( 1662 exp.Directory, 1663 this=self._parse_var_or_string(), 1664 local=local, 1665 row_format=self._parse_row_format(match_row=True), 1666 ) 1667 else: 1668 if self._match(TokenType.OR): 1669 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1670 1671 self._match(TokenType.INTO) 1672 self._match(TokenType.TABLE) 1673 this = self._parse_table(schema=True) 1674 1675 return self.expression( 1676 exp.Insert, 1677 this=this, 1678 exists=self._parse_exists(), 1679 partition=self._parse_partition(), 1680 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1681 and self._parse_conjunction(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not conflict and not duplicate: 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Returning]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format( 1735 self, match_row: bool = False 1736 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1737 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1738 return None 1739 1740 if self._match_text_seq("SERDE"): 1741 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1742 1743 self._match_text_seq("DELIMITED") 1744 1745 kwargs = {} 1746 1747 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1748 kwargs["fields"] = self._parse_string() 1749 if self._match_text_seq("ESCAPED", "BY"): 1750 kwargs["escaped"] = self._parse_string() 1751 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1752 kwargs["collection_items"] = self._parse_string() 1753 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1754 kwargs["map_keys"] = self._parse_string() 1755 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1756 kwargs["lines"] = self._parse_string() 1757 if self._match_text_seq("NULL", "DEFINED", "AS"): 1758 kwargs["null"] = self._parse_string() 1759 1760 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1761 1762 def _parse_load(self) -> exp.LoadData | exp.Command: 1763 if self._match_text_seq("DATA"): 1764 local = self._match_text_seq("LOCAL") 1765 self._match_text_seq("INPATH") 1766 inpath = self._parse_string() 1767 overwrite = self._match(TokenType.OVERWRITE) 1768 self._match_pair(TokenType.INTO, TokenType.TABLE) 1769 1770 return self.expression( 1771 exp.LoadData, 1772 this=self._parse_table(schema=True), 1773 local=local, 1774 overwrite=overwrite, 1775 inpath=inpath, 1776 partition=self._parse_partition(), 1777 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1778 serde=self._match_text_seq("SERDE") and self._parse_string(), 1779 ) 1780 return self._parse_as_command(self._prev) 1781 1782 def _parse_delete(self) -> exp.Delete: 1783 self._match(TokenType.FROM) 1784 1785 return self.expression( 1786 exp.Delete, 1787 this=self._parse_table(), 1788 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1789 where=self._parse_where(), 1790 returning=self._parse_returning(), 1791 limit=self._parse_limit(), 1792 ) 1793 1794 def _parse_update(self) -> exp.Update: 1795 return self.expression( 1796 exp.Update, 1797 **{ # type: ignore 1798 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1799 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1800 "from": self._parse_from(modifiers=True), 1801 "where": self._parse_where(), 1802 "returning": self._parse_returning(), 1803 "limit": self._parse_limit(), 1804 }, 1805 ) 1806 1807 def _parse_uncache(self) -> exp.Uncache: 1808 if not self._match(TokenType.TABLE): 1809 self.raise_error("Expecting TABLE after UNCACHE") 1810 1811 return self.expression( 1812 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1813 ) 1814 1815 def _parse_cache(self) -> exp.Cache: 1816 lazy = self._match_text_seq("LAZY") 1817 self._match(TokenType.TABLE) 1818 table = self._parse_table(schema=True) 1819 1820 options = [] 1821 if self._match_text_seq("OPTIONS"): 1822 self._match_l_paren() 1823 k = self._parse_string() 1824 self._match(TokenType.EQ) 1825 v = self._parse_string() 1826 options = [k, v] 1827 self._match_r_paren() 1828 1829 self._match(TokenType.ALIAS) 1830 return self.expression( 1831 exp.Cache, 1832 this=table, 1833 lazy=lazy, 1834 options=options, 1835 expression=self._parse_select(nested=True), 1836 ) 1837 1838 def _parse_partition(self) -> t.Optional[exp.Partition]: 1839 if not self._match(TokenType.PARTITION): 1840 return None 1841 1842 return self.expression( 1843 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1844 ) 1845 1846 def _parse_value(self) -> exp.Tuple: 1847 if self._match(TokenType.L_PAREN): 1848 expressions = self._parse_csv(self._parse_conjunction) 1849 self._match_r_paren() 1850 return self.expression(exp.Tuple, expressions=expressions) 1851 1852 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1853 # Source: https://prestodb.io/docs/current/sql/values.html 1854 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1855 1856 def _parse_select( 1857 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1858 ) -> t.Optional[exp.Expression]: 1859 cte = self._parse_with() 1860 if cte: 1861 this = self._parse_statement() 1862 1863 if not this: 1864 self.raise_error("Failed to parse any statement following CTE") 1865 return cte 1866 1867 if "with" in this.arg_types: 1868 this.set("with", cte) 1869 else: 1870 self.raise_error(f"{this.key} does not support CTE") 1871 this = cte 1872 elif self._match(TokenType.SELECT): 1873 comments = self._prev_comments 1874 1875 hint = self._parse_hint() 1876 all_ = self._match(TokenType.ALL) 1877 distinct = self._match(TokenType.DISTINCT) 1878 1879 kind = ( 1880 self._match(TokenType.ALIAS) 1881 and self._match_texts(("STRUCT", "VALUE")) 1882 and self._prev.text 1883 ) 1884 1885 if distinct: 1886 distinct = self.expression( 1887 exp.Distinct, 1888 on=self._parse_value() if self._match(TokenType.ON) else None, 1889 ) 1890 1891 if all_ and distinct: 1892 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1893 1894 limit = self._parse_limit(top=True) 1895 expressions = self._parse_csv(self._parse_expression) 1896 1897 this = self.expression( 1898 exp.Select, 1899 kind=kind, 1900 hint=hint, 1901 distinct=distinct, 1902 expressions=expressions, 1903 limit=limit, 1904 ) 1905 this.comments = comments 1906 1907 into = self._parse_into() 1908 if into: 1909 this.set("into", into) 1910 1911 from_ = self._parse_from() 1912 if from_: 1913 this.set("from", from_) 1914 1915 this = self._parse_query_modifiers(this) 1916 elif (table or nested) and self._match(TokenType.L_PAREN): 1917 if self._match(TokenType.PIVOT): 1918 this = self._parse_simplified_pivot() 1919 elif self._match(TokenType.FROM): 1920 this = exp.select("*").from_( 1921 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1922 ) 1923 else: 1924 this = self._parse_table() if table else self._parse_select(nested=True) 1925 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1926 1927 self._match_r_paren() 1928 1929 # early return so that subquery unions aren't parsed again 1930 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1931 # Union ALL should be a property of the top select node, not the subquery 1932 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1933 elif self._match(TokenType.VALUES): 1934 this = self.expression( 1935 exp.Values, 1936 expressions=self._parse_csv(self._parse_value), 1937 alias=self._parse_table_alias(), 1938 ) 1939 else: 1940 this = None 1941 1942 return self._parse_set_operations(this) 1943 1944 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1945 if not skip_with_token and not self._match(TokenType.WITH): 1946 return None 1947 1948 comments = self._prev_comments 1949 recursive = self._match(TokenType.RECURSIVE) 1950 1951 expressions = [] 1952 while True: 1953 expressions.append(self._parse_cte()) 1954 1955 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1956 break 1957 else: 1958 self._match(TokenType.WITH) 1959 1960 return self.expression( 1961 exp.With, comments=comments, expressions=expressions, recursive=recursive 1962 ) 1963 1964 def _parse_cte(self) -> exp.CTE: 1965 alias = self._parse_table_alias() 1966 if not alias or not alias.this: 1967 self.raise_error("Expected CTE to have alias") 1968 1969 self._match(TokenType.ALIAS) 1970 return self.expression( 1971 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1972 ) 1973 1974 def _parse_table_alias( 1975 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1976 ) -> t.Optional[exp.TableAlias]: 1977 any_token = self._match(TokenType.ALIAS) 1978 alias = ( 1979 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1980 or self._parse_string_as_identifier() 1981 ) 1982 1983 index = self._index 1984 if self._match(TokenType.L_PAREN): 1985 columns = self._parse_csv(self._parse_function_parameter) 1986 self._match_r_paren() if columns else self._retreat(index) 1987 else: 1988 columns = None 1989 1990 if not alias and not columns: 1991 return None 1992 1993 return self.expression(exp.TableAlias, this=alias, columns=columns) 1994 1995 def _parse_subquery( 1996 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1997 ) -> t.Optional[exp.Subquery]: 1998 if not this: 1999 return None 2000 2001 return self.expression( 2002 exp.Subquery, 2003 this=this, 2004 pivots=self._parse_pivots(), 2005 alias=self._parse_table_alias() if parse_alias else None, 2006 ) 2007 2008 def _parse_query_modifiers( 2009 self, this: t.Optional[exp.Expression] 2010 ) -> t.Optional[exp.Expression]: 2011 if isinstance(this, self.MODIFIABLES): 2012 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2013 expression = parser(self) 2014 2015 if expression: 2016 if key == "limit": 2017 offset = expression.args.pop("offset", None) 2018 if offset: 2019 this.set("offset", exp.Offset(expression=offset)) 2020 this.set(key, expression) 2021 return this 2022 2023 def _parse_hint(self) -> t.Optional[exp.Hint]: 2024 if self._match(TokenType.HINT): 2025 hints = self._parse_csv(self._parse_function) 2026 2027 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2028 self.raise_error("Expected */ after HINT") 2029 2030 return self.expression(exp.Hint, expressions=hints) 2031 2032 return None 2033 2034 def _parse_into(self) -> t.Optional[exp.Into]: 2035 if not self._match(TokenType.INTO): 2036 return None 2037 2038 temp = self._match(TokenType.TEMPORARY) 2039 unlogged = self._match_text_seq("UNLOGGED") 2040 self._match(TokenType.TABLE) 2041 2042 return self.expression( 2043 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2044 ) 2045 2046 def _parse_from( 2047 self, modifiers: bool = False, skip_from_token: bool = False 2048 ) -> t.Optional[exp.From]: 2049 if not skip_from_token and not self._match(TokenType.FROM): 2050 return None 2051 2052 comments = self._prev_comments 2053 this = self._parse_table() 2054 2055 return self.expression( 2056 exp.From, 2057 comments=comments, 2058 this=self._parse_query_modifiers(this) if modifiers else this, 2059 ) 2060 2061 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2062 if not self._match(TokenType.MATCH_RECOGNIZE): 2063 return None 2064 2065 self._match_l_paren() 2066 2067 partition = self._parse_partition_by() 2068 order = self._parse_order() 2069 measures = ( 2070 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2071 ) 2072 2073 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2074 rows = exp.var("ONE ROW PER MATCH") 2075 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2076 text = "ALL ROWS PER MATCH" 2077 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2078 text += f" SHOW EMPTY MATCHES" 2079 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2080 text += f" OMIT EMPTY MATCHES" 2081 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2082 text += f" WITH UNMATCHED ROWS" 2083 rows = exp.var(text) 2084 else: 2085 rows = None 2086 2087 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2088 text = "AFTER MATCH SKIP" 2089 if self._match_text_seq("PAST", "LAST", "ROW"): 2090 text += f" PAST LAST ROW" 2091 elif self._match_text_seq("TO", "NEXT", "ROW"): 2092 text += f" TO NEXT ROW" 2093 elif self._match_text_seq("TO", "FIRST"): 2094 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2095 elif self._match_text_seq("TO", "LAST"): 2096 text += f" TO LAST {self._advance_any().text}" # type: ignore 2097 after = exp.var(text) 2098 else: 2099 after = None 2100 2101 if self._match_text_seq("PATTERN"): 2102 self._match_l_paren() 2103 2104 if not self._curr: 2105 self.raise_error("Expecting )", self._curr) 2106 2107 paren = 1 2108 start = self._curr 2109 2110 while self._curr and paren > 0: 2111 if self._curr.token_type == TokenType.L_PAREN: 2112 paren += 1 2113 if self._curr.token_type == TokenType.R_PAREN: 2114 paren -= 1 2115 2116 end = self._prev 2117 self._advance() 2118 2119 if paren > 0: 2120 self.raise_error("Expecting )", self._curr) 2121 2122 pattern = exp.var(self._find_sql(start, end)) 2123 else: 2124 pattern = None 2125 2126 define = ( 2127 self._parse_csv( 2128 lambda: self.expression( 2129 exp.Alias, 2130 alias=self._parse_id_var(any_token=True), 2131 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2132 ) 2133 ) 2134 if self._match_text_seq("DEFINE") 2135 else None 2136 ) 2137 2138 self._match_r_paren() 2139 2140 return self.expression( 2141 exp.MatchRecognize, 2142 partition_by=partition, 2143 order=order, 2144 measures=measures, 2145 rows=rows, 2146 after=after, 2147 pattern=pattern, 2148 define=define, 2149 alias=self._parse_table_alias(), 2150 ) 2151 2152 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2153 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2154 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2155 2156 if outer_apply or cross_apply: 2157 this = self._parse_select(table=True) 2158 view = None 2159 outer = not cross_apply 2160 elif self._match(TokenType.LATERAL): 2161 this = self._parse_select(table=True) 2162 view = self._match(TokenType.VIEW) 2163 outer = self._match(TokenType.OUTER) 2164 else: 2165 return None 2166 2167 if not this: 2168 this = self._parse_function() or self._parse_id_var(any_token=False) 2169 while self._match(TokenType.DOT): 2170 this = exp.Dot( 2171 this=this, 2172 expression=self._parse_function() or self._parse_id_var(any_token=False), 2173 ) 2174 2175 if view: 2176 table = self._parse_id_var(any_token=False) 2177 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2178 table_alias: t.Optional[exp.TableAlias] = self.expression( 2179 exp.TableAlias, this=table, columns=columns 2180 ) 2181 elif isinstance(this, exp.Subquery) and this.alias: 2182 # Ensures parity between the Subquery's and the Lateral's "alias" args 2183 table_alias = this.args["alias"].copy() 2184 else: 2185 table_alias = self._parse_table_alias() 2186 2187 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2188 2189 def _parse_join_parts( 2190 self, 2191 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2192 return ( 2193 self._match_set(self.JOIN_METHODS) and self._prev, 2194 self._match_set(self.JOIN_SIDES) and self._prev, 2195 self._match_set(self.JOIN_KINDS) and self._prev, 2196 ) 2197 2198 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2199 if self._match(TokenType.COMMA): 2200 return self.expression(exp.Join, this=self._parse_table()) 2201 2202 index = self._index 2203 method, side, kind = self._parse_join_parts() 2204 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2205 join = self._match(TokenType.JOIN) 2206 2207 if not skip_join_token and not join: 2208 self._retreat(index) 2209 kind = None 2210 method = None 2211 side = None 2212 2213 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2214 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2215 2216 if not skip_join_token and not join and not outer_apply and not cross_apply: 2217 return None 2218 2219 if outer_apply: 2220 side = Token(TokenType.LEFT, "LEFT") 2221 2222 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2223 2224 if method: 2225 kwargs["method"] = method.text 2226 if side: 2227 kwargs["side"] = side.text 2228 if kind: 2229 kwargs["kind"] = kind.text 2230 if hint: 2231 kwargs["hint"] = hint 2232 2233 if self._match(TokenType.ON): 2234 kwargs["on"] = self._parse_conjunction() 2235 elif self._match(TokenType.USING): 2236 kwargs["using"] = self._parse_wrapped_id_vars() 2237 2238 return self.expression(exp.Join, **kwargs) 2239 2240 def _parse_index( 2241 self, 2242 index: t.Optional[exp.Expression] = None, 2243 ) -> t.Optional[exp.Index]: 2244 if index: 2245 unique = None 2246 primary = None 2247 amp = None 2248 2249 self._match(TokenType.ON) 2250 self._match(TokenType.TABLE) # hive 2251 table = self._parse_table_parts(schema=True) 2252 else: 2253 unique = self._match(TokenType.UNIQUE) 2254 primary = self._match_text_seq("PRIMARY") 2255 amp = self._match_text_seq("AMP") 2256 2257 if not self._match(TokenType.INDEX): 2258 return None 2259 2260 index = self._parse_id_var() 2261 table = None 2262 2263 using = self._parse_field() if self._match(TokenType.USING) else None 2264 2265 if self._match(TokenType.L_PAREN, advance=False): 2266 columns = self._parse_wrapped_csv(self._parse_ordered) 2267 else: 2268 columns = None 2269 2270 return self.expression( 2271 exp.Index, 2272 this=index, 2273 table=table, 2274 using=using, 2275 columns=columns, 2276 unique=unique, 2277 primary=primary, 2278 amp=amp, 2279 partition_by=self._parse_partition_by(), 2280 ) 2281 2282 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2283 hints: t.List[exp.Expression] = [] 2284 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2285 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2286 hints.append( 2287 self.expression( 2288 exp.WithTableHint, 2289 expressions=self._parse_csv( 2290 lambda: self._parse_function() or self._parse_var(any_token=True) 2291 ), 2292 ) 2293 ) 2294 self._match_r_paren() 2295 else: 2296 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2297 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2298 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2299 2300 self._match_texts({"INDEX", "KEY"}) 2301 if self._match(TokenType.FOR): 2302 hint.set("target", self._advance_any() and self._prev.text.upper()) 2303 2304 hint.set("expressions", self._parse_wrapped_id_vars()) 2305 hints.append(hint) 2306 2307 return hints or None 2308 2309 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2310 return ( 2311 (not schema and self._parse_function(optional_parens=False)) 2312 or self._parse_id_var(any_token=False) 2313 or self._parse_string_as_identifier() 2314 or self._parse_placeholder() 2315 ) 2316 2317 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2318 catalog = None 2319 db = None 2320 table = self._parse_table_part(schema=schema) 2321 2322 while self._match(TokenType.DOT): 2323 if catalog: 2324 # This allows nesting the table in arbitrarily many dot expressions if needed 2325 table = self.expression( 2326 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2327 ) 2328 else: 2329 catalog = db 2330 db = table 2331 table = self._parse_table_part(schema=schema) 2332 2333 if not table: 2334 self.raise_error(f"Expected table name but got {self._curr}") 2335 2336 return self.expression( 2337 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2338 ) 2339 2340 def _parse_table( 2341 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2342 ) -> t.Optional[exp.Expression]: 2343 lateral = self._parse_lateral() 2344 if lateral: 2345 return lateral 2346 2347 unnest = self._parse_unnest() 2348 if unnest: 2349 return unnest 2350 2351 values = self._parse_derived_table_values() 2352 if values: 2353 return values 2354 2355 subquery = self._parse_select(table=True) 2356 if subquery: 2357 if not subquery.args.get("pivots"): 2358 subquery.set("pivots", self._parse_pivots()) 2359 return subquery 2360 2361 this: exp.Expression = self._parse_table_parts(schema=schema) 2362 2363 if schema: 2364 return self._parse_schema(this=this) 2365 2366 if self.ALIAS_POST_TABLESAMPLE: 2367 table_sample = self._parse_table_sample() 2368 2369 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2370 if alias: 2371 this.set("alias", alias) 2372 2373 if not this.args.get("pivots"): 2374 this.set("pivots", self._parse_pivots()) 2375 2376 this.set("hints", self._parse_table_hints()) 2377 2378 if not self.ALIAS_POST_TABLESAMPLE: 2379 table_sample = self._parse_table_sample() 2380 2381 if table_sample: 2382 table_sample.set("this", this) 2383 this = table_sample 2384 2385 return this 2386 2387 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2388 if not self._match(TokenType.UNNEST): 2389 return None 2390 2391 expressions = self._parse_wrapped_csv(self._parse_type) 2392 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2393 2394 alias = self._parse_table_alias() if with_alias else None 2395 2396 if alias and self.UNNEST_COLUMN_ONLY: 2397 if alias.args.get("columns"): 2398 self.raise_error("Unexpected extra column alias in unnest.") 2399 2400 alias.set("columns", [alias.this]) 2401 alias.set("this", None) 2402 2403 offset = None 2404 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2405 self._match(TokenType.ALIAS) 2406 offset = self._parse_id_var() or exp.to_identifier("offset") 2407 2408 return self.expression( 2409 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2410 ) 2411 2412 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2413 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2414 if not is_derived and not self._match(TokenType.VALUES): 2415 return None 2416 2417 expressions = self._parse_csv(self._parse_value) 2418 alias = self._parse_table_alias() 2419 2420 if is_derived: 2421 self._match_r_paren() 2422 2423 return self.expression( 2424 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2425 ) 2426 2427 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2428 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2429 as_modifier and self._match_text_seq("USING", "SAMPLE") 2430 ): 2431 return None 2432 2433 bucket_numerator = None 2434 bucket_denominator = None 2435 bucket_field = None 2436 percent = None 2437 rows = None 2438 size = None 2439 seed = None 2440 2441 kind = ( 2442 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2443 ) 2444 method = self._parse_var(tokens=(TokenType.ROW,)) 2445 2446 self._match(TokenType.L_PAREN) 2447 2448 num = self._parse_number() 2449 2450 if self._match_text_seq("BUCKET"): 2451 bucket_numerator = self._parse_number() 2452 self._match_text_seq("OUT", "OF") 2453 bucket_denominator = bucket_denominator = self._parse_number() 2454 self._match(TokenType.ON) 2455 bucket_field = self._parse_field() 2456 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2457 percent = num 2458 elif self._match(TokenType.ROWS): 2459 rows = num 2460 else: 2461 size = num 2462 2463 self._match(TokenType.R_PAREN) 2464 2465 if self._match(TokenType.L_PAREN): 2466 method = self._parse_var() 2467 seed = self._match(TokenType.COMMA) and self._parse_number() 2468 self._match_r_paren() 2469 elif self._match_texts(("SEED", "REPEATABLE")): 2470 seed = self._parse_wrapped(self._parse_number) 2471 2472 return self.expression( 2473 exp.TableSample, 2474 method=method, 2475 bucket_numerator=bucket_numerator, 2476 bucket_denominator=bucket_denominator, 2477 bucket_field=bucket_field, 2478 percent=percent, 2479 rows=rows, 2480 size=size, 2481 seed=seed, 2482 kind=kind, 2483 ) 2484 2485 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2486 return list(iter(self._parse_pivot, None)) 2487 2488 # https://duckdb.org/docs/sql/statements/pivot 2489 def _parse_simplified_pivot(self) -> exp.Pivot: 2490 def _parse_on() -> t.Optional[exp.Expression]: 2491 this = self._parse_bitwise() 2492 return self._parse_in(this) if self._match(TokenType.IN) else this 2493 2494 this = self._parse_table() 2495 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2496 using = self._match(TokenType.USING) and self._parse_csv( 2497 lambda: self._parse_alias(self._parse_function()) 2498 ) 2499 group = self._parse_group() 2500 return self.expression( 2501 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2502 ) 2503 2504 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2505 index = self._index 2506 2507 if self._match(TokenType.PIVOT): 2508 unpivot = False 2509 elif self._match(TokenType.UNPIVOT): 2510 unpivot = True 2511 else: 2512 return None 2513 2514 expressions = [] 2515 field = None 2516 2517 if not self._match(TokenType.L_PAREN): 2518 self._retreat(index) 2519 return None 2520 2521 if unpivot: 2522 expressions = self._parse_csv(self._parse_column) 2523 else: 2524 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2525 2526 if not expressions: 2527 self.raise_error("Failed to parse PIVOT's aggregation list") 2528 2529 if not self._match(TokenType.FOR): 2530 self.raise_error("Expecting FOR") 2531 2532 value = self._parse_column() 2533 2534 if not self._match(TokenType.IN): 2535 self.raise_error("Expecting IN") 2536 2537 field = self._parse_in(value, alias=True) 2538 2539 self._match_r_paren() 2540 2541 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2542 2543 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2544 pivot.set("alias", self._parse_table_alias()) 2545 2546 if not unpivot: 2547 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2548 2549 columns: t.List[exp.Expression] = [] 2550 for fld in pivot.args["field"].expressions: 2551 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2552 for name in names: 2553 if self.PREFIXED_PIVOT_COLUMNS: 2554 name = f"{name}_{field_name}" if name else field_name 2555 else: 2556 name = f"{field_name}_{name}" if name else field_name 2557 2558 columns.append(exp.to_identifier(name)) 2559 2560 pivot.set("columns", columns) 2561 2562 return pivot 2563 2564 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2565 return [agg.alias for agg in aggregations] 2566 2567 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2568 if not skip_where_token and not self._match(TokenType.WHERE): 2569 return None 2570 2571 return self.expression( 2572 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2573 ) 2574 2575 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2576 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2577 return None 2578 2579 elements = defaultdict(list) 2580 2581 while True: 2582 expressions = self._parse_csv(self._parse_conjunction) 2583 if expressions: 2584 elements["expressions"].extend(expressions) 2585 2586 grouping_sets = self._parse_grouping_sets() 2587 if grouping_sets: 2588 elements["grouping_sets"].extend(grouping_sets) 2589 2590 rollup = None 2591 cube = None 2592 totals = None 2593 2594 with_ = self._match(TokenType.WITH) 2595 if self._match(TokenType.ROLLUP): 2596 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2597 elements["rollup"].extend(ensure_list(rollup)) 2598 2599 if self._match(TokenType.CUBE): 2600 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2601 elements["cube"].extend(ensure_list(cube)) 2602 2603 if self._match_text_seq("TOTALS"): 2604 totals = True 2605 elements["totals"] = True # type: ignore 2606 2607 if not (grouping_sets or rollup or cube or totals): 2608 break 2609 2610 return self.expression(exp.Group, **elements) # type: ignore 2611 2612 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2613 if not self._match(TokenType.GROUPING_SETS): 2614 return None 2615 2616 return self._parse_wrapped_csv(self._parse_grouping_set) 2617 2618 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2619 if self._match(TokenType.L_PAREN): 2620 grouping_set = self._parse_csv(self._parse_column) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=grouping_set) 2623 2624 return self._parse_column() 2625 2626 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2627 if not skip_having_token and not self._match(TokenType.HAVING): 2628 return None 2629 return self.expression(exp.Having, this=self._parse_conjunction()) 2630 2631 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2632 if not self._match(TokenType.QUALIFY): 2633 return None 2634 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2635 2636 def _parse_order( 2637 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2638 ) -> t.Optional[exp.Expression]: 2639 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2640 return this 2641 2642 return self.expression( 2643 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2644 ) 2645 2646 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2647 if not self._match(token): 2648 return None 2649 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2650 2651 def _parse_ordered(self) -> exp.Ordered: 2652 this = self._parse_conjunction() 2653 self._match(TokenType.ASC) 2654 2655 is_desc = self._match(TokenType.DESC) 2656 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2657 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2658 desc = is_desc or False 2659 asc = not desc 2660 nulls_first = is_nulls_first or False 2661 explicitly_null_ordered = is_nulls_first or is_nulls_last 2662 2663 if ( 2664 not explicitly_null_ordered 2665 and ( 2666 (asc and self.NULL_ORDERING == "nulls_are_small") 2667 or (desc and self.NULL_ORDERING != "nulls_are_small") 2668 ) 2669 and self.NULL_ORDERING != "nulls_are_last" 2670 ): 2671 nulls_first = True 2672 2673 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2674 2675 def _parse_limit( 2676 self, this: t.Optional[exp.Expression] = None, top: bool = False 2677 ) -> t.Optional[exp.Expression]: 2678 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2679 limit_paren = self._match(TokenType.L_PAREN) 2680 expression = self._parse_number() if top else self._parse_term() 2681 2682 if self._match(TokenType.COMMA): 2683 offset = expression 2684 expression = self._parse_term() 2685 else: 2686 offset = None 2687 2688 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2689 2690 if limit_paren: 2691 self._match_r_paren() 2692 2693 return limit_exp 2694 2695 if self._match(TokenType.FETCH): 2696 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2697 direction = self._prev.text if direction else "FIRST" 2698 2699 count = self._parse_number() 2700 percent = self._match(TokenType.PERCENT) 2701 2702 self._match_set((TokenType.ROW, TokenType.ROWS)) 2703 2704 only = self._match_text_seq("ONLY") 2705 with_ties = self._match_text_seq("WITH", "TIES") 2706 2707 if only and with_ties: 2708 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2709 2710 return self.expression( 2711 exp.Fetch, 2712 direction=direction, 2713 count=count, 2714 percent=percent, 2715 with_ties=with_ties, 2716 ) 2717 2718 return this 2719 2720 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2721 if not self._match(TokenType.OFFSET): 2722 return this 2723 2724 count = self._parse_number() 2725 self._match_set((TokenType.ROW, TokenType.ROWS)) 2726 return self.expression(exp.Offset, this=this, expression=count) 2727 2728 def _parse_locks(self) -> t.List[exp.Lock]: 2729 locks = [] 2730 while True: 2731 if self._match_text_seq("FOR", "UPDATE"): 2732 update = True 2733 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2734 "LOCK", "IN", "SHARE", "MODE" 2735 ): 2736 update = False 2737 else: 2738 break 2739 2740 expressions = None 2741 if self._match_text_seq("OF"): 2742 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2743 2744 wait: t.Optional[bool | exp.Expression] = None 2745 if self._match_text_seq("NOWAIT"): 2746 wait = True 2747 elif self._match_text_seq("WAIT"): 2748 wait = self._parse_primary() 2749 elif self._match_text_seq("SKIP", "LOCKED"): 2750 wait = False 2751 2752 locks.append( 2753 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2754 ) 2755 2756 return locks 2757 2758 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2759 if not self._match_set(self.SET_OPERATIONS): 2760 return this 2761 2762 token_type = self._prev.token_type 2763 2764 if token_type == TokenType.UNION: 2765 expression = exp.Union 2766 elif token_type == TokenType.EXCEPT: 2767 expression = exp.Except 2768 else: 2769 expression = exp.Intersect 2770 2771 return self.expression( 2772 expression, 2773 this=this, 2774 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2775 expression=self._parse_set_operations(self._parse_select(nested=True)), 2776 ) 2777 2778 def _parse_expression(self) -> t.Optional[exp.Expression]: 2779 return self._parse_alias(self._parse_conjunction()) 2780 2781 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2782 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2783 2784 def _parse_equality(self) -> t.Optional[exp.Expression]: 2785 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2786 2787 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2788 return self._parse_tokens(self._parse_range, self.COMPARISON) 2789 2790 def _parse_range(self) -> t.Optional[exp.Expression]: 2791 this = self._parse_bitwise() 2792 negate = self._match(TokenType.NOT) 2793 2794 if self._match_set(self.RANGE_PARSERS): 2795 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2796 if not expression: 2797 return this 2798 2799 this = expression 2800 elif self._match(TokenType.ISNULL): 2801 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2802 2803 # Postgres supports ISNULL and NOTNULL for conditions. 2804 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2805 if self._match(TokenType.NOTNULL): 2806 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2807 this = self.expression(exp.Not, this=this) 2808 2809 if negate: 2810 this = self.expression(exp.Not, this=this) 2811 2812 if self._match(TokenType.IS): 2813 this = self._parse_is(this) 2814 2815 return this 2816 2817 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2818 index = self._index - 1 2819 negate = self._match(TokenType.NOT) 2820 2821 if self._match_text_seq("DISTINCT", "FROM"): 2822 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2823 return self.expression(klass, this=this, expression=self._parse_expression()) 2824 2825 expression = self._parse_null() or self._parse_boolean() 2826 if not expression: 2827 self._retreat(index) 2828 return None 2829 2830 this = self.expression(exp.Is, this=this, expression=expression) 2831 return self.expression(exp.Not, this=this) if negate else this 2832 2833 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2834 unnest = self._parse_unnest(with_alias=False) 2835 if unnest: 2836 this = self.expression(exp.In, this=this, unnest=unnest) 2837 elif self._match(TokenType.L_PAREN): 2838 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2839 2840 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2841 this = self.expression(exp.In, this=this, query=expressions[0]) 2842 else: 2843 this = self.expression(exp.In, this=this, expressions=expressions) 2844 2845 self._match_r_paren(this) 2846 else: 2847 this = self.expression(exp.In, this=this, field=self._parse_field()) 2848 2849 return this 2850 2851 def _parse_between(self, this: exp.Expression) -> exp.Between: 2852 low = self._parse_bitwise() 2853 self._match(TokenType.AND) 2854 high = self._parse_bitwise() 2855 return self.expression(exp.Between, this=this, low=low, high=high) 2856 2857 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2858 if not self._match(TokenType.ESCAPE): 2859 return this 2860 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2861 2862 def _parse_interval(self) -> t.Optional[exp.Interval]: 2863 if not self._match(TokenType.INTERVAL): 2864 return None 2865 2866 this = self._parse_primary() or self._parse_term() 2867 unit = self._parse_function() or self._parse_var() 2868 2869 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2870 # each INTERVAL expression into this canonical form so it's easy to transpile 2871 if this and this.is_number: 2872 this = exp.Literal.string(this.name) 2873 elif this and this.is_string: 2874 parts = this.name.split() 2875 2876 if len(parts) == 2: 2877 if unit: 2878 # this is not actually a unit, it's something else 2879 unit = None 2880 self._retreat(self._index - 1) 2881 else: 2882 this = exp.Literal.string(parts[0]) 2883 unit = self.expression(exp.Var, this=parts[1]) 2884 2885 return self.expression(exp.Interval, this=this, unit=unit) 2886 2887 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2888 this = self._parse_term() 2889 2890 while True: 2891 if self._match_set(self.BITWISE): 2892 this = self.expression( 2893 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2894 ) 2895 elif self._match_pair(TokenType.LT, TokenType.LT): 2896 this = self.expression( 2897 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2898 ) 2899 elif self._match_pair(TokenType.GT, TokenType.GT): 2900 this = self.expression( 2901 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2902 ) 2903 else: 2904 break 2905 2906 return this 2907 2908 def _parse_term(self) -> t.Optional[exp.Expression]: 2909 return self._parse_tokens(self._parse_factor, self.TERM) 2910 2911 def _parse_factor(self) -> t.Optional[exp.Expression]: 2912 return self._parse_tokens(self._parse_unary, self.FACTOR) 2913 2914 def _parse_unary(self) -> t.Optional[exp.Expression]: 2915 if self._match_set(self.UNARY_PARSERS): 2916 return self.UNARY_PARSERS[self._prev.token_type](self) 2917 return self._parse_at_time_zone(self._parse_type()) 2918 2919 def _parse_type(self) -> t.Optional[exp.Expression]: 2920 interval = self._parse_interval() 2921 if interval: 2922 return interval 2923 2924 index = self._index 2925 data_type = self._parse_types(check_func=True) 2926 this = self._parse_column() 2927 2928 if data_type: 2929 if isinstance(this, exp.Literal): 2930 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2931 if parser: 2932 return parser(self, this, data_type) 2933 return self.expression(exp.Cast, this=this, to=data_type) 2934 if not data_type.expressions: 2935 self._retreat(index) 2936 return self._parse_column() 2937 return self._parse_column_ops(data_type) 2938 2939 return this 2940 2941 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2942 this = self._parse_type() 2943 if not this: 2944 return None 2945 2946 return self.expression( 2947 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2948 ) 2949 2950 def _parse_types( 2951 self, check_func: bool = False, schema: bool = False 2952 ) -> t.Optional[exp.Expression]: 2953 index = self._index 2954 2955 prefix = self._match_text_seq("SYSUDTLIB", ".") 2956 2957 if not self._match_set(self.TYPE_TOKENS): 2958 return None 2959 2960 type_token = self._prev.token_type 2961 2962 if type_token == TokenType.PSEUDO_TYPE: 2963 return self.expression(exp.PseudoType, this=self._prev.text) 2964 2965 nested = type_token in self.NESTED_TYPE_TOKENS 2966 is_struct = type_token == TokenType.STRUCT 2967 expressions = None 2968 maybe_func = False 2969 2970 if self._match(TokenType.L_PAREN): 2971 if is_struct: 2972 expressions = self._parse_csv(self._parse_struct_types) 2973 elif nested: 2974 expressions = self._parse_csv( 2975 lambda: self._parse_types(check_func=check_func, schema=schema) 2976 ) 2977 elif type_token in self.ENUM_TYPE_TOKENS: 2978 expressions = self._parse_csv(self._parse_primary) 2979 else: 2980 expressions = self._parse_csv(self._parse_type_size) 2981 2982 if not expressions or not self._match(TokenType.R_PAREN): 2983 self._retreat(index) 2984 return None 2985 2986 maybe_func = True 2987 2988 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2989 this = exp.DataType( 2990 this=exp.DataType.Type.ARRAY, 2991 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2992 nested=True, 2993 ) 2994 2995 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2996 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2997 2998 return this 2999 3000 if self._match(TokenType.L_BRACKET): 3001 self._retreat(index) 3002 return None 3003 3004 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3005 if nested and self._match(TokenType.LT): 3006 if is_struct: 3007 expressions = self._parse_csv(self._parse_struct_types) 3008 else: 3009 expressions = self._parse_csv( 3010 lambda: self._parse_types(check_func=check_func, schema=schema) 3011 ) 3012 3013 if not self._match(TokenType.GT): 3014 self.raise_error("Expecting >") 3015 3016 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3017 values = self._parse_csv(self._parse_conjunction) 3018 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3019 3020 value: t.Optional[exp.Expression] = None 3021 if type_token in self.TIMESTAMPS: 3022 if self._match_text_seq("WITH", "TIME", "ZONE"): 3023 maybe_func = False 3024 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3025 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3026 maybe_func = False 3027 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3028 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3029 maybe_func = False 3030 elif type_token == TokenType.INTERVAL: 3031 unit = self._parse_var() 3032 3033 if not unit: 3034 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3035 else: 3036 value = self.expression(exp.Interval, unit=unit) 3037 3038 if maybe_func and check_func: 3039 index2 = self._index 3040 peek = self._parse_string() 3041 3042 if not peek: 3043 self._retreat(index) 3044 return None 3045 3046 self._retreat(index2) 3047 3048 if value: 3049 return value 3050 3051 return exp.DataType( 3052 this=exp.DataType.Type[type_token.value.upper()], 3053 expressions=expressions, 3054 nested=nested, 3055 values=values, 3056 prefix=prefix, 3057 ) 3058 3059 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3060 this = self._parse_type() or self._parse_id_var() 3061 self._match(TokenType.COLON) 3062 return self._parse_column_def(this) 3063 3064 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3065 if not self._match_text_seq("AT", "TIME", "ZONE"): 3066 return this 3067 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3068 3069 def _parse_column(self) -> t.Optional[exp.Expression]: 3070 this = self._parse_field() 3071 if isinstance(this, exp.Identifier): 3072 this = self.expression(exp.Column, this=this) 3073 elif not this: 3074 return self._parse_bracket(this) 3075 return self._parse_column_ops(this) 3076 3077 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3078 this = self._parse_bracket(this) 3079 3080 while self._match_set(self.COLUMN_OPERATORS): 3081 op_token = self._prev.token_type 3082 op = self.COLUMN_OPERATORS.get(op_token) 3083 3084 if op_token == TokenType.DCOLON: 3085 field = self._parse_types() 3086 if not field: 3087 self.raise_error("Expected type") 3088 elif op and self._curr: 3089 self._advance() 3090 value = self._prev.text 3091 field = ( 3092 exp.Literal.number(value) 3093 if self._prev.token_type == TokenType.NUMBER 3094 else exp.Literal.string(value) 3095 ) 3096 else: 3097 field = self._parse_field(anonymous_func=True, any_token=True) 3098 3099 if isinstance(field, exp.Func): 3100 # bigquery allows function calls like x.y.count(...) 3101 # SAFE.SUBSTR(...) 3102 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3103 this = self._replace_columns_with_dots(this) 3104 3105 if op: 3106 this = op(self, this, field) 3107 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3108 this = self.expression( 3109 exp.Column, 3110 this=field, 3111 table=this.this, 3112 db=this.args.get("table"), 3113 catalog=this.args.get("db"), 3114 ) 3115 else: 3116 this = self.expression(exp.Dot, this=this, expression=field) 3117 this = self._parse_bracket(this) 3118 return this 3119 3120 def _parse_primary(self) -> t.Optional[exp.Expression]: 3121 if self._match_set(self.PRIMARY_PARSERS): 3122 token_type = self._prev.token_type 3123 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3124 3125 if token_type == TokenType.STRING: 3126 expressions = [primary] 3127 while self._match(TokenType.STRING): 3128 expressions.append(exp.Literal.string(self._prev.text)) 3129 3130 if len(expressions) > 1: 3131 return self.expression(exp.Concat, expressions=expressions) 3132 3133 return primary 3134 3135 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3136 return exp.Literal.number(f"0.{self._prev.text}") 3137 3138 if self._match(TokenType.L_PAREN): 3139 comments = self._prev_comments 3140 query = self._parse_select() 3141 3142 if query: 3143 expressions = [query] 3144 else: 3145 expressions = self._parse_csv(self._parse_expression) 3146 3147 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3148 3149 if isinstance(this, exp.Subqueryable): 3150 this = self._parse_set_operations( 3151 self._parse_subquery(this=this, parse_alias=False) 3152 ) 3153 elif len(expressions) > 1: 3154 this = self.expression(exp.Tuple, expressions=expressions) 3155 else: 3156 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3157 3158 if this: 3159 this.add_comments(comments) 3160 3161 self._match_r_paren(expression=this) 3162 return this 3163 3164 return None 3165 3166 def _parse_field( 3167 self, 3168 any_token: bool = False, 3169 tokens: t.Optional[t.Collection[TokenType]] = None, 3170 anonymous_func: bool = False, 3171 ) -> t.Optional[exp.Expression]: 3172 return ( 3173 self._parse_primary() 3174 or self._parse_function(anonymous=anonymous_func) 3175 or self._parse_id_var(any_token=any_token, tokens=tokens) 3176 ) 3177 3178 def _parse_function( 3179 self, 3180 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3181 anonymous: bool = False, 3182 optional_parens: bool = True, 3183 ) -> t.Optional[exp.Expression]: 3184 if not self._curr: 3185 return None 3186 3187 token_type = self._curr.token_type 3188 3189 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3190 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3191 3192 if not self._next or self._next.token_type != TokenType.L_PAREN: 3193 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3194 self._advance() 3195 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3196 3197 return None 3198 3199 if token_type not in self.FUNC_TOKENS: 3200 return None 3201 3202 this = self._curr.text 3203 upper = this.upper() 3204 self._advance(2) 3205 3206 parser = self.FUNCTION_PARSERS.get(upper) 3207 3208 if parser and not anonymous: 3209 this = parser(self) 3210 else: 3211 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3212 3213 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3214 this = self.expression(subquery_predicate, this=self._parse_select()) 3215 self._match_r_paren() 3216 return this 3217 3218 if functions is None: 3219 functions = self.FUNCTIONS 3220 3221 function = functions.get(upper) 3222 3223 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3224 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3225 3226 if function and not anonymous: 3227 this = self.validate_expression(function(args), args) 3228 else: 3229 this = self.expression(exp.Anonymous, this=this, expressions=args) 3230 3231 self._match_r_paren(this) 3232 return self._parse_window(this) 3233 3234 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3235 return self._parse_column_def(self._parse_id_var()) 3236 3237 def _parse_user_defined_function( 3238 self, kind: t.Optional[TokenType] = None 3239 ) -> t.Optional[exp.Expression]: 3240 this = self._parse_id_var() 3241 3242 while self._match(TokenType.DOT): 3243 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3244 3245 if not self._match(TokenType.L_PAREN): 3246 return this 3247 3248 expressions = self._parse_csv(self._parse_function_parameter) 3249 self._match_r_paren() 3250 return self.expression( 3251 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3252 ) 3253 3254 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3255 literal = self._parse_primary() 3256 if literal: 3257 return self.expression(exp.Introducer, this=token.text, expression=literal) 3258 3259 return self.expression(exp.Identifier, this=token.text) 3260 3261 def _parse_session_parameter(self) -> exp.SessionParameter: 3262 kind = None 3263 this = self._parse_id_var() or self._parse_primary() 3264 3265 if this and self._match(TokenType.DOT): 3266 kind = this.name 3267 this = self._parse_var() or self._parse_primary() 3268 3269 return self.expression(exp.SessionParameter, this=this, kind=kind) 3270 3271 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3272 index = self._index 3273 3274 if self._match(TokenType.L_PAREN): 3275 expressions = self._parse_csv(self._parse_id_var) 3276 3277 if not self._match(TokenType.R_PAREN): 3278 self._retreat(index) 3279 else: 3280 expressions = [self._parse_id_var()] 3281 3282 if self._match_set(self.LAMBDAS): 3283 return self.LAMBDAS[self._prev.token_type](self, expressions) 3284 3285 self._retreat(index) 3286 3287 this: t.Optional[exp.Expression] 3288 3289 if self._match(TokenType.DISTINCT): 3290 this = self.expression( 3291 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3292 ) 3293 else: 3294 this = self._parse_select_or_expression(alias=alias) 3295 3296 if isinstance(this, exp.EQ): 3297 left = this.this 3298 if isinstance(left, exp.Column): 3299 left.replace(exp.var(left.text("this"))) 3300 3301 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3302 3303 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3304 index = self._index 3305 3306 if not self.errors: 3307 try: 3308 if self._parse_select(nested=True): 3309 return this 3310 except ParseError: 3311 pass 3312 finally: 3313 self.errors.clear() 3314 self._retreat(index) 3315 3316 if not self._match(TokenType.L_PAREN): 3317 return this 3318 3319 args = self._parse_csv( 3320 lambda: self._parse_constraint() 3321 or self._parse_column_def(self._parse_field(any_token=True)) 3322 ) 3323 3324 self._match_r_paren() 3325 return self.expression(exp.Schema, this=this, expressions=args) 3326 3327 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3328 # column defs are not really columns, they're identifiers 3329 if isinstance(this, exp.Column): 3330 this = this.this 3331 3332 kind = self._parse_types(schema=True) 3333 3334 if self._match_text_seq("FOR", "ORDINALITY"): 3335 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3336 3337 constraints = [] 3338 while True: 3339 constraint = self._parse_column_constraint() 3340 if not constraint: 3341 break 3342 constraints.append(constraint) 3343 3344 if not kind and not constraints: 3345 return this 3346 3347 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3348 3349 def _parse_auto_increment( 3350 self, 3351 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3352 start = None 3353 increment = None 3354 3355 if self._match(TokenType.L_PAREN, advance=False): 3356 args = self._parse_wrapped_csv(self._parse_bitwise) 3357 start = seq_get(args, 0) 3358 increment = seq_get(args, 1) 3359 elif self._match_text_seq("START"): 3360 start = self._parse_bitwise() 3361 self._match_text_seq("INCREMENT") 3362 increment = self._parse_bitwise() 3363 3364 if start and increment: 3365 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3366 3367 return exp.AutoIncrementColumnConstraint() 3368 3369 def _parse_compress(self) -> exp.CompressColumnConstraint: 3370 if self._match(TokenType.L_PAREN, advance=False): 3371 return self.expression( 3372 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3373 ) 3374 3375 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3376 3377 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3378 if self._match_text_seq("BY", "DEFAULT"): 3379 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3380 this = self.expression( 3381 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3382 ) 3383 else: 3384 self._match_text_seq("ALWAYS") 3385 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3386 3387 self._match(TokenType.ALIAS) 3388 identity = self._match_text_seq("IDENTITY") 3389 3390 if self._match(TokenType.L_PAREN): 3391 if self._match_text_seq("START", "WITH"): 3392 this.set("start", self._parse_bitwise()) 3393 if self._match_text_seq("INCREMENT", "BY"): 3394 this.set("increment", self._parse_bitwise()) 3395 if self._match_text_seq("MINVALUE"): 3396 this.set("minvalue", self._parse_bitwise()) 3397 if self._match_text_seq("MAXVALUE"): 3398 this.set("maxvalue", self._parse_bitwise()) 3399 3400 if self._match_text_seq("CYCLE"): 3401 this.set("cycle", True) 3402 elif self._match_text_seq("NO", "CYCLE"): 3403 this.set("cycle", False) 3404 3405 if not identity: 3406 this.set("expression", self._parse_bitwise()) 3407 3408 self._match_r_paren() 3409 3410 return this 3411 3412 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3413 self._match_text_seq("LENGTH") 3414 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3415 3416 def _parse_not_constraint( 3417 self, 3418 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3419 if self._match_text_seq("NULL"): 3420 return self.expression(exp.NotNullColumnConstraint) 3421 if self._match_text_seq("CASESPECIFIC"): 3422 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3423 return None 3424 3425 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3426 if self._match(TokenType.CONSTRAINT): 3427 this = self._parse_id_var() 3428 else: 3429 this = None 3430 3431 if self._match_texts(self.CONSTRAINT_PARSERS): 3432 return self.expression( 3433 exp.ColumnConstraint, 3434 this=this, 3435 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3436 ) 3437 3438 return this 3439 3440 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3441 if not self._match(TokenType.CONSTRAINT): 3442 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3443 3444 this = self._parse_id_var() 3445 expressions = [] 3446 3447 while True: 3448 constraint = self._parse_unnamed_constraint() or self._parse_function() 3449 if not constraint: 3450 break 3451 expressions.append(constraint) 3452 3453 return self.expression(exp.Constraint, this=this, expressions=expressions) 3454 3455 def _parse_unnamed_constraint( 3456 self, constraints: t.Optional[t.Collection[str]] = None 3457 ) -> t.Optional[exp.Expression]: 3458 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3459 return None 3460 3461 constraint = self._prev.text.upper() 3462 if constraint not in self.CONSTRAINT_PARSERS: 3463 self.raise_error(f"No parser found for schema constraint {constraint}.") 3464 3465 return self.CONSTRAINT_PARSERS[constraint](self) 3466 3467 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3468 self._match_text_seq("KEY") 3469 return self.expression( 3470 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3471 ) 3472 3473 def _parse_key_constraint_options(self) -> t.List[str]: 3474 options = [] 3475 while True: 3476 if not self._curr: 3477 break 3478 3479 if self._match(TokenType.ON): 3480 action = None 3481 on = self._advance_any() and self._prev.text 3482 3483 if self._match_text_seq("NO", "ACTION"): 3484 action = "NO ACTION" 3485 elif self._match_text_seq("CASCADE"): 3486 action = "CASCADE" 3487 elif self._match_pair(TokenType.SET, TokenType.NULL): 3488 action = "SET NULL" 3489 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3490 action = "SET DEFAULT" 3491 else: 3492 self.raise_error("Invalid key constraint") 3493 3494 options.append(f"ON {on} {action}") 3495 elif self._match_text_seq("NOT", "ENFORCED"): 3496 options.append("NOT ENFORCED") 3497 elif self._match_text_seq("DEFERRABLE"): 3498 options.append("DEFERRABLE") 3499 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3500 options.append("INITIALLY DEFERRED") 3501 elif self._match_text_seq("NORELY"): 3502 options.append("NORELY") 3503 elif self._match_text_seq("MATCH", "FULL"): 3504 options.append("MATCH FULL") 3505 else: 3506 break 3507 3508 return options 3509 3510 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3511 if match and not self._match(TokenType.REFERENCES): 3512 return None 3513 3514 expressions = None 3515 this = self._parse_id_var() 3516 3517 if self._match(TokenType.L_PAREN, advance=False): 3518 expressions = self._parse_wrapped_id_vars() 3519 3520 options = self._parse_key_constraint_options() 3521 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3522 3523 def _parse_foreign_key(self) -> exp.ForeignKey: 3524 expressions = self._parse_wrapped_id_vars() 3525 reference = self._parse_references() 3526 options = {} 3527 3528 while self._match(TokenType.ON): 3529 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3530 self.raise_error("Expected DELETE or UPDATE") 3531 3532 kind = self._prev.text.lower() 3533 3534 if self._match_text_seq("NO", "ACTION"): 3535 action = "NO ACTION" 3536 elif self._match(TokenType.SET): 3537 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3538 action = "SET " + self._prev.text.upper() 3539 else: 3540 self._advance() 3541 action = self._prev.text.upper() 3542 3543 options[kind] = action 3544 3545 return self.expression( 3546 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3547 ) 3548 3549 def _parse_primary_key( 3550 self, wrapped_optional: bool = False, in_props: bool = False 3551 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3552 desc = ( 3553 self._match_set((TokenType.ASC, TokenType.DESC)) 3554 and self._prev.token_type == TokenType.DESC 3555 ) 3556 3557 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3558 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3559 3560 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3561 options = self._parse_key_constraint_options() 3562 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3563 3564 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3565 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3566 return this 3567 3568 bracket_kind = self._prev.token_type 3569 3570 if self._match(TokenType.COLON): 3571 expressions: t.List[t.Optional[exp.Expression]] = [ 3572 self.expression(exp.Slice, expression=self._parse_conjunction()) 3573 ] 3574 else: 3575 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3576 3577 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3578 if bracket_kind == TokenType.L_BRACE: 3579 this = self.expression(exp.Struct, expressions=expressions) 3580 elif not this or this.name.upper() == "ARRAY": 3581 this = self.expression(exp.Array, expressions=expressions) 3582 else: 3583 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3584 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3585 3586 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3587 self.raise_error("Expected ]") 3588 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3589 self.raise_error("Expected }") 3590 3591 self._add_comments(this) 3592 return self._parse_bracket(this) 3593 3594 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3595 if self._match(TokenType.COLON): 3596 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3597 return this 3598 3599 def _parse_case(self) -> t.Optional[exp.Expression]: 3600 ifs = [] 3601 default = None 3602 3603 expression = self._parse_conjunction() 3604 3605 while self._match(TokenType.WHEN): 3606 this = self._parse_conjunction() 3607 self._match(TokenType.THEN) 3608 then = self._parse_conjunction() 3609 ifs.append(self.expression(exp.If, this=this, true=then)) 3610 3611 if self._match(TokenType.ELSE): 3612 default = self._parse_conjunction() 3613 3614 if not self._match(TokenType.END): 3615 self.raise_error("Expected END after CASE", self._prev) 3616 3617 return self._parse_window( 3618 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3619 ) 3620 3621 def _parse_if(self) -> t.Optional[exp.Expression]: 3622 if self._match(TokenType.L_PAREN): 3623 args = self._parse_csv(self._parse_conjunction) 3624 this = self.validate_expression(exp.If.from_arg_list(args), args) 3625 self._match_r_paren() 3626 else: 3627 index = self._index - 1 3628 condition = self._parse_conjunction() 3629 3630 if not condition: 3631 self._retreat(index) 3632 return None 3633 3634 self._match(TokenType.THEN) 3635 true = self._parse_conjunction() 3636 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3637 self._match(TokenType.END) 3638 this = self.expression(exp.If, this=condition, true=true, false=false) 3639 3640 return self._parse_window(this) 3641 3642 def _parse_extract(self) -> exp.Extract: 3643 this = self._parse_function() or self._parse_var() or self._parse_type() 3644 3645 if self._match(TokenType.FROM): 3646 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3647 3648 if not self._match(TokenType.COMMA): 3649 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3650 3651 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3652 3653 def _parse_cast(self, strict: bool) -> exp.Expression: 3654 this = self._parse_conjunction() 3655 3656 if not self._match(TokenType.ALIAS): 3657 if self._match(TokenType.COMMA): 3658 return self.expression( 3659 exp.CastToStrType, this=this, expression=self._parse_string() 3660 ) 3661 else: 3662 self.raise_error("Expected AS after CAST") 3663 3664 to = self._parse_types() 3665 3666 if not to: 3667 self.raise_error("Expected TYPE after CAST") 3668 elif to.this == exp.DataType.Type.CHAR: 3669 if self._match(TokenType.CHARACTER_SET): 3670 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3671 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3672 fmt = self._parse_string() 3673 3674 return self.expression( 3675 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3676 this=this, 3677 format=exp.Literal.string( 3678 format_time( 3679 fmt.this if fmt else "", 3680 self.FORMAT_MAPPING or self.TIME_MAPPING, 3681 self.FORMAT_TRIE or self.TIME_TRIE, 3682 ) 3683 ), 3684 ) 3685 3686 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3687 3688 def _parse_concat(self) -> t.Optional[exp.Expression]: 3689 args = self._parse_csv(self._parse_conjunction) 3690 if self.CONCAT_NULL_OUTPUTS_STRING: 3691 args = [ 3692 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3693 for arg in args 3694 if arg 3695 ] 3696 3697 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3698 # we find such a call we replace it with its argument. 3699 if len(args) == 1: 3700 return args[0] 3701 3702 return self.expression( 3703 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3704 ) 3705 3706 def _parse_string_agg(self) -> exp.Expression: 3707 expression: t.Optional[exp.Expression] 3708 3709 if self._match(TokenType.DISTINCT): 3710 args = self._parse_csv(self._parse_conjunction) 3711 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3712 else: 3713 args = self._parse_csv(self._parse_conjunction) 3714 expression = seq_get(args, 0) 3715 3716 index = self._index 3717 if not self._match(TokenType.R_PAREN): 3718 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3719 order = self._parse_order(this=expression) 3720 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3721 3722 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3723 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3724 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3725 if not self._match_text_seq("WITHIN", "GROUP"): 3726 self._retreat(index) 3727 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3728 3729 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3730 order = self._parse_order(this=expression) 3731 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3732 3733 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3734 to: t.Optional[exp.Expression] 3735 this = self._parse_bitwise() 3736 3737 if self._match(TokenType.USING): 3738 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3739 elif self._match(TokenType.COMMA): 3740 to = self._parse_bitwise() 3741 else: 3742 to = None 3743 3744 # Swap the argument order if needed to produce the correct AST 3745 if self.CONVERT_TYPE_FIRST: 3746 this, to = to, this 3747 3748 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3749 3750 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3751 """ 3752 There are generally two variants of the DECODE function: 3753 3754 - DECODE(bin, charset) 3755 - DECODE(expression, search, result [, search, result] ... [, default]) 3756 3757 The second variant will always be parsed into a CASE expression. Note that NULL 3758 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3759 instead of relying on pattern matching. 3760 """ 3761 args = self._parse_csv(self._parse_conjunction) 3762 3763 if len(args) < 3: 3764 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3765 3766 expression, *expressions = args 3767 if not expression: 3768 return None 3769 3770 ifs = [] 3771 for search, result in zip(expressions[::2], expressions[1::2]): 3772 if not search or not result: 3773 return None 3774 3775 if isinstance(search, exp.Literal): 3776 ifs.append( 3777 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3778 ) 3779 elif isinstance(search, exp.Null): 3780 ifs.append( 3781 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3782 ) 3783 else: 3784 cond = exp.or_( 3785 exp.EQ(this=expression.copy(), expression=search), 3786 exp.and_( 3787 exp.Is(this=expression.copy(), expression=exp.Null()), 3788 exp.Is(this=search.copy(), expression=exp.Null()), 3789 copy=False, 3790 ), 3791 copy=False, 3792 ) 3793 ifs.append(exp.If(this=cond, true=result)) 3794 3795 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3796 3797 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3798 self._match_text_seq("KEY") 3799 key = self._parse_field() 3800 self._match(TokenType.COLON) 3801 self._match_text_seq("VALUE") 3802 value = self._parse_field() 3803 3804 if not key and not value: 3805 return None 3806 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3807 3808 def _parse_json_object(self) -> exp.JSONObject: 3809 star = self._parse_star() 3810 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3811 3812 null_handling = None 3813 if self._match_text_seq("NULL", "ON", "NULL"): 3814 null_handling = "NULL ON NULL" 3815 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3816 null_handling = "ABSENT ON NULL" 3817 3818 unique_keys = None 3819 if self._match_text_seq("WITH", "UNIQUE"): 3820 unique_keys = True 3821 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3822 unique_keys = False 3823 3824 self._match_text_seq("KEYS") 3825 3826 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3827 format_json = self._match_text_seq("FORMAT", "JSON") 3828 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3829 3830 return self.expression( 3831 exp.JSONObject, 3832 expressions=expressions, 3833 null_handling=null_handling, 3834 unique_keys=unique_keys, 3835 return_type=return_type, 3836 format_json=format_json, 3837 encoding=encoding, 3838 ) 3839 3840 def _parse_logarithm(self) -> exp.Func: 3841 # Default argument order is base, expression 3842 args = self._parse_csv(self._parse_range) 3843 3844 if len(args) > 1: 3845 if not self.LOG_BASE_FIRST: 3846 args.reverse() 3847 return exp.Log.from_arg_list(args) 3848 3849 return self.expression( 3850 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3851 ) 3852 3853 def _parse_match_against(self) -> exp.MatchAgainst: 3854 expressions = self._parse_csv(self._parse_column) 3855 3856 self._match_text_seq(")", "AGAINST", "(") 3857 3858 this = self._parse_string() 3859 3860 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3861 modifier = "IN NATURAL LANGUAGE MODE" 3862 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3863 modifier = f"{modifier} WITH QUERY EXPANSION" 3864 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3865 modifier = "IN BOOLEAN MODE" 3866 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3867 modifier = "WITH QUERY EXPANSION" 3868 else: 3869 modifier = None 3870 3871 return self.expression( 3872 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3873 ) 3874 3875 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3876 def _parse_open_json(self) -> exp.OpenJSON: 3877 this = self._parse_bitwise() 3878 path = self._match(TokenType.COMMA) and self._parse_string() 3879 3880 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3881 this = self._parse_field(any_token=True) 3882 kind = self._parse_types() 3883 path = self._parse_string() 3884 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3885 3886 return self.expression( 3887 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3888 ) 3889 3890 expressions = None 3891 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3892 self._match_l_paren() 3893 expressions = self._parse_csv(_parse_open_json_column_def) 3894 3895 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3896 3897 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3898 args = self._parse_csv(self._parse_bitwise) 3899 3900 if self._match(TokenType.IN): 3901 return self.expression( 3902 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3903 ) 3904 3905 if haystack_first: 3906 haystack = seq_get(args, 0) 3907 needle = seq_get(args, 1) 3908 else: 3909 needle = seq_get(args, 0) 3910 haystack = seq_get(args, 1) 3911 3912 return self.expression( 3913 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3914 ) 3915 3916 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3917 args = self._parse_csv(self._parse_table) 3918 return exp.JoinHint(this=func_name.upper(), expressions=args) 3919 3920 def _parse_substring(self) -> exp.Substring: 3921 # Postgres supports the form: substring(string [from int] [for int]) 3922 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3923 3924 args = self._parse_csv(self._parse_bitwise) 3925 3926 if self._match(TokenType.FROM): 3927 args.append(self._parse_bitwise()) 3928 if self._match(TokenType.FOR): 3929 args.append(self._parse_bitwise()) 3930 3931 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3932 3933 def _parse_trim(self) -> exp.Trim: 3934 # https://www.w3resource.com/sql/character-functions/trim.php 3935 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3936 3937 position = None 3938 collation = None 3939 3940 if self._match_texts(self.TRIM_TYPES): 3941 position = self._prev.text.upper() 3942 3943 expression = self._parse_bitwise() 3944 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3945 this = self._parse_bitwise() 3946 else: 3947 this = expression 3948 expression = None 3949 3950 if self._match(TokenType.COLLATE): 3951 collation = self._parse_bitwise() 3952 3953 return self.expression( 3954 exp.Trim, this=this, position=position, expression=expression, collation=collation 3955 ) 3956 3957 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3958 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3959 3960 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3961 return self._parse_window(self._parse_id_var(), alias=True) 3962 3963 def _parse_respect_or_ignore_nulls( 3964 self, this: t.Optional[exp.Expression] 3965 ) -> t.Optional[exp.Expression]: 3966 if self._match_text_seq("IGNORE", "NULLS"): 3967 return self.expression(exp.IgnoreNulls, this=this) 3968 if self._match_text_seq("RESPECT", "NULLS"): 3969 return self.expression(exp.RespectNulls, this=this) 3970 return this 3971 3972 def _parse_window( 3973 self, this: t.Optional[exp.Expression], alias: bool = False 3974 ) -> t.Optional[exp.Expression]: 3975 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3976 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3977 self._match_r_paren() 3978 3979 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3980 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3981 if self._match_text_seq("WITHIN", "GROUP"): 3982 order = self._parse_wrapped(self._parse_order) 3983 this = self.expression(exp.WithinGroup, this=this, expression=order) 3984 3985 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3986 # Some dialects choose to implement and some do not. 3987 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3988 3989 # There is some code above in _parse_lambda that handles 3990 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3991 3992 # The below changes handle 3993 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3994 3995 # Oracle allows both formats 3996 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3997 # and Snowflake chose to do the same for familiarity 3998 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3999 this = self._parse_respect_or_ignore_nulls(this) 4000 4001 # bigquery select from window x AS (partition by ...) 4002 if alias: 4003 over = None 4004 self._match(TokenType.ALIAS) 4005 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4006 return this 4007 else: 4008 over = self._prev.text.upper() 4009 4010 if not self._match(TokenType.L_PAREN): 4011 return self.expression( 4012 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4013 ) 4014 4015 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4016 4017 first = self._match(TokenType.FIRST) 4018 if self._match_text_seq("LAST"): 4019 first = False 4020 4021 partition = self._parse_partition_by() 4022 order = self._parse_order() 4023 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4024 4025 if kind: 4026 self._match(TokenType.BETWEEN) 4027 start = self._parse_window_spec() 4028 self._match(TokenType.AND) 4029 end = self._parse_window_spec() 4030 4031 spec = self.expression( 4032 exp.WindowSpec, 4033 kind=kind, 4034 start=start["value"], 4035 start_side=start["side"], 4036 end=end["value"], 4037 end_side=end["side"], 4038 ) 4039 else: 4040 spec = None 4041 4042 self._match_r_paren() 4043 4044 return self.expression( 4045 exp.Window, 4046 this=this, 4047 partition_by=partition, 4048 order=order, 4049 spec=spec, 4050 alias=window_alias, 4051 over=over, 4052 first=first, 4053 ) 4054 4055 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4056 self._match(TokenType.BETWEEN) 4057 4058 return { 4059 "value": ( 4060 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4061 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4062 or self._parse_bitwise() 4063 ), 4064 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4065 } 4066 4067 def _parse_alias( 4068 self, this: t.Optional[exp.Expression], explicit: bool = False 4069 ) -> t.Optional[exp.Expression]: 4070 any_token = self._match(TokenType.ALIAS) 4071 4072 if explicit and not any_token: 4073 return this 4074 4075 if self._match(TokenType.L_PAREN): 4076 aliases = self.expression( 4077 exp.Aliases, 4078 this=this, 4079 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4080 ) 4081 self._match_r_paren(aliases) 4082 return aliases 4083 4084 alias = self._parse_id_var(any_token) 4085 4086 if alias: 4087 return self.expression(exp.Alias, this=this, alias=alias) 4088 4089 return this 4090 4091 def _parse_id_var( 4092 self, 4093 any_token: bool = True, 4094 tokens: t.Optional[t.Collection[TokenType]] = None, 4095 ) -> t.Optional[exp.Expression]: 4096 identifier = self._parse_identifier() 4097 4098 if identifier: 4099 return identifier 4100 4101 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4102 quoted = self._prev.token_type == TokenType.STRING 4103 return exp.Identifier(this=self._prev.text, quoted=quoted) 4104 4105 return None 4106 4107 def _parse_string(self) -> t.Optional[exp.Expression]: 4108 if self._match(TokenType.STRING): 4109 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4110 return self._parse_placeholder() 4111 4112 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4113 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4114 4115 def _parse_number(self) -> t.Optional[exp.Expression]: 4116 if self._match(TokenType.NUMBER): 4117 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4118 return self._parse_placeholder() 4119 4120 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4121 if self._match(TokenType.IDENTIFIER): 4122 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4123 return self._parse_placeholder() 4124 4125 def _parse_var( 4126 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4127 ) -> t.Optional[exp.Expression]: 4128 if ( 4129 (any_token and self._advance_any()) 4130 or self._match(TokenType.VAR) 4131 or (self._match_set(tokens) if tokens else False) 4132 ): 4133 return self.expression(exp.Var, this=self._prev.text) 4134 return self._parse_placeholder() 4135 4136 def _advance_any(self) -> t.Optional[Token]: 4137 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4138 self._advance() 4139 return self._prev 4140 return None 4141 4142 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4143 return self._parse_var() or self._parse_string() 4144 4145 def _parse_null(self) -> t.Optional[exp.Expression]: 4146 if self._match(TokenType.NULL): 4147 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4148 return None 4149 4150 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4151 if self._match(TokenType.TRUE): 4152 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4153 if self._match(TokenType.FALSE): 4154 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4155 return None 4156 4157 def _parse_star(self) -> t.Optional[exp.Expression]: 4158 if self._match(TokenType.STAR): 4159 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4160 return None 4161 4162 def _parse_parameter(self) -> exp.Parameter: 4163 wrapped = self._match(TokenType.L_BRACE) 4164 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4165 self._match(TokenType.R_BRACE) 4166 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4167 4168 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4169 if self._match_set(self.PLACEHOLDER_PARSERS): 4170 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4171 if placeholder: 4172 return placeholder 4173 self._advance(-1) 4174 return None 4175 4176 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4177 if not self._match(TokenType.EXCEPT): 4178 return None 4179 if self._match(TokenType.L_PAREN, advance=False): 4180 return self._parse_wrapped_csv(self._parse_column) 4181 return self._parse_csv(self._parse_column) 4182 4183 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4184 if not self._match(TokenType.REPLACE): 4185 return None 4186 if self._match(TokenType.L_PAREN, advance=False): 4187 return self._parse_wrapped_csv(self._parse_expression) 4188 return self._parse_csv(self._parse_expression) 4189 4190 def _parse_csv( 4191 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4192 ) -> t.List[t.Optional[exp.Expression]]: 4193 parse_result = parse_method() 4194 items = [parse_result] if parse_result is not None else [] 4195 4196 while self._match(sep): 4197 self._add_comments(parse_result) 4198 parse_result = parse_method() 4199 if parse_result is not None: 4200 items.append(parse_result) 4201 4202 return items 4203 4204 def _parse_tokens( 4205 self, parse_method: t.Callable, expressions: t.Dict 4206 ) -> t.Optional[exp.Expression]: 4207 this = parse_method() 4208 4209 while self._match_set(expressions): 4210 this = self.expression( 4211 expressions[self._prev.token_type], 4212 this=this, 4213 comments=self._prev_comments, 4214 expression=parse_method(), 4215 ) 4216 4217 return this 4218 4219 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4220 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4221 4222 def _parse_wrapped_csv( 4223 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4224 ) -> t.List[t.Optional[exp.Expression]]: 4225 return self._parse_wrapped( 4226 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4227 ) 4228 4229 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4230 wrapped = self._match(TokenType.L_PAREN) 4231 if not wrapped and not optional: 4232 self.raise_error("Expecting (") 4233 parse_result = parse_method() 4234 if wrapped: 4235 self._match_r_paren() 4236 return parse_result 4237 4238 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4239 return self._parse_select() or self._parse_set_operations( 4240 self._parse_expression() if alias else self._parse_conjunction() 4241 ) 4242 4243 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4244 return self._parse_query_modifiers( 4245 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4246 ) 4247 4248 def _parse_transaction(self) -> exp.Transaction: 4249 this = None 4250 if self._match_texts(self.TRANSACTION_KIND): 4251 this = self._prev.text 4252 4253 self._match_texts({"TRANSACTION", "WORK"}) 4254 4255 modes = [] 4256 while True: 4257 mode = [] 4258 while self._match(TokenType.VAR): 4259 mode.append(self._prev.text) 4260 4261 if mode: 4262 modes.append(" ".join(mode)) 4263 if not self._match(TokenType.COMMA): 4264 break 4265 4266 return self.expression(exp.Transaction, this=this, modes=modes) 4267 4268 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4269 chain = None 4270 savepoint = None 4271 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4272 4273 self._match_texts({"TRANSACTION", "WORK"}) 4274 4275 if self._match_text_seq("TO"): 4276 self._match_text_seq("SAVEPOINT") 4277 savepoint = self._parse_id_var() 4278 4279 if self._match(TokenType.AND): 4280 chain = not self._match_text_seq("NO") 4281 self._match_text_seq("CHAIN") 4282 4283 if is_rollback: 4284 return self.expression(exp.Rollback, savepoint=savepoint) 4285 4286 return self.expression(exp.Commit, chain=chain) 4287 4288 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4289 if not self._match_text_seq("ADD"): 4290 return None 4291 4292 self._match(TokenType.COLUMN) 4293 exists_column = self._parse_exists(not_=True) 4294 expression = self._parse_column_def(self._parse_field(any_token=True)) 4295 4296 if expression: 4297 expression.set("exists", exists_column) 4298 4299 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4300 if self._match_texts(("FIRST", "AFTER")): 4301 position = self._prev.text 4302 column_position = self.expression( 4303 exp.ColumnPosition, this=self._parse_column(), position=position 4304 ) 4305 expression.set("position", column_position) 4306 4307 return expression 4308 4309 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4310 drop = self._match(TokenType.DROP) and self._parse_drop() 4311 if drop and not isinstance(drop, exp.Command): 4312 drop.set("kind", drop.args.get("kind", "COLUMN")) 4313 return drop 4314 4315 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4316 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4317 return self.expression( 4318 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4319 ) 4320 4321 def _parse_add_constraint(self) -> exp.AddConstraint: 4322 this = None 4323 kind = self._prev.token_type 4324 4325 if kind == TokenType.CONSTRAINT: 4326 this = self._parse_id_var() 4327 4328 if self._match_text_seq("CHECK"): 4329 expression = self._parse_wrapped(self._parse_conjunction) 4330 enforced = self._match_text_seq("ENFORCED") 4331 4332 return self.expression( 4333 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4334 ) 4335 4336 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4337 expression = self._parse_foreign_key() 4338 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4339 expression = self._parse_primary_key() 4340 else: 4341 expression = None 4342 4343 return self.expression(exp.AddConstraint, this=this, expression=expression) 4344 4345 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4346 index = self._index - 1 4347 4348 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4349 return self._parse_csv(self._parse_add_constraint) 4350 4351 self._retreat(index) 4352 return self._parse_csv(self._parse_add_column) 4353 4354 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4355 self._match(TokenType.COLUMN) 4356 column = self._parse_field(any_token=True) 4357 4358 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4359 return self.expression(exp.AlterColumn, this=column, drop=True) 4360 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4361 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4362 4363 self._match_text_seq("SET", "DATA") 4364 return self.expression( 4365 exp.AlterColumn, 4366 this=column, 4367 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4368 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4369 using=self._match(TokenType.USING) and self._parse_conjunction(), 4370 ) 4371 4372 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4373 index = self._index - 1 4374 4375 partition_exists = self._parse_exists() 4376 if self._match(TokenType.PARTITION, advance=False): 4377 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4378 4379 self._retreat(index) 4380 return self._parse_csv(self._parse_drop_column) 4381 4382 def _parse_alter_table_rename(self) -> exp.RenameTable: 4383 self._match_text_seq("TO") 4384 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4385 4386 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4387 start = self._prev 4388 4389 if not self._match(TokenType.TABLE): 4390 return self._parse_as_command(start) 4391 4392 exists = self._parse_exists() 4393 this = self._parse_table(schema=True) 4394 4395 if self._next: 4396 self._advance() 4397 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4398 4399 if parser: 4400 actions = ensure_list(parser(self)) 4401 4402 if not self._curr: 4403 return self.expression( 4404 exp.AlterTable, 4405 this=this, 4406 exists=exists, 4407 actions=actions, 4408 ) 4409 return self._parse_as_command(start) 4410 4411 def _parse_merge(self) -> exp.Merge: 4412 self._match(TokenType.INTO) 4413 target = self._parse_table() 4414 4415 self._match(TokenType.USING) 4416 using = self._parse_table() 4417 4418 self._match(TokenType.ON) 4419 on = self._parse_conjunction() 4420 4421 whens = [] 4422 while self._match(TokenType.WHEN): 4423 matched = not self._match(TokenType.NOT) 4424 self._match_text_seq("MATCHED") 4425 source = ( 4426 False 4427 if self._match_text_seq("BY", "TARGET") 4428 else self._match_text_seq("BY", "SOURCE") 4429 ) 4430 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4431 4432 self._match(TokenType.THEN) 4433 4434 if self._match(TokenType.INSERT): 4435 _this = self._parse_star() 4436 if _this: 4437 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4438 else: 4439 then = self.expression( 4440 exp.Insert, 4441 this=self._parse_value(), 4442 expression=self._match(TokenType.VALUES) and self._parse_value(), 4443 ) 4444 elif self._match(TokenType.UPDATE): 4445 expressions = self._parse_star() 4446 if expressions: 4447 then = self.expression(exp.Update, expressions=expressions) 4448 else: 4449 then = self.expression( 4450 exp.Update, 4451 expressions=self._match(TokenType.SET) 4452 and self._parse_csv(self._parse_equality), 4453 ) 4454 elif self._match(TokenType.DELETE): 4455 then = self.expression(exp.Var, this=self._prev.text) 4456 else: 4457 then = None 4458 4459 whens.append( 4460 self.expression( 4461 exp.When, 4462 matched=matched, 4463 source=source, 4464 condition=condition, 4465 then=then, 4466 ) 4467 ) 4468 4469 return self.expression( 4470 exp.Merge, 4471 this=target, 4472 using=using, 4473 on=on, 4474 expressions=whens, 4475 ) 4476 4477 def _parse_show(self) -> t.Optional[exp.Expression]: 4478 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4479 if parser: 4480 return parser(self) 4481 self._advance() 4482 return self.expression(exp.Show, this=self._prev.text.upper()) 4483 4484 def _parse_set_item_assignment( 4485 self, kind: t.Optional[str] = None 4486 ) -> t.Optional[exp.Expression]: 4487 index = self._index 4488 4489 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4490 return self._parse_set_transaction(global_=kind == "GLOBAL") 4491 4492 left = self._parse_primary() or self._parse_id_var() 4493 4494 if not self._match_texts(("=", "TO")): 4495 self._retreat(index) 4496 return None 4497 4498 right = self._parse_statement() or self._parse_id_var() 4499 this = self.expression(exp.EQ, this=left, expression=right) 4500 4501 return self.expression(exp.SetItem, this=this, kind=kind) 4502 4503 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4504 self._match_text_seq("TRANSACTION") 4505 characteristics = self._parse_csv( 4506 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4507 ) 4508 return self.expression( 4509 exp.SetItem, 4510 expressions=characteristics, 4511 kind="TRANSACTION", 4512 **{"global": global_}, # type: ignore 4513 ) 4514 4515 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4516 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4517 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4518 4519 def _parse_set(self) -> exp.Set | exp.Command: 4520 index = self._index 4521 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4522 4523 if self._curr: 4524 self._retreat(index) 4525 return self._parse_as_command(self._prev) 4526 4527 return set_ 4528 4529 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4530 for option in options: 4531 if self._match_text_seq(*option.split(" ")): 4532 return exp.var(option) 4533 return None 4534 4535 def _parse_as_command(self, start: Token) -> exp.Command: 4536 while self._curr: 4537 self._advance() 4538 text = self._find_sql(start, self._prev) 4539 size = len(start.text) 4540 return exp.Command(this=text[:size], expression=text[size:]) 4541 4542 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4543 settings = [] 4544 4545 self._match_l_paren() 4546 kind = self._parse_id_var() 4547 4548 if self._match(TokenType.L_PAREN): 4549 while True: 4550 key = self._parse_id_var() 4551 value = self._parse_primary() 4552 4553 if not key and value is None: 4554 break 4555 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4556 self._match(TokenType.R_PAREN) 4557 4558 self._match_r_paren() 4559 4560 return self.expression( 4561 exp.DictProperty, 4562 this=this, 4563 kind=kind.this if kind else None, 4564 settings=settings, 4565 ) 4566 4567 def _parse_dict_range(self, this: str) -> exp.DictRange: 4568 self._match_l_paren() 4569 has_min = self._match_text_seq("MIN") 4570 if has_min: 4571 min = self._parse_var() or self._parse_primary() 4572 self._match_text_seq("MAX") 4573 max = self._parse_var() or self._parse_primary() 4574 else: 4575 max = self._parse_var() or self._parse_primary() 4576 min = exp.Literal.number(0) 4577 self._match_r_paren() 4578 return self.expression(exp.DictRange, this=this, min=min, max=max) 4579 4580 def _find_parser( 4581 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4582 ) -> t.Optional[t.Callable]: 4583 if not self._curr: 4584 return None 4585 4586 index = self._index 4587 this = [] 4588 while True: 4589 # The current token might be multiple words 4590 curr = self._curr.text.upper() 4591 key = curr.split(" ") 4592 this.append(curr) 4593 4594 self._advance() 4595 result, trie = in_trie(trie, key) 4596 if result == TrieResult.FAILED: 4597 break 4598 4599 if result == TrieResult.EXISTS: 4600 subparser = parsers[" ".join(this)] 4601 return subparser 4602 4603 self._retreat(index) 4604 return None 4605 4606 def _match(self, token_type, advance=True, expression=None): 4607 if not self._curr: 4608 return None 4609 4610 if self._curr.token_type == token_type: 4611 if advance: 4612 self._advance() 4613 self._add_comments(expression) 4614 return True 4615 4616 return None 4617 4618 def _match_set(self, types, advance=True): 4619 if not self._curr: 4620 return None 4621 4622 if self._curr.token_type in types: 4623 if advance: 4624 self._advance() 4625 return True 4626 4627 return None 4628 4629 def _match_pair(self, token_type_a, token_type_b, advance=True): 4630 if not self._curr or not self._next: 4631 return None 4632 4633 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4634 if advance: 4635 self._advance(2) 4636 return True 4637 4638 return None 4639 4640 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4641 if not self._match(TokenType.L_PAREN, expression=expression): 4642 self.raise_error("Expecting (") 4643 4644 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4645 if not self._match(TokenType.R_PAREN, expression=expression): 4646 self.raise_error("Expecting )") 4647 4648 def _match_texts(self, texts, advance=True): 4649 if self._curr and self._curr.text.upper() in texts: 4650 if advance: 4651 self._advance() 4652 return True 4653 return False 4654 4655 def _match_text_seq(self, *texts, advance=True): 4656 index = self._index 4657 for text in texts: 4658 if self._curr and self._curr.text.upper() == text: 4659 self._advance() 4660 else: 4661 self._retreat(index) 4662 return False 4663 4664 if not advance: 4665 self._retreat(index) 4666 4667 return True 4668 4669 @t.overload 4670 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4671 ... 4672 4673 @t.overload 4674 def _replace_columns_with_dots( 4675 self, this: t.Optional[exp.Expression] 4676 ) -> t.Optional[exp.Expression]: 4677 ... 4678 4679 def _replace_columns_with_dots(self, this): 4680 if isinstance(this, exp.Dot): 4681 exp.replace_children(this, self._replace_columns_with_dots) 4682 elif isinstance(this, exp.Column): 4683 exp.replace_children(this, self._replace_columns_with_dots) 4684 table = this.args.get("table") 4685 this = ( 4686 self.expression(exp.Dot, this=table, expression=this.this) 4687 if table 4688 else self.expression(exp.Var, this=this.name) 4689 ) 4690 elif isinstance(this, exp.Identifier): 4691 this = self.expression(exp.Var, this=this.name) 4692 4693 return this 4694 4695 def _replace_lambda( 4696 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4697 ) -> t.Optional[exp.Expression]: 4698 if not node: 4699 return node 4700 4701 for column in node.find_all(exp.Column): 4702 if column.parts[0].name in lambda_variables: 4703 dot_or_id = column.to_dot() if column.table else column.this 4704 parent = column.parent 4705 4706 while isinstance(parent, exp.Dot): 4707 if not isinstance(parent.parent, exp.Dot): 4708 parent.replace(dot_or_id) 4709 break 4710 parent = parent.parent 4711 else: 4712 if column is node: 4713 node = dot_or_id 4714 else: 4715 column.replace(dot_or_id) 4716 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 786 787 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 788 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 789 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 790 791 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 792 793 STRICT_CAST = True 794 795 # A NULL arg in CONCAT yields NULL by default 796 CONCAT_NULL_OUTPUTS_STRING = False 797 798 CONVERT_TYPE_FIRST = False 799 800 PREFIXED_PIVOT_COLUMNS = False 801 IDENTIFY_PIVOT_STRINGS = False 802 803 LOG_BASE_FIRST = True 804 LOG_DEFAULTS_TO_LN = False 805 806 __slots__ = ( 807 "error_level", 808 "error_message_context", 809 "max_errors", 810 "sql", 811 "errors", 812 "_tokens", 813 "_index", 814 "_curr", 815 "_next", 816 "_prev", 817 "_prev_comments", 818 ) 819 820 # Autofilled 821 INDEX_OFFSET: int = 0 822 UNNEST_COLUMN_ONLY: bool = False 823 ALIAS_POST_TABLESAMPLE: bool = False 824 STRICT_STRING_CONCAT = False 825 NULL_ORDERING: str = "nulls_are_small" 826 SHOW_TRIE: t.Dict = {} 827 SET_TRIE: t.Dict = {} 828 FORMAT_MAPPING: t.Dict[str, str] = {} 829 FORMAT_TRIE: t.Dict = {} 830 TIME_MAPPING: t.Dict[str, str] = {} 831 TIME_TRIE: t.Dict = {} 832 833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset() 843 844 def reset(self): 845 self.sql = "" 846 self.errors = [] 847 self._tokens = [] 848 self._index = 0 849 self._curr = None 850 self._next = None 851 self._prev = None 852 self._prev_comments = None 853 854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 ) 871 872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1] 907 908 def _parse( 909 self, 910 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 911 raw_tokens: t.List[Token], 912 sql: t.Optional[str] = None, 913 ) -> t.List[t.Optional[exp.Expression]]: 914 self.reset() 915 self.sql = sql or "" 916 917 total = len(raw_tokens) 918 chunks: t.List[t.List[Token]] = [[]] 919 920 for i, token in enumerate(raw_tokens): 921 if token.token_type == TokenType.SEMICOLON: 922 if i < total - 1: 923 chunks.append([]) 924 else: 925 chunks[-1].append(token) 926 927 expressions = [] 928 929 for tokens in chunks: 930 self._index = -1 931 self._tokens = tokens 932 self._advance() 933 934 expressions.append(parse_method(self)) 935 936 if self._index < len(self._tokens): 937 self.raise_error("Invalid expression / Unexpected token") 938 939 self.check_errors() 940 941 return expressions 942 943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 ) 953 954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error) 981 982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance) 999 1000 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1001 if expression and self._prev_comments: 1002 expression.add_comments(self._prev_comments) 1003 self._prev_comments = None 1004 1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression 1021 1022 def _find_sql(self, start: Token, end: Token) -> str: 1023 return self.sql[start.start : end.end + 1] 1024 1025 def _advance(self, times: int = 1) -> None: 1026 self._index += times 1027 self._curr = seq_get(self._tokens, self._index) 1028 self._next = seq_get(self._tokens, self._index + 1) 1029 1030 if self._index > 0: 1031 self._prev = self._tokens[self._index - 1] 1032 self._prev_comments = self._prev.comments 1033 else: 1034 self._prev = None 1035 self._prev_comments = None 1036 1037 def _retreat(self, index: int) -> None: 1038 if index != self._index: 1039 self._advance(index - self._index) 1040 1041 def _parse_command(self) -> exp.Command: 1042 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1043 1044 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1045 start = self._prev 1046 exists = self._parse_exists() if allow_exists else None 1047 1048 self._match(TokenType.ON) 1049 1050 kind = self._match_set(self.CREATABLES) and self._prev 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1055 this = self._parse_user_defined_function(kind=kind.token_type) 1056 elif kind.token_type == TokenType.TABLE: 1057 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1058 elif kind.token_type == TokenType.COLUMN: 1059 this = self._parse_column() 1060 else: 1061 this = self._parse_id_var() 1062 1063 self._match(TokenType.IS) 1064 1065 return self.expression( 1066 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1067 ) 1068 1069 def _parse_to_table( 1070 self, 1071 ) -> exp.ToTableProperty: 1072 table = self._parse_table_parts(schema=True) 1073 return self.expression(exp.ToTableProperty, this=table) 1074 1075 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1076 def _parse_ttl(self) -> exp.Expression: 1077 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1078 this = self._parse_bitwise() 1079 1080 if self._match_text_seq("DELETE"): 1081 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1082 if self._match_text_seq("RECOMPRESS"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1085 ) 1086 if self._match_text_seq("TO", "DISK"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1089 ) 1090 if self._match_text_seq("TO", "VOLUME"): 1091 return self.expression( 1092 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1093 ) 1094 1095 return this 1096 1097 expressions = self._parse_csv(_parse_ttl_action) 1098 where = self._parse_where() 1099 group = self._parse_group() 1100 1101 aggregates = None 1102 if group and self._match(TokenType.SET): 1103 aggregates = self._parse_csv(self._parse_set_item) 1104 1105 return self.expression( 1106 exp.MergeTreeTTL, 1107 expressions=expressions, 1108 where=where, 1109 group=group, 1110 aggregates=aggregates, 1111 ) 1112 1113 def _parse_statement(self) -> t.Optional[exp.Expression]: 1114 if self._curr is None: 1115 return None 1116 1117 if self._match_set(self.STATEMENT_PARSERS): 1118 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1119 1120 if self._match_set(Tokenizer.COMMANDS): 1121 return self._parse_command() 1122 1123 expression = self._parse_expression() 1124 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1125 return self._parse_query_modifiers(expression) 1126 1127 def _parse_drop(self) -> exp.Drop | exp.Command: 1128 start = self._prev 1129 temporary = self._match(TokenType.TEMPORARY) 1130 materialized = self._match_text_seq("MATERIALIZED") 1131 1132 kind = self._match_set(self.CREATABLES) and self._prev.text 1133 if not kind: 1134 return self._parse_as_command(start) 1135 1136 return self.expression( 1137 exp.Drop, 1138 exists=self._parse_exists(), 1139 this=self._parse_table(schema=True), 1140 kind=kind, 1141 temporary=temporary, 1142 materialized=materialized, 1143 cascade=self._match_text_seq("CASCADE"), 1144 constraints=self._match_text_seq("CONSTRAINTS"), 1145 purge=self._match_text_seq("PURGE"), 1146 ) 1147 1148 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1149 return ( 1150 self._match(TokenType.IF) 1151 and (not not_ or self._match(TokenType.NOT)) 1152 and self._match(TokenType.EXISTS) 1153 ) 1154 1155 def _parse_create(self) -> exp.Create | exp.Command: 1156 # Note: this can't be None because we've matched a statement parser 1157 start = self._prev 1158 replace = start.text.upper() == "REPLACE" or self._match_pair( 1159 TokenType.OR, TokenType.REPLACE 1160 ) 1161 unique = self._match(TokenType.UNIQUE) 1162 1163 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1164 self._advance() 1165 1166 properties = None 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not create_token: 1170 # exp.Properties.Location.POST_CREATE 1171 properties = self._parse_properties() 1172 create_token = self._match_set(self.CREATABLES) and self._prev 1173 1174 if not properties or not create_token: 1175 return self._parse_as_command(start) 1176 1177 exists = self._parse_exists(not_=True) 1178 this = None 1179 expression = None 1180 indexes = None 1181 no_schema_binding = None 1182 begin = None 1183 clone = None 1184 1185 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1186 nonlocal properties 1187 if properties and temp_props: 1188 properties.expressions.extend(temp_props.expressions) 1189 elif temp_props: 1190 properties = temp_props 1191 1192 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1193 this = self._parse_user_defined_function(kind=create_token.token_type) 1194 1195 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1196 extend_props(self._parse_properties()) 1197 1198 self._match(TokenType.ALIAS) 1199 begin = self._match(TokenType.BEGIN) 1200 return_ = self._match_text_seq("RETURN") 1201 expression = self._parse_statement() 1202 1203 if return_: 1204 expression = self.expression(exp.Return, this=expression) 1205 elif create_token.token_type == TokenType.INDEX: 1206 this = self._parse_index(index=self._parse_id_var()) 1207 elif create_token.token_type in self.DB_CREATABLES: 1208 table_parts = self._parse_table_parts(schema=True) 1209 1210 # exp.Properties.Location.POST_NAME 1211 self._match(TokenType.COMMA) 1212 extend_props(self._parse_properties(before=True)) 1213 1214 this = self._parse_schema(this=table_parts) 1215 1216 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1217 extend_props(self._parse_properties()) 1218 1219 self._match(TokenType.ALIAS) 1220 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1221 # exp.Properties.Location.POST_ALIAS 1222 extend_props(self._parse_properties()) 1223 1224 expression = self._parse_ddl_select() 1225 1226 if create_token.token_type == TokenType.TABLE: 1227 indexes = [] 1228 while True: 1229 index = self._parse_index() 1230 1231 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1232 extend_props(self._parse_properties()) 1233 1234 if not index: 1235 break 1236 else: 1237 self._match(TokenType.COMMA) 1238 indexes.append(index) 1239 elif create_token.token_type == TokenType.VIEW: 1240 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1241 no_schema_binding = True 1242 1243 if self._match_text_seq("CLONE"): 1244 clone = self._parse_table(schema=True) 1245 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1246 clone_kind = ( 1247 self._match(TokenType.L_PAREN) 1248 and self._match_texts(self.CLONE_KINDS) 1249 and self._prev.text.upper() 1250 ) 1251 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1252 self._match(TokenType.R_PAREN) 1253 clone = self.expression( 1254 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1255 ) 1256 1257 return self.expression( 1258 exp.Create, 1259 this=this, 1260 kind=create_token.text, 1261 replace=replace, 1262 unique=unique, 1263 expression=expression, 1264 exists=exists, 1265 properties=properties, 1266 indexes=indexes, 1267 no_schema_binding=no_schema_binding, 1268 begin=begin, 1269 clone=clone, 1270 ) 1271 1272 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1273 # only used for teradata currently 1274 self._match(TokenType.COMMA) 1275 1276 kwargs = { 1277 "no": self._match_text_seq("NO"), 1278 "dual": self._match_text_seq("DUAL"), 1279 "before": self._match_text_seq("BEFORE"), 1280 "default": self._match_text_seq("DEFAULT"), 1281 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1282 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1283 "after": self._match_text_seq("AFTER"), 1284 "minimum": self._match_texts(("MIN", "MINIMUM")), 1285 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1286 } 1287 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1290 try: 1291 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1292 except TypeError: 1293 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1294 1295 return None 1296 1297 def _parse_property(self) -> t.Optional[exp.Expression]: 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1300 1301 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1302 return self._parse_character_set(default=True) 1303 1304 if self._match_text_seq("COMPOUND", "SORTKEY"): 1305 return self._parse_sortkey(compound=True) 1306 1307 if self._match_text_seq("SQL", "SECURITY"): 1308 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1309 1310 assignment = self._match_pair( 1311 TokenType.VAR, TokenType.EQ, advance=False 1312 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1313 1314 if assignment: 1315 key = self._parse_var_or_string() 1316 self._match(TokenType.EQ) 1317 return self.expression(exp.Property, this=key, value=self._parse_column()) 1318 1319 return None 1320 1321 def _parse_stored(self) -> exp.FileFormatProperty: 1322 self._match(TokenType.ALIAS) 1323 1324 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1325 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1326 1327 return self.expression( 1328 exp.FileFormatProperty, 1329 this=self.expression( 1330 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1331 ) 1332 if input_format or output_format 1333 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1334 ) 1335 1336 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1337 self._match(TokenType.EQ) 1338 self._match(TokenType.ALIAS) 1339 return self.expression(exp_class, this=self._parse_field()) 1340 1341 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1342 properties = [] 1343 while True: 1344 if before: 1345 prop = self._parse_property_before() 1346 else: 1347 prop = self._parse_property() 1348 1349 if not prop: 1350 break 1351 for p in ensure_list(prop): 1352 properties.append(p) 1353 1354 if properties: 1355 return self.expression(exp.Properties, expressions=properties) 1356 1357 return None 1358 1359 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1360 return self.expression( 1361 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1362 ) 1363 1364 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1365 if self._index >= 2: 1366 pre_volatile_token = self._tokens[self._index - 2] 1367 else: 1368 pre_volatile_token = None 1369 1370 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1371 return exp.VolatileProperty() 1372 1373 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1374 1375 def _parse_with_property( 1376 self, 1377 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1378 self._match(TokenType.WITH) 1379 if self._match(TokenType.L_PAREN, advance=False): 1380 return self._parse_wrapped_csv(self._parse_property) 1381 1382 if self._match_text_seq("JOURNAL"): 1383 return self._parse_withjournaltable() 1384 1385 if self._match_text_seq("DATA"): 1386 return self._parse_withdata(no=False) 1387 elif self._match_text_seq("NO", "DATA"): 1388 return self._parse_withdata(no=True) 1389 1390 if not self._next: 1391 return None 1392 1393 return self._parse_withisolatedloading() 1394 1395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1397 self._match(TokenType.EQ) 1398 1399 user = self._parse_id_var() 1400 self._match(TokenType.PARAMETER) 1401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1402 1403 if not user or not host: 1404 return None 1405 1406 return exp.DefinerProperty(this=f"{user}@{host}") 1407 1408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1409 self._match(TokenType.TABLE) 1410 self._match(TokenType.EQ) 1411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1412 1413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1414 return self.expression(exp.LogProperty, no=no) 1415 1416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1417 return self.expression(exp.JournalProperty, **kwargs) 1418 1419 def _parse_checksum(self) -> exp.ChecksumProperty: 1420 self._match(TokenType.EQ) 1421 1422 on = None 1423 if self._match(TokenType.ON): 1424 on = True 1425 elif self._match_text_seq("OFF"): 1426 on = False 1427 1428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1429 1430 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1434 if not self._match_text_seq("GRANTS"): 1435 self._retreat(self._index - 1) 1436 return None 1437 1438 return self.expression(exp.CopyGrantsProperty) 1439 1440 def _parse_freespace(self) -> exp.FreespaceProperty: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1444 ) 1445 1446 def _parse_mergeblockratio( 1447 self, no: bool = False, default: bool = False 1448 ) -> exp.MergeBlockRatioProperty: 1449 if self._match(TokenType.EQ): 1450 return self.expression( 1451 exp.MergeBlockRatioProperty, 1452 this=self._parse_number(), 1453 percent=self._match(TokenType.PERCENT), 1454 ) 1455 1456 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1457 1458 def _parse_datablocksize( 1459 self, 1460 default: t.Optional[bool] = None, 1461 minimum: t.Optional[bool] = None, 1462 maximum: t.Optional[bool] = None, 1463 ) -> exp.DataBlocksizeProperty: 1464 self._match(TokenType.EQ) 1465 size = self._parse_number() 1466 1467 units = None 1468 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1469 units = self._prev.text 1470 1471 return self.expression( 1472 exp.DataBlocksizeProperty, 1473 size=size, 1474 units=units, 1475 default=default, 1476 minimum=minimum, 1477 maximum=maximum, 1478 ) 1479 1480 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 1487 autotemp = None 1488 if self._match_text_seq("AUTOTEMP"): 1489 autotemp = self._parse_schema() 1490 1491 return self.expression( 1492 exp.BlockCompressionProperty, 1493 always=always, 1494 manual=manual, 1495 never=never, 1496 default=default, 1497 autotemp=autotemp, 1498 ) 1499 1500 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1501 no = self._match_text_seq("NO") 1502 concurrent = self._match_text_seq("CONCURRENT") 1503 self._match_text_seq("ISOLATED", "LOADING") 1504 for_all = self._match_text_seq("FOR", "ALL") 1505 for_insert = self._match_text_seq("FOR", "INSERT") 1506 for_none = self._match_text_seq("FOR", "NONE") 1507 return self.expression( 1508 exp.IsolatedLoadingProperty, 1509 no=no, 1510 concurrent=concurrent, 1511 for_all=for_all, 1512 for_insert=for_insert, 1513 for_none=for_none, 1514 ) 1515 1516 def _parse_locking(self) -> exp.LockingProperty: 1517 if self._match(TokenType.TABLE): 1518 kind = "TABLE" 1519 elif self._match(TokenType.VIEW): 1520 kind = "VIEW" 1521 elif self._match(TokenType.ROW): 1522 kind = "ROW" 1523 elif self._match_text_seq("DATABASE"): 1524 kind = "DATABASE" 1525 else: 1526 kind = None 1527 1528 if kind in ("DATABASE", "TABLE", "VIEW"): 1529 this = self._parse_table_parts() 1530 else: 1531 this = None 1532 1533 if self._match(TokenType.FOR): 1534 for_or_in = "FOR" 1535 elif self._match(TokenType.IN): 1536 for_or_in = "IN" 1537 else: 1538 for_or_in = None 1539 1540 if self._match_text_seq("ACCESS"): 1541 lock_type = "ACCESS" 1542 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1543 lock_type = "EXCLUSIVE" 1544 elif self._match_text_seq("SHARE"): 1545 lock_type = "SHARE" 1546 elif self._match_text_seq("READ"): 1547 lock_type = "READ" 1548 elif self._match_text_seq("WRITE"): 1549 lock_type = "WRITE" 1550 elif self._match_text_seq("CHECKSUM"): 1551 lock_type = "CHECKSUM" 1552 else: 1553 lock_type = None 1554 1555 override = self._match_text_seq("OVERRIDE") 1556 1557 return self.expression( 1558 exp.LockingProperty, 1559 this=this, 1560 kind=kind, 1561 for_or_in=for_or_in, 1562 lock_type=lock_type, 1563 override=override, 1564 ) 1565 1566 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1567 if self._match(TokenType.PARTITION_BY): 1568 return self._parse_csv(self._parse_conjunction) 1569 return [] 1570 1571 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1572 self._match(TokenType.EQ) 1573 return self.expression( 1574 exp.PartitionedByProperty, 1575 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1576 ) 1577 1578 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1579 if self._match_text_seq("AND", "STATISTICS"): 1580 statistics = True 1581 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1582 statistics = False 1583 else: 1584 statistics = None 1585 1586 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1587 1588 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1589 if self._match_text_seq("PRIMARY", "INDEX"): 1590 return exp.NoPrimaryIndexProperty() 1591 return None 1592 1593 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1594 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1595 return exp.OnCommitProperty() 1596 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1597 return exp.OnCommitProperty(delete=True) 1598 return None 1599 1600 def _parse_distkey(self) -> exp.DistKeyProperty: 1601 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1602 1603 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1604 table = self._parse_table(schema=True) 1605 1606 options = [] 1607 while self._match_texts(("INCLUDING", "EXCLUDING")): 1608 this = self._prev.text.upper() 1609 1610 id_var = self._parse_id_var() 1611 if not id_var: 1612 return None 1613 1614 options.append( 1615 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1616 ) 1617 1618 return self.expression(exp.LikeProperty, this=table, expressions=options) 1619 1620 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1621 return self.expression( 1622 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1623 ) 1624 1625 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1626 self._match(TokenType.EQ) 1627 return self.expression( 1628 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1629 ) 1630 1631 def _parse_returns(self) -> exp.ReturnsProperty: 1632 value: t.Optional[exp.Expression] 1633 is_table = self._match(TokenType.TABLE) 1634 1635 if is_table: 1636 if self._match(TokenType.LT): 1637 value = self.expression( 1638 exp.Schema, 1639 this="TABLE", 1640 expressions=self._parse_csv(self._parse_struct_types), 1641 ) 1642 if not self._match(TokenType.GT): 1643 self.raise_error("Expecting >") 1644 else: 1645 value = self._parse_schema(exp.var("TABLE")) 1646 else: 1647 value = self._parse_types() 1648 1649 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1650 1651 def _parse_describe(self) -> exp.Describe: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Insert: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match_text_seq("LOCAL") 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1682 and self._parse_conjunction(), 1683 expression=self._parse_ddl_select(), 1684 conflict=self._parse_on_conflict(), 1685 returning=self._parse_returning(), 1686 overwrite=overwrite, 1687 alternative=alternative, 1688 ) 1689 1690 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1691 conflict = self._match_text_seq("ON", "CONFLICT") 1692 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1693 1694 if not conflict and not duplicate: 1695 return None 1696 1697 nothing = None 1698 expressions = None 1699 key = None 1700 constraint = None 1701 1702 if conflict: 1703 if self._match_text_seq("ON", "CONSTRAINT"): 1704 constraint = self._parse_id_var() 1705 else: 1706 key = self._parse_csv(self._parse_value) 1707 1708 self._match_text_seq("DO") 1709 if self._match_text_seq("NOTHING"): 1710 nothing = True 1711 else: 1712 self._match(TokenType.UPDATE) 1713 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1714 1715 return self.expression( 1716 exp.OnConflict, 1717 duplicate=duplicate, 1718 expressions=expressions, 1719 nothing=nothing, 1720 key=key, 1721 constraint=constraint, 1722 ) 1723 1724 def _parse_returning(self) -> t.Optional[exp.Returning]: 1725 if not self._match(TokenType.RETURNING): 1726 return None 1727 1728 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1729 1730 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1731 if not self._match(TokenType.FORMAT): 1732 return None 1733 return self._parse_row_format() 1734 1735 def _parse_row_format( 1736 self, match_row: bool = False 1737 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1738 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1739 return None 1740 1741 if self._match_text_seq("SERDE"): 1742 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1743 1744 self._match_text_seq("DELIMITED") 1745 1746 kwargs = {} 1747 1748 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1749 kwargs["fields"] = self._parse_string() 1750 if self._match_text_seq("ESCAPED", "BY"): 1751 kwargs["escaped"] = self._parse_string() 1752 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1753 kwargs["collection_items"] = self._parse_string() 1754 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1755 kwargs["map_keys"] = self._parse_string() 1756 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1757 kwargs["lines"] = self._parse_string() 1758 if self._match_text_seq("NULL", "DEFINED", "AS"): 1759 kwargs["null"] = self._parse_string() 1760 1761 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1762 1763 def _parse_load(self) -> exp.LoadData | exp.Command: 1764 if self._match_text_seq("DATA"): 1765 local = self._match_text_seq("LOCAL") 1766 self._match_text_seq("INPATH") 1767 inpath = self._parse_string() 1768 overwrite = self._match(TokenType.OVERWRITE) 1769 self._match_pair(TokenType.INTO, TokenType.TABLE) 1770 1771 return self.expression( 1772 exp.LoadData, 1773 this=self._parse_table(schema=True), 1774 local=local, 1775 overwrite=overwrite, 1776 inpath=inpath, 1777 partition=self._parse_partition(), 1778 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1779 serde=self._match_text_seq("SERDE") and self._parse_string(), 1780 ) 1781 return self._parse_as_command(self._prev) 1782 1783 def _parse_delete(self) -> exp.Delete: 1784 self._match(TokenType.FROM) 1785 1786 return self.expression( 1787 exp.Delete, 1788 this=self._parse_table(), 1789 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1790 where=self._parse_where(), 1791 returning=self._parse_returning(), 1792 limit=self._parse_limit(), 1793 ) 1794 1795 def _parse_update(self) -> exp.Update: 1796 return self.expression( 1797 exp.Update, 1798 **{ # type: ignore 1799 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1800 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1801 "from": self._parse_from(modifiers=True), 1802 "where": self._parse_where(), 1803 "returning": self._parse_returning(), 1804 "limit": self._parse_limit(), 1805 }, 1806 ) 1807 1808 def _parse_uncache(self) -> exp.Uncache: 1809 if not self._match(TokenType.TABLE): 1810 self.raise_error("Expecting TABLE after UNCACHE") 1811 1812 return self.expression( 1813 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1814 ) 1815 1816 def _parse_cache(self) -> exp.Cache: 1817 lazy = self._match_text_seq("LAZY") 1818 self._match(TokenType.TABLE) 1819 table = self._parse_table(schema=True) 1820 1821 options = [] 1822 if self._match_text_seq("OPTIONS"): 1823 self._match_l_paren() 1824 k = self._parse_string() 1825 self._match(TokenType.EQ) 1826 v = self._parse_string() 1827 options = [k, v] 1828 self._match_r_paren() 1829 1830 self._match(TokenType.ALIAS) 1831 return self.expression( 1832 exp.Cache, 1833 this=table, 1834 lazy=lazy, 1835 options=options, 1836 expression=self._parse_select(nested=True), 1837 ) 1838 1839 def _parse_partition(self) -> t.Optional[exp.Partition]: 1840 if not self._match(TokenType.PARTITION): 1841 return None 1842 1843 return self.expression( 1844 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1845 ) 1846 1847 def _parse_value(self) -> exp.Tuple: 1848 if self._match(TokenType.L_PAREN): 1849 expressions = self._parse_csv(self._parse_conjunction) 1850 self._match_r_paren() 1851 return self.expression(exp.Tuple, expressions=expressions) 1852 1853 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1854 # Source: https://prestodb.io/docs/current/sql/values.html 1855 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1856 1857 def _parse_select( 1858 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1859 ) -> t.Optional[exp.Expression]: 1860 cte = self._parse_with() 1861 if cte: 1862 this = self._parse_statement() 1863 1864 if not this: 1865 self.raise_error("Failed to parse any statement following CTE") 1866 return cte 1867 1868 if "with" in this.arg_types: 1869 this.set("with", cte) 1870 else: 1871 self.raise_error(f"{this.key} does not support CTE") 1872 this = cte 1873 elif self._match(TokenType.SELECT): 1874 comments = self._prev_comments 1875 1876 hint = self._parse_hint() 1877 all_ = self._match(TokenType.ALL) 1878 distinct = self._match(TokenType.DISTINCT) 1879 1880 kind = ( 1881 self._match(TokenType.ALIAS) 1882 and self._match_texts(("STRUCT", "VALUE")) 1883 and self._prev.text 1884 ) 1885 1886 if distinct: 1887 distinct = self.expression( 1888 exp.Distinct, 1889 on=self._parse_value() if self._match(TokenType.ON) else None, 1890 ) 1891 1892 if all_ and distinct: 1893 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1894 1895 limit = self._parse_limit(top=True) 1896 expressions = self._parse_csv(self._parse_expression) 1897 1898 this = self.expression( 1899 exp.Select, 1900 kind=kind, 1901 hint=hint, 1902 distinct=distinct, 1903 expressions=expressions, 1904 limit=limit, 1905 ) 1906 this.comments = comments 1907 1908 into = self._parse_into() 1909 if into: 1910 this.set("into", into) 1911 1912 from_ = self._parse_from() 1913 if from_: 1914 this.set("from", from_) 1915 1916 this = self._parse_query_modifiers(this) 1917 elif (table or nested) and self._match(TokenType.L_PAREN): 1918 if self._match(TokenType.PIVOT): 1919 this = self._parse_simplified_pivot() 1920 elif self._match(TokenType.FROM): 1921 this = exp.select("*").from_( 1922 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1923 ) 1924 else: 1925 this = self._parse_table() if table else self._parse_select(nested=True) 1926 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1927 1928 self._match_r_paren() 1929 1930 # early return so that subquery unions aren't parsed again 1931 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1932 # Union ALL should be a property of the top select node, not the subquery 1933 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1934 elif self._match(TokenType.VALUES): 1935 this = self.expression( 1936 exp.Values, 1937 expressions=self._parse_csv(self._parse_value), 1938 alias=self._parse_table_alias(), 1939 ) 1940 else: 1941 this = None 1942 1943 return self._parse_set_operations(this) 1944 1945 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1946 if not skip_with_token and not self._match(TokenType.WITH): 1947 return None 1948 1949 comments = self._prev_comments 1950 recursive = self._match(TokenType.RECURSIVE) 1951 1952 expressions = [] 1953 while True: 1954 expressions.append(self._parse_cte()) 1955 1956 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1957 break 1958 else: 1959 self._match(TokenType.WITH) 1960 1961 return self.expression( 1962 exp.With, comments=comments, expressions=expressions, recursive=recursive 1963 ) 1964 1965 def _parse_cte(self) -> exp.CTE: 1966 alias = self._parse_table_alias() 1967 if not alias or not alias.this: 1968 self.raise_error("Expected CTE to have alias") 1969 1970 self._match(TokenType.ALIAS) 1971 return self.expression( 1972 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1973 ) 1974 1975 def _parse_table_alias( 1976 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1977 ) -> t.Optional[exp.TableAlias]: 1978 any_token = self._match(TokenType.ALIAS) 1979 alias = ( 1980 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1981 or self._parse_string_as_identifier() 1982 ) 1983 1984 index = self._index 1985 if self._match(TokenType.L_PAREN): 1986 columns = self._parse_csv(self._parse_function_parameter) 1987 self._match_r_paren() if columns else self._retreat(index) 1988 else: 1989 columns = None 1990 1991 if not alias and not columns: 1992 return None 1993 1994 return self.expression(exp.TableAlias, this=alias, columns=columns) 1995 1996 def _parse_subquery( 1997 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1998 ) -> t.Optional[exp.Subquery]: 1999 if not this: 2000 return None 2001 2002 return self.expression( 2003 exp.Subquery, 2004 this=this, 2005 pivots=self._parse_pivots(), 2006 alias=self._parse_table_alias() if parse_alias else None, 2007 ) 2008 2009 def _parse_query_modifiers( 2010 self, this: t.Optional[exp.Expression] 2011 ) -> t.Optional[exp.Expression]: 2012 if isinstance(this, self.MODIFIABLES): 2013 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2014 expression = parser(self) 2015 2016 if expression: 2017 if key == "limit": 2018 offset = expression.args.pop("offset", None) 2019 if offset: 2020 this.set("offset", exp.Offset(expression=offset)) 2021 this.set(key, expression) 2022 return this 2023 2024 def _parse_hint(self) -> t.Optional[exp.Hint]: 2025 if self._match(TokenType.HINT): 2026 hints = self._parse_csv(self._parse_function) 2027 2028 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2029 self.raise_error("Expected */ after HINT") 2030 2031 return self.expression(exp.Hint, expressions=hints) 2032 2033 return None 2034 2035 def _parse_into(self) -> t.Optional[exp.Into]: 2036 if not self._match(TokenType.INTO): 2037 return None 2038 2039 temp = self._match(TokenType.TEMPORARY) 2040 unlogged = self._match_text_seq("UNLOGGED") 2041 self._match(TokenType.TABLE) 2042 2043 return self.expression( 2044 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2045 ) 2046 2047 def _parse_from( 2048 self, modifiers: bool = False, skip_from_token: bool = False 2049 ) -> t.Optional[exp.From]: 2050 if not skip_from_token and not self._match(TokenType.FROM): 2051 return None 2052 2053 comments = self._prev_comments 2054 this = self._parse_table() 2055 2056 return self.expression( 2057 exp.From, 2058 comments=comments, 2059 this=self._parse_query_modifiers(this) if modifiers else this, 2060 ) 2061 2062 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2063 if not self._match(TokenType.MATCH_RECOGNIZE): 2064 return None 2065 2066 self._match_l_paren() 2067 2068 partition = self._parse_partition_by() 2069 order = self._parse_order() 2070 measures = ( 2071 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2072 ) 2073 2074 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2075 rows = exp.var("ONE ROW PER MATCH") 2076 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2077 text = "ALL ROWS PER MATCH" 2078 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2079 text += f" SHOW EMPTY MATCHES" 2080 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2081 text += f" OMIT EMPTY MATCHES" 2082 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2083 text += f" WITH UNMATCHED ROWS" 2084 rows = exp.var(text) 2085 else: 2086 rows = None 2087 2088 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2089 text = "AFTER MATCH SKIP" 2090 if self._match_text_seq("PAST", "LAST", "ROW"): 2091 text += f" PAST LAST ROW" 2092 elif self._match_text_seq("TO", "NEXT", "ROW"): 2093 text += f" TO NEXT ROW" 2094 elif self._match_text_seq("TO", "FIRST"): 2095 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2096 elif self._match_text_seq("TO", "LAST"): 2097 text += f" TO LAST {self._advance_any().text}" # type: ignore 2098 after = exp.var(text) 2099 else: 2100 after = None 2101 2102 if self._match_text_seq("PATTERN"): 2103 self._match_l_paren() 2104 2105 if not self._curr: 2106 self.raise_error("Expecting )", self._curr) 2107 2108 paren = 1 2109 start = self._curr 2110 2111 while self._curr and paren > 0: 2112 if self._curr.token_type == TokenType.L_PAREN: 2113 paren += 1 2114 if self._curr.token_type == TokenType.R_PAREN: 2115 paren -= 1 2116 2117 end = self._prev 2118 self._advance() 2119 2120 if paren > 0: 2121 self.raise_error("Expecting )", self._curr) 2122 2123 pattern = exp.var(self._find_sql(start, end)) 2124 else: 2125 pattern = None 2126 2127 define = ( 2128 self._parse_csv( 2129 lambda: self.expression( 2130 exp.Alias, 2131 alias=self._parse_id_var(any_token=True), 2132 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2133 ) 2134 ) 2135 if self._match_text_seq("DEFINE") 2136 else None 2137 ) 2138 2139 self._match_r_paren() 2140 2141 return self.expression( 2142 exp.MatchRecognize, 2143 partition_by=partition, 2144 order=order, 2145 measures=measures, 2146 rows=rows, 2147 after=after, 2148 pattern=pattern, 2149 define=define, 2150 alias=self._parse_table_alias(), 2151 ) 2152 2153 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2154 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2155 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2156 2157 if outer_apply or cross_apply: 2158 this = self._parse_select(table=True) 2159 view = None 2160 outer = not cross_apply 2161 elif self._match(TokenType.LATERAL): 2162 this = self._parse_select(table=True) 2163 view = self._match(TokenType.VIEW) 2164 outer = self._match(TokenType.OUTER) 2165 else: 2166 return None 2167 2168 if not this: 2169 this = self._parse_function() or self._parse_id_var(any_token=False) 2170 while self._match(TokenType.DOT): 2171 this = exp.Dot( 2172 this=this, 2173 expression=self._parse_function() or self._parse_id_var(any_token=False), 2174 ) 2175 2176 if view: 2177 table = self._parse_id_var(any_token=False) 2178 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2179 table_alias: t.Optional[exp.TableAlias] = self.expression( 2180 exp.TableAlias, this=table, columns=columns 2181 ) 2182 elif isinstance(this, exp.Subquery) and this.alias: 2183 # Ensures parity between the Subquery's and the Lateral's "alias" args 2184 table_alias = this.args["alias"].copy() 2185 else: 2186 table_alias = self._parse_table_alias() 2187 2188 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2189 2190 def _parse_join_parts( 2191 self, 2192 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2193 return ( 2194 self._match_set(self.JOIN_METHODS) and self._prev, 2195 self._match_set(self.JOIN_SIDES) and self._prev, 2196 self._match_set(self.JOIN_KINDS) and self._prev, 2197 ) 2198 2199 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2200 if self._match(TokenType.COMMA): 2201 return self.expression(exp.Join, this=self._parse_table()) 2202 2203 index = self._index 2204 method, side, kind = self._parse_join_parts() 2205 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2206 join = self._match(TokenType.JOIN) 2207 2208 if not skip_join_token and not join: 2209 self._retreat(index) 2210 kind = None 2211 method = None 2212 side = None 2213 2214 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2215 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2216 2217 if not skip_join_token and not join and not outer_apply and not cross_apply: 2218 return None 2219 2220 if outer_apply: 2221 side = Token(TokenType.LEFT, "LEFT") 2222 2223 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2224 2225 if method: 2226 kwargs["method"] = method.text 2227 if side: 2228 kwargs["side"] = side.text 2229 if kind: 2230 kwargs["kind"] = kind.text 2231 if hint: 2232 kwargs["hint"] = hint 2233 2234 if self._match(TokenType.ON): 2235 kwargs["on"] = self._parse_conjunction() 2236 elif self._match(TokenType.USING): 2237 kwargs["using"] = self._parse_wrapped_id_vars() 2238 2239 return self.expression(exp.Join, **kwargs) 2240 2241 def _parse_index( 2242 self, 2243 index: t.Optional[exp.Expression] = None, 2244 ) -> t.Optional[exp.Index]: 2245 if index: 2246 unique = None 2247 primary = None 2248 amp = None 2249 2250 self._match(TokenType.ON) 2251 self._match(TokenType.TABLE) # hive 2252 table = self._parse_table_parts(schema=True) 2253 else: 2254 unique = self._match(TokenType.UNIQUE) 2255 primary = self._match_text_seq("PRIMARY") 2256 amp = self._match_text_seq("AMP") 2257 2258 if not self._match(TokenType.INDEX): 2259 return None 2260 2261 index = self._parse_id_var() 2262 table = None 2263 2264 using = self._parse_field() if self._match(TokenType.USING) else None 2265 2266 if self._match(TokenType.L_PAREN, advance=False): 2267 columns = self._parse_wrapped_csv(self._parse_ordered) 2268 else: 2269 columns = None 2270 2271 return self.expression( 2272 exp.Index, 2273 this=index, 2274 table=table, 2275 using=using, 2276 columns=columns, 2277 unique=unique, 2278 primary=primary, 2279 amp=amp, 2280 partition_by=self._parse_partition_by(), 2281 ) 2282 2283 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2284 hints: t.List[exp.Expression] = [] 2285 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2286 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2287 hints.append( 2288 self.expression( 2289 exp.WithTableHint, 2290 expressions=self._parse_csv( 2291 lambda: self._parse_function() or self._parse_var(any_token=True) 2292 ), 2293 ) 2294 ) 2295 self._match_r_paren() 2296 else: 2297 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2298 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2299 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2300 2301 self._match_texts({"INDEX", "KEY"}) 2302 if self._match(TokenType.FOR): 2303 hint.set("target", self._advance_any() and self._prev.text.upper()) 2304 2305 hint.set("expressions", self._parse_wrapped_id_vars()) 2306 hints.append(hint) 2307 2308 return hints or None 2309 2310 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2311 return ( 2312 (not schema and self._parse_function(optional_parens=False)) 2313 or self._parse_id_var(any_token=False) 2314 or self._parse_string_as_identifier() 2315 or self._parse_placeholder() 2316 ) 2317 2318 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2319 catalog = None 2320 db = None 2321 table = self._parse_table_part(schema=schema) 2322 2323 while self._match(TokenType.DOT): 2324 if catalog: 2325 # This allows nesting the table in arbitrarily many dot expressions if needed 2326 table = self.expression( 2327 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2328 ) 2329 else: 2330 catalog = db 2331 db = table 2332 table = self._parse_table_part(schema=schema) 2333 2334 if not table: 2335 self.raise_error(f"Expected table name but got {self._curr}") 2336 2337 return self.expression( 2338 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2339 ) 2340 2341 def _parse_table( 2342 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2343 ) -> t.Optional[exp.Expression]: 2344 lateral = self._parse_lateral() 2345 if lateral: 2346 return lateral 2347 2348 unnest = self._parse_unnest() 2349 if unnest: 2350 return unnest 2351 2352 values = self._parse_derived_table_values() 2353 if values: 2354 return values 2355 2356 subquery = self._parse_select(table=True) 2357 if subquery: 2358 if not subquery.args.get("pivots"): 2359 subquery.set("pivots", self._parse_pivots()) 2360 return subquery 2361 2362 this: exp.Expression = self._parse_table_parts(schema=schema) 2363 2364 if schema: 2365 return self._parse_schema(this=this) 2366 2367 if self.ALIAS_POST_TABLESAMPLE: 2368 table_sample = self._parse_table_sample() 2369 2370 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2371 if alias: 2372 this.set("alias", alias) 2373 2374 if not this.args.get("pivots"): 2375 this.set("pivots", self._parse_pivots()) 2376 2377 this.set("hints", self._parse_table_hints()) 2378 2379 if not self.ALIAS_POST_TABLESAMPLE: 2380 table_sample = self._parse_table_sample() 2381 2382 if table_sample: 2383 table_sample.set("this", this) 2384 this = table_sample 2385 2386 return this 2387 2388 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2389 if not self._match(TokenType.UNNEST): 2390 return None 2391 2392 expressions = self._parse_wrapped_csv(self._parse_type) 2393 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2394 2395 alias = self._parse_table_alias() if with_alias else None 2396 2397 if alias and self.UNNEST_COLUMN_ONLY: 2398 if alias.args.get("columns"): 2399 self.raise_error("Unexpected extra column alias in unnest.") 2400 2401 alias.set("columns", [alias.this]) 2402 alias.set("this", None) 2403 2404 offset = None 2405 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2406 self._match(TokenType.ALIAS) 2407 offset = self._parse_id_var() or exp.to_identifier("offset") 2408 2409 return self.expression( 2410 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2411 ) 2412 2413 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2414 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2415 if not is_derived and not self._match(TokenType.VALUES): 2416 return None 2417 2418 expressions = self._parse_csv(self._parse_value) 2419 alias = self._parse_table_alias() 2420 2421 if is_derived: 2422 self._match_r_paren() 2423 2424 return self.expression( 2425 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2426 ) 2427 2428 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2429 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2430 as_modifier and self._match_text_seq("USING", "SAMPLE") 2431 ): 2432 return None 2433 2434 bucket_numerator = None 2435 bucket_denominator = None 2436 bucket_field = None 2437 percent = None 2438 rows = None 2439 size = None 2440 seed = None 2441 2442 kind = ( 2443 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2444 ) 2445 method = self._parse_var(tokens=(TokenType.ROW,)) 2446 2447 self._match(TokenType.L_PAREN) 2448 2449 num = self._parse_number() 2450 2451 if self._match_text_seq("BUCKET"): 2452 bucket_numerator = self._parse_number() 2453 self._match_text_seq("OUT", "OF") 2454 bucket_denominator = bucket_denominator = self._parse_number() 2455 self._match(TokenType.ON) 2456 bucket_field = self._parse_field() 2457 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2458 percent = num 2459 elif self._match(TokenType.ROWS): 2460 rows = num 2461 else: 2462 size = num 2463 2464 self._match(TokenType.R_PAREN) 2465 2466 if self._match(TokenType.L_PAREN): 2467 method = self._parse_var() 2468 seed = self._match(TokenType.COMMA) and self._parse_number() 2469 self._match_r_paren() 2470 elif self._match_texts(("SEED", "REPEATABLE")): 2471 seed = self._parse_wrapped(self._parse_number) 2472 2473 return self.expression( 2474 exp.TableSample, 2475 method=method, 2476 bucket_numerator=bucket_numerator, 2477 bucket_denominator=bucket_denominator, 2478 bucket_field=bucket_field, 2479 percent=percent, 2480 rows=rows, 2481 size=size, 2482 seed=seed, 2483 kind=kind, 2484 ) 2485 2486 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2487 return list(iter(self._parse_pivot, None)) 2488 2489 # https://duckdb.org/docs/sql/statements/pivot 2490 def _parse_simplified_pivot(self) -> exp.Pivot: 2491 def _parse_on() -> t.Optional[exp.Expression]: 2492 this = self._parse_bitwise() 2493 return self._parse_in(this) if self._match(TokenType.IN) else this 2494 2495 this = self._parse_table() 2496 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2497 using = self._match(TokenType.USING) and self._parse_csv( 2498 lambda: self._parse_alias(self._parse_function()) 2499 ) 2500 group = self._parse_group() 2501 return self.expression( 2502 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2503 ) 2504 2505 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2506 index = self._index 2507 2508 if self._match(TokenType.PIVOT): 2509 unpivot = False 2510 elif self._match(TokenType.UNPIVOT): 2511 unpivot = True 2512 else: 2513 return None 2514 2515 expressions = [] 2516 field = None 2517 2518 if not self._match(TokenType.L_PAREN): 2519 self._retreat(index) 2520 return None 2521 2522 if unpivot: 2523 expressions = self._parse_csv(self._parse_column) 2524 else: 2525 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2526 2527 if not expressions: 2528 self.raise_error("Failed to parse PIVOT's aggregation list") 2529 2530 if not self._match(TokenType.FOR): 2531 self.raise_error("Expecting FOR") 2532 2533 value = self._parse_column() 2534 2535 if not self._match(TokenType.IN): 2536 self.raise_error("Expecting IN") 2537 2538 field = self._parse_in(value, alias=True) 2539 2540 self._match_r_paren() 2541 2542 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2543 2544 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2545 pivot.set("alias", self._parse_table_alias()) 2546 2547 if not unpivot: 2548 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2549 2550 columns: t.List[exp.Expression] = [] 2551 for fld in pivot.args["field"].expressions: 2552 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2553 for name in names: 2554 if self.PREFIXED_PIVOT_COLUMNS: 2555 name = f"{name}_{field_name}" if name else field_name 2556 else: 2557 name = f"{field_name}_{name}" if name else field_name 2558 2559 columns.append(exp.to_identifier(name)) 2560 2561 pivot.set("columns", columns) 2562 2563 return pivot 2564 2565 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2566 return [agg.alias for agg in aggregations] 2567 2568 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2569 if not skip_where_token and not self._match(TokenType.WHERE): 2570 return None 2571 2572 return self.expression( 2573 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2574 ) 2575 2576 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2577 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2578 return None 2579 2580 elements = defaultdict(list) 2581 2582 while True: 2583 expressions = self._parse_csv(self._parse_conjunction) 2584 if expressions: 2585 elements["expressions"].extend(expressions) 2586 2587 grouping_sets = self._parse_grouping_sets() 2588 if grouping_sets: 2589 elements["grouping_sets"].extend(grouping_sets) 2590 2591 rollup = None 2592 cube = None 2593 totals = None 2594 2595 with_ = self._match(TokenType.WITH) 2596 if self._match(TokenType.ROLLUP): 2597 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2598 elements["rollup"].extend(ensure_list(rollup)) 2599 2600 if self._match(TokenType.CUBE): 2601 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2602 elements["cube"].extend(ensure_list(cube)) 2603 2604 if self._match_text_seq("TOTALS"): 2605 totals = True 2606 elements["totals"] = True # type: ignore 2607 2608 if not (grouping_sets or rollup or cube or totals): 2609 break 2610 2611 return self.expression(exp.Group, **elements) # type: ignore 2612 2613 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2614 if not self._match(TokenType.GROUPING_SETS): 2615 return None 2616 2617 return self._parse_wrapped_csv(self._parse_grouping_set) 2618 2619 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2620 if self._match(TokenType.L_PAREN): 2621 grouping_set = self._parse_csv(self._parse_column) 2622 self._match_r_paren() 2623 return self.expression(exp.Tuple, expressions=grouping_set) 2624 2625 return self._parse_column() 2626 2627 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2628 if not skip_having_token and not self._match(TokenType.HAVING): 2629 return None 2630 return self.expression(exp.Having, this=self._parse_conjunction()) 2631 2632 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2633 if not self._match(TokenType.QUALIFY): 2634 return None 2635 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2636 2637 def _parse_order( 2638 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2639 ) -> t.Optional[exp.Expression]: 2640 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2641 return this 2642 2643 return self.expression( 2644 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2645 ) 2646 2647 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2648 if not self._match(token): 2649 return None 2650 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2651 2652 def _parse_ordered(self) -> exp.Ordered: 2653 this = self._parse_conjunction() 2654 self._match(TokenType.ASC) 2655 2656 is_desc = self._match(TokenType.DESC) 2657 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2658 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2659 desc = is_desc or False 2660 asc = not desc 2661 nulls_first = is_nulls_first or False 2662 explicitly_null_ordered = is_nulls_first or is_nulls_last 2663 2664 if ( 2665 not explicitly_null_ordered 2666 and ( 2667 (asc and self.NULL_ORDERING == "nulls_are_small") 2668 or (desc and self.NULL_ORDERING != "nulls_are_small") 2669 ) 2670 and self.NULL_ORDERING != "nulls_are_last" 2671 ): 2672 nulls_first = True 2673 2674 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2675 2676 def _parse_limit( 2677 self, this: t.Optional[exp.Expression] = None, top: bool = False 2678 ) -> t.Optional[exp.Expression]: 2679 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2680 limit_paren = self._match(TokenType.L_PAREN) 2681 expression = self._parse_number() if top else self._parse_term() 2682 2683 if self._match(TokenType.COMMA): 2684 offset = expression 2685 expression = self._parse_term() 2686 else: 2687 offset = None 2688 2689 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2690 2691 if limit_paren: 2692 self._match_r_paren() 2693 2694 return limit_exp 2695 2696 if self._match(TokenType.FETCH): 2697 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2698 direction = self._prev.text if direction else "FIRST" 2699 2700 count = self._parse_number() 2701 percent = self._match(TokenType.PERCENT) 2702 2703 self._match_set((TokenType.ROW, TokenType.ROWS)) 2704 2705 only = self._match_text_seq("ONLY") 2706 with_ties = self._match_text_seq("WITH", "TIES") 2707 2708 if only and with_ties: 2709 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2710 2711 return self.expression( 2712 exp.Fetch, 2713 direction=direction, 2714 count=count, 2715 percent=percent, 2716 with_ties=with_ties, 2717 ) 2718 2719 return this 2720 2721 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2722 if not self._match(TokenType.OFFSET): 2723 return this 2724 2725 count = self._parse_number() 2726 self._match_set((TokenType.ROW, TokenType.ROWS)) 2727 return self.expression(exp.Offset, this=this, expression=count) 2728 2729 def _parse_locks(self) -> t.List[exp.Lock]: 2730 locks = [] 2731 while True: 2732 if self._match_text_seq("FOR", "UPDATE"): 2733 update = True 2734 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2735 "LOCK", "IN", "SHARE", "MODE" 2736 ): 2737 update = False 2738 else: 2739 break 2740 2741 expressions = None 2742 if self._match_text_seq("OF"): 2743 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2744 2745 wait: t.Optional[bool | exp.Expression] = None 2746 if self._match_text_seq("NOWAIT"): 2747 wait = True 2748 elif self._match_text_seq("WAIT"): 2749 wait = self._parse_primary() 2750 elif self._match_text_seq("SKIP", "LOCKED"): 2751 wait = False 2752 2753 locks.append( 2754 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2755 ) 2756 2757 return locks 2758 2759 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2760 if not self._match_set(self.SET_OPERATIONS): 2761 return this 2762 2763 token_type = self._prev.token_type 2764 2765 if token_type == TokenType.UNION: 2766 expression = exp.Union 2767 elif token_type == TokenType.EXCEPT: 2768 expression = exp.Except 2769 else: 2770 expression = exp.Intersect 2771 2772 return self.expression( 2773 expression, 2774 this=this, 2775 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2776 expression=self._parse_set_operations(self._parse_select(nested=True)), 2777 ) 2778 2779 def _parse_expression(self) -> t.Optional[exp.Expression]: 2780 return self._parse_alias(self._parse_conjunction()) 2781 2782 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2783 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2784 2785 def _parse_equality(self) -> t.Optional[exp.Expression]: 2786 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2787 2788 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2789 return self._parse_tokens(self._parse_range, self.COMPARISON) 2790 2791 def _parse_range(self) -> t.Optional[exp.Expression]: 2792 this = self._parse_bitwise() 2793 negate = self._match(TokenType.NOT) 2794 2795 if self._match_set(self.RANGE_PARSERS): 2796 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2797 if not expression: 2798 return this 2799 2800 this = expression 2801 elif self._match(TokenType.ISNULL): 2802 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2803 2804 # Postgres supports ISNULL and NOTNULL for conditions. 2805 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2806 if self._match(TokenType.NOTNULL): 2807 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2808 this = self.expression(exp.Not, this=this) 2809 2810 if negate: 2811 this = self.expression(exp.Not, this=this) 2812 2813 if self._match(TokenType.IS): 2814 this = self._parse_is(this) 2815 2816 return this 2817 2818 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2819 index = self._index - 1 2820 negate = self._match(TokenType.NOT) 2821 2822 if self._match_text_seq("DISTINCT", "FROM"): 2823 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2824 return self.expression(klass, this=this, expression=self._parse_expression()) 2825 2826 expression = self._parse_null() or self._parse_boolean() 2827 if not expression: 2828 self._retreat(index) 2829 return None 2830 2831 this = self.expression(exp.Is, this=this, expression=expression) 2832 return self.expression(exp.Not, this=this) if negate else this 2833 2834 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2835 unnest = self._parse_unnest(with_alias=False) 2836 if unnest: 2837 this = self.expression(exp.In, this=this, unnest=unnest) 2838 elif self._match(TokenType.L_PAREN): 2839 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2840 2841 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2842 this = self.expression(exp.In, this=this, query=expressions[0]) 2843 else: 2844 this = self.expression(exp.In, this=this, expressions=expressions) 2845 2846 self._match_r_paren(this) 2847 else: 2848 this = self.expression(exp.In, this=this, field=self._parse_field()) 2849 2850 return this 2851 2852 def _parse_between(self, this: exp.Expression) -> exp.Between: 2853 low = self._parse_bitwise() 2854 self._match(TokenType.AND) 2855 high = self._parse_bitwise() 2856 return self.expression(exp.Between, this=this, low=low, high=high) 2857 2858 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2859 if not self._match(TokenType.ESCAPE): 2860 return this 2861 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2862 2863 def _parse_interval(self) -> t.Optional[exp.Interval]: 2864 if not self._match(TokenType.INTERVAL): 2865 return None 2866 2867 this = self._parse_primary() or self._parse_term() 2868 unit = self._parse_function() or self._parse_var() 2869 2870 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2871 # each INTERVAL expression into this canonical form so it's easy to transpile 2872 if this and this.is_number: 2873 this = exp.Literal.string(this.name) 2874 elif this and this.is_string: 2875 parts = this.name.split() 2876 2877 if len(parts) == 2: 2878 if unit: 2879 # this is not actually a unit, it's something else 2880 unit = None 2881 self._retreat(self._index - 1) 2882 else: 2883 this = exp.Literal.string(parts[0]) 2884 unit = self.expression(exp.Var, this=parts[1]) 2885 2886 return self.expression(exp.Interval, this=this, unit=unit) 2887 2888 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2889 this = self._parse_term() 2890 2891 while True: 2892 if self._match_set(self.BITWISE): 2893 this = self.expression( 2894 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2895 ) 2896 elif self._match_pair(TokenType.LT, TokenType.LT): 2897 this = self.expression( 2898 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2899 ) 2900 elif self._match_pair(TokenType.GT, TokenType.GT): 2901 this = self.expression( 2902 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2903 ) 2904 else: 2905 break 2906 2907 return this 2908 2909 def _parse_term(self) -> t.Optional[exp.Expression]: 2910 return self._parse_tokens(self._parse_factor, self.TERM) 2911 2912 def _parse_factor(self) -> t.Optional[exp.Expression]: 2913 return self._parse_tokens(self._parse_unary, self.FACTOR) 2914 2915 def _parse_unary(self) -> t.Optional[exp.Expression]: 2916 if self._match_set(self.UNARY_PARSERS): 2917 return self.UNARY_PARSERS[self._prev.token_type](self) 2918 return self._parse_at_time_zone(self._parse_type()) 2919 2920 def _parse_type(self) -> t.Optional[exp.Expression]: 2921 interval = self._parse_interval() 2922 if interval: 2923 return interval 2924 2925 index = self._index 2926 data_type = self._parse_types(check_func=True) 2927 this = self._parse_column() 2928 2929 if data_type: 2930 if isinstance(this, exp.Literal): 2931 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2932 if parser: 2933 return parser(self, this, data_type) 2934 return self.expression(exp.Cast, this=this, to=data_type) 2935 if not data_type.expressions: 2936 self._retreat(index) 2937 return self._parse_column() 2938 return self._parse_column_ops(data_type) 2939 2940 return this 2941 2942 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2943 this = self._parse_type() 2944 if not this: 2945 return None 2946 2947 return self.expression( 2948 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2949 ) 2950 2951 def _parse_types( 2952 self, check_func: bool = False, schema: bool = False 2953 ) -> t.Optional[exp.Expression]: 2954 index = self._index 2955 2956 prefix = self._match_text_seq("SYSUDTLIB", ".") 2957 2958 if not self._match_set(self.TYPE_TOKENS): 2959 return None 2960 2961 type_token = self._prev.token_type 2962 2963 if type_token == TokenType.PSEUDO_TYPE: 2964 return self.expression(exp.PseudoType, this=self._prev.text) 2965 2966 nested = type_token in self.NESTED_TYPE_TOKENS 2967 is_struct = type_token == TokenType.STRUCT 2968 expressions = None 2969 maybe_func = False 2970 2971 if self._match(TokenType.L_PAREN): 2972 if is_struct: 2973 expressions = self._parse_csv(self._parse_struct_types) 2974 elif nested: 2975 expressions = self._parse_csv( 2976 lambda: self._parse_types(check_func=check_func, schema=schema) 2977 ) 2978 elif type_token in self.ENUM_TYPE_TOKENS: 2979 expressions = self._parse_csv(self._parse_primary) 2980 else: 2981 expressions = self._parse_csv(self._parse_type_size) 2982 2983 if not expressions or not self._match(TokenType.R_PAREN): 2984 self._retreat(index) 2985 return None 2986 2987 maybe_func = True 2988 2989 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2990 this = exp.DataType( 2991 this=exp.DataType.Type.ARRAY, 2992 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2993 nested=True, 2994 ) 2995 2996 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2997 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2998 2999 return this 3000 3001 if self._match(TokenType.L_BRACKET): 3002 self._retreat(index) 3003 return None 3004 3005 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3006 if nested and self._match(TokenType.LT): 3007 if is_struct: 3008 expressions = self._parse_csv(self._parse_struct_types) 3009 else: 3010 expressions = self._parse_csv( 3011 lambda: self._parse_types(check_func=check_func, schema=schema) 3012 ) 3013 3014 if not self._match(TokenType.GT): 3015 self.raise_error("Expecting >") 3016 3017 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3018 values = self._parse_csv(self._parse_conjunction) 3019 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3020 3021 value: t.Optional[exp.Expression] = None 3022 if type_token in self.TIMESTAMPS: 3023 if self._match_text_seq("WITH", "TIME", "ZONE"): 3024 maybe_func = False 3025 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3026 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3027 maybe_func = False 3028 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3029 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3030 maybe_func = False 3031 elif type_token == TokenType.INTERVAL: 3032 unit = self._parse_var() 3033 3034 if not unit: 3035 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3036 else: 3037 value = self.expression(exp.Interval, unit=unit) 3038 3039 if maybe_func and check_func: 3040 index2 = self._index 3041 peek = self._parse_string() 3042 3043 if not peek: 3044 self._retreat(index) 3045 return None 3046 3047 self._retreat(index2) 3048 3049 if value: 3050 return value 3051 3052 return exp.DataType( 3053 this=exp.DataType.Type[type_token.value.upper()], 3054 expressions=expressions, 3055 nested=nested, 3056 values=values, 3057 prefix=prefix, 3058 ) 3059 3060 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3061 this = self._parse_type() or self._parse_id_var() 3062 self._match(TokenType.COLON) 3063 return self._parse_column_def(this) 3064 3065 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3066 if not self._match_text_seq("AT", "TIME", "ZONE"): 3067 return this 3068 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3069 3070 def _parse_column(self) -> t.Optional[exp.Expression]: 3071 this = self._parse_field() 3072 if isinstance(this, exp.Identifier): 3073 this = self.expression(exp.Column, this=this) 3074 elif not this: 3075 return self._parse_bracket(this) 3076 return self._parse_column_ops(this) 3077 3078 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3079 this = self._parse_bracket(this) 3080 3081 while self._match_set(self.COLUMN_OPERATORS): 3082 op_token = self._prev.token_type 3083 op = self.COLUMN_OPERATORS.get(op_token) 3084 3085 if op_token == TokenType.DCOLON: 3086 field = self._parse_types() 3087 if not field: 3088 self.raise_error("Expected type") 3089 elif op and self._curr: 3090 self._advance() 3091 value = self._prev.text 3092 field = ( 3093 exp.Literal.number(value) 3094 if self._prev.token_type == TokenType.NUMBER 3095 else exp.Literal.string(value) 3096 ) 3097 else: 3098 field = self._parse_field(anonymous_func=True, any_token=True) 3099 3100 if isinstance(field, exp.Func): 3101 # bigquery allows function calls like x.y.count(...) 3102 # SAFE.SUBSTR(...) 3103 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3104 this = self._replace_columns_with_dots(this) 3105 3106 if op: 3107 this = op(self, this, field) 3108 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3109 this = self.expression( 3110 exp.Column, 3111 this=field, 3112 table=this.this, 3113 db=this.args.get("table"), 3114 catalog=this.args.get("db"), 3115 ) 3116 else: 3117 this = self.expression(exp.Dot, this=this, expression=field) 3118 this = self._parse_bracket(this) 3119 return this 3120 3121 def _parse_primary(self) -> t.Optional[exp.Expression]: 3122 if self._match_set(self.PRIMARY_PARSERS): 3123 token_type = self._prev.token_type 3124 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3125 3126 if token_type == TokenType.STRING: 3127 expressions = [primary] 3128 while self._match(TokenType.STRING): 3129 expressions.append(exp.Literal.string(self._prev.text)) 3130 3131 if len(expressions) > 1: 3132 return self.expression(exp.Concat, expressions=expressions) 3133 3134 return primary 3135 3136 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3137 return exp.Literal.number(f"0.{self._prev.text}") 3138 3139 if self._match(TokenType.L_PAREN): 3140 comments = self._prev_comments 3141 query = self._parse_select() 3142 3143 if query: 3144 expressions = [query] 3145 else: 3146 expressions = self._parse_csv(self._parse_expression) 3147 3148 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3149 3150 if isinstance(this, exp.Subqueryable): 3151 this = self._parse_set_operations( 3152 self._parse_subquery(this=this, parse_alias=False) 3153 ) 3154 elif len(expressions) > 1: 3155 this = self.expression(exp.Tuple, expressions=expressions) 3156 else: 3157 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3158 3159 if this: 3160 this.add_comments(comments) 3161 3162 self._match_r_paren(expression=this) 3163 return this 3164 3165 return None 3166 3167 def _parse_field( 3168 self, 3169 any_token: bool = False, 3170 tokens: t.Optional[t.Collection[TokenType]] = None, 3171 anonymous_func: bool = False, 3172 ) -> t.Optional[exp.Expression]: 3173 return ( 3174 self._parse_primary() 3175 or self._parse_function(anonymous=anonymous_func) 3176 or self._parse_id_var(any_token=any_token, tokens=tokens) 3177 ) 3178 3179 def _parse_function( 3180 self, 3181 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3182 anonymous: bool = False, 3183 optional_parens: bool = True, 3184 ) -> t.Optional[exp.Expression]: 3185 if not self._curr: 3186 return None 3187 3188 token_type = self._curr.token_type 3189 3190 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3191 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3192 3193 if not self._next or self._next.token_type != TokenType.L_PAREN: 3194 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3195 self._advance() 3196 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3197 3198 return None 3199 3200 if token_type not in self.FUNC_TOKENS: 3201 return None 3202 3203 this = self._curr.text 3204 upper = this.upper() 3205 self._advance(2) 3206 3207 parser = self.FUNCTION_PARSERS.get(upper) 3208 3209 if parser and not anonymous: 3210 this = parser(self) 3211 else: 3212 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3213 3214 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3215 this = self.expression(subquery_predicate, this=self._parse_select()) 3216 self._match_r_paren() 3217 return this 3218 3219 if functions is None: 3220 functions = self.FUNCTIONS 3221 3222 function = functions.get(upper) 3223 3224 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3225 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3226 3227 if function and not anonymous: 3228 this = self.validate_expression(function(args), args) 3229 else: 3230 this = self.expression(exp.Anonymous, this=this, expressions=args) 3231 3232 self._match_r_paren(this) 3233 return self._parse_window(this) 3234 3235 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3236 return self._parse_column_def(self._parse_id_var()) 3237 3238 def _parse_user_defined_function( 3239 self, kind: t.Optional[TokenType] = None 3240 ) -> t.Optional[exp.Expression]: 3241 this = self._parse_id_var() 3242 3243 while self._match(TokenType.DOT): 3244 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3245 3246 if not self._match(TokenType.L_PAREN): 3247 return this 3248 3249 expressions = self._parse_csv(self._parse_function_parameter) 3250 self._match_r_paren() 3251 return self.expression( 3252 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3253 ) 3254 3255 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3256 literal = self._parse_primary() 3257 if literal: 3258 return self.expression(exp.Introducer, this=token.text, expression=literal) 3259 3260 return self.expression(exp.Identifier, this=token.text) 3261 3262 def _parse_session_parameter(self) -> exp.SessionParameter: 3263 kind = None 3264 this = self._parse_id_var() or self._parse_primary() 3265 3266 if this and self._match(TokenType.DOT): 3267 kind = this.name 3268 this = self._parse_var() or self._parse_primary() 3269 3270 return self.expression(exp.SessionParameter, this=this, kind=kind) 3271 3272 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3273 index = self._index 3274 3275 if self._match(TokenType.L_PAREN): 3276 expressions = self._parse_csv(self._parse_id_var) 3277 3278 if not self._match(TokenType.R_PAREN): 3279 self._retreat(index) 3280 else: 3281 expressions = [self._parse_id_var()] 3282 3283 if self._match_set(self.LAMBDAS): 3284 return self.LAMBDAS[self._prev.token_type](self, expressions) 3285 3286 self._retreat(index) 3287 3288 this: t.Optional[exp.Expression] 3289 3290 if self._match(TokenType.DISTINCT): 3291 this = self.expression( 3292 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3293 ) 3294 else: 3295 this = self._parse_select_or_expression(alias=alias) 3296 3297 if isinstance(this, exp.EQ): 3298 left = this.this 3299 if isinstance(left, exp.Column): 3300 left.replace(exp.var(left.text("this"))) 3301 3302 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3303 3304 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3305 index = self._index 3306 3307 if not self.errors: 3308 try: 3309 if self._parse_select(nested=True): 3310 return this 3311 except ParseError: 3312 pass 3313 finally: 3314 self.errors.clear() 3315 self._retreat(index) 3316 3317 if not self._match(TokenType.L_PAREN): 3318 return this 3319 3320 args = self._parse_csv( 3321 lambda: self._parse_constraint() 3322 or self._parse_column_def(self._parse_field(any_token=True)) 3323 ) 3324 3325 self._match_r_paren() 3326 return self.expression(exp.Schema, this=this, expressions=args) 3327 3328 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3329 # column defs are not really columns, they're identifiers 3330 if isinstance(this, exp.Column): 3331 this = this.this 3332 3333 kind = self._parse_types(schema=True) 3334 3335 if self._match_text_seq("FOR", "ORDINALITY"): 3336 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3337 3338 constraints = [] 3339 while True: 3340 constraint = self._parse_column_constraint() 3341 if not constraint: 3342 break 3343 constraints.append(constraint) 3344 3345 if not kind and not constraints: 3346 return this 3347 3348 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3349 3350 def _parse_auto_increment( 3351 self, 3352 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3353 start = None 3354 increment = None 3355 3356 if self._match(TokenType.L_PAREN, advance=False): 3357 args = self._parse_wrapped_csv(self._parse_bitwise) 3358 start = seq_get(args, 0) 3359 increment = seq_get(args, 1) 3360 elif self._match_text_seq("START"): 3361 start = self._parse_bitwise() 3362 self._match_text_seq("INCREMENT") 3363 increment = self._parse_bitwise() 3364 3365 if start and increment: 3366 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3367 3368 return exp.AutoIncrementColumnConstraint() 3369 3370 def _parse_compress(self) -> exp.CompressColumnConstraint: 3371 if self._match(TokenType.L_PAREN, advance=False): 3372 return self.expression( 3373 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3374 ) 3375 3376 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3377 3378 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3379 if self._match_text_seq("BY", "DEFAULT"): 3380 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3381 this = self.expression( 3382 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3383 ) 3384 else: 3385 self._match_text_seq("ALWAYS") 3386 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3387 3388 self._match(TokenType.ALIAS) 3389 identity = self._match_text_seq("IDENTITY") 3390 3391 if self._match(TokenType.L_PAREN): 3392 if self._match_text_seq("START", "WITH"): 3393 this.set("start", self._parse_bitwise()) 3394 if self._match_text_seq("INCREMENT", "BY"): 3395 this.set("increment", self._parse_bitwise()) 3396 if self._match_text_seq("MINVALUE"): 3397 this.set("minvalue", self._parse_bitwise()) 3398 if self._match_text_seq("MAXVALUE"): 3399 this.set("maxvalue", self._parse_bitwise()) 3400 3401 if self._match_text_seq("CYCLE"): 3402 this.set("cycle", True) 3403 elif self._match_text_seq("NO", "CYCLE"): 3404 this.set("cycle", False) 3405 3406 if not identity: 3407 this.set("expression", self._parse_bitwise()) 3408 3409 self._match_r_paren() 3410 3411 return this 3412 3413 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3414 self._match_text_seq("LENGTH") 3415 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3416 3417 def _parse_not_constraint( 3418 self, 3419 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3420 if self._match_text_seq("NULL"): 3421 return self.expression(exp.NotNullColumnConstraint) 3422 if self._match_text_seq("CASESPECIFIC"): 3423 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3424 return None 3425 3426 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3427 if self._match(TokenType.CONSTRAINT): 3428 this = self._parse_id_var() 3429 else: 3430 this = None 3431 3432 if self._match_texts(self.CONSTRAINT_PARSERS): 3433 return self.expression( 3434 exp.ColumnConstraint, 3435 this=this, 3436 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3437 ) 3438 3439 return this 3440 3441 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3442 if not self._match(TokenType.CONSTRAINT): 3443 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3444 3445 this = self._parse_id_var() 3446 expressions = [] 3447 3448 while True: 3449 constraint = self._parse_unnamed_constraint() or self._parse_function() 3450 if not constraint: 3451 break 3452 expressions.append(constraint) 3453 3454 return self.expression(exp.Constraint, this=this, expressions=expressions) 3455 3456 def _parse_unnamed_constraint( 3457 self, constraints: t.Optional[t.Collection[str]] = None 3458 ) -> t.Optional[exp.Expression]: 3459 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3460 return None 3461 3462 constraint = self._prev.text.upper() 3463 if constraint not in self.CONSTRAINT_PARSERS: 3464 self.raise_error(f"No parser found for schema constraint {constraint}.") 3465 3466 return self.CONSTRAINT_PARSERS[constraint](self) 3467 3468 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3469 self._match_text_seq("KEY") 3470 return self.expression( 3471 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3472 ) 3473 3474 def _parse_key_constraint_options(self) -> t.List[str]: 3475 options = [] 3476 while True: 3477 if not self._curr: 3478 break 3479 3480 if self._match(TokenType.ON): 3481 action = None 3482 on = self._advance_any() and self._prev.text 3483 3484 if self._match_text_seq("NO", "ACTION"): 3485 action = "NO ACTION" 3486 elif self._match_text_seq("CASCADE"): 3487 action = "CASCADE" 3488 elif self._match_pair(TokenType.SET, TokenType.NULL): 3489 action = "SET NULL" 3490 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3491 action = "SET DEFAULT" 3492 else: 3493 self.raise_error("Invalid key constraint") 3494 3495 options.append(f"ON {on} {action}") 3496 elif self._match_text_seq("NOT", "ENFORCED"): 3497 options.append("NOT ENFORCED") 3498 elif self._match_text_seq("DEFERRABLE"): 3499 options.append("DEFERRABLE") 3500 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3501 options.append("INITIALLY DEFERRED") 3502 elif self._match_text_seq("NORELY"): 3503 options.append("NORELY") 3504 elif self._match_text_seq("MATCH", "FULL"): 3505 options.append("MATCH FULL") 3506 else: 3507 break 3508 3509 return options 3510 3511 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3512 if match and not self._match(TokenType.REFERENCES): 3513 return None 3514 3515 expressions = None 3516 this = self._parse_id_var() 3517 3518 if self._match(TokenType.L_PAREN, advance=False): 3519 expressions = self._parse_wrapped_id_vars() 3520 3521 options = self._parse_key_constraint_options() 3522 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3523 3524 def _parse_foreign_key(self) -> exp.ForeignKey: 3525 expressions = self._parse_wrapped_id_vars() 3526 reference = self._parse_references() 3527 options = {} 3528 3529 while self._match(TokenType.ON): 3530 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3531 self.raise_error("Expected DELETE or UPDATE") 3532 3533 kind = self._prev.text.lower() 3534 3535 if self._match_text_seq("NO", "ACTION"): 3536 action = "NO ACTION" 3537 elif self._match(TokenType.SET): 3538 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3539 action = "SET " + self._prev.text.upper() 3540 else: 3541 self._advance() 3542 action = self._prev.text.upper() 3543 3544 options[kind] = action 3545 3546 return self.expression( 3547 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3548 ) 3549 3550 def _parse_primary_key( 3551 self, wrapped_optional: bool = False, in_props: bool = False 3552 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3553 desc = ( 3554 self._match_set((TokenType.ASC, TokenType.DESC)) 3555 and self._prev.token_type == TokenType.DESC 3556 ) 3557 3558 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3559 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3560 3561 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3562 options = self._parse_key_constraint_options() 3563 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3564 3565 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3566 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3567 return this 3568 3569 bracket_kind = self._prev.token_type 3570 3571 if self._match(TokenType.COLON): 3572 expressions: t.List[t.Optional[exp.Expression]] = [ 3573 self.expression(exp.Slice, expression=self._parse_conjunction()) 3574 ] 3575 else: 3576 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3577 3578 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3579 if bracket_kind == TokenType.L_BRACE: 3580 this = self.expression(exp.Struct, expressions=expressions) 3581 elif not this or this.name.upper() == "ARRAY": 3582 this = self.expression(exp.Array, expressions=expressions) 3583 else: 3584 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3585 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3586 3587 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3588 self.raise_error("Expected ]") 3589 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3590 self.raise_error("Expected }") 3591 3592 self._add_comments(this) 3593 return self._parse_bracket(this) 3594 3595 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3596 if self._match(TokenType.COLON): 3597 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3598 return this 3599 3600 def _parse_case(self) -> t.Optional[exp.Expression]: 3601 ifs = [] 3602 default = None 3603 3604 expression = self._parse_conjunction() 3605 3606 while self._match(TokenType.WHEN): 3607 this = self._parse_conjunction() 3608 self._match(TokenType.THEN) 3609 then = self._parse_conjunction() 3610 ifs.append(self.expression(exp.If, this=this, true=then)) 3611 3612 if self._match(TokenType.ELSE): 3613 default = self._parse_conjunction() 3614 3615 if not self._match(TokenType.END): 3616 self.raise_error("Expected END after CASE", self._prev) 3617 3618 return self._parse_window( 3619 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3620 ) 3621 3622 def _parse_if(self) -> t.Optional[exp.Expression]: 3623 if self._match(TokenType.L_PAREN): 3624 args = self._parse_csv(self._parse_conjunction) 3625 this = self.validate_expression(exp.If.from_arg_list(args), args) 3626 self._match_r_paren() 3627 else: 3628 index = self._index - 1 3629 condition = self._parse_conjunction() 3630 3631 if not condition: 3632 self._retreat(index) 3633 return None 3634 3635 self._match(TokenType.THEN) 3636 true = self._parse_conjunction() 3637 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3638 self._match(TokenType.END) 3639 this = self.expression(exp.If, this=condition, true=true, false=false) 3640 3641 return self._parse_window(this) 3642 3643 def _parse_extract(self) -> exp.Extract: 3644 this = self._parse_function() or self._parse_var() or self._parse_type() 3645 3646 if self._match(TokenType.FROM): 3647 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3648 3649 if not self._match(TokenType.COMMA): 3650 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3651 3652 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3653 3654 def _parse_cast(self, strict: bool) -> exp.Expression: 3655 this = self._parse_conjunction() 3656 3657 if not self._match(TokenType.ALIAS): 3658 if self._match(TokenType.COMMA): 3659 return self.expression( 3660 exp.CastToStrType, this=this, expression=self._parse_string() 3661 ) 3662 else: 3663 self.raise_error("Expected AS after CAST") 3664 3665 to = self._parse_types() 3666 3667 if not to: 3668 self.raise_error("Expected TYPE after CAST") 3669 elif to.this == exp.DataType.Type.CHAR: 3670 if self._match(TokenType.CHARACTER_SET): 3671 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3672 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3673 fmt = self._parse_string() 3674 3675 return self.expression( 3676 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3677 this=this, 3678 format=exp.Literal.string( 3679 format_time( 3680 fmt.this if fmt else "", 3681 self.FORMAT_MAPPING or self.TIME_MAPPING, 3682 self.FORMAT_TRIE or self.TIME_TRIE, 3683 ) 3684 ), 3685 ) 3686 3687 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3688 3689 def _parse_concat(self) -> t.Optional[exp.Expression]: 3690 args = self._parse_csv(self._parse_conjunction) 3691 if self.CONCAT_NULL_OUTPUTS_STRING: 3692 args = [ 3693 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3694 for arg in args 3695 if arg 3696 ] 3697 3698 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3699 # we find such a call we replace it with its argument. 3700 if len(args) == 1: 3701 return args[0] 3702 3703 return self.expression( 3704 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3705 ) 3706 3707 def _parse_string_agg(self) -> exp.Expression: 3708 expression: t.Optional[exp.Expression] 3709 3710 if self._match(TokenType.DISTINCT): 3711 args = self._parse_csv(self._parse_conjunction) 3712 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3713 else: 3714 args = self._parse_csv(self._parse_conjunction) 3715 expression = seq_get(args, 0) 3716 3717 index = self._index 3718 if not self._match(TokenType.R_PAREN): 3719 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3720 order = self._parse_order(this=expression) 3721 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3722 3723 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3724 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3725 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3726 if not self._match_text_seq("WITHIN", "GROUP"): 3727 self._retreat(index) 3728 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3729 3730 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3731 order = self._parse_order(this=expression) 3732 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3733 3734 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3735 to: t.Optional[exp.Expression] 3736 this = self._parse_bitwise() 3737 3738 if self._match(TokenType.USING): 3739 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3740 elif self._match(TokenType.COMMA): 3741 to = self._parse_bitwise() 3742 else: 3743 to = None 3744 3745 # Swap the argument order if needed to produce the correct AST 3746 if self.CONVERT_TYPE_FIRST: 3747 this, to = to, this 3748 3749 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3750 3751 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3752 """ 3753 There are generally two variants of the DECODE function: 3754 3755 - DECODE(bin, charset) 3756 - DECODE(expression, search, result [, search, result] ... [, default]) 3757 3758 The second variant will always be parsed into a CASE expression. Note that NULL 3759 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3760 instead of relying on pattern matching. 3761 """ 3762 args = self._parse_csv(self._parse_conjunction) 3763 3764 if len(args) < 3: 3765 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3766 3767 expression, *expressions = args 3768 if not expression: 3769 return None 3770 3771 ifs = [] 3772 for search, result in zip(expressions[::2], expressions[1::2]): 3773 if not search or not result: 3774 return None 3775 3776 if isinstance(search, exp.Literal): 3777 ifs.append( 3778 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3779 ) 3780 elif isinstance(search, exp.Null): 3781 ifs.append( 3782 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3783 ) 3784 else: 3785 cond = exp.or_( 3786 exp.EQ(this=expression.copy(), expression=search), 3787 exp.and_( 3788 exp.Is(this=expression.copy(), expression=exp.Null()), 3789 exp.Is(this=search.copy(), expression=exp.Null()), 3790 copy=False, 3791 ), 3792 copy=False, 3793 ) 3794 ifs.append(exp.If(this=cond, true=result)) 3795 3796 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3797 3798 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3799 self._match_text_seq("KEY") 3800 key = self._parse_field() 3801 self._match(TokenType.COLON) 3802 self._match_text_seq("VALUE") 3803 value = self._parse_field() 3804 3805 if not key and not value: 3806 return None 3807 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3808 3809 def _parse_json_object(self) -> exp.JSONObject: 3810 star = self._parse_star() 3811 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3812 3813 null_handling = None 3814 if self._match_text_seq("NULL", "ON", "NULL"): 3815 null_handling = "NULL ON NULL" 3816 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3817 null_handling = "ABSENT ON NULL" 3818 3819 unique_keys = None 3820 if self._match_text_seq("WITH", "UNIQUE"): 3821 unique_keys = True 3822 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3823 unique_keys = False 3824 3825 self._match_text_seq("KEYS") 3826 3827 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3828 format_json = self._match_text_seq("FORMAT", "JSON") 3829 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3830 3831 return self.expression( 3832 exp.JSONObject, 3833 expressions=expressions, 3834 null_handling=null_handling, 3835 unique_keys=unique_keys, 3836 return_type=return_type, 3837 format_json=format_json, 3838 encoding=encoding, 3839 ) 3840 3841 def _parse_logarithm(self) -> exp.Func: 3842 # Default argument order is base, expression 3843 args = self._parse_csv(self._parse_range) 3844 3845 if len(args) > 1: 3846 if not self.LOG_BASE_FIRST: 3847 args.reverse() 3848 return exp.Log.from_arg_list(args) 3849 3850 return self.expression( 3851 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3852 ) 3853 3854 def _parse_match_against(self) -> exp.MatchAgainst: 3855 expressions = self._parse_csv(self._parse_column) 3856 3857 self._match_text_seq(")", "AGAINST", "(") 3858 3859 this = self._parse_string() 3860 3861 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3862 modifier = "IN NATURAL LANGUAGE MODE" 3863 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3864 modifier = f"{modifier} WITH QUERY EXPANSION" 3865 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3866 modifier = "IN BOOLEAN MODE" 3867 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3868 modifier = "WITH QUERY EXPANSION" 3869 else: 3870 modifier = None 3871 3872 return self.expression( 3873 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3874 ) 3875 3876 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3877 def _parse_open_json(self) -> exp.OpenJSON: 3878 this = self._parse_bitwise() 3879 path = self._match(TokenType.COMMA) and self._parse_string() 3880 3881 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3882 this = self._parse_field(any_token=True) 3883 kind = self._parse_types() 3884 path = self._parse_string() 3885 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3886 3887 return self.expression( 3888 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3889 ) 3890 3891 expressions = None 3892 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3893 self._match_l_paren() 3894 expressions = self._parse_csv(_parse_open_json_column_def) 3895 3896 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3897 3898 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3899 args = self._parse_csv(self._parse_bitwise) 3900 3901 if self._match(TokenType.IN): 3902 return self.expression( 3903 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3904 ) 3905 3906 if haystack_first: 3907 haystack = seq_get(args, 0) 3908 needle = seq_get(args, 1) 3909 else: 3910 needle = seq_get(args, 0) 3911 haystack = seq_get(args, 1) 3912 3913 return self.expression( 3914 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3915 ) 3916 3917 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3918 args = self._parse_csv(self._parse_table) 3919 return exp.JoinHint(this=func_name.upper(), expressions=args) 3920 3921 def _parse_substring(self) -> exp.Substring: 3922 # Postgres supports the form: substring(string [from int] [for int]) 3923 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3924 3925 args = self._parse_csv(self._parse_bitwise) 3926 3927 if self._match(TokenType.FROM): 3928 args.append(self._parse_bitwise()) 3929 if self._match(TokenType.FOR): 3930 args.append(self._parse_bitwise()) 3931 3932 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3933 3934 def _parse_trim(self) -> exp.Trim: 3935 # https://www.w3resource.com/sql/character-functions/trim.php 3936 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3937 3938 position = None 3939 collation = None 3940 3941 if self._match_texts(self.TRIM_TYPES): 3942 position = self._prev.text.upper() 3943 3944 expression = self._parse_bitwise() 3945 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3946 this = self._parse_bitwise() 3947 else: 3948 this = expression 3949 expression = None 3950 3951 if self._match(TokenType.COLLATE): 3952 collation = self._parse_bitwise() 3953 3954 return self.expression( 3955 exp.Trim, this=this, position=position, expression=expression, collation=collation 3956 ) 3957 3958 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3959 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3960 3961 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3962 return self._parse_window(self._parse_id_var(), alias=True) 3963 3964 def _parse_respect_or_ignore_nulls( 3965 self, this: t.Optional[exp.Expression] 3966 ) -> t.Optional[exp.Expression]: 3967 if self._match_text_seq("IGNORE", "NULLS"): 3968 return self.expression(exp.IgnoreNulls, this=this) 3969 if self._match_text_seq("RESPECT", "NULLS"): 3970 return self.expression(exp.RespectNulls, this=this) 3971 return this 3972 3973 def _parse_window( 3974 self, this: t.Optional[exp.Expression], alias: bool = False 3975 ) -> t.Optional[exp.Expression]: 3976 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3977 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3978 self._match_r_paren() 3979 3980 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3981 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3982 if self._match_text_seq("WITHIN", "GROUP"): 3983 order = self._parse_wrapped(self._parse_order) 3984 this = self.expression(exp.WithinGroup, this=this, expression=order) 3985 3986 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3987 # Some dialects choose to implement and some do not. 3988 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3989 3990 # There is some code above in _parse_lambda that handles 3991 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3992 3993 # The below changes handle 3994 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3995 3996 # Oracle allows both formats 3997 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3998 # and Snowflake chose to do the same for familiarity 3999 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4000 this = self._parse_respect_or_ignore_nulls(this) 4001 4002 # bigquery select from window x AS (partition by ...) 4003 if alias: 4004 over = None 4005 self._match(TokenType.ALIAS) 4006 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4007 return this 4008 else: 4009 over = self._prev.text.upper() 4010 4011 if not self._match(TokenType.L_PAREN): 4012 return self.expression( 4013 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4014 ) 4015 4016 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4017 4018 first = self._match(TokenType.FIRST) 4019 if self._match_text_seq("LAST"): 4020 first = False 4021 4022 partition = self._parse_partition_by() 4023 order = self._parse_order() 4024 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4025 4026 if kind: 4027 self._match(TokenType.BETWEEN) 4028 start = self._parse_window_spec() 4029 self._match(TokenType.AND) 4030 end = self._parse_window_spec() 4031 4032 spec = self.expression( 4033 exp.WindowSpec, 4034 kind=kind, 4035 start=start["value"], 4036 start_side=start["side"], 4037 end=end["value"], 4038 end_side=end["side"], 4039 ) 4040 else: 4041 spec = None 4042 4043 self._match_r_paren() 4044 4045 return self.expression( 4046 exp.Window, 4047 this=this, 4048 partition_by=partition, 4049 order=order, 4050 spec=spec, 4051 alias=window_alias, 4052 over=over, 4053 first=first, 4054 ) 4055 4056 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4057 self._match(TokenType.BETWEEN) 4058 4059 return { 4060 "value": ( 4061 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4062 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4063 or self._parse_bitwise() 4064 ), 4065 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4066 } 4067 4068 def _parse_alias( 4069 self, this: t.Optional[exp.Expression], explicit: bool = False 4070 ) -> t.Optional[exp.Expression]: 4071 any_token = self._match(TokenType.ALIAS) 4072 4073 if explicit and not any_token: 4074 return this 4075 4076 if self._match(TokenType.L_PAREN): 4077 aliases = self.expression( 4078 exp.Aliases, 4079 this=this, 4080 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4081 ) 4082 self._match_r_paren(aliases) 4083 return aliases 4084 4085 alias = self._parse_id_var(any_token) 4086 4087 if alias: 4088 return self.expression(exp.Alias, this=this, alias=alias) 4089 4090 return this 4091 4092 def _parse_id_var( 4093 self, 4094 any_token: bool = True, 4095 tokens: t.Optional[t.Collection[TokenType]] = None, 4096 ) -> t.Optional[exp.Expression]: 4097 identifier = self._parse_identifier() 4098 4099 if identifier: 4100 return identifier 4101 4102 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4103 quoted = self._prev.token_type == TokenType.STRING 4104 return exp.Identifier(this=self._prev.text, quoted=quoted) 4105 4106 return None 4107 4108 def _parse_string(self) -> t.Optional[exp.Expression]: 4109 if self._match(TokenType.STRING): 4110 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4111 return self._parse_placeholder() 4112 4113 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4114 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4115 4116 def _parse_number(self) -> t.Optional[exp.Expression]: 4117 if self._match(TokenType.NUMBER): 4118 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4119 return self._parse_placeholder() 4120 4121 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4122 if self._match(TokenType.IDENTIFIER): 4123 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4124 return self._parse_placeholder() 4125 4126 def _parse_var( 4127 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4128 ) -> t.Optional[exp.Expression]: 4129 if ( 4130 (any_token and self._advance_any()) 4131 or self._match(TokenType.VAR) 4132 or (self._match_set(tokens) if tokens else False) 4133 ): 4134 return self.expression(exp.Var, this=self._prev.text) 4135 return self._parse_placeholder() 4136 4137 def _advance_any(self) -> t.Optional[Token]: 4138 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4139 self._advance() 4140 return self._prev 4141 return None 4142 4143 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4144 return self._parse_var() or self._parse_string() 4145 4146 def _parse_null(self) -> t.Optional[exp.Expression]: 4147 if self._match(TokenType.NULL): 4148 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4149 return None 4150 4151 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4152 if self._match(TokenType.TRUE): 4153 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4154 if self._match(TokenType.FALSE): 4155 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4156 return None 4157 4158 def _parse_star(self) -> t.Optional[exp.Expression]: 4159 if self._match(TokenType.STAR): 4160 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4161 return None 4162 4163 def _parse_parameter(self) -> exp.Parameter: 4164 wrapped = self._match(TokenType.L_BRACE) 4165 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4166 self._match(TokenType.R_BRACE) 4167 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4168 4169 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4170 if self._match_set(self.PLACEHOLDER_PARSERS): 4171 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4172 if placeholder: 4173 return placeholder 4174 self._advance(-1) 4175 return None 4176 4177 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4178 if not self._match(TokenType.EXCEPT): 4179 return None 4180 if self._match(TokenType.L_PAREN, advance=False): 4181 return self._parse_wrapped_csv(self._parse_column) 4182 return self._parse_csv(self._parse_column) 4183 4184 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4185 if not self._match(TokenType.REPLACE): 4186 return None 4187 if self._match(TokenType.L_PAREN, advance=False): 4188 return self._parse_wrapped_csv(self._parse_expression) 4189 return self._parse_csv(self._parse_expression) 4190 4191 def _parse_csv( 4192 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4193 ) -> t.List[t.Optional[exp.Expression]]: 4194 parse_result = parse_method() 4195 items = [parse_result] if parse_result is not None else [] 4196 4197 while self._match(sep): 4198 self._add_comments(parse_result) 4199 parse_result = parse_method() 4200 if parse_result is not None: 4201 items.append(parse_result) 4202 4203 return items 4204 4205 def _parse_tokens( 4206 self, parse_method: t.Callable, expressions: t.Dict 4207 ) -> t.Optional[exp.Expression]: 4208 this = parse_method() 4209 4210 while self._match_set(expressions): 4211 this = self.expression( 4212 expressions[self._prev.token_type], 4213 this=this, 4214 comments=self._prev_comments, 4215 expression=parse_method(), 4216 ) 4217 4218 return this 4219 4220 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4221 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4222 4223 def _parse_wrapped_csv( 4224 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4225 ) -> t.List[t.Optional[exp.Expression]]: 4226 return self._parse_wrapped( 4227 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4228 ) 4229 4230 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4231 wrapped = self._match(TokenType.L_PAREN) 4232 if not wrapped and not optional: 4233 self.raise_error("Expecting (") 4234 parse_result = parse_method() 4235 if wrapped: 4236 self._match_r_paren() 4237 return parse_result 4238 4239 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4240 return self._parse_select() or self._parse_set_operations( 4241 self._parse_expression() if alias else self._parse_conjunction() 4242 ) 4243 4244 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4245 return self._parse_query_modifiers( 4246 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4247 ) 4248 4249 def _parse_transaction(self) -> exp.Transaction: 4250 this = None 4251 if self._match_texts(self.TRANSACTION_KIND): 4252 this = self._prev.text 4253 4254 self._match_texts({"TRANSACTION", "WORK"}) 4255 4256 modes = [] 4257 while True: 4258 mode = [] 4259 while self._match(TokenType.VAR): 4260 mode.append(self._prev.text) 4261 4262 if mode: 4263 modes.append(" ".join(mode)) 4264 if not self._match(TokenType.COMMA): 4265 break 4266 4267 return self.expression(exp.Transaction, this=this, modes=modes) 4268 4269 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4270 chain = None 4271 savepoint = None 4272 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4273 4274 self._match_texts({"TRANSACTION", "WORK"}) 4275 4276 if self._match_text_seq("TO"): 4277 self._match_text_seq("SAVEPOINT") 4278 savepoint = self._parse_id_var() 4279 4280 if self._match(TokenType.AND): 4281 chain = not self._match_text_seq("NO") 4282 self._match_text_seq("CHAIN") 4283 4284 if is_rollback: 4285 return self.expression(exp.Rollback, savepoint=savepoint) 4286 4287 return self.expression(exp.Commit, chain=chain) 4288 4289 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4290 if not self._match_text_seq("ADD"): 4291 return None 4292 4293 self._match(TokenType.COLUMN) 4294 exists_column = self._parse_exists(not_=True) 4295 expression = self._parse_column_def(self._parse_field(any_token=True)) 4296 4297 if expression: 4298 expression.set("exists", exists_column) 4299 4300 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4301 if self._match_texts(("FIRST", "AFTER")): 4302 position = self._prev.text 4303 column_position = self.expression( 4304 exp.ColumnPosition, this=self._parse_column(), position=position 4305 ) 4306 expression.set("position", column_position) 4307 4308 return expression 4309 4310 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4311 drop = self._match(TokenType.DROP) and self._parse_drop() 4312 if drop and not isinstance(drop, exp.Command): 4313 drop.set("kind", drop.args.get("kind", "COLUMN")) 4314 return drop 4315 4316 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4317 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4318 return self.expression( 4319 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4320 ) 4321 4322 def _parse_add_constraint(self) -> exp.AddConstraint: 4323 this = None 4324 kind = self._prev.token_type 4325 4326 if kind == TokenType.CONSTRAINT: 4327 this = self._parse_id_var() 4328 4329 if self._match_text_seq("CHECK"): 4330 expression = self._parse_wrapped(self._parse_conjunction) 4331 enforced = self._match_text_seq("ENFORCED") 4332 4333 return self.expression( 4334 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4335 ) 4336 4337 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4338 expression = self._parse_foreign_key() 4339 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4340 expression = self._parse_primary_key() 4341 else: 4342 expression = None 4343 4344 return self.expression(exp.AddConstraint, this=this, expression=expression) 4345 4346 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4347 index = self._index - 1 4348 4349 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4350 return self._parse_csv(self._parse_add_constraint) 4351 4352 self._retreat(index) 4353 return self._parse_csv(self._parse_add_column) 4354 4355 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4356 self._match(TokenType.COLUMN) 4357 column = self._parse_field(any_token=True) 4358 4359 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4360 return self.expression(exp.AlterColumn, this=column, drop=True) 4361 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4362 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4363 4364 self._match_text_seq("SET", "DATA") 4365 return self.expression( 4366 exp.AlterColumn, 4367 this=column, 4368 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4369 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4370 using=self._match(TokenType.USING) and self._parse_conjunction(), 4371 ) 4372 4373 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4374 index = self._index - 1 4375 4376 partition_exists = self._parse_exists() 4377 if self._match(TokenType.PARTITION, advance=False): 4378 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4379 4380 self._retreat(index) 4381 return self._parse_csv(self._parse_drop_column) 4382 4383 def _parse_alter_table_rename(self) -> exp.RenameTable: 4384 self._match_text_seq("TO") 4385 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4386 4387 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4388 start = self._prev 4389 4390 if not self._match(TokenType.TABLE): 4391 return self._parse_as_command(start) 4392 4393 exists = self._parse_exists() 4394 this = self._parse_table(schema=True) 4395 4396 if self._next: 4397 self._advance() 4398 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4399 4400 if parser: 4401 actions = ensure_list(parser(self)) 4402 4403 if not self._curr: 4404 return self.expression( 4405 exp.AlterTable, 4406 this=this, 4407 exists=exists, 4408 actions=actions, 4409 ) 4410 return self._parse_as_command(start) 4411 4412 def _parse_merge(self) -> exp.Merge: 4413 self._match(TokenType.INTO) 4414 target = self._parse_table() 4415 4416 self._match(TokenType.USING) 4417 using = self._parse_table() 4418 4419 self._match(TokenType.ON) 4420 on = self._parse_conjunction() 4421 4422 whens = [] 4423 while self._match(TokenType.WHEN): 4424 matched = not self._match(TokenType.NOT) 4425 self._match_text_seq("MATCHED") 4426 source = ( 4427 False 4428 if self._match_text_seq("BY", "TARGET") 4429 else self._match_text_seq("BY", "SOURCE") 4430 ) 4431 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4432 4433 self._match(TokenType.THEN) 4434 4435 if self._match(TokenType.INSERT): 4436 _this = self._parse_star() 4437 if _this: 4438 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4439 else: 4440 then = self.expression( 4441 exp.Insert, 4442 this=self._parse_value(), 4443 expression=self._match(TokenType.VALUES) and self._parse_value(), 4444 ) 4445 elif self._match(TokenType.UPDATE): 4446 expressions = self._parse_star() 4447 if expressions: 4448 then = self.expression(exp.Update, expressions=expressions) 4449 else: 4450 then = self.expression( 4451 exp.Update, 4452 expressions=self._match(TokenType.SET) 4453 and self._parse_csv(self._parse_equality), 4454 ) 4455 elif self._match(TokenType.DELETE): 4456 then = self.expression(exp.Var, this=self._prev.text) 4457 else: 4458 then = None 4459 4460 whens.append( 4461 self.expression( 4462 exp.When, 4463 matched=matched, 4464 source=source, 4465 condition=condition, 4466 then=then, 4467 ) 4468 ) 4469 4470 return self.expression( 4471 exp.Merge, 4472 this=target, 4473 using=using, 4474 on=on, 4475 expressions=whens, 4476 ) 4477 4478 def _parse_show(self) -> t.Optional[exp.Expression]: 4479 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4480 if parser: 4481 return parser(self) 4482 self._advance() 4483 return self.expression(exp.Show, this=self._prev.text.upper()) 4484 4485 def _parse_set_item_assignment( 4486 self, kind: t.Optional[str] = None 4487 ) -> t.Optional[exp.Expression]: 4488 index = self._index 4489 4490 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4491 return self._parse_set_transaction(global_=kind == "GLOBAL") 4492 4493 left = self._parse_primary() or self._parse_id_var() 4494 4495 if not self._match_texts(("=", "TO")): 4496 self._retreat(index) 4497 return None 4498 4499 right = self._parse_statement() or self._parse_id_var() 4500 this = self.expression(exp.EQ, this=left, expression=right) 4501 4502 return self.expression(exp.SetItem, this=this, kind=kind) 4503 4504 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4505 self._match_text_seq("TRANSACTION") 4506 characteristics = self._parse_csv( 4507 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4508 ) 4509 return self.expression( 4510 exp.SetItem, 4511 expressions=characteristics, 4512 kind="TRANSACTION", 4513 **{"global": global_}, # type: ignore 4514 ) 4515 4516 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4517 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4518 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4519 4520 def _parse_set(self) -> exp.Set | exp.Command: 4521 index = self._index 4522 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4523 4524 if self._curr: 4525 self._retreat(index) 4526 return self._parse_as_command(self._prev) 4527 4528 return set_ 4529 4530 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4531 for option in options: 4532 if self._match_text_seq(*option.split(" ")): 4533 return exp.var(option) 4534 return None 4535 4536 def _parse_as_command(self, start: Token) -> exp.Command: 4537 while self._curr: 4538 self._advance() 4539 text = self._find_sql(start, self._prev) 4540 size = len(start.text) 4541 return exp.Command(this=text[:size], expression=text[size:]) 4542 4543 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4544 settings = [] 4545 4546 self._match_l_paren() 4547 kind = self._parse_id_var() 4548 4549 if self._match(TokenType.L_PAREN): 4550 while True: 4551 key = self._parse_id_var() 4552 value = self._parse_primary() 4553 4554 if not key and value is None: 4555 break 4556 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4557 self._match(TokenType.R_PAREN) 4558 4559 self._match_r_paren() 4560 4561 return self.expression( 4562 exp.DictProperty, 4563 this=this, 4564 kind=kind.this if kind else None, 4565 settings=settings, 4566 ) 4567 4568 def _parse_dict_range(self, this: str) -> exp.DictRange: 4569 self._match_l_paren() 4570 has_min = self._match_text_seq("MIN") 4571 if has_min: 4572 min = self._parse_var() or self._parse_primary() 4573 self._match_text_seq("MAX") 4574 max = self._parse_var() or self._parse_primary() 4575 else: 4576 max = self._parse_var() or self._parse_primary() 4577 min = exp.Literal.number(0) 4578 self._match_r_paren() 4579 return self.expression(exp.DictRange, this=this, min=min, max=max) 4580 4581 def _find_parser( 4582 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4583 ) -> t.Optional[t.Callable]: 4584 if not self._curr: 4585 return None 4586 4587 index = self._index 4588 this = [] 4589 while True: 4590 # The current token might be multiple words 4591 curr = self._curr.text.upper() 4592 key = curr.split(" ") 4593 this.append(curr) 4594 4595 self._advance() 4596 result, trie = in_trie(trie, key) 4597 if result == TrieResult.FAILED: 4598 break 4599 4600 if result == TrieResult.EXISTS: 4601 subparser = parsers[" ".join(this)] 4602 return subparser 4603 4604 self._retreat(index) 4605 return None 4606 4607 def _match(self, token_type, advance=True, expression=None): 4608 if not self._curr: 4609 return None 4610 4611 if self._curr.token_type == token_type: 4612 if advance: 4613 self._advance() 4614 self._add_comments(expression) 4615 return True 4616 4617 return None 4618 4619 def _match_set(self, types, advance=True): 4620 if not self._curr: 4621 return None 4622 4623 if self._curr.token_type in types: 4624 if advance: 4625 self._advance() 4626 return True 4627 4628 return None 4629 4630 def _match_pair(self, token_type_a, token_type_b, advance=True): 4631 if not self._curr or not self._next: 4632 return None 4633 4634 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4635 if advance: 4636 self._advance(2) 4637 return True 4638 4639 return None 4640 4641 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4642 if not self._match(TokenType.L_PAREN, expression=expression): 4643 self.raise_error("Expecting (") 4644 4645 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4646 if not self._match(TokenType.R_PAREN, expression=expression): 4647 self.raise_error("Expecting )") 4648 4649 def _match_texts(self, texts, advance=True): 4650 if self._curr and self._curr.text.upper() in texts: 4651 if advance: 4652 self._advance() 4653 return True 4654 return False 4655 4656 def _match_text_seq(self, *texts, advance=True): 4657 index = self._index 4658 for text in texts: 4659 if self._curr and self._curr.text.upper() == text: 4660 self._advance() 4661 else: 4662 self._retreat(index) 4663 return False 4664 4665 if not advance: 4666 self._retreat(index) 4667 4668 return True 4669 4670 @t.overload 4671 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4672 ... 4673 4674 @t.overload 4675 def _replace_columns_with_dots( 4676 self, this: t.Optional[exp.Expression] 4677 ) -> t.Optional[exp.Expression]: 4678 ... 4679 4680 def _replace_columns_with_dots(self, this): 4681 if isinstance(this, exp.Dot): 4682 exp.replace_children(this, self._replace_columns_with_dots) 4683 elif isinstance(this, exp.Column): 4684 exp.replace_children(this, self._replace_columns_with_dots) 4685 table = this.args.get("table") 4686 this = ( 4687 self.expression(exp.Dot, this=table, expression=this.this) 4688 if table 4689 else self.expression(exp.Var, this=this.name) 4690 ) 4691 elif isinstance(this, exp.Identifier): 4692 this = self.expression(exp.Var, this=this.name) 4693 4694 return this 4695 4696 def _replace_lambda( 4697 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4698 ) -> t.Optional[exp.Expression]: 4699 if not node: 4700 return node 4701 4702 for column in node.find_all(exp.Column): 4703 if column.parts[0].name in lambda_variables: 4704 dot_or_id = column.to_dot() if column.table else column.this 4705 parent = column.parent 4706 4707 while isinstance(parent, exp.Dot): 4708 if not isinstance(parent.parent, exp.Dot): 4709 parent.replace(dot_or_id) 4710 break 4711 parent = parent.parent 4712 else: 4713 if column is node: 4714 node = dot_or_id 4715 else: 4716 column.replace(dot_or_id) 4717 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
833 def __init__( 834 self, 835 error_level: t.Optional[ErrorLevel] = None, 836 error_message_context: int = 100, 837 max_errors: int = 3, 838 ): 839 self.error_level = error_level or ErrorLevel.IMMEDIATE 840 self.error_message_context = error_message_context 841 self.max_errors = max_errors 842 self.reset()
854 def parse( 855 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 856 ) -> t.List[t.Optional[exp.Expression]]: 857 """ 858 Parses a list of tokens and returns a list of syntax trees, one tree 859 per parsed SQL statement. 860 861 Args: 862 raw_tokens: The list of tokens. 863 sql: The original SQL string, used to produce helpful debug messages. 864 865 Returns: 866 The list of the produced syntax trees. 867 """ 868 return self._parse( 869 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 870 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
872 def parse_into( 873 self, 874 expression_types: exp.IntoType, 875 raw_tokens: t.List[Token], 876 sql: t.Optional[str] = None, 877 ) -> t.List[t.Optional[exp.Expression]]: 878 """ 879 Parses a list of tokens into a given Expression type. If a collection of Expression 880 types is given instead, this method will try to parse the token list into each one 881 of them, stopping at the first for which the parsing succeeds. 882 883 Args: 884 expression_types: The expression type(s) to try and parse the token list into. 885 raw_tokens: The list of tokens. 886 sql: The original SQL string, used to produce helpful debug messages. 887 888 Returns: 889 The target Expression. 890 """ 891 errors = [] 892 for expression_type in ensure_list(expression_types): 893 parser = self.EXPRESSION_PARSERS.get(expression_type) 894 if not parser: 895 raise TypeError(f"No parser registered for {expression_type}") 896 897 try: 898 return self._parse(parser, raw_tokens, sql) 899 except ParseError as e: 900 e.errors[0]["into_expression"] = expression_type 901 errors.append(e) 902 903 raise ParseError( 904 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 905 errors=merge_errors(errors), 906 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
943 def check_errors(self) -> None: 944 """Logs or raises any found errors, depending on the chosen error level setting.""" 945 if self.error_level == ErrorLevel.WARN: 946 for error in self.errors: 947 logger.error(str(error)) 948 elif self.error_level == ErrorLevel.RAISE and self.errors: 949 raise ParseError( 950 concat_messages(self.errors, self.max_errors), 951 errors=merge_errors(self.errors), 952 )
Logs or raises any found errors, depending on the chosen error level setting.
954 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 955 """ 956 Appends an error in the list of recorded errors or raises it, depending on the chosen 957 error level setting. 958 """ 959 token = token or self._curr or self._prev or Token.string("") 960 start = token.start 961 end = token.end + 1 962 start_context = self.sql[max(start - self.error_message_context, 0) : start] 963 highlight = self.sql[start:end] 964 end_context = self.sql[end : end + self.error_message_context] 965 966 error = ParseError.new( 967 f"{message}. Line {token.line}, Col: {token.col}.\n" 968 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 969 description=message, 970 line=token.line, 971 col=token.col, 972 start_context=start_context, 973 highlight=highlight, 974 end_context=end_context, 975 ) 976 977 if self.error_level == ErrorLevel.IMMEDIATE: 978 raise error 979 980 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
982 def expression( 983 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 984 ) -> E: 985 """ 986 Creates a new, validated Expression. 987 988 Args: 989 exp_class: The expression class to instantiate. 990 comments: An optional list of comments to attach to the expression. 991 kwargs: The arguments to set for the expression along with their respective values. 992 993 Returns: 994 The target expression. 995 """ 996 instance = exp_class(**kwargs) 997 instance.add_comments(comments) if comments else self._add_comments(instance) 998 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1005 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1006 """ 1007 Validates an Expression, making sure that all its mandatory arguments are set. 1008 1009 Args: 1010 expression: The expression to validate. 1011 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1012 1013 Returns: 1014 The validated expression. 1015 """ 1016 if self.error_level != ErrorLevel.IGNORE: 1017 for error_message in expression.error_messages(args): 1018 self.raise_error(error_message) 1019 1020 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.