sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 exists=self._parse_exists(), 1148 this=self._parse_table(schema=True), 1149 kind=kind, 1150 temporary=temporary, 1151 materialized=materialized, 1152 cascade=self._match_text_seq("CASCADE"), 1153 constraints=self._match_text_seq("CONSTRAINTS"), 1154 purge=self._match_text_seq("PURGE"), 1155 ) 1156 1157 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1158 return ( 1159 self._match(TokenType.IF) 1160 and (not not_ or self._match(TokenType.NOT)) 1161 and self._match(TokenType.EXISTS) 1162 ) 1163 1164 def _parse_create(self) -> exp.Create | exp.Command: 1165 # Note: this can't be None because we've matched a statement parser 1166 start = self._prev 1167 replace = start.text.upper() == "REPLACE" or self._match_pair( 1168 TokenType.OR, TokenType.REPLACE 1169 ) 1170 unique = self._match(TokenType.UNIQUE) 1171 1172 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1173 self._advance() 1174 1175 properties = None 1176 create_token = self._match_set(self.CREATABLES) and self._prev 1177 1178 if not create_token: 1179 # exp.Properties.Location.POST_CREATE 1180 properties = self._parse_properties() 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not properties or not create_token: 1184 return self._parse_as_command(start) 1185 1186 exists = self._parse_exists(not_=True) 1187 this = None 1188 expression = None 1189 indexes = None 1190 no_schema_binding = None 1191 begin = None 1192 clone = None 1193 1194 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1195 nonlocal properties 1196 if properties and temp_props: 1197 properties.expressions.extend(temp_props.expressions) 1198 elif temp_props: 1199 properties = temp_props 1200 1201 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1202 this = self._parse_user_defined_function(kind=create_token.token_type) 1203 1204 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1205 extend_props(self._parse_properties()) 1206 1207 self._match(TokenType.ALIAS) 1208 begin = self._match(TokenType.BEGIN) 1209 return_ = self._match_text_seq("RETURN") 1210 expression = self._parse_statement() 1211 1212 if return_: 1213 expression = self.expression(exp.Return, this=expression) 1214 elif create_token.token_type == TokenType.INDEX: 1215 this = self._parse_index(index=self._parse_id_var()) 1216 elif create_token.token_type in self.DB_CREATABLES: 1217 table_parts = self._parse_table_parts(schema=True) 1218 1219 # exp.Properties.Location.POST_NAME 1220 self._match(TokenType.COMMA) 1221 extend_props(self._parse_properties(before=True)) 1222 1223 this = self._parse_schema(this=table_parts) 1224 1225 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1226 extend_props(self._parse_properties()) 1227 1228 self._match(TokenType.ALIAS) 1229 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1230 # exp.Properties.Location.POST_ALIAS 1231 extend_props(self._parse_properties()) 1232 1233 expression = self._parse_ddl_select() 1234 1235 if create_token.token_type == TokenType.TABLE: 1236 indexes = [] 1237 while True: 1238 index = self._parse_index() 1239 1240 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1241 extend_props(self._parse_properties()) 1242 1243 if not index: 1244 break 1245 else: 1246 self._match(TokenType.COMMA) 1247 indexes.append(index) 1248 elif create_token.token_type == TokenType.VIEW: 1249 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1250 no_schema_binding = True 1251 1252 if self._match_text_seq("CLONE"): 1253 clone = self._parse_table(schema=True) 1254 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1255 clone_kind = ( 1256 self._match(TokenType.L_PAREN) 1257 and self._match_texts(self.CLONE_KINDS) 1258 and self._prev.text.upper() 1259 ) 1260 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1261 self._match(TokenType.R_PAREN) 1262 clone = self.expression( 1263 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1264 ) 1265 1266 return self.expression( 1267 exp.Create, 1268 this=this, 1269 kind=create_token.text, 1270 replace=replace, 1271 unique=unique, 1272 expression=expression, 1273 exists=exists, 1274 properties=properties, 1275 indexes=indexes, 1276 no_schema_binding=no_schema_binding, 1277 begin=begin, 1278 clone=clone, 1279 ) 1280 1281 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1282 # only used for teradata currently 1283 self._match(TokenType.COMMA) 1284 1285 kwargs = { 1286 "no": self._match_text_seq("NO"), 1287 "dual": self._match_text_seq("DUAL"), 1288 "before": self._match_text_seq("BEFORE"), 1289 "default": self._match_text_seq("DEFAULT"), 1290 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1291 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1292 "after": self._match_text_seq("AFTER"), 1293 "minimum": self._match_texts(("MIN", "MINIMUM")), 1294 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1295 } 1296 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1299 try: 1300 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1301 except TypeError: 1302 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1303 1304 return None 1305 1306 def _parse_property(self) -> t.Optional[exp.Expression]: 1307 if self._match_texts(self.PROPERTY_PARSERS): 1308 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1309 1310 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1311 return self._parse_character_set(default=True) 1312 1313 if self._match_text_seq("COMPOUND", "SORTKEY"): 1314 return self._parse_sortkey(compound=True) 1315 1316 if self._match_text_seq("SQL", "SECURITY"): 1317 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1318 1319 assignment = self._match_pair( 1320 TokenType.VAR, TokenType.EQ, advance=False 1321 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1322 1323 if assignment: 1324 key = self._parse_var_or_string() 1325 self._match(TokenType.EQ) 1326 return self.expression(exp.Property, this=key, value=self._parse_column()) 1327 1328 return None 1329 1330 def _parse_stored(self) -> exp.FileFormatProperty: 1331 self._match(TokenType.ALIAS) 1332 1333 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1334 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 this=self.expression( 1339 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1340 ) 1341 if input_format or output_format 1342 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1343 ) 1344 1345 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1346 self._match(TokenType.EQ) 1347 self._match(TokenType.ALIAS) 1348 return self.expression(exp_class, this=self._parse_field()) 1349 1350 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1351 properties = [] 1352 while True: 1353 if before: 1354 prop = self._parse_property_before() 1355 else: 1356 prop = self._parse_property() 1357 1358 if not prop: 1359 break 1360 for p in ensure_list(prop): 1361 properties.append(p) 1362 1363 if properties: 1364 return self.expression(exp.Properties, expressions=properties) 1365 1366 return None 1367 1368 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1369 return self.expression( 1370 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1371 ) 1372 1373 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1374 if self._index >= 2: 1375 pre_volatile_token = self._tokens[self._index - 2] 1376 else: 1377 pre_volatile_token = None 1378 1379 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1380 return exp.VolatileProperty() 1381 1382 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1383 1384 def _parse_with_property( 1385 self, 1386 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1387 self._match(TokenType.WITH) 1388 if self._match(TokenType.L_PAREN, advance=False): 1389 return self._parse_wrapped_csv(self._parse_property) 1390 1391 if self._match_text_seq("JOURNAL"): 1392 return self._parse_withjournaltable() 1393 1394 if self._match_text_seq("DATA"): 1395 return self._parse_withdata(no=False) 1396 elif self._match_text_seq("NO", "DATA"): 1397 return self._parse_withdata(no=True) 1398 1399 if not self._next: 1400 return None 1401 1402 return self._parse_withisolatedloading() 1403 1404 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1405 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1406 self._match(TokenType.EQ) 1407 1408 user = self._parse_id_var() 1409 self._match(TokenType.PARAMETER) 1410 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1411 1412 if not user or not host: 1413 return None 1414 1415 return exp.DefinerProperty(this=f"{user}@{host}") 1416 1417 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1418 self._match(TokenType.TABLE) 1419 self._match(TokenType.EQ) 1420 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1421 1422 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1423 return self.expression(exp.LogProperty, no=no) 1424 1425 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1426 return self.expression(exp.JournalProperty, **kwargs) 1427 1428 def _parse_checksum(self) -> exp.ChecksumProperty: 1429 self._match(TokenType.EQ) 1430 1431 on = None 1432 if self._match(TokenType.ON): 1433 on = True 1434 elif self._match_text_seq("OFF"): 1435 on = False 1436 1437 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1438 1439 def _parse_cluster(self) -> exp.Cluster: 1440 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1441 1442 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1443 self._match_text_seq("BY") 1444 1445 self._match_l_paren() 1446 expressions = self._parse_csv(self._parse_column) 1447 self._match_r_paren() 1448 1449 if self._match_text_seq("SORTED", "BY"): 1450 self._match_l_paren() 1451 sorted_by = self._parse_csv(self._parse_ordered) 1452 self._match_r_paren() 1453 else: 1454 sorted_by = None 1455 1456 self._match(TokenType.INTO) 1457 buckets = self._parse_number() 1458 self._match_text_seq("BUCKETS") 1459 1460 return self.expression( 1461 exp.ClusteredByProperty, 1462 expressions=expressions, 1463 sorted_by=sorted_by, 1464 buckets=buckets, 1465 ) 1466 1467 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1468 if not self._match_text_seq("GRANTS"): 1469 self._retreat(self._index - 1) 1470 return None 1471 1472 return self.expression(exp.CopyGrantsProperty) 1473 1474 def _parse_freespace(self) -> exp.FreespaceProperty: 1475 self._match(TokenType.EQ) 1476 return self.expression( 1477 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1478 ) 1479 1480 def _parse_mergeblockratio( 1481 self, no: bool = False, default: bool = False 1482 ) -> exp.MergeBlockRatioProperty: 1483 if self._match(TokenType.EQ): 1484 return self.expression( 1485 exp.MergeBlockRatioProperty, 1486 this=self._parse_number(), 1487 percent=self._match(TokenType.PERCENT), 1488 ) 1489 1490 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1491 1492 def _parse_datablocksize( 1493 self, 1494 default: t.Optional[bool] = None, 1495 minimum: t.Optional[bool] = None, 1496 maximum: t.Optional[bool] = None, 1497 ) -> exp.DataBlocksizeProperty: 1498 self._match(TokenType.EQ) 1499 size = self._parse_number() 1500 1501 units = None 1502 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1503 units = self._prev.text 1504 1505 return self.expression( 1506 exp.DataBlocksizeProperty, 1507 size=size, 1508 units=units, 1509 default=default, 1510 minimum=minimum, 1511 maximum=maximum, 1512 ) 1513 1514 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1515 self._match(TokenType.EQ) 1516 always = self._match_text_seq("ALWAYS") 1517 manual = self._match_text_seq("MANUAL") 1518 never = self._match_text_seq("NEVER") 1519 default = self._match_text_seq("DEFAULT") 1520 1521 autotemp = None 1522 if self._match_text_seq("AUTOTEMP"): 1523 autotemp = self._parse_schema() 1524 1525 return self.expression( 1526 exp.BlockCompressionProperty, 1527 always=always, 1528 manual=manual, 1529 never=never, 1530 default=default, 1531 autotemp=autotemp, 1532 ) 1533 1534 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1535 no = self._match_text_seq("NO") 1536 concurrent = self._match_text_seq("CONCURRENT") 1537 self._match_text_seq("ISOLATED", "LOADING") 1538 for_all = self._match_text_seq("FOR", "ALL") 1539 for_insert = self._match_text_seq("FOR", "INSERT") 1540 for_none = self._match_text_seq("FOR", "NONE") 1541 return self.expression( 1542 exp.IsolatedLoadingProperty, 1543 no=no, 1544 concurrent=concurrent, 1545 for_all=for_all, 1546 for_insert=for_insert, 1547 for_none=for_none, 1548 ) 1549 1550 def _parse_locking(self) -> exp.LockingProperty: 1551 if self._match(TokenType.TABLE): 1552 kind = "TABLE" 1553 elif self._match(TokenType.VIEW): 1554 kind = "VIEW" 1555 elif self._match(TokenType.ROW): 1556 kind = "ROW" 1557 elif self._match_text_seq("DATABASE"): 1558 kind = "DATABASE" 1559 else: 1560 kind = None 1561 1562 if kind in ("DATABASE", "TABLE", "VIEW"): 1563 this = self._parse_table_parts() 1564 else: 1565 this = None 1566 1567 if self._match(TokenType.FOR): 1568 for_or_in = "FOR" 1569 elif self._match(TokenType.IN): 1570 for_or_in = "IN" 1571 else: 1572 for_or_in = None 1573 1574 if self._match_text_seq("ACCESS"): 1575 lock_type = "ACCESS" 1576 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1577 lock_type = "EXCLUSIVE" 1578 elif self._match_text_seq("SHARE"): 1579 lock_type = "SHARE" 1580 elif self._match_text_seq("READ"): 1581 lock_type = "READ" 1582 elif self._match_text_seq("WRITE"): 1583 lock_type = "WRITE" 1584 elif self._match_text_seq("CHECKSUM"): 1585 lock_type = "CHECKSUM" 1586 else: 1587 lock_type = None 1588 1589 override = self._match_text_seq("OVERRIDE") 1590 1591 return self.expression( 1592 exp.LockingProperty, 1593 this=this, 1594 kind=kind, 1595 for_or_in=for_or_in, 1596 lock_type=lock_type, 1597 override=override, 1598 ) 1599 1600 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1601 if self._match(TokenType.PARTITION_BY): 1602 return self._parse_csv(self._parse_conjunction) 1603 return [] 1604 1605 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1606 self._match(TokenType.EQ) 1607 return self.expression( 1608 exp.PartitionedByProperty, 1609 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1610 ) 1611 1612 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1613 if self._match_text_seq("AND", "STATISTICS"): 1614 statistics = True 1615 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1616 statistics = False 1617 else: 1618 statistics = None 1619 1620 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1621 1622 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1623 if self._match_text_seq("PRIMARY", "INDEX"): 1624 return exp.NoPrimaryIndexProperty() 1625 return None 1626 1627 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1628 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1629 return exp.OnCommitProperty() 1630 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1631 return exp.OnCommitProperty(delete=True) 1632 return None 1633 1634 def _parse_distkey(self) -> exp.DistKeyProperty: 1635 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1636 1637 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1638 table = self._parse_table(schema=True) 1639 1640 options = [] 1641 while self._match_texts(("INCLUDING", "EXCLUDING")): 1642 this = self._prev.text.upper() 1643 1644 id_var = self._parse_id_var() 1645 if not id_var: 1646 return None 1647 1648 options.append( 1649 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1650 ) 1651 1652 return self.expression(exp.LikeProperty, this=table, expressions=options) 1653 1654 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1655 return self.expression( 1656 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1657 ) 1658 1659 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1660 self._match(TokenType.EQ) 1661 return self.expression( 1662 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1663 ) 1664 1665 def _parse_returns(self) -> exp.ReturnsProperty: 1666 value: t.Optional[exp.Expression] 1667 is_table = self._match(TokenType.TABLE) 1668 1669 if is_table: 1670 if self._match(TokenType.LT): 1671 value = self.expression( 1672 exp.Schema, 1673 this="TABLE", 1674 expressions=self._parse_csv(self._parse_struct_types), 1675 ) 1676 if not self._match(TokenType.GT): 1677 self.raise_error("Expecting >") 1678 else: 1679 value = self._parse_schema(exp.var("TABLE")) 1680 else: 1681 value = self._parse_types() 1682 1683 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1684 1685 def _parse_describe(self) -> exp.Describe: 1686 kind = self._match_set(self.CREATABLES) and self._prev.text 1687 this = self._parse_table() 1688 return self.expression(exp.Describe, this=this, kind=kind) 1689 1690 def _parse_insert(self) -> exp.Insert: 1691 overwrite = self._match(TokenType.OVERWRITE) 1692 ignore = self._match(TokenType.IGNORE) 1693 local = self._match_text_seq("LOCAL") 1694 alternative = None 1695 1696 if self._match_text_seq("DIRECTORY"): 1697 this: t.Optional[exp.Expression] = self.expression( 1698 exp.Directory, 1699 this=self._parse_var_or_string(), 1700 local=local, 1701 row_format=self._parse_row_format(match_row=True), 1702 ) 1703 else: 1704 if self._match(TokenType.OR): 1705 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1706 1707 self._match(TokenType.INTO) 1708 self._match(TokenType.TABLE) 1709 this = self._parse_table(schema=True) 1710 1711 return self.expression( 1712 exp.Insert, 1713 this=this, 1714 exists=self._parse_exists(), 1715 partition=self._parse_partition(), 1716 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1717 and self._parse_conjunction(), 1718 expression=self._parse_ddl_select(), 1719 conflict=self._parse_on_conflict(), 1720 returning=self._parse_returning(), 1721 overwrite=overwrite, 1722 alternative=alternative, 1723 ignore=ignore, 1724 ) 1725 1726 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1727 conflict = self._match_text_seq("ON", "CONFLICT") 1728 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1729 1730 if not conflict and not duplicate: 1731 return None 1732 1733 nothing = None 1734 expressions = None 1735 key = None 1736 constraint = None 1737 1738 if conflict: 1739 if self._match_text_seq("ON", "CONSTRAINT"): 1740 constraint = self._parse_id_var() 1741 else: 1742 key = self._parse_csv(self._parse_value) 1743 1744 self._match_text_seq("DO") 1745 if self._match_text_seq("NOTHING"): 1746 nothing = True 1747 else: 1748 self._match(TokenType.UPDATE) 1749 self._match(TokenType.SET) 1750 expressions = self._parse_csv(self._parse_equality) 1751 1752 return self.expression( 1753 exp.OnConflict, 1754 duplicate=duplicate, 1755 expressions=expressions, 1756 nothing=nothing, 1757 key=key, 1758 constraint=constraint, 1759 ) 1760 1761 def _parse_returning(self) -> t.Optional[exp.Returning]: 1762 if not self._match(TokenType.RETURNING): 1763 return None 1764 1765 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1766 1767 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1768 if not self._match(TokenType.FORMAT): 1769 return None 1770 return self._parse_row_format() 1771 1772 def _parse_row_format( 1773 self, match_row: bool = False 1774 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1775 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1776 return None 1777 1778 if self._match_text_seq("SERDE"): 1779 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1780 1781 self._match_text_seq("DELIMITED") 1782 1783 kwargs = {} 1784 1785 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1786 kwargs["fields"] = self._parse_string() 1787 if self._match_text_seq("ESCAPED", "BY"): 1788 kwargs["escaped"] = self._parse_string() 1789 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1790 kwargs["collection_items"] = self._parse_string() 1791 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1792 kwargs["map_keys"] = self._parse_string() 1793 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1794 kwargs["lines"] = self._parse_string() 1795 if self._match_text_seq("NULL", "DEFINED", "AS"): 1796 kwargs["null"] = self._parse_string() 1797 1798 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1799 1800 def _parse_load(self) -> exp.LoadData | exp.Command: 1801 if self._match_text_seq("DATA"): 1802 local = self._match_text_seq("LOCAL") 1803 self._match_text_seq("INPATH") 1804 inpath = self._parse_string() 1805 overwrite = self._match(TokenType.OVERWRITE) 1806 self._match_pair(TokenType.INTO, TokenType.TABLE) 1807 1808 return self.expression( 1809 exp.LoadData, 1810 this=self._parse_table(schema=True), 1811 local=local, 1812 overwrite=overwrite, 1813 inpath=inpath, 1814 partition=self._parse_partition(), 1815 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1816 serde=self._match_text_seq("SERDE") and self._parse_string(), 1817 ) 1818 return self._parse_as_command(self._prev) 1819 1820 def _parse_delete(self) -> exp.Delete: 1821 # This handles MySQL's "Multiple-Table Syntax" 1822 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1823 tables = None 1824 if not self._match(TokenType.FROM, advance=False): 1825 tables = self._parse_csv(self._parse_table) or None 1826 1827 return self.expression( 1828 exp.Delete, 1829 tables=tables, 1830 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1831 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1832 where=self._parse_where(), 1833 returning=self._parse_returning(), 1834 limit=self._parse_limit(), 1835 ) 1836 1837 def _parse_update(self) -> exp.Update: 1838 return self.expression( 1839 exp.Update, 1840 **{ # type: ignore 1841 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1842 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1843 "from": self._parse_from(joins=True), 1844 "where": self._parse_where(), 1845 "returning": self._parse_returning(), 1846 "limit": self._parse_limit(), 1847 }, 1848 ) 1849 1850 def _parse_uncache(self) -> exp.Uncache: 1851 if not self._match(TokenType.TABLE): 1852 self.raise_error("Expecting TABLE after UNCACHE") 1853 1854 return self.expression( 1855 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1856 ) 1857 1858 def _parse_cache(self) -> exp.Cache: 1859 lazy = self._match_text_seq("LAZY") 1860 self._match(TokenType.TABLE) 1861 table = self._parse_table(schema=True) 1862 1863 options = [] 1864 if self._match_text_seq("OPTIONS"): 1865 self._match_l_paren() 1866 k = self._parse_string() 1867 self._match(TokenType.EQ) 1868 v = self._parse_string() 1869 options = [k, v] 1870 self._match_r_paren() 1871 1872 self._match(TokenType.ALIAS) 1873 return self.expression( 1874 exp.Cache, 1875 this=table, 1876 lazy=lazy, 1877 options=options, 1878 expression=self._parse_select(nested=True), 1879 ) 1880 1881 def _parse_partition(self) -> t.Optional[exp.Partition]: 1882 if not self._match(TokenType.PARTITION): 1883 return None 1884 1885 return self.expression( 1886 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1887 ) 1888 1889 def _parse_value(self) -> exp.Tuple: 1890 if self._match(TokenType.L_PAREN): 1891 expressions = self._parse_csv(self._parse_conjunction) 1892 self._match_r_paren() 1893 return self.expression(exp.Tuple, expressions=expressions) 1894 1895 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1896 # https://prestodb.io/docs/current/sql/values.html 1897 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1898 1899 def _parse_select( 1900 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1901 ) -> t.Optional[exp.Expression]: 1902 cte = self._parse_with() 1903 if cte: 1904 this = self._parse_statement() 1905 1906 if not this: 1907 self.raise_error("Failed to parse any statement following CTE") 1908 return cte 1909 1910 if "with" in this.arg_types: 1911 this.set("with", cte) 1912 else: 1913 self.raise_error(f"{this.key} does not support CTE") 1914 this = cte 1915 elif self._match(TokenType.SELECT): 1916 comments = self._prev_comments 1917 1918 hint = self._parse_hint() 1919 all_ = self._match(TokenType.ALL) 1920 distinct = self._match(TokenType.DISTINCT) 1921 1922 kind = ( 1923 self._match(TokenType.ALIAS) 1924 and self._match_texts(("STRUCT", "VALUE")) 1925 and self._prev.text 1926 ) 1927 1928 if distinct: 1929 distinct = self.expression( 1930 exp.Distinct, 1931 on=self._parse_value() if self._match(TokenType.ON) else None, 1932 ) 1933 1934 if all_ and distinct: 1935 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1936 1937 limit = self._parse_limit(top=True) 1938 expressions = self._parse_expressions() 1939 1940 this = self.expression( 1941 exp.Select, 1942 kind=kind, 1943 hint=hint, 1944 distinct=distinct, 1945 expressions=expressions, 1946 limit=limit, 1947 ) 1948 this.comments = comments 1949 1950 into = self._parse_into() 1951 if into: 1952 this.set("into", into) 1953 1954 from_ = self._parse_from() 1955 if from_: 1956 this.set("from", from_) 1957 1958 this = self._parse_query_modifiers(this) 1959 elif (table or nested) and self._match(TokenType.L_PAREN): 1960 if self._match(TokenType.PIVOT): 1961 this = self._parse_simplified_pivot() 1962 elif self._match(TokenType.FROM): 1963 this = exp.select("*").from_( 1964 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1965 ) 1966 else: 1967 this = self._parse_table() if table else self._parse_select(nested=True) 1968 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1969 1970 self._match_r_paren() 1971 1972 # early return so that subquery unions aren't parsed again 1973 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1974 # Union ALL should be a property of the top select node, not the subquery 1975 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1976 elif self._match(TokenType.VALUES): 1977 this = self.expression( 1978 exp.Values, 1979 expressions=self._parse_csv(self._parse_value), 1980 alias=self._parse_table_alias(), 1981 ) 1982 else: 1983 this = None 1984 1985 return self._parse_set_operations(this) 1986 1987 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1988 if not skip_with_token and not self._match(TokenType.WITH): 1989 return None 1990 1991 comments = self._prev_comments 1992 recursive = self._match(TokenType.RECURSIVE) 1993 1994 expressions = [] 1995 while True: 1996 expressions.append(self._parse_cte()) 1997 1998 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1999 break 2000 else: 2001 self._match(TokenType.WITH) 2002 2003 return self.expression( 2004 exp.With, comments=comments, expressions=expressions, recursive=recursive 2005 ) 2006 2007 def _parse_cte(self) -> exp.CTE: 2008 alias = self._parse_table_alias() 2009 if not alias or not alias.this: 2010 self.raise_error("Expected CTE to have alias") 2011 2012 self._match(TokenType.ALIAS) 2013 return self.expression( 2014 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2015 ) 2016 2017 def _parse_table_alias( 2018 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2019 ) -> t.Optional[exp.TableAlias]: 2020 any_token = self._match(TokenType.ALIAS) 2021 alias = ( 2022 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2023 or self._parse_string_as_identifier() 2024 ) 2025 2026 index = self._index 2027 if self._match(TokenType.L_PAREN): 2028 columns = self._parse_csv(self._parse_function_parameter) 2029 self._match_r_paren() if columns else self._retreat(index) 2030 else: 2031 columns = None 2032 2033 if not alias and not columns: 2034 return None 2035 2036 return self.expression(exp.TableAlias, this=alias, columns=columns) 2037 2038 def _parse_subquery( 2039 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2040 ) -> t.Optional[exp.Subquery]: 2041 if not this: 2042 return None 2043 2044 return self.expression( 2045 exp.Subquery, 2046 this=this, 2047 pivots=self._parse_pivots(), 2048 alias=self._parse_table_alias() if parse_alias else None, 2049 ) 2050 2051 def _parse_query_modifiers( 2052 self, this: t.Optional[exp.Expression] 2053 ) -> t.Optional[exp.Expression]: 2054 if isinstance(this, self.MODIFIABLES): 2055 for join in iter(self._parse_join, None): 2056 this.append("joins", join) 2057 for lateral in iter(self._parse_lateral, None): 2058 this.append("laterals", lateral) 2059 2060 while True: 2061 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2062 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2063 key, expression = parser(self) 2064 2065 if expression: 2066 this.set(key, expression) 2067 if key == "limit": 2068 offset = expression.args.pop("offset", None) 2069 if offset: 2070 this.set("offset", exp.Offset(expression=offset)) 2071 continue 2072 break 2073 return this 2074 2075 def _parse_hint(self) -> t.Optional[exp.Hint]: 2076 if self._match(TokenType.HINT): 2077 hints = [] 2078 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2079 hints.extend(hint) 2080 2081 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2082 self.raise_error("Expected */ after HINT") 2083 2084 return self.expression(exp.Hint, expressions=hints) 2085 2086 return None 2087 2088 def _parse_into(self) -> t.Optional[exp.Into]: 2089 if not self._match(TokenType.INTO): 2090 return None 2091 2092 temp = self._match(TokenType.TEMPORARY) 2093 unlogged = self._match_text_seq("UNLOGGED") 2094 self._match(TokenType.TABLE) 2095 2096 return self.expression( 2097 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2098 ) 2099 2100 def _parse_from( 2101 self, joins: bool = False, skip_from_token: bool = False 2102 ) -> t.Optional[exp.From]: 2103 if not skip_from_token and not self._match(TokenType.FROM): 2104 return None 2105 2106 return self.expression( 2107 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2108 ) 2109 2110 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2111 if not self._match(TokenType.MATCH_RECOGNIZE): 2112 return None 2113 2114 self._match_l_paren() 2115 2116 partition = self._parse_partition_by() 2117 order = self._parse_order() 2118 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2119 2120 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2121 rows = exp.var("ONE ROW PER MATCH") 2122 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2123 text = "ALL ROWS PER MATCH" 2124 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2125 text += f" SHOW EMPTY MATCHES" 2126 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2127 text += f" OMIT EMPTY MATCHES" 2128 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2129 text += f" WITH UNMATCHED ROWS" 2130 rows = exp.var(text) 2131 else: 2132 rows = None 2133 2134 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2135 text = "AFTER MATCH SKIP" 2136 if self._match_text_seq("PAST", "LAST", "ROW"): 2137 text += f" PAST LAST ROW" 2138 elif self._match_text_seq("TO", "NEXT", "ROW"): 2139 text += f" TO NEXT ROW" 2140 elif self._match_text_seq("TO", "FIRST"): 2141 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2142 elif self._match_text_seq("TO", "LAST"): 2143 text += f" TO LAST {self._advance_any().text}" # type: ignore 2144 after = exp.var(text) 2145 else: 2146 after = None 2147 2148 if self._match_text_seq("PATTERN"): 2149 self._match_l_paren() 2150 2151 if not self._curr: 2152 self.raise_error("Expecting )", self._curr) 2153 2154 paren = 1 2155 start = self._curr 2156 2157 while self._curr and paren > 0: 2158 if self._curr.token_type == TokenType.L_PAREN: 2159 paren += 1 2160 if self._curr.token_type == TokenType.R_PAREN: 2161 paren -= 1 2162 2163 end = self._prev 2164 self._advance() 2165 2166 if paren > 0: 2167 self.raise_error("Expecting )", self._curr) 2168 2169 pattern = exp.var(self._find_sql(start, end)) 2170 else: 2171 pattern = None 2172 2173 define = ( 2174 self._parse_csv( 2175 lambda: self.expression( 2176 exp.Alias, 2177 alias=self._parse_id_var(any_token=True), 2178 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2179 ) 2180 ) 2181 if self._match_text_seq("DEFINE") 2182 else None 2183 ) 2184 2185 self._match_r_paren() 2186 2187 return self.expression( 2188 exp.MatchRecognize, 2189 partition_by=partition, 2190 order=order, 2191 measures=measures, 2192 rows=rows, 2193 after=after, 2194 pattern=pattern, 2195 define=define, 2196 alias=self._parse_table_alias(), 2197 ) 2198 2199 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2200 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2201 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2202 2203 if outer_apply or cross_apply: 2204 this = self._parse_select(table=True) 2205 view = None 2206 outer = not cross_apply 2207 elif self._match(TokenType.LATERAL): 2208 this = self._parse_select(table=True) 2209 view = self._match(TokenType.VIEW) 2210 outer = self._match(TokenType.OUTER) 2211 else: 2212 return None 2213 2214 if not this: 2215 this = self._parse_function() or self._parse_id_var(any_token=False) 2216 while self._match(TokenType.DOT): 2217 this = exp.Dot( 2218 this=this, 2219 expression=self._parse_function() or self._parse_id_var(any_token=False), 2220 ) 2221 2222 if view: 2223 table = self._parse_id_var(any_token=False) 2224 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2225 table_alias: t.Optional[exp.TableAlias] = self.expression( 2226 exp.TableAlias, this=table, columns=columns 2227 ) 2228 elif isinstance(this, exp.Subquery) and this.alias: 2229 # Ensures parity between the Subquery's and the Lateral's "alias" args 2230 table_alias = this.args["alias"].copy() 2231 else: 2232 table_alias = self._parse_table_alias() 2233 2234 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2235 2236 def _parse_join_parts( 2237 self, 2238 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2239 return ( 2240 self._match_set(self.JOIN_METHODS) and self._prev, 2241 self._match_set(self.JOIN_SIDES) and self._prev, 2242 self._match_set(self.JOIN_KINDS) and self._prev, 2243 ) 2244 2245 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2246 if self._match(TokenType.COMMA): 2247 return self.expression(exp.Join, this=self._parse_table()) 2248 2249 index = self._index 2250 method, side, kind = self._parse_join_parts() 2251 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2252 join = self._match(TokenType.JOIN) 2253 2254 if not skip_join_token and not join: 2255 self._retreat(index) 2256 kind = None 2257 method = None 2258 side = None 2259 2260 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2261 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2262 2263 if not skip_join_token and not join and not outer_apply and not cross_apply: 2264 return None 2265 2266 if outer_apply: 2267 side = Token(TokenType.LEFT, "LEFT") 2268 2269 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2270 2271 if method: 2272 kwargs["method"] = method.text 2273 if side: 2274 kwargs["side"] = side.text 2275 if kind: 2276 kwargs["kind"] = kind.text 2277 if hint: 2278 kwargs["hint"] = hint 2279 2280 if self._match(TokenType.ON): 2281 kwargs["on"] = self._parse_conjunction() 2282 elif self._match(TokenType.USING): 2283 kwargs["using"] = self._parse_wrapped_id_vars() 2284 elif not (kind and kind.token_type == TokenType.CROSS): 2285 index = self._index 2286 joins = self._parse_joins() 2287 2288 if joins and self._match(TokenType.ON): 2289 kwargs["on"] = self._parse_conjunction() 2290 elif joins and self._match(TokenType.USING): 2291 kwargs["using"] = self._parse_wrapped_id_vars() 2292 else: 2293 joins = None 2294 self._retreat(index) 2295 kwargs["this"].set("joins", joins) 2296 2297 return self.expression(exp.Join, **kwargs) 2298 2299 def _parse_index( 2300 self, 2301 index: t.Optional[exp.Expression] = None, 2302 ) -> t.Optional[exp.Index]: 2303 if index: 2304 unique = None 2305 primary = None 2306 amp = None 2307 2308 self._match(TokenType.ON) 2309 self._match(TokenType.TABLE) # hive 2310 table = self._parse_table_parts(schema=True) 2311 else: 2312 unique = self._match(TokenType.UNIQUE) 2313 primary = self._match_text_seq("PRIMARY") 2314 amp = self._match_text_seq("AMP") 2315 2316 if not self._match(TokenType.INDEX): 2317 return None 2318 2319 index = self._parse_id_var() 2320 table = None 2321 2322 using = self._parse_field() if self._match(TokenType.USING) else None 2323 2324 if self._match(TokenType.L_PAREN, advance=False): 2325 columns = self._parse_wrapped_csv(self._parse_ordered) 2326 else: 2327 columns = None 2328 2329 return self.expression( 2330 exp.Index, 2331 this=index, 2332 table=table, 2333 using=using, 2334 columns=columns, 2335 unique=unique, 2336 primary=primary, 2337 amp=amp, 2338 partition_by=self._parse_partition_by(), 2339 ) 2340 2341 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2342 hints: t.List[exp.Expression] = [] 2343 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2344 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2345 hints.append( 2346 self.expression( 2347 exp.WithTableHint, 2348 expressions=self._parse_csv( 2349 lambda: self._parse_function() or self._parse_var(any_token=True) 2350 ), 2351 ) 2352 ) 2353 self._match_r_paren() 2354 else: 2355 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2356 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2357 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2358 2359 self._match_texts({"INDEX", "KEY"}) 2360 if self._match(TokenType.FOR): 2361 hint.set("target", self._advance_any() and self._prev.text.upper()) 2362 2363 hint.set("expressions", self._parse_wrapped_id_vars()) 2364 hints.append(hint) 2365 2366 return hints or None 2367 2368 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2369 return ( 2370 (not schema and self._parse_function(optional_parens=False)) 2371 or self._parse_id_var(any_token=False) 2372 or self._parse_string_as_identifier() 2373 or self._parse_placeholder() 2374 ) 2375 2376 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2377 catalog = None 2378 db = None 2379 table = self._parse_table_part(schema=schema) 2380 2381 while self._match(TokenType.DOT): 2382 if catalog: 2383 # This allows nesting the table in arbitrarily many dot expressions if needed 2384 table = self.expression( 2385 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2386 ) 2387 else: 2388 catalog = db 2389 db = table 2390 table = self._parse_table_part(schema=schema) 2391 2392 if not table: 2393 self.raise_error(f"Expected table name but got {self._curr}") 2394 2395 return self.expression( 2396 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2397 ) 2398 2399 def _parse_table( 2400 self, 2401 schema: bool = False, 2402 joins: bool = False, 2403 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2404 ) -> t.Optional[exp.Expression]: 2405 lateral = self._parse_lateral() 2406 if lateral: 2407 return lateral 2408 2409 unnest = self._parse_unnest() 2410 if unnest: 2411 return unnest 2412 2413 values = self._parse_derived_table_values() 2414 if values: 2415 return values 2416 2417 subquery = self._parse_select(table=True) 2418 if subquery: 2419 if not subquery.args.get("pivots"): 2420 subquery.set("pivots", self._parse_pivots()) 2421 return subquery 2422 2423 this: exp.Expression = self._parse_table_parts(schema=schema) 2424 2425 if schema: 2426 return self._parse_schema(this=this) 2427 2428 if self.ALIAS_POST_TABLESAMPLE: 2429 table_sample = self._parse_table_sample() 2430 2431 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2432 if alias: 2433 this.set("alias", alias) 2434 2435 if not this.args.get("pivots"): 2436 this.set("pivots", self._parse_pivots()) 2437 2438 this.set("hints", self._parse_table_hints()) 2439 2440 if not self.ALIAS_POST_TABLESAMPLE: 2441 table_sample = self._parse_table_sample() 2442 2443 if table_sample: 2444 table_sample.set("this", this) 2445 this = table_sample 2446 2447 if joins: 2448 for join in iter(self._parse_join, None): 2449 this.append("joins", join) 2450 2451 return this 2452 2453 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2454 if not self._match(TokenType.UNNEST): 2455 return None 2456 2457 expressions = self._parse_wrapped_csv(self._parse_type) 2458 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2459 2460 alias = self._parse_table_alias() if with_alias else None 2461 2462 if alias and self.UNNEST_COLUMN_ONLY: 2463 if alias.args.get("columns"): 2464 self.raise_error("Unexpected extra column alias in unnest.") 2465 2466 alias.set("columns", [alias.this]) 2467 alias.set("this", None) 2468 2469 offset = None 2470 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2471 self._match(TokenType.ALIAS) 2472 offset = self._parse_id_var() or exp.to_identifier("offset") 2473 2474 return self.expression( 2475 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2476 ) 2477 2478 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2479 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2480 if not is_derived and not self._match(TokenType.VALUES): 2481 return None 2482 2483 expressions = self._parse_csv(self._parse_value) 2484 alias = self._parse_table_alias() 2485 2486 if is_derived: 2487 self._match_r_paren() 2488 2489 return self.expression( 2490 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2491 ) 2492 2493 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2494 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2495 as_modifier and self._match_text_seq("USING", "SAMPLE") 2496 ): 2497 return None 2498 2499 bucket_numerator = None 2500 bucket_denominator = None 2501 bucket_field = None 2502 percent = None 2503 rows = None 2504 size = None 2505 seed = None 2506 2507 kind = ( 2508 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2509 ) 2510 method = self._parse_var(tokens=(TokenType.ROW,)) 2511 2512 self._match(TokenType.L_PAREN) 2513 2514 num = self._parse_number() 2515 2516 if self._match_text_seq("BUCKET"): 2517 bucket_numerator = self._parse_number() 2518 self._match_text_seq("OUT", "OF") 2519 bucket_denominator = bucket_denominator = self._parse_number() 2520 self._match(TokenType.ON) 2521 bucket_field = self._parse_field() 2522 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2523 percent = num 2524 elif self._match(TokenType.ROWS): 2525 rows = num 2526 else: 2527 size = num 2528 2529 self._match(TokenType.R_PAREN) 2530 2531 if self._match(TokenType.L_PAREN): 2532 method = self._parse_var() 2533 seed = self._match(TokenType.COMMA) and self._parse_number() 2534 self._match_r_paren() 2535 elif self._match_texts(("SEED", "REPEATABLE")): 2536 seed = self._parse_wrapped(self._parse_number) 2537 2538 return self.expression( 2539 exp.TableSample, 2540 method=method, 2541 bucket_numerator=bucket_numerator, 2542 bucket_denominator=bucket_denominator, 2543 bucket_field=bucket_field, 2544 percent=percent, 2545 rows=rows, 2546 size=size, 2547 seed=seed, 2548 kind=kind, 2549 ) 2550 2551 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2552 return list(iter(self._parse_pivot, None)) or None 2553 2554 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2555 return list(iter(self._parse_join, None)) or None 2556 2557 # https://duckdb.org/docs/sql/statements/pivot 2558 def _parse_simplified_pivot(self) -> exp.Pivot: 2559 def _parse_on() -> t.Optional[exp.Expression]: 2560 this = self._parse_bitwise() 2561 return self._parse_in(this) if self._match(TokenType.IN) else this 2562 2563 this = self._parse_table() 2564 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2565 using = self._match(TokenType.USING) and self._parse_csv( 2566 lambda: self._parse_alias(self._parse_function()) 2567 ) 2568 group = self._parse_group() 2569 return self.expression( 2570 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2571 ) 2572 2573 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2574 index = self._index 2575 2576 if self._match(TokenType.PIVOT): 2577 unpivot = False 2578 elif self._match(TokenType.UNPIVOT): 2579 unpivot = True 2580 else: 2581 return None 2582 2583 expressions = [] 2584 field = None 2585 2586 if not self._match(TokenType.L_PAREN): 2587 self._retreat(index) 2588 return None 2589 2590 if unpivot: 2591 expressions = self._parse_csv(self._parse_column) 2592 else: 2593 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2594 2595 if not expressions: 2596 self.raise_error("Failed to parse PIVOT's aggregation list") 2597 2598 if not self._match(TokenType.FOR): 2599 self.raise_error("Expecting FOR") 2600 2601 value = self._parse_column() 2602 2603 if not self._match(TokenType.IN): 2604 self.raise_error("Expecting IN") 2605 2606 field = self._parse_in(value, alias=True) 2607 2608 self._match_r_paren() 2609 2610 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2611 2612 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2613 pivot.set("alias", self._parse_table_alias()) 2614 2615 if not unpivot: 2616 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2617 2618 columns: t.List[exp.Expression] = [] 2619 for fld in pivot.args["field"].expressions: 2620 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2621 for name in names: 2622 if self.PREFIXED_PIVOT_COLUMNS: 2623 name = f"{name}_{field_name}" if name else field_name 2624 else: 2625 name = f"{field_name}_{name}" if name else field_name 2626 2627 columns.append(exp.to_identifier(name)) 2628 2629 pivot.set("columns", columns) 2630 2631 return pivot 2632 2633 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2634 return [agg.alias for agg in aggregations] 2635 2636 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2637 if not skip_where_token and not self._match(TokenType.WHERE): 2638 return None 2639 2640 return self.expression( 2641 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2642 ) 2643 2644 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2645 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2646 return None 2647 2648 elements = defaultdict(list) 2649 2650 if self._match(TokenType.ALL): 2651 return self.expression(exp.Group, all=True) 2652 2653 while True: 2654 expressions = self._parse_csv(self._parse_conjunction) 2655 if expressions: 2656 elements["expressions"].extend(expressions) 2657 2658 grouping_sets = self._parse_grouping_sets() 2659 if grouping_sets: 2660 elements["grouping_sets"].extend(grouping_sets) 2661 2662 rollup = None 2663 cube = None 2664 totals = None 2665 2666 with_ = self._match(TokenType.WITH) 2667 if self._match(TokenType.ROLLUP): 2668 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2669 elements["rollup"].extend(ensure_list(rollup)) 2670 2671 if self._match(TokenType.CUBE): 2672 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2673 elements["cube"].extend(ensure_list(cube)) 2674 2675 if self._match_text_seq("TOTALS"): 2676 totals = True 2677 elements["totals"] = True # type: ignore 2678 2679 if not (grouping_sets or rollup or cube or totals): 2680 break 2681 2682 return self.expression(exp.Group, **elements) # type: ignore 2683 2684 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2685 if not self._match(TokenType.GROUPING_SETS): 2686 return None 2687 2688 return self._parse_wrapped_csv(self._parse_grouping_set) 2689 2690 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2691 if self._match(TokenType.L_PAREN): 2692 grouping_set = self._parse_csv(self._parse_column) 2693 self._match_r_paren() 2694 return self.expression(exp.Tuple, expressions=grouping_set) 2695 2696 return self._parse_column() 2697 2698 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2699 if not skip_having_token and not self._match(TokenType.HAVING): 2700 return None 2701 return self.expression(exp.Having, this=self._parse_conjunction()) 2702 2703 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2704 if not self._match(TokenType.QUALIFY): 2705 return None 2706 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2707 2708 def _parse_order( 2709 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2710 ) -> t.Optional[exp.Expression]: 2711 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2712 return this 2713 2714 return self.expression( 2715 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2716 ) 2717 2718 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2719 if not self._match(token): 2720 return None 2721 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2722 2723 def _parse_ordered(self) -> exp.Ordered: 2724 this = self._parse_conjunction() 2725 self._match(TokenType.ASC) 2726 2727 is_desc = self._match(TokenType.DESC) 2728 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2729 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2730 desc = is_desc or False 2731 asc = not desc 2732 nulls_first = is_nulls_first or False 2733 explicitly_null_ordered = is_nulls_first or is_nulls_last 2734 2735 if ( 2736 not explicitly_null_ordered 2737 and ( 2738 (asc and self.NULL_ORDERING == "nulls_are_small") 2739 or (desc and self.NULL_ORDERING != "nulls_are_small") 2740 ) 2741 and self.NULL_ORDERING != "nulls_are_last" 2742 ): 2743 nulls_first = True 2744 2745 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2746 2747 def _parse_limit( 2748 self, this: t.Optional[exp.Expression] = None, top: bool = False 2749 ) -> t.Optional[exp.Expression]: 2750 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2751 limit_paren = self._match(TokenType.L_PAREN) 2752 expression = self._parse_number() if top else self._parse_term() 2753 2754 if self._match(TokenType.COMMA): 2755 offset = expression 2756 expression = self._parse_term() 2757 else: 2758 offset = None 2759 2760 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2761 2762 if limit_paren: 2763 self._match_r_paren() 2764 2765 return limit_exp 2766 2767 if self._match(TokenType.FETCH): 2768 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2769 direction = self._prev.text if direction else "FIRST" 2770 2771 count = self._parse_number() 2772 percent = self._match(TokenType.PERCENT) 2773 2774 self._match_set((TokenType.ROW, TokenType.ROWS)) 2775 2776 only = self._match_text_seq("ONLY") 2777 with_ties = self._match_text_seq("WITH", "TIES") 2778 2779 if only and with_ties: 2780 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2781 2782 return self.expression( 2783 exp.Fetch, 2784 direction=direction, 2785 count=count, 2786 percent=percent, 2787 with_ties=with_ties, 2788 ) 2789 2790 return this 2791 2792 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2793 if not self._match(TokenType.OFFSET): 2794 return this 2795 2796 count = self._parse_number() 2797 self._match_set((TokenType.ROW, TokenType.ROWS)) 2798 return self.expression(exp.Offset, this=this, expression=count) 2799 2800 def _parse_locks(self) -> t.List[exp.Lock]: 2801 locks = [] 2802 while True: 2803 if self._match_text_seq("FOR", "UPDATE"): 2804 update = True 2805 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2806 "LOCK", "IN", "SHARE", "MODE" 2807 ): 2808 update = False 2809 else: 2810 break 2811 2812 expressions = None 2813 if self._match_text_seq("OF"): 2814 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2815 2816 wait: t.Optional[bool | exp.Expression] = None 2817 if self._match_text_seq("NOWAIT"): 2818 wait = True 2819 elif self._match_text_seq("WAIT"): 2820 wait = self._parse_primary() 2821 elif self._match_text_seq("SKIP", "LOCKED"): 2822 wait = False 2823 2824 locks.append( 2825 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2826 ) 2827 2828 return locks 2829 2830 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2831 if not self._match_set(self.SET_OPERATIONS): 2832 return this 2833 2834 token_type = self._prev.token_type 2835 2836 if token_type == TokenType.UNION: 2837 expression = exp.Union 2838 elif token_type == TokenType.EXCEPT: 2839 expression = exp.Except 2840 else: 2841 expression = exp.Intersect 2842 2843 return self.expression( 2844 expression, 2845 this=this, 2846 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2847 expression=self._parse_set_operations(self._parse_select(nested=True)), 2848 ) 2849 2850 def _parse_expression(self) -> t.Optional[exp.Expression]: 2851 return self._parse_alias(self._parse_conjunction()) 2852 2853 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2854 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2855 2856 def _parse_equality(self) -> t.Optional[exp.Expression]: 2857 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2858 2859 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2860 return self._parse_tokens(self._parse_range, self.COMPARISON) 2861 2862 def _parse_range(self) -> t.Optional[exp.Expression]: 2863 this = self._parse_bitwise() 2864 negate = self._match(TokenType.NOT) 2865 2866 if self._match_set(self.RANGE_PARSERS): 2867 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2868 if not expression: 2869 return this 2870 2871 this = expression 2872 elif self._match(TokenType.ISNULL): 2873 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2874 2875 # Postgres supports ISNULL and NOTNULL for conditions. 2876 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2877 if self._match(TokenType.NOTNULL): 2878 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2879 this = self.expression(exp.Not, this=this) 2880 2881 if negate: 2882 this = self.expression(exp.Not, this=this) 2883 2884 if self._match(TokenType.IS): 2885 this = self._parse_is(this) 2886 2887 return this 2888 2889 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2890 index = self._index - 1 2891 negate = self._match(TokenType.NOT) 2892 2893 if self._match_text_seq("DISTINCT", "FROM"): 2894 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2895 return self.expression(klass, this=this, expression=self._parse_expression()) 2896 2897 expression = self._parse_null() or self._parse_boolean() 2898 if not expression: 2899 self._retreat(index) 2900 return None 2901 2902 this = self.expression(exp.Is, this=this, expression=expression) 2903 return self.expression(exp.Not, this=this) if negate else this 2904 2905 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2906 unnest = self._parse_unnest(with_alias=False) 2907 if unnest: 2908 this = self.expression(exp.In, this=this, unnest=unnest) 2909 elif self._match(TokenType.L_PAREN): 2910 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2911 2912 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2913 this = self.expression(exp.In, this=this, query=expressions[0]) 2914 else: 2915 this = self.expression(exp.In, this=this, expressions=expressions) 2916 2917 self._match_r_paren(this) 2918 else: 2919 this = self.expression(exp.In, this=this, field=self._parse_field()) 2920 2921 return this 2922 2923 def _parse_between(self, this: exp.Expression) -> exp.Between: 2924 low = self._parse_bitwise() 2925 self._match(TokenType.AND) 2926 high = self._parse_bitwise() 2927 return self.expression(exp.Between, this=this, low=low, high=high) 2928 2929 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2930 if not self._match(TokenType.ESCAPE): 2931 return this 2932 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2933 2934 def _parse_interval(self) -> t.Optional[exp.Interval]: 2935 if not self._match(TokenType.INTERVAL): 2936 return None 2937 2938 if self._match(TokenType.STRING, advance=False): 2939 this = self._parse_primary() 2940 else: 2941 this = self._parse_term() 2942 2943 unit = self._parse_function() or self._parse_var() 2944 2945 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2946 # each INTERVAL expression into this canonical form so it's easy to transpile 2947 if this and this.is_number: 2948 this = exp.Literal.string(this.name) 2949 elif this and this.is_string: 2950 parts = this.name.split() 2951 2952 if len(parts) == 2: 2953 if unit: 2954 # this is not actually a unit, it's something else 2955 unit = None 2956 self._retreat(self._index - 1) 2957 else: 2958 this = exp.Literal.string(parts[0]) 2959 unit = self.expression(exp.Var, this=parts[1]) 2960 2961 return self.expression(exp.Interval, this=this, unit=unit) 2962 2963 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2964 this = self._parse_term() 2965 2966 while True: 2967 if self._match_set(self.BITWISE): 2968 this = self.expression( 2969 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2970 ) 2971 elif self._match_pair(TokenType.LT, TokenType.LT): 2972 this = self.expression( 2973 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2974 ) 2975 elif self._match_pair(TokenType.GT, TokenType.GT): 2976 this = self.expression( 2977 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2978 ) 2979 else: 2980 break 2981 2982 return this 2983 2984 def _parse_term(self) -> t.Optional[exp.Expression]: 2985 return self._parse_tokens(self._parse_factor, self.TERM) 2986 2987 def _parse_factor(self) -> t.Optional[exp.Expression]: 2988 return self._parse_tokens(self._parse_unary, self.FACTOR) 2989 2990 def _parse_unary(self) -> t.Optional[exp.Expression]: 2991 if self._match_set(self.UNARY_PARSERS): 2992 return self.UNARY_PARSERS[self._prev.token_type](self) 2993 return self._parse_at_time_zone(self._parse_type()) 2994 2995 def _parse_type(self) -> t.Optional[exp.Expression]: 2996 interval = self._parse_interval() 2997 if interval: 2998 return interval 2999 3000 index = self._index 3001 data_type = self._parse_types(check_func=True) 3002 this = self._parse_column() 3003 3004 if data_type: 3005 if isinstance(this, exp.Literal): 3006 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3007 if parser: 3008 return parser(self, this, data_type) 3009 return self.expression(exp.Cast, this=this, to=data_type) 3010 if not data_type.expressions: 3011 self._retreat(index) 3012 return self._parse_column() 3013 return self._parse_column_ops(data_type) 3014 3015 return this 3016 3017 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3018 this = self._parse_type() 3019 if not this: 3020 return None 3021 3022 return self.expression( 3023 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3024 ) 3025 3026 def _parse_types( 3027 self, check_func: bool = False, schema: bool = False 3028 ) -> t.Optional[exp.Expression]: 3029 index = self._index 3030 3031 prefix = self._match_text_seq("SYSUDTLIB", ".") 3032 3033 if not self._match_set(self.TYPE_TOKENS): 3034 return None 3035 3036 type_token = self._prev.token_type 3037 3038 if type_token == TokenType.PSEUDO_TYPE: 3039 return self.expression(exp.PseudoType, this=self._prev.text) 3040 3041 nested = type_token in self.NESTED_TYPE_TOKENS 3042 is_struct = type_token == TokenType.STRUCT 3043 expressions = None 3044 maybe_func = False 3045 3046 if self._match(TokenType.L_PAREN): 3047 if is_struct: 3048 expressions = self._parse_csv(self._parse_struct_types) 3049 elif nested: 3050 expressions = self._parse_csv( 3051 lambda: self._parse_types(check_func=check_func, schema=schema) 3052 ) 3053 elif type_token in self.ENUM_TYPE_TOKENS: 3054 expressions = self._parse_csv(self._parse_primary) 3055 else: 3056 expressions = self._parse_csv(self._parse_type_size) 3057 3058 if not expressions or not self._match(TokenType.R_PAREN): 3059 self._retreat(index) 3060 return None 3061 3062 maybe_func = True 3063 3064 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3065 this = exp.DataType( 3066 this=exp.DataType.Type.ARRAY, 3067 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3068 nested=True, 3069 ) 3070 3071 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3072 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3073 3074 return this 3075 3076 if self._match(TokenType.L_BRACKET): 3077 self._retreat(index) 3078 return None 3079 3080 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3081 if nested and self._match(TokenType.LT): 3082 if is_struct: 3083 expressions = self._parse_csv(self._parse_struct_types) 3084 else: 3085 expressions = self._parse_csv( 3086 lambda: self._parse_types(check_func=check_func, schema=schema) 3087 ) 3088 3089 if not self._match(TokenType.GT): 3090 self.raise_error("Expecting >") 3091 3092 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3093 values = self._parse_csv(self._parse_conjunction) 3094 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3095 3096 value: t.Optional[exp.Expression] = None 3097 if type_token in self.TIMESTAMPS: 3098 if self._match_text_seq("WITH", "TIME", "ZONE"): 3099 maybe_func = False 3100 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3101 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3102 maybe_func = False 3103 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3104 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3105 maybe_func = False 3106 elif type_token == TokenType.INTERVAL: 3107 unit = self._parse_var() 3108 3109 if not unit: 3110 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3111 else: 3112 value = self.expression(exp.Interval, unit=unit) 3113 3114 if maybe_func and check_func: 3115 index2 = self._index 3116 peek = self._parse_string() 3117 3118 if not peek: 3119 self._retreat(index) 3120 return None 3121 3122 self._retreat(index2) 3123 3124 if value: 3125 return value 3126 3127 return exp.DataType( 3128 this=exp.DataType.Type[type_token.value.upper()], 3129 expressions=expressions, 3130 nested=nested, 3131 values=values, 3132 prefix=prefix, 3133 ) 3134 3135 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3136 this = self._parse_type() or self._parse_id_var() 3137 self._match(TokenType.COLON) 3138 return self._parse_column_def(this) 3139 3140 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3141 if not self._match_text_seq("AT", "TIME", "ZONE"): 3142 return this 3143 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3144 3145 def _parse_column(self) -> t.Optional[exp.Expression]: 3146 this = self._parse_field() 3147 if isinstance(this, exp.Identifier): 3148 this = self.expression(exp.Column, this=this) 3149 elif not this: 3150 return self._parse_bracket(this) 3151 return self._parse_column_ops(this) 3152 3153 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3154 this = self._parse_bracket(this) 3155 3156 while self._match_set(self.COLUMN_OPERATORS): 3157 op_token = self._prev.token_type 3158 op = self.COLUMN_OPERATORS.get(op_token) 3159 3160 if op_token == TokenType.DCOLON: 3161 field = self._parse_types() 3162 if not field: 3163 self.raise_error("Expected type") 3164 elif op and self._curr: 3165 self._advance() 3166 value = self._prev.text 3167 field = ( 3168 exp.Literal.number(value) 3169 if self._prev.token_type == TokenType.NUMBER 3170 else exp.Literal.string(value) 3171 ) 3172 else: 3173 field = self._parse_field(anonymous_func=True, any_token=True) 3174 3175 if isinstance(field, exp.Func): 3176 # bigquery allows function calls like x.y.count(...) 3177 # SAFE.SUBSTR(...) 3178 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3179 this = self._replace_columns_with_dots(this) 3180 3181 if op: 3182 this = op(self, this, field) 3183 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3184 this = self.expression( 3185 exp.Column, 3186 this=field, 3187 table=this.this, 3188 db=this.args.get("table"), 3189 catalog=this.args.get("db"), 3190 ) 3191 else: 3192 this = self.expression(exp.Dot, this=this, expression=field) 3193 this = self._parse_bracket(this) 3194 return this 3195 3196 def _parse_primary(self) -> t.Optional[exp.Expression]: 3197 if self._match_set(self.PRIMARY_PARSERS): 3198 token_type = self._prev.token_type 3199 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3200 3201 if token_type == TokenType.STRING: 3202 expressions = [primary] 3203 while self._match(TokenType.STRING): 3204 expressions.append(exp.Literal.string(self._prev.text)) 3205 3206 if len(expressions) > 1: 3207 return self.expression(exp.Concat, expressions=expressions) 3208 3209 return primary 3210 3211 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3212 return exp.Literal.number(f"0.{self._prev.text}") 3213 3214 if self._match(TokenType.L_PAREN): 3215 comments = self._prev_comments 3216 query = self._parse_select() 3217 3218 if query: 3219 expressions = [query] 3220 else: 3221 expressions = self._parse_expressions() 3222 3223 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3224 3225 if isinstance(this, exp.Subqueryable): 3226 this = self._parse_set_operations( 3227 self._parse_subquery(this=this, parse_alias=False) 3228 ) 3229 elif len(expressions) > 1: 3230 this = self.expression(exp.Tuple, expressions=expressions) 3231 else: 3232 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3233 3234 if this: 3235 this.add_comments(comments) 3236 3237 self._match_r_paren(expression=this) 3238 return this 3239 3240 return None 3241 3242 def _parse_field( 3243 self, 3244 any_token: bool = False, 3245 tokens: t.Optional[t.Collection[TokenType]] = None, 3246 anonymous_func: bool = False, 3247 ) -> t.Optional[exp.Expression]: 3248 return ( 3249 self._parse_primary() 3250 or self._parse_function(anonymous=anonymous_func) 3251 or self._parse_id_var(any_token=any_token, tokens=tokens) 3252 ) 3253 3254 def _parse_function( 3255 self, 3256 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3257 anonymous: bool = False, 3258 optional_parens: bool = True, 3259 ) -> t.Optional[exp.Expression]: 3260 if not self._curr: 3261 return None 3262 3263 token_type = self._curr.token_type 3264 3265 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3266 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3267 3268 if not self._next or self._next.token_type != TokenType.L_PAREN: 3269 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3270 self._advance() 3271 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3272 3273 return None 3274 3275 if token_type not in self.FUNC_TOKENS: 3276 return None 3277 3278 this = self._curr.text 3279 upper = this.upper() 3280 self._advance(2) 3281 3282 parser = self.FUNCTION_PARSERS.get(upper) 3283 3284 if parser and not anonymous: 3285 this = parser(self) 3286 else: 3287 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3288 3289 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3290 this = self.expression(subquery_predicate, this=self._parse_select()) 3291 self._match_r_paren() 3292 return this 3293 3294 if functions is None: 3295 functions = self.FUNCTIONS 3296 3297 function = functions.get(upper) 3298 3299 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3300 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3301 3302 if function and not anonymous: 3303 this = self.validate_expression(function(args), args) 3304 else: 3305 this = self.expression(exp.Anonymous, this=this, expressions=args) 3306 3307 self._match_r_paren(this) 3308 return self._parse_window(this) 3309 3310 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3311 return self._parse_column_def(self._parse_id_var()) 3312 3313 def _parse_user_defined_function( 3314 self, kind: t.Optional[TokenType] = None 3315 ) -> t.Optional[exp.Expression]: 3316 this = self._parse_id_var() 3317 3318 while self._match(TokenType.DOT): 3319 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3320 3321 if not self._match(TokenType.L_PAREN): 3322 return this 3323 3324 expressions = self._parse_csv(self._parse_function_parameter) 3325 self._match_r_paren() 3326 return self.expression( 3327 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3328 ) 3329 3330 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3331 literal = self._parse_primary() 3332 if literal: 3333 return self.expression(exp.Introducer, this=token.text, expression=literal) 3334 3335 return self.expression(exp.Identifier, this=token.text) 3336 3337 def _parse_session_parameter(self) -> exp.SessionParameter: 3338 kind = None 3339 this = self._parse_id_var() or self._parse_primary() 3340 3341 if this and self._match(TokenType.DOT): 3342 kind = this.name 3343 this = self._parse_var() or self._parse_primary() 3344 3345 return self.expression(exp.SessionParameter, this=this, kind=kind) 3346 3347 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3348 index = self._index 3349 3350 if self._match(TokenType.L_PAREN): 3351 expressions = self._parse_csv(self._parse_id_var) 3352 3353 if not self._match(TokenType.R_PAREN): 3354 self._retreat(index) 3355 else: 3356 expressions = [self._parse_id_var()] 3357 3358 if self._match_set(self.LAMBDAS): 3359 return self.LAMBDAS[self._prev.token_type](self, expressions) 3360 3361 self._retreat(index) 3362 3363 this: t.Optional[exp.Expression] 3364 3365 if self._match(TokenType.DISTINCT): 3366 this = self.expression( 3367 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3368 ) 3369 else: 3370 this = self._parse_select_or_expression(alias=alias) 3371 3372 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3373 3374 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3375 index = self._index 3376 3377 if not self.errors: 3378 try: 3379 if self._parse_select(nested=True): 3380 return this 3381 except ParseError: 3382 pass 3383 finally: 3384 self.errors.clear() 3385 self._retreat(index) 3386 3387 if not self._match(TokenType.L_PAREN): 3388 return this 3389 3390 args = self._parse_csv( 3391 lambda: self._parse_constraint() 3392 or self._parse_column_def(self._parse_field(any_token=True)) 3393 ) 3394 3395 self._match_r_paren() 3396 return self.expression(exp.Schema, this=this, expressions=args) 3397 3398 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3399 # column defs are not really columns, they're identifiers 3400 if isinstance(this, exp.Column): 3401 this = this.this 3402 3403 kind = self._parse_types(schema=True) 3404 3405 if self._match_text_seq("FOR", "ORDINALITY"): 3406 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3407 3408 constraints = [] 3409 while True: 3410 constraint = self._parse_column_constraint() 3411 if not constraint: 3412 break 3413 constraints.append(constraint) 3414 3415 if not kind and not constraints: 3416 return this 3417 3418 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3419 3420 def _parse_auto_increment( 3421 self, 3422 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3423 start = None 3424 increment = None 3425 3426 if self._match(TokenType.L_PAREN, advance=False): 3427 args = self._parse_wrapped_csv(self._parse_bitwise) 3428 start = seq_get(args, 0) 3429 increment = seq_get(args, 1) 3430 elif self._match_text_seq("START"): 3431 start = self._parse_bitwise() 3432 self._match_text_seq("INCREMENT") 3433 increment = self._parse_bitwise() 3434 3435 if start and increment: 3436 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3437 3438 return exp.AutoIncrementColumnConstraint() 3439 3440 def _parse_compress(self) -> exp.CompressColumnConstraint: 3441 if self._match(TokenType.L_PAREN, advance=False): 3442 return self.expression( 3443 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3444 ) 3445 3446 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3447 3448 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3449 if self._match_text_seq("BY", "DEFAULT"): 3450 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3451 this = self.expression( 3452 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3453 ) 3454 else: 3455 self._match_text_seq("ALWAYS") 3456 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3457 3458 self._match(TokenType.ALIAS) 3459 identity = self._match_text_seq("IDENTITY") 3460 3461 if self._match(TokenType.L_PAREN): 3462 if self._match_text_seq("START", "WITH"): 3463 this.set("start", self._parse_bitwise()) 3464 if self._match_text_seq("INCREMENT", "BY"): 3465 this.set("increment", self._parse_bitwise()) 3466 if self._match_text_seq("MINVALUE"): 3467 this.set("minvalue", self._parse_bitwise()) 3468 if self._match_text_seq("MAXVALUE"): 3469 this.set("maxvalue", self._parse_bitwise()) 3470 3471 if self._match_text_seq("CYCLE"): 3472 this.set("cycle", True) 3473 elif self._match_text_seq("NO", "CYCLE"): 3474 this.set("cycle", False) 3475 3476 if not identity: 3477 this.set("expression", self._parse_bitwise()) 3478 3479 self._match_r_paren() 3480 3481 return this 3482 3483 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3484 self._match_text_seq("LENGTH") 3485 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3486 3487 def _parse_not_constraint( 3488 self, 3489 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3490 if self._match_text_seq("NULL"): 3491 return self.expression(exp.NotNullColumnConstraint) 3492 if self._match_text_seq("CASESPECIFIC"): 3493 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3494 return None 3495 3496 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3497 if self._match(TokenType.CONSTRAINT): 3498 this = self._parse_id_var() 3499 else: 3500 this = None 3501 3502 if self._match_texts(self.CONSTRAINT_PARSERS): 3503 return self.expression( 3504 exp.ColumnConstraint, 3505 this=this, 3506 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3507 ) 3508 3509 return this 3510 3511 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3512 if not self._match(TokenType.CONSTRAINT): 3513 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3514 3515 this = self._parse_id_var() 3516 expressions = [] 3517 3518 while True: 3519 constraint = self._parse_unnamed_constraint() or self._parse_function() 3520 if not constraint: 3521 break 3522 expressions.append(constraint) 3523 3524 return self.expression(exp.Constraint, this=this, expressions=expressions) 3525 3526 def _parse_unnamed_constraint( 3527 self, constraints: t.Optional[t.Collection[str]] = None 3528 ) -> t.Optional[exp.Expression]: 3529 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3530 return None 3531 3532 constraint = self._prev.text.upper() 3533 if constraint not in self.CONSTRAINT_PARSERS: 3534 self.raise_error(f"No parser found for schema constraint {constraint}.") 3535 3536 return self.CONSTRAINT_PARSERS[constraint](self) 3537 3538 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3539 self._match_text_seq("KEY") 3540 return self.expression( 3541 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3542 ) 3543 3544 def _parse_key_constraint_options(self) -> t.List[str]: 3545 options = [] 3546 while True: 3547 if not self._curr: 3548 break 3549 3550 if self._match(TokenType.ON): 3551 action = None 3552 on = self._advance_any() and self._prev.text 3553 3554 if self._match_text_seq("NO", "ACTION"): 3555 action = "NO ACTION" 3556 elif self._match_text_seq("CASCADE"): 3557 action = "CASCADE" 3558 elif self._match_pair(TokenType.SET, TokenType.NULL): 3559 action = "SET NULL" 3560 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3561 action = "SET DEFAULT" 3562 else: 3563 self.raise_error("Invalid key constraint") 3564 3565 options.append(f"ON {on} {action}") 3566 elif self._match_text_seq("NOT", "ENFORCED"): 3567 options.append("NOT ENFORCED") 3568 elif self._match_text_seq("DEFERRABLE"): 3569 options.append("DEFERRABLE") 3570 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3571 options.append("INITIALLY DEFERRED") 3572 elif self._match_text_seq("NORELY"): 3573 options.append("NORELY") 3574 elif self._match_text_seq("MATCH", "FULL"): 3575 options.append("MATCH FULL") 3576 else: 3577 break 3578 3579 return options 3580 3581 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3582 if match and not self._match(TokenType.REFERENCES): 3583 return None 3584 3585 expressions = None 3586 this = self._parse_id_var() 3587 3588 if self._match(TokenType.L_PAREN, advance=False): 3589 expressions = self._parse_wrapped_id_vars() 3590 3591 options = self._parse_key_constraint_options() 3592 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3593 3594 def _parse_foreign_key(self) -> exp.ForeignKey: 3595 expressions = self._parse_wrapped_id_vars() 3596 reference = self._parse_references() 3597 options = {} 3598 3599 while self._match(TokenType.ON): 3600 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3601 self.raise_error("Expected DELETE or UPDATE") 3602 3603 kind = self._prev.text.lower() 3604 3605 if self._match_text_seq("NO", "ACTION"): 3606 action = "NO ACTION" 3607 elif self._match(TokenType.SET): 3608 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3609 action = "SET " + self._prev.text.upper() 3610 else: 3611 self._advance() 3612 action = self._prev.text.upper() 3613 3614 options[kind] = action 3615 3616 return self.expression( 3617 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3618 ) 3619 3620 def _parse_primary_key( 3621 self, wrapped_optional: bool = False, in_props: bool = False 3622 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3623 desc = ( 3624 self._match_set((TokenType.ASC, TokenType.DESC)) 3625 and self._prev.token_type == TokenType.DESC 3626 ) 3627 3628 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3629 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3630 3631 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3632 options = self._parse_key_constraint_options() 3633 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3634 3635 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3636 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3637 return this 3638 3639 bracket_kind = self._prev.token_type 3640 3641 if self._match(TokenType.COLON): 3642 expressions: t.List[t.Optional[exp.Expression]] = [ 3643 self.expression(exp.Slice, expression=self._parse_conjunction()) 3644 ] 3645 else: 3646 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3647 3648 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3649 if bracket_kind == TokenType.L_BRACE: 3650 this = self.expression(exp.Struct, expressions=expressions) 3651 elif not this or this.name.upper() == "ARRAY": 3652 this = self.expression(exp.Array, expressions=expressions) 3653 else: 3654 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3655 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3656 3657 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3658 self.raise_error("Expected ]") 3659 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3660 self.raise_error("Expected }") 3661 3662 self._add_comments(this) 3663 return self._parse_bracket(this) 3664 3665 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3666 if self._match(TokenType.COLON): 3667 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3668 return this 3669 3670 def _parse_case(self) -> t.Optional[exp.Expression]: 3671 ifs = [] 3672 default = None 3673 3674 expression = self._parse_conjunction() 3675 3676 while self._match(TokenType.WHEN): 3677 this = self._parse_conjunction() 3678 self._match(TokenType.THEN) 3679 then = self._parse_conjunction() 3680 ifs.append(self.expression(exp.If, this=this, true=then)) 3681 3682 if self._match(TokenType.ELSE): 3683 default = self._parse_conjunction() 3684 3685 if not self._match(TokenType.END): 3686 self.raise_error("Expected END after CASE", self._prev) 3687 3688 return self._parse_window( 3689 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3690 ) 3691 3692 def _parse_if(self) -> t.Optional[exp.Expression]: 3693 if self._match(TokenType.L_PAREN): 3694 args = self._parse_csv(self._parse_conjunction) 3695 this = self.validate_expression(exp.If.from_arg_list(args), args) 3696 self._match_r_paren() 3697 else: 3698 index = self._index - 1 3699 condition = self._parse_conjunction() 3700 3701 if not condition: 3702 self._retreat(index) 3703 return None 3704 3705 self._match(TokenType.THEN) 3706 true = self._parse_conjunction() 3707 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3708 self._match(TokenType.END) 3709 this = self.expression(exp.If, this=condition, true=true, false=false) 3710 3711 return self._parse_window(this) 3712 3713 def _parse_extract(self) -> exp.Extract: 3714 this = self._parse_function() or self._parse_var() or self._parse_type() 3715 3716 if self._match(TokenType.FROM): 3717 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3718 3719 if not self._match(TokenType.COMMA): 3720 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3721 3722 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3723 3724 def _parse_any_value(self) -> exp.AnyValue: 3725 this = self._parse_lambda() 3726 is_max = None 3727 having = None 3728 3729 if self._match(TokenType.HAVING): 3730 self._match_texts(("MAX", "MIN")) 3731 is_max = self._prev.text == "MAX" 3732 having = self._parse_column() 3733 3734 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3735 3736 def _parse_cast(self, strict: bool) -> exp.Expression: 3737 this = self._parse_conjunction() 3738 3739 if not self._match(TokenType.ALIAS): 3740 if self._match(TokenType.COMMA): 3741 return self.expression( 3742 exp.CastToStrType, this=this, expression=self._parse_string() 3743 ) 3744 else: 3745 self.raise_error("Expected AS after CAST") 3746 3747 fmt = None 3748 to = self._parse_types() 3749 3750 if not to: 3751 self.raise_error("Expected TYPE after CAST") 3752 elif to.this == exp.DataType.Type.CHAR: 3753 if self._match(TokenType.CHARACTER_SET): 3754 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3755 elif self._match(TokenType.FORMAT): 3756 fmt_string = self._parse_string() 3757 fmt = self._parse_at_time_zone(fmt_string) 3758 3759 if to.this in exp.DataType.TEMPORAL_TYPES: 3760 this = self.expression( 3761 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3762 this=this, 3763 format=exp.Literal.string( 3764 format_time( 3765 fmt_string.this if fmt_string else "", 3766 self.FORMAT_MAPPING or self.TIME_MAPPING, 3767 self.FORMAT_TRIE or self.TIME_TRIE, 3768 ) 3769 ), 3770 ) 3771 3772 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3773 this.set("zone", fmt.args["zone"]) 3774 3775 return this 3776 3777 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3778 3779 def _parse_concat(self) -> t.Optional[exp.Expression]: 3780 args = self._parse_csv(self._parse_conjunction) 3781 if self.CONCAT_NULL_OUTPUTS_STRING: 3782 args = [ 3783 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3784 for arg in args 3785 if arg 3786 ] 3787 3788 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3789 # we find such a call we replace it with its argument. 3790 if len(args) == 1: 3791 return args[0] 3792 3793 return self.expression( 3794 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3795 ) 3796 3797 def _parse_string_agg(self) -> exp.Expression: 3798 if self._match(TokenType.DISTINCT): 3799 args: t.List[t.Optional[exp.Expression]] = [ 3800 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3801 ] 3802 if self._match(TokenType.COMMA): 3803 args.extend(self._parse_csv(self._parse_conjunction)) 3804 else: 3805 args = self._parse_csv(self._parse_conjunction) 3806 3807 index = self._index 3808 if not self._match(TokenType.R_PAREN): 3809 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3810 return self.expression( 3811 exp.GroupConcat, 3812 this=seq_get(args, 0), 3813 separator=self._parse_order(this=seq_get(args, 1)), 3814 ) 3815 3816 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3817 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3818 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3819 if not self._match_text_seq("WITHIN", "GROUP"): 3820 self._retreat(index) 3821 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3822 3823 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3824 order = self._parse_order(this=seq_get(args, 0)) 3825 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3826 3827 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3828 this = self._parse_bitwise() 3829 3830 if self._match(TokenType.USING): 3831 to: t.Optional[exp.Expression] = self.expression( 3832 exp.CharacterSet, this=self._parse_var() 3833 ) 3834 elif self._match(TokenType.COMMA): 3835 to = self._parse_types() 3836 else: 3837 to = None 3838 3839 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3840 3841 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3842 """ 3843 There are generally two variants of the DECODE function: 3844 3845 - DECODE(bin, charset) 3846 - DECODE(expression, search, result [, search, result] ... [, default]) 3847 3848 The second variant will always be parsed into a CASE expression. Note that NULL 3849 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3850 instead of relying on pattern matching. 3851 """ 3852 args = self._parse_csv(self._parse_conjunction) 3853 3854 if len(args) < 3: 3855 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3856 3857 expression, *expressions = args 3858 if not expression: 3859 return None 3860 3861 ifs = [] 3862 for search, result in zip(expressions[::2], expressions[1::2]): 3863 if not search or not result: 3864 return None 3865 3866 if isinstance(search, exp.Literal): 3867 ifs.append( 3868 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3869 ) 3870 elif isinstance(search, exp.Null): 3871 ifs.append( 3872 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3873 ) 3874 else: 3875 cond = exp.or_( 3876 exp.EQ(this=expression.copy(), expression=search), 3877 exp.and_( 3878 exp.Is(this=expression.copy(), expression=exp.Null()), 3879 exp.Is(this=search.copy(), expression=exp.Null()), 3880 copy=False, 3881 ), 3882 copy=False, 3883 ) 3884 ifs.append(exp.If(this=cond, true=result)) 3885 3886 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3887 3888 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3889 self._match_text_seq("KEY") 3890 key = self._parse_field() 3891 self._match(TokenType.COLON) 3892 self._match_text_seq("VALUE") 3893 value = self._parse_field() 3894 3895 if not key and not value: 3896 return None 3897 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3898 3899 def _parse_json_object(self) -> exp.JSONObject: 3900 star = self._parse_star() 3901 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3902 3903 null_handling = None 3904 if self._match_text_seq("NULL", "ON", "NULL"): 3905 null_handling = "NULL ON NULL" 3906 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3907 null_handling = "ABSENT ON NULL" 3908 3909 unique_keys = None 3910 if self._match_text_seq("WITH", "UNIQUE"): 3911 unique_keys = True 3912 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3913 unique_keys = False 3914 3915 self._match_text_seq("KEYS") 3916 3917 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3918 format_json = self._match_text_seq("FORMAT", "JSON") 3919 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3920 3921 return self.expression( 3922 exp.JSONObject, 3923 expressions=expressions, 3924 null_handling=null_handling, 3925 unique_keys=unique_keys, 3926 return_type=return_type, 3927 format_json=format_json, 3928 encoding=encoding, 3929 ) 3930 3931 def _parse_logarithm(self) -> exp.Func: 3932 # Default argument order is base, expression 3933 args = self._parse_csv(self._parse_range) 3934 3935 if len(args) > 1: 3936 if not self.LOG_BASE_FIRST: 3937 args.reverse() 3938 return exp.Log.from_arg_list(args) 3939 3940 return self.expression( 3941 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3942 ) 3943 3944 def _parse_match_against(self) -> exp.MatchAgainst: 3945 expressions = self._parse_csv(self._parse_column) 3946 3947 self._match_text_seq(")", "AGAINST", "(") 3948 3949 this = self._parse_string() 3950 3951 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3952 modifier = "IN NATURAL LANGUAGE MODE" 3953 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3954 modifier = f"{modifier} WITH QUERY EXPANSION" 3955 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3956 modifier = "IN BOOLEAN MODE" 3957 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3958 modifier = "WITH QUERY EXPANSION" 3959 else: 3960 modifier = None 3961 3962 return self.expression( 3963 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3964 ) 3965 3966 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3967 def _parse_open_json(self) -> exp.OpenJSON: 3968 this = self._parse_bitwise() 3969 path = self._match(TokenType.COMMA) and self._parse_string() 3970 3971 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3972 this = self._parse_field(any_token=True) 3973 kind = self._parse_types() 3974 path = self._parse_string() 3975 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3976 3977 return self.expression( 3978 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3979 ) 3980 3981 expressions = None 3982 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3983 self._match_l_paren() 3984 expressions = self._parse_csv(_parse_open_json_column_def) 3985 3986 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3987 3988 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3989 args = self._parse_csv(self._parse_bitwise) 3990 3991 if self._match(TokenType.IN): 3992 return self.expression( 3993 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3994 ) 3995 3996 if haystack_first: 3997 haystack = seq_get(args, 0) 3998 needle = seq_get(args, 1) 3999 else: 4000 needle = seq_get(args, 0) 4001 haystack = seq_get(args, 1) 4002 4003 return self.expression( 4004 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4005 ) 4006 4007 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4008 args = self._parse_csv(self._parse_table) 4009 return exp.JoinHint(this=func_name.upper(), expressions=args) 4010 4011 def _parse_substring(self) -> exp.Substring: 4012 # Postgres supports the form: substring(string [from int] [for int]) 4013 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4014 4015 args = self._parse_csv(self._parse_bitwise) 4016 4017 if self._match(TokenType.FROM): 4018 args.append(self._parse_bitwise()) 4019 if self._match(TokenType.FOR): 4020 args.append(self._parse_bitwise()) 4021 4022 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4023 4024 def _parse_trim(self) -> exp.Trim: 4025 # https://www.w3resource.com/sql/character-functions/trim.php 4026 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4027 4028 position = None 4029 collation = None 4030 4031 if self._match_texts(self.TRIM_TYPES): 4032 position = self._prev.text.upper() 4033 4034 expression = self._parse_bitwise() 4035 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4036 this = self._parse_bitwise() 4037 else: 4038 this = expression 4039 expression = None 4040 4041 if self._match(TokenType.COLLATE): 4042 collation = self._parse_bitwise() 4043 4044 return self.expression( 4045 exp.Trim, this=this, position=position, expression=expression, collation=collation 4046 ) 4047 4048 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4049 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4050 4051 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4052 return self._parse_window(self._parse_id_var(), alias=True) 4053 4054 def _parse_respect_or_ignore_nulls( 4055 self, this: t.Optional[exp.Expression] 4056 ) -> t.Optional[exp.Expression]: 4057 if self._match_text_seq("IGNORE", "NULLS"): 4058 return self.expression(exp.IgnoreNulls, this=this) 4059 if self._match_text_seq("RESPECT", "NULLS"): 4060 return self.expression(exp.RespectNulls, this=this) 4061 return this 4062 4063 def _parse_window( 4064 self, this: t.Optional[exp.Expression], alias: bool = False 4065 ) -> t.Optional[exp.Expression]: 4066 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4067 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4068 self._match_r_paren() 4069 4070 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4071 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4072 if self._match_text_seq("WITHIN", "GROUP"): 4073 order = self._parse_wrapped(self._parse_order) 4074 this = self.expression(exp.WithinGroup, this=this, expression=order) 4075 4076 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4077 # Some dialects choose to implement and some do not. 4078 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4079 4080 # There is some code above in _parse_lambda that handles 4081 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4082 4083 # The below changes handle 4084 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4085 4086 # Oracle allows both formats 4087 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4088 # and Snowflake chose to do the same for familiarity 4089 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4090 this = self._parse_respect_or_ignore_nulls(this) 4091 4092 # bigquery select from window x AS (partition by ...) 4093 if alias: 4094 over = None 4095 self._match(TokenType.ALIAS) 4096 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4097 return this 4098 else: 4099 over = self._prev.text.upper() 4100 4101 if not self._match(TokenType.L_PAREN): 4102 return self.expression( 4103 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4104 ) 4105 4106 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4107 4108 first = self._match(TokenType.FIRST) 4109 if self._match_text_seq("LAST"): 4110 first = False 4111 4112 partition = self._parse_partition_by() 4113 order = self._parse_order() 4114 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4115 4116 if kind: 4117 self._match(TokenType.BETWEEN) 4118 start = self._parse_window_spec() 4119 self._match(TokenType.AND) 4120 end = self._parse_window_spec() 4121 4122 spec = self.expression( 4123 exp.WindowSpec, 4124 kind=kind, 4125 start=start["value"], 4126 start_side=start["side"], 4127 end=end["value"], 4128 end_side=end["side"], 4129 ) 4130 else: 4131 spec = None 4132 4133 self._match_r_paren() 4134 4135 return self.expression( 4136 exp.Window, 4137 this=this, 4138 partition_by=partition, 4139 order=order, 4140 spec=spec, 4141 alias=window_alias, 4142 over=over, 4143 first=first, 4144 ) 4145 4146 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4147 self._match(TokenType.BETWEEN) 4148 4149 return { 4150 "value": ( 4151 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4152 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4153 or self._parse_bitwise() 4154 ), 4155 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4156 } 4157 4158 def _parse_alias( 4159 self, this: t.Optional[exp.Expression], explicit: bool = False 4160 ) -> t.Optional[exp.Expression]: 4161 any_token = self._match(TokenType.ALIAS) 4162 4163 if explicit and not any_token: 4164 return this 4165 4166 if self._match(TokenType.L_PAREN): 4167 aliases = self.expression( 4168 exp.Aliases, 4169 this=this, 4170 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4171 ) 4172 self._match_r_paren(aliases) 4173 return aliases 4174 4175 alias = self._parse_id_var(any_token) 4176 4177 if alias: 4178 return self.expression(exp.Alias, this=this, alias=alias) 4179 4180 return this 4181 4182 def _parse_id_var( 4183 self, 4184 any_token: bool = True, 4185 tokens: t.Optional[t.Collection[TokenType]] = None, 4186 ) -> t.Optional[exp.Expression]: 4187 identifier = self._parse_identifier() 4188 4189 if identifier: 4190 return identifier 4191 4192 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4193 quoted = self._prev.token_type == TokenType.STRING 4194 return exp.Identifier(this=self._prev.text, quoted=quoted) 4195 4196 return None 4197 4198 def _parse_string(self) -> t.Optional[exp.Expression]: 4199 if self._match(TokenType.STRING): 4200 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4201 return self._parse_placeholder() 4202 4203 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4204 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4205 4206 def _parse_number(self) -> t.Optional[exp.Expression]: 4207 if self._match(TokenType.NUMBER): 4208 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4209 return self._parse_placeholder() 4210 4211 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4212 if self._match(TokenType.IDENTIFIER): 4213 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4214 return self._parse_placeholder() 4215 4216 def _parse_var( 4217 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4218 ) -> t.Optional[exp.Expression]: 4219 if ( 4220 (any_token and self._advance_any()) 4221 or self._match(TokenType.VAR) 4222 or (self._match_set(tokens) if tokens else False) 4223 ): 4224 return self.expression(exp.Var, this=self._prev.text) 4225 return self._parse_placeholder() 4226 4227 def _advance_any(self) -> t.Optional[Token]: 4228 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4229 self._advance() 4230 return self._prev 4231 return None 4232 4233 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4234 return self._parse_var() or self._parse_string() 4235 4236 def _parse_null(self) -> t.Optional[exp.Expression]: 4237 if self._match(TokenType.NULL): 4238 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4239 return None 4240 4241 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4242 if self._match(TokenType.TRUE): 4243 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4244 if self._match(TokenType.FALSE): 4245 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4246 return None 4247 4248 def _parse_star(self) -> t.Optional[exp.Expression]: 4249 if self._match(TokenType.STAR): 4250 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4251 return None 4252 4253 def _parse_parameter(self) -> exp.Parameter: 4254 wrapped = self._match(TokenType.L_BRACE) 4255 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4256 self._match(TokenType.R_BRACE) 4257 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4258 4259 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4260 if self._match_set(self.PLACEHOLDER_PARSERS): 4261 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4262 if placeholder: 4263 return placeholder 4264 self._advance(-1) 4265 return None 4266 4267 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4268 if not self._match(TokenType.EXCEPT): 4269 return None 4270 if self._match(TokenType.L_PAREN, advance=False): 4271 return self._parse_wrapped_csv(self._parse_column) 4272 return self._parse_csv(self._parse_column) 4273 4274 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4275 if not self._match(TokenType.REPLACE): 4276 return None 4277 if self._match(TokenType.L_PAREN, advance=False): 4278 return self._parse_wrapped_csv(self._parse_expression) 4279 return self._parse_expressions() 4280 4281 def _parse_csv( 4282 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4283 ) -> t.List[t.Optional[exp.Expression]]: 4284 parse_result = parse_method() 4285 items = [parse_result] if parse_result is not None else [] 4286 4287 while self._match(sep): 4288 self._add_comments(parse_result) 4289 parse_result = parse_method() 4290 if parse_result is not None: 4291 items.append(parse_result) 4292 4293 return items 4294 4295 def _parse_tokens( 4296 self, parse_method: t.Callable, expressions: t.Dict 4297 ) -> t.Optional[exp.Expression]: 4298 this = parse_method() 4299 4300 while self._match_set(expressions): 4301 this = self.expression( 4302 expressions[self._prev.token_type], 4303 this=this, 4304 comments=self._prev_comments, 4305 expression=parse_method(), 4306 ) 4307 4308 return this 4309 4310 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4311 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4312 4313 def _parse_wrapped_csv( 4314 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4315 ) -> t.List[t.Optional[exp.Expression]]: 4316 return self._parse_wrapped( 4317 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4318 ) 4319 4320 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4321 wrapped = self._match(TokenType.L_PAREN) 4322 if not wrapped and not optional: 4323 self.raise_error("Expecting (") 4324 parse_result = parse_method() 4325 if wrapped: 4326 self._match_r_paren() 4327 return parse_result 4328 4329 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4330 return self._parse_csv(self._parse_expression) 4331 4332 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4333 return self._parse_select() or self._parse_set_operations( 4334 self._parse_expression() if alias else self._parse_conjunction() 4335 ) 4336 4337 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4338 return self._parse_query_modifiers( 4339 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4340 ) 4341 4342 def _parse_transaction(self) -> exp.Transaction: 4343 this = None 4344 if self._match_texts(self.TRANSACTION_KIND): 4345 this = self._prev.text 4346 4347 self._match_texts({"TRANSACTION", "WORK"}) 4348 4349 modes = [] 4350 while True: 4351 mode = [] 4352 while self._match(TokenType.VAR): 4353 mode.append(self._prev.text) 4354 4355 if mode: 4356 modes.append(" ".join(mode)) 4357 if not self._match(TokenType.COMMA): 4358 break 4359 4360 return self.expression(exp.Transaction, this=this, modes=modes) 4361 4362 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4363 chain = None 4364 savepoint = None 4365 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4366 4367 self._match_texts({"TRANSACTION", "WORK"}) 4368 4369 if self._match_text_seq("TO"): 4370 self._match_text_seq("SAVEPOINT") 4371 savepoint = self._parse_id_var() 4372 4373 if self._match(TokenType.AND): 4374 chain = not self._match_text_seq("NO") 4375 self._match_text_seq("CHAIN") 4376 4377 if is_rollback: 4378 return self.expression(exp.Rollback, savepoint=savepoint) 4379 4380 return self.expression(exp.Commit, chain=chain) 4381 4382 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4383 if not self._match_text_seq("ADD"): 4384 return None 4385 4386 self._match(TokenType.COLUMN) 4387 exists_column = self._parse_exists(not_=True) 4388 expression = self._parse_column_def(self._parse_field(any_token=True)) 4389 4390 if expression: 4391 expression.set("exists", exists_column) 4392 4393 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4394 if self._match_texts(("FIRST", "AFTER")): 4395 position = self._prev.text 4396 column_position = self.expression( 4397 exp.ColumnPosition, this=self._parse_column(), position=position 4398 ) 4399 expression.set("position", column_position) 4400 4401 return expression 4402 4403 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4404 drop = self._match(TokenType.DROP) and self._parse_drop() 4405 if drop and not isinstance(drop, exp.Command): 4406 drop.set("kind", drop.args.get("kind", "COLUMN")) 4407 return drop 4408 4409 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4410 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4411 return self.expression( 4412 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4413 ) 4414 4415 def _parse_add_constraint(self) -> exp.AddConstraint: 4416 this = None 4417 kind = self._prev.token_type 4418 4419 if kind == TokenType.CONSTRAINT: 4420 this = self._parse_id_var() 4421 4422 if self._match_text_seq("CHECK"): 4423 expression = self._parse_wrapped(self._parse_conjunction) 4424 enforced = self._match_text_seq("ENFORCED") 4425 4426 return self.expression( 4427 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4428 ) 4429 4430 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4431 expression = self._parse_foreign_key() 4432 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4433 expression = self._parse_primary_key() 4434 else: 4435 expression = None 4436 4437 return self.expression(exp.AddConstraint, this=this, expression=expression) 4438 4439 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4440 index = self._index - 1 4441 4442 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4443 return self._parse_csv(self._parse_add_constraint) 4444 4445 self._retreat(index) 4446 return self._parse_csv(self._parse_add_column) 4447 4448 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4449 self._match(TokenType.COLUMN) 4450 column = self._parse_field(any_token=True) 4451 4452 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4453 return self.expression(exp.AlterColumn, this=column, drop=True) 4454 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4455 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4456 4457 self._match_text_seq("SET", "DATA") 4458 return self.expression( 4459 exp.AlterColumn, 4460 this=column, 4461 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4462 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4463 using=self._match(TokenType.USING) and self._parse_conjunction(), 4464 ) 4465 4466 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4467 index = self._index - 1 4468 4469 partition_exists = self._parse_exists() 4470 if self._match(TokenType.PARTITION, advance=False): 4471 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4472 4473 self._retreat(index) 4474 return self._parse_csv(self._parse_drop_column) 4475 4476 def _parse_alter_table_rename(self) -> exp.RenameTable: 4477 self._match_text_seq("TO") 4478 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4479 4480 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4481 start = self._prev 4482 4483 if not self._match(TokenType.TABLE): 4484 return self._parse_as_command(start) 4485 4486 exists = self._parse_exists() 4487 this = self._parse_table(schema=True) 4488 4489 if self._next: 4490 self._advance() 4491 4492 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4493 if parser: 4494 actions = ensure_list(parser(self)) 4495 4496 if not self._curr: 4497 return self.expression( 4498 exp.AlterTable, 4499 this=this, 4500 exists=exists, 4501 actions=actions, 4502 ) 4503 return self._parse_as_command(start) 4504 4505 def _parse_merge(self) -> exp.Merge: 4506 self._match(TokenType.INTO) 4507 target = self._parse_table() 4508 4509 self._match(TokenType.USING) 4510 using = self._parse_table() 4511 4512 self._match(TokenType.ON) 4513 on = self._parse_conjunction() 4514 4515 whens = [] 4516 while self._match(TokenType.WHEN): 4517 matched = not self._match(TokenType.NOT) 4518 self._match_text_seq("MATCHED") 4519 source = ( 4520 False 4521 if self._match_text_seq("BY", "TARGET") 4522 else self._match_text_seq("BY", "SOURCE") 4523 ) 4524 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4525 4526 self._match(TokenType.THEN) 4527 4528 if self._match(TokenType.INSERT): 4529 _this = self._parse_star() 4530 if _this: 4531 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4532 else: 4533 then = self.expression( 4534 exp.Insert, 4535 this=self._parse_value(), 4536 expression=self._match(TokenType.VALUES) and self._parse_value(), 4537 ) 4538 elif self._match(TokenType.UPDATE): 4539 expressions = self._parse_star() 4540 if expressions: 4541 then = self.expression(exp.Update, expressions=expressions) 4542 else: 4543 then = self.expression( 4544 exp.Update, 4545 expressions=self._match(TokenType.SET) 4546 and self._parse_csv(self._parse_equality), 4547 ) 4548 elif self._match(TokenType.DELETE): 4549 then = self.expression(exp.Var, this=self._prev.text) 4550 else: 4551 then = None 4552 4553 whens.append( 4554 self.expression( 4555 exp.When, 4556 matched=matched, 4557 source=source, 4558 condition=condition, 4559 then=then, 4560 ) 4561 ) 4562 4563 return self.expression( 4564 exp.Merge, 4565 this=target, 4566 using=using, 4567 on=on, 4568 expressions=whens, 4569 ) 4570 4571 def _parse_show(self) -> t.Optional[exp.Expression]: 4572 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4573 if parser: 4574 return parser(self) 4575 self._advance() 4576 return self.expression(exp.Show, this=self._prev.text.upper()) 4577 4578 def _parse_set_item_assignment( 4579 self, kind: t.Optional[str] = None 4580 ) -> t.Optional[exp.Expression]: 4581 index = self._index 4582 4583 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4584 return self._parse_set_transaction(global_=kind == "GLOBAL") 4585 4586 left = self._parse_primary() or self._parse_id_var() 4587 4588 if not self._match_texts(("=", "TO")): 4589 self._retreat(index) 4590 return None 4591 4592 right = self._parse_statement() or self._parse_id_var() 4593 this = self.expression(exp.EQ, this=left, expression=right) 4594 4595 return self.expression(exp.SetItem, this=this, kind=kind) 4596 4597 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4598 self._match_text_seq("TRANSACTION") 4599 characteristics = self._parse_csv( 4600 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4601 ) 4602 return self.expression( 4603 exp.SetItem, 4604 expressions=characteristics, 4605 kind="TRANSACTION", 4606 **{"global": global_}, # type: ignore 4607 ) 4608 4609 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4610 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4611 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4612 4613 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4614 index = self._index 4615 set_ = self.expression( 4616 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4617 ) 4618 4619 if self._curr: 4620 self._retreat(index) 4621 return self._parse_as_command(self._prev) 4622 4623 return set_ 4624 4625 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4626 for option in options: 4627 if self._match_text_seq(*option.split(" ")): 4628 return exp.var(option) 4629 return None 4630 4631 def _parse_as_command(self, start: Token) -> exp.Command: 4632 while self._curr: 4633 self._advance() 4634 text = self._find_sql(start, self._prev) 4635 size = len(start.text) 4636 return exp.Command(this=text[:size], expression=text[size:]) 4637 4638 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4639 settings = [] 4640 4641 self._match_l_paren() 4642 kind = self._parse_id_var() 4643 4644 if self._match(TokenType.L_PAREN): 4645 while True: 4646 key = self._parse_id_var() 4647 value = self._parse_primary() 4648 4649 if not key and value is None: 4650 break 4651 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4652 self._match(TokenType.R_PAREN) 4653 4654 self._match_r_paren() 4655 4656 return self.expression( 4657 exp.DictProperty, 4658 this=this, 4659 kind=kind.this if kind else None, 4660 settings=settings, 4661 ) 4662 4663 def _parse_dict_range(self, this: str) -> exp.DictRange: 4664 self._match_l_paren() 4665 has_min = self._match_text_seq("MIN") 4666 if has_min: 4667 min = self._parse_var() or self._parse_primary() 4668 self._match_text_seq("MAX") 4669 max = self._parse_var() or self._parse_primary() 4670 else: 4671 max = self._parse_var() or self._parse_primary() 4672 min = exp.Literal.number(0) 4673 self._match_r_paren() 4674 return self.expression(exp.DictRange, this=this, min=min, max=max) 4675 4676 def _find_parser( 4677 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4678 ) -> t.Optional[t.Callable]: 4679 if not self._curr: 4680 return None 4681 4682 index = self._index 4683 this = [] 4684 while True: 4685 # The current token might be multiple words 4686 curr = self._curr.text.upper() 4687 key = curr.split(" ") 4688 this.append(curr) 4689 4690 self._advance() 4691 result, trie = in_trie(trie, key) 4692 if result == TrieResult.FAILED: 4693 break 4694 4695 if result == TrieResult.EXISTS: 4696 subparser = parsers[" ".join(this)] 4697 return subparser 4698 4699 self._retreat(index) 4700 return None 4701 4702 def _match(self, token_type, advance=True, expression=None): 4703 if not self._curr: 4704 return None 4705 4706 if self._curr.token_type == token_type: 4707 if advance: 4708 self._advance() 4709 self._add_comments(expression) 4710 return True 4711 4712 return None 4713 4714 def _match_set(self, types, advance=True): 4715 if not self._curr: 4716 return None 4717 4718 if self._curr.token_type in types: 4719 if advance: 4720 self._advance() 4721 return True 4722 4723 return None 4724 4725 def _match_pair(self, token_type_a, token_type_b, advance=True): 4726 if not self._curr or not self._next: 4727 return None 4728 4729 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4730 if advance: 4731 self._advance(2) 4732 return True 4733 4734 return None 4735 4736 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4737 if not self._match(TokenType.L_PAREN, expression=expression): 4738 self.raise_error("Expecting (") 4739 4740 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4741 if not self._match(TokenType.R_PAREN, expression=expression): 4742 self.raise_error("Expecting )") 4743 4744 def _match_texts(self, texts, advance=True): 4745 if self._curr and self._curr.text.upper() in texts: 4746 if advance: 4747 self._advance() 4748 return True 4749 return False 4750 4751 def _match_text_seq(self, *texts, advance=True): 4752 index = self._index 4753 for text in texts: 4754 if self._curr and self._curr.text.upper() == text: 4755 self._advance() 4756 else: 4757 self._retreat(index) 4758 return False 4759 4760 if not advance: 4761 self._retreat(index) 4762 4763 return True 4764 4765 @t.overload 4766 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4767 ... 4768 4769 @t.overload 4770 def _replace_columns_with_dots( 4771 self, this: t.Optional[exp.Expression] 4772 ) -> t.Optional[exp.Expression]: 4773 ... 4774 4775 def _replace_columns_with_dots(self, this): 4776 if isinstance(this, exp.Dot): 4777 exp.replace_children(this, self._replace_columns_with_dots) 4778 elif isinstance(this, exp.Column): 4779 exp.replace_children(this, self._replace_columns_with_dots) 4780 table = this.args.get("table") 4781 this = ( 4782 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4783 ) 4784 4785 return this 4786 4787 def _replace_lambda( 4788 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4789 ) -> t.Optional[exp.Expression]: 4790 if not node: 4791 return node 4792 4793 for column in node.find_all(exp.Column): 4794 if column.parts[0].name in lambda_variables: 4795 dot_or_id = column.to_dot() if column.table else column.this 4796 parent = column.parent 4797 4798 while isinstance(parent, exp.Dot): 4799 if not isinstance(parent.parent, exp.Dot): 4800 parent.replace(dot_or_id) 4801 break 4802 parent = parent.parent 4803 else: 4804 if column is node: 4805 node = dot_or_id 4806 else: 4807 column.replace(dot_or_id) 4808 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match(TokenType.IF) 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 begin = self._match(TokenType.BEGIN) 1210 return_ = self._match_text_seq("RETURN") 1211 expression = self._parse_statement() 1212 1213 if return_: 1214 expression = self.expression(exp.Return, this=expression) 1215 elif create_token.token_type == TokenType.INDEX: 1216 this = self._parse_index(index=self._parse_id_var()) 1217 elif create_token.token_type in self.DB_CREATABLES: 1218 table_parts = self._parse_table_parts(schema=True) 1219 1220 # exp.Properties.Location.POST_NAME 1221 self._match(TokenType.COMMA) 1222 extend_props(self._parse_properties(before=True)) 1223 1224 this = self._parse_schema(this=table_parts) 1225 1226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1227 extend_props(self._parse_properties()) 1228 1229 self._match(TokenType.ALIAS) 1230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1231 # exp.Properties.Location.POST_ALIAS 1232 extend_props(self._parse_properties()) 1233 1234 expression = self._parse_ddl_select() 1235 1236 if create_token.token_type == TokenType.TABLE: 1237 indexes = [] 1238 while True: 1239 index = self._parse_index() 1240 1241 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1242 extend_props(self._parse_properties()) 1243 1244 if not index: 1245 break 1246 else: 1247 self._match(TokenType.COMMA) 1248 indexes.append(index) 1249 elif create_token.token_type == TokenType.VIEW: 1250 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1251 no_schema_binding = True 1252 1253 if self._match_text_seq("CLONE"): 1254 clone = self._parse_table(schema=True) 1255 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1256 clone_kind = ( 1257 self._match(TokenType.L_PAREN) 1258 and self._match_texts(self.CLONE_KINDS) 1259 and self._prev.text.upper() 1260 ) 1261 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1262 self._match(TokenType.R_PAREN) 1263 clone = self.expression( 1264 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1265 ) 1266 1267 return self.expression( 1268 exp.Create, 1269 this=this, 1270 kind=create_token.text, 1271 replace=replace, 1272 unique=unique, 1273 expression=expression, 1274 exists=exists, 1275 properties=properties, 1276 indexes=indexes, 1277 no_schema_binding=no_schema_binding, 1278 begin=begin, 1279 clone=clone, 1280 ) 1281 1282 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1283 # only used for teradata currently 1284 self._match(TokenType.COMMA) 1285 1286 kwargs = { 1287 "no": self._match_text_seq("NO"), 1288 "dual": self._match_text_seq("DUAL"), 1289 "before": self._match_text_seq("BEFORE"), 1290 "default": self._match_text_seq("DEFAULT"), 1291 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1292 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1293 "after": self._match_text_seq("AFTER"), 1294 "minimum": self._match_texts(("MIN", "MINIMUM")), 1295 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1296 } 1297 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1300 try: 1301 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1302 except TypeError: 1303 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1304 1305 return None 1306 1307 def _parse_property(self) -> t.Optional[exp.Expression]: 1308 if self._match_texts(self.PROPERTY_PARSERS): 1309 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1310 1311 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1312 return self._parse_character_set(default=True) 1313 1314 if self._match_text_seq("COMPOUND", "SORTKEY"): 1315 return self._parse_sortkey(compound=True) 1316 1317 if self._match_text_seq("SQL", "SECURITY"): 1318 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1319 1320 assignment = self._match_pair( 1321 TokenType.VAR, TokenType.EQ, advance=False 1322 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1323 1324 if assignment: 1325 key = self._parse_var_or_string() 1326 self._match(TokenType.EQ) 1327 return self.expression(exp.Property, this=key, value=self._parse_column()) 1328 1329 return None 1330 1331 def _parse_stored(self) -> exp.FileFormatProperty: 1332 self._match(TokenType.ALIAS) 1333 1334 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1335 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1336 1337 return self.expression( 1338 exp.FileFormatProperty, 1339 this=self.expression( 1340 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1341 ) 1342 if input_format or output_format 1343 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1344 ) 1345 1346 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1347 self._match(TokenType.EQ) 1348 self._match(TokenType.ALIAS) 1349 return self.expression(exp_class, this=self._parse_field()) 1350 1351 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1352 properties = [] 1353 while True: 1354 if before: 1355 prop = self._parse_property_before() 1356 else: 1357 prop = self._parse_property() 1358 1359 if not prop: 1360 break 1361 for p in ensure_list(prop): 1362 properties.append(p) 1363 1364 if properties: 1365 return self.expression(exp.Properties, expressions=properties) 1366 1367 return None 1368 1369 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1370 return self.expression( 1371 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1372 ) 1373 1374 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1375 if self._index >= 2: 1376 pre_volatile_token = self._tokens[self._index - 2] 1377 else: 1378 pre_volatile_token = None 1379 1380 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1381 return exp.VolatileProperty() 1382 1383 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1384 1385 def _parse_with_property( 1386 self, 1387 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1388 self._match(TokenType.WITH) 1389 if self._match(TokenType.L_PAREN, advance=False): 1390 return self._parse_wrapped_csv(self._parse_property) 1391 1392 if self._match_text_seq("JOURNAL"): 1393 return self._parse_withjournaltable() 1394 1395 if self._match_text_seq("DATA"): 1396 return self._parse_withdata(no=False) 1397 elif self._match_text_seq("NO", "DATA"): 1398 return self._parse_withdata(no=True) 1399 1400 if not self._next: 1401 return None 1402 1403 return self._parse_withisolatedloading() 1404 1405 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1406 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1407 self._match(TokenType.EQ) 1408 1409 user = self._parse_id_var() 1410 self._match(TokenType.PARAMETER) 1411 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1412 1413 if not user or not host: 1414 return None 1415 1416 return exp.DefinerProperty(this=f"{user}@{host}") 1417 1418 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1419 self._match(TokenType.TABLE) 1420 self._match(TokenType.EQ) 1421 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1422 1423 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1424 return self.expression(exp.LogProperty, no=no) 1425 1426 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1427 return self.expression(exp.JournalProperty, **kwargs) 1428 1429 def _parse_checksum(self) -> exp.ChecksumProperty: 1430 self._match(TokenType.EQ) 1431 1432 on = None 1433 if self._match(TokenType.ON): 1434 on = True 1435 elif self._match_text_seq("OFF"): 1436 on = False 1437 1438 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1439 1440 def _parse_cluster(self) -> exp.Cluster: 1441 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1442 1443 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1444 self._match_text_seq("BY") 1445 1446 self._match_l_paren() 1447 expressions = self._parse_csv(self._parse_column) 1448 self._match_r_paren() 1449 1450 if self._match_text_seq("SORTED", "BY"): 1451 self._match_l_paren() 1452 sorted_by = self._parse_csv(self._parse_ordered) 1453 self._match_r_paren() 1454 else: 1455 sorted_by = None 1456 1457 self._match(TokenType.INTO) 1458 buckets = self._parse_number() 1459 self._match_text_seq("BUCKETS") 1460 1461 return self.expression( 1462 exp.ClusteredByProperty, 1463 expressions=expressions, 1464 sorted_by=sorted_by, 1465 buckets=buckets, 1466 ) 1467 1468 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1469 if not self._match_text_seq("GRANTS"): 1470 self._retreat(self._index - 1) 1471 return None 1472 1473 return self.expression(exp.CopyGrantsProperty) 1474 1475 def _parse_freespace(self) -> exp.FreespaceProperty: 1476 self._match(TokenType.EQ) 1477 return self.expression( 1478 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1479 ) 1480 1481 def _parse_mergeblockratio( 1482 self, no: bool = False, default: bool = False 1483 ) -> exp.MergeBlockRatioProperty: 1484 if self._match(TokenType.EQ): 1485 return self.expression( 1486 exp.MergeBlockRatioProperty, 1487 this=self._parse_number(), 1488 percent=self._match(TokenType.PERCENT), 1489 ) 1490 1491 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1492 1493 def _parse_datablocksize( 1494 self, 1495 default: t.Optional[bool] = None, 1496 minimum: t.Optional[bool] = None, 1497 maximum: t.Optional[bool] = None, 1498 ) -> exp.DataBlocksizeProperty: 1499 self._match(TokenType.EQ) 1500 size = self._parse_number() 1501 1502 units = None 1503 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1504 units = self._prev.text 1505 1506 return self.expression( 1507 exp.DataBlocksizeProperty, 1508 size=size, 1509 units=units, 1510 default=default, 1511 minimum=minimum, 1512 maximum=maximum, 1513 ) 1514 1515 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1516 self._match(TokenType.EQ) 1517 always = self._match_text_seq("ALWAYS") 1518 manual = self._match_text_seq("MANUAL") 1519 never = self._match_text_seq("NEVER") 1520 default = self._match_text_seq("DEFAULT") 1521 1522 autotemp = None 1523 if self._match_text_seq("AUTOTEMP"): 1524 autotemp = self._parse_schema() 1525 1526 return self.expression( 1527 exp.BlockCompressionProperty, 1528 always=always, 1529 manual=manual, 1530 never=never, 1531 default=default, 1532 autotemp=autotemp, 1533 ) 1534 1535 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1536 no = self._match_text_seq("NO") 1537 concurrent = self._match_text_seq("CONCURRENT") 1538 self._match_text_seq("ISOLATED", "LOADING") 1539 for_all = self._match_text_seq("FOR", "ALL") 1540 for_insert = self._match_text_seq("FOR", "INSERT") 1541 for_none = self._match_text_seq("FOR", "NONE") 1542 return self.expression( 1543 exp.IsolatedLoadingProperty, 1544 no=no, 1545 concurrent=concurrent, 1546 for_all=for_all, 1547 for_insert=for_insert, 1548 for_none=for_none, 1549 ) 1550 1551 def _parse_locking(self) -> exp.LockingProperty: 1552 if self._match(TokenType.TABLE): 1553 kind = "TABLE" 1554 elif self._match(TokenType.VIEW): 1555 kind = "VIEW" 1556 elif self._match(TokenType.ROW): 1557 kind = "ROW" 1558 elif self._match_text_seq("DATABASE"): 1559 kind = "DATABASE" 1560 else: 1561 kind = None 1562 1563 if kind in ("DATABASE", "TABLE", "VIEW"): 1564 this = self._parse_table_parts() 1565 else: 1566 this = None 1567 1568 if self._match(TokenType.FOR): 1569 for_or_in = "FOR" 1570 elif self._match(TokenType.IN): 1571 for_or_in = "IN" 1572 else: 1573 for_or_in = None 1574 1575 if self._match_text_seq("ACCESS"): 1576 lock_type = "ACCESS" 1577 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1578 lock_type = "EXCLUSIVE" 1579 elif self._match_text_seq("SHARE"): 1580 lock_type = "SHARE" 1581 elif self._match_text_seq("READ"): 1582 lock_type = "READ" 1583 elif self._match_text_seq("WRITE"): 1584 lock_type = "WRITE" 1585 elif self._match_text_seq("CHECKSUM"): 1586 lock_type = "CHECKSUM" 1587 else: 1588 lock_type = None 1589 1590 override = self._match_text_seq("OVERRIDE") 1591 1592 return self.expression( 1593 exp.LockingProperty, 1594 this=this, 1595 kind=kind, 1596 for_or_in=for_or_in, 1597 lock_type=lock_type, 1598 override=override, 1599 ) 1600 1601 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1602 if self._match(TokenType.PARTITION_BY): 1603 return self._parse_csv(self._parse_conjunction) 1604 return [] 1605 1606 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1607 self._match(TokenType.EQ) 1608 return self.expression( 1609 exp.PartitionedByProperty, 1610 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1611 ) 1612 1613 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1614 if self._match_text_seq("AND", "STATISTICS"): 1615 statistics = True 1616 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1617 statistics = False 1618 else: 1619 statistics = None 1620 1621 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1622 1623 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1624 if self._match_text_seq("PRIMARY", "INDEX"): 1625 return exp.NoPrimaryIndexProperty() 1626 return None 1627 1628 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1629 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1630 return exp.OnCommitProperty() 1631 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1632 return exp.OnCommitProperty(delete=True) 1633 return None 1634 1635 def _parse_distkey(self) -> exp.DistKeyProperty: 1636 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1637 1638 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1639 table = self._parse_table(schema=True) 1640 1641 options = [] 1642 while self._match_texts(("INCLUDING", "EXCLUDING")): 1643 this = self._prev.text.upper() 1644 1645 id_var = self._parse_id_var() 1646 if not id_var: 1647 return None 1648 1649 options.append( 1650 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1651 ) 1652 1653 return self.expression(exp.LikeProperty, this=table, expressions=options) 1654 1655 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1656 return self.expression( 1657 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1658 ) 1659 1660 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1661 self._match(TokenType.EQ) 1662 return self.expression( 1663 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1664 ) 1665 1666 def _parse_returns(self) -> exp.ReturnsProperty: 1667 value: t.Optional[exp.Expression] 1668 is_table = self._match(TokenType.TABLE) 1669 1670 if is_table: 1671 if self._match(TokenType.LT): 1672 value = self.expression( 1673 exp.Schema, 1674 this="TABLE", 1675 expressions=self._parse_csv(self._parse_struct_types), 1676 ) 1677 if not self._match(TokenType.GT): 1678 self.raise_error("Expecting >") 1679 else: 1680 value = self._parse_schema(exp.var("TABLE")) 1681 else: 1682 value = self._parse_types() 1683 1684 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1685 1686 def _parse_describe(self) -> exp.Describe: 1687 kind = self._match_set(self.CREATABLES) and self._prev.text 1688 this = self._parse_table() 1689 return self.expression(exp.Describe, this=this, kind=kind) 1690 1691 def _parse_insert(self) -> exp.Insert: 1692 overwrite = self._match(TokenType.OVERWRITE) 1693 ignore = self._match(TokenType.IGNORE) 1694 local = self._match_text_seq("LOCAL") 1695 alternative = None 1696 1697 if self._match_text_seq("DIRECTORY"): 1698 this: t.Optional[exp.Expression] = self.expression( 1699 exp.Directory, 1700 this=self._parse_var_or_string(), 1701 local=local, 1702 row_format=self._parse_row_format(match_row=True), 1703 ) 1704 else: 1705 if self._match(TokenType.OR): 1706 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1707 1708 self._match(TokenType.INTO) 1709 self._match(TokenType.TABLE) 1710 this = self._parse_table(schema=True) 1711 1712 return self.expression( 1713 exp.Insert, 1714 this=this, 1715 exists=self._parse_exists(), 1716 partition=self._parse_partition(), 1717 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1718 and self._parse_conjunction(), 1719 expression=self._parse_ddl_select(), 1720 conflict=self._parse_on_conflict(), 1721 returning=self._parse_returning(), 1722 overwrite=overwrite, 1723 alternative=alternative, 1724 ignore=ignore, 1725 ) 1726 1727 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1728 conflict = self._match_text_seq("ON", "CONFLICT") 1729 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1730 1731 if not conflict and not duplicate: 1732 return None 1733 1734 nothing = None 1735 expressions = None 1736 key = None 1737 constraint = None 1738 1739 if conflict: 1740 if self._match_text_seq("ON", "CONSTRAINT"): 1741 constraint = self._parse_id_var() 1742 else: 1743 key = self._parse_csv(self._parse_value) 1744 1745 self._match_text_seq("DO") 1746 if self._match_text_seq("NOTHING"): 1747 nothing = True 1748 else: 1749 self._match(TokenType.UPDATE) 1750 self._match(TokenType.SET) 1751 expressions = self._parse_csv(self._parse_equality) 1752 1753 return self.expression( 1754 exp.OnConflict, 1755 duplicate=duplicate, 1756 expressions=expressions, 1757 nothing=nothing, 1758 key=key, 1759 constraint=constraint, 1760 ) 1761 1762 def _parse_returning(self) -> t.Optional[exp.Returning]: 1763 if not self._match(TokenType.RETURNING): 1764 return None 1765 1766 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1767 1768 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1769 if not self._match(TokenType.FORMAT): 1770 return None 1771 return self._parse_row_format() 1772 1773 def _parse_row_format( 1774 self, match_row: bool = False 1775 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1776 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1777 return None 1778 1779 if self._match_text_seq("SERDE"): 1780 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1781 1782 self._match_text_seq("DELIMITED") 1783 1784 kwargs = {} 1785 1786 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1787 kwargs["fields"] = self._parse_string() 1788 if self._match_text_seq("ESCAPED", "BY"): 1789 kwargs["escaped"] = self._parse_string() 1790 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1791 kwargs["collection_items"] = self._parse_string() 1792 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1793 kwargs["map_keys"] = self._parse_string() 1794 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1795 kwargs["lines"] = self._parse_string() 1796 if self._match_text_seq("NULL", "DEFINED", "AS"): 1797 kwargs["null"] = self._parse_string() 1798 1799 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1800 1801 def _parse_load(self) -> exp.LoadData | exp.Command: 1802 if self._match_text_seq("DATA"): 1803 local = self._match_text_seq("LOCAL") 1804 self._match_text_seq("INPATH") 1805 inpath = self._parse_string() 1806 overwrite = self._match(TokenType.OVERWRITE) 1807 self._match_pair(TokenType.INTO, TokenType.TABLE) 1808 1809 return self.expression( 1810 exp.LoadData, 1811 this=self._parse_table(schema=True), 1812 local=local, 1813 overwrite=overwrite, 1814 inpath=inpath, 1815 partition=self._parse_partition(), 1816 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1817 serde=self._match_text_seq("SERDE") and self._parse_string(), 1818 ) 1819 return self._parse_as_command(self._prev) 1820 1821 def _parse_delete(self) -> exp.Delete: 1822 # This handles MySQL's "Multiple-Table Syntax" 1823 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1824 tables = None 1825 if not self._match(TokenType.FROM, advance=False): 1826 tables = self._parse_csv(self._parse_table) or None 1827 1828 return self.expression( 1829 exp.Delete, 1830 tables=tables, 1831 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1832 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1833 where=self._parse_where(), 1834 returning=self._parse_returning(), 1835 limit=self._parse_limit(), 1836 ) 1837 1838 def _parse_update(self) -> exp.Update: 1839 return self.expression( 1840 exp.Update, 1841 **{ # type: ignore 1842 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1843 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1844 "from": self._parse_from(joins=True), 1845 "where": self._parse_where(), 1846 "returning": self._parse_returning(), 1847 "limit": self._parse_limit(), 1848 }, 1849 ) 1850 1851 def _parse_uncache(self) -> exp.Uncache: 1852 if not self._match(TokenType.TABLE): 1853 self.raise_error("Expecting TABLE after UNCACHE") 1854 1855 return self.expression( 1856 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1857 ) 1858 1859 def _parse_cache(self) -> exp.Cache: 1860 lazy = self._match_text_seq("LAZY") 1861 self._match(TokenType.TABLE) 1862 table = self._parse_table(schema=True) 1863 1864 options = [] 1865 if self._match_text_seq("OPTIONS"): 1866 self._match_l_paren() 1867 k = self._parse_string() 1868 self._match(TokenType.EQ) 1869 v = self._parse_string() 1870 options = [k, v] 1871 self._match_r_paren() 1872 1873 self._match(TokenType.ALIAS) 1874 return self.expression( 1875 exp.Cache, 1876 this=table, 1877 lazy=lazy, 1878 options=options, 1879 expression=self._parse_select(nested=True), 1880 ) 1881 1882 def _parse_partition(self) -> t.Optional[exp.Partition]: 1883 if not self._match(TokenType.PARTITION): 1884 return None 1885 1886 return self.expression( 1887 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1888 ) 1889 1890 def _parse_value(self) -> exp.Tuple: 1891 if self._match(TokenType.L_PAREN): 1892 expressions = self._parse_csv(self._parse_conjunction) 1893 self._match_r_paren() 1894 return self.expression(exp.Tuple, expressions=expressions) 1895 1896 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1897 # https://prestodb.io/docs/current/sql/values.html 1898 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1899 1900 def _parse_select( 1901 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1902 ) -> t.Optional[exp.Expression]: 1903 cte = self._parse_with() 1904 if cte: 1905 this = self._parse_statement() 1906 1907 if not this: 1908 self.raise_error("Failed to parse any statement following CTE") 1909 return cte 1910 1911 if "with" in this.arg_types: 1912 this.set("with", cte) 1913 else: 1914 self.raise_error(f"{this.key} does not support CTE") 1915 this = cte 1916 elif self._match(TokenType.SELECT): 1917 comments = self._prev_comments 1918 1919 hint = self._parse_hint() 1920 all_ = self._match(TokenType.ALL) 1921 distinct = self._match(TokenType.DISTINCT) 1922 1923 kind = ( 1924 self._match(TokenType.ALIAS) 1925 and self._match_texts(("STRUCT", "VALUE")) 1926 and self._prev.text 1927 ) 1928 1929 if distinct: 1930 distinct = self.expression( 1931 exp.Distinct, 1932 on=self._parse_value() if self._match(TokenType.ON) else None, 1933 ) 1934 1935 if all_ and distinct: 1936 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1937 1938 limit = self._parse_limit(top=True) 1939 expressions = self._parse_expressions() 1940 1941 this = self.expression( 1942 exp.Select, 1943 kind=kind, 1944 hint=hint, 1945 distinct=distinct, 1946 expressions=expressions, 1947 limit=limit, 1948 ) 1949 this.comments = comments 1950 1951 into = self._parse_into() 1952 if into: 1953 this.set("into", into) 1954 1955 from_ = self._parse_from() 1956 if from_: 1957 this.set("from", from_) 1958 1959 this = self._parse_query_modifiers(this) 1960 elif (table or nested) and self._match(TokenType.L_PAREN): 1961 if self._match(TokenType.PIVOT): 1962 this = self._parse_simplified_pivot() 1963 elif self._match(TokenType.FROM): 1964 this = exp.select("*").from_( 1965 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1966 ) 1967 else: 1968 this = self._parse_table() if table else self._parse_select(nested=True) 1969 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1970 1971 self._match_r_paren() 1972 1973 # early return so that subquery unions aren't parsed again 1974 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1975 # Union ALL should be a property of the top select node, not the subquery 1976 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1977 elif self._match(TokenType.VALUES): 1978 this = self.expression( 1979 exp.Values, 1980 expressions=self._parse_csv(self._parse_value), 1981 alias=self._parse_table_alias(), 1982 ) 1983 else: 1984 this = None 1985 1986 return self._parse_set_operations(this) 1987 1988 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1989 if not skip_with_token and not self._match(TokenType.WITH): 1990 return None 1991 1992 comments = self._prev_comments 1993 recursive = self._match(TokenType.RECURSIVE) 1994 1995 expressions = [] 1996 while True: 1997 expressions.append(self._parse_cte()) 1998 1999 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2000 break 2001 else: 2002 self._match(TokenType.WITH) 2003 2004 return self.expression( 2005 exp.With, comments=comments, expressions=expressions, recursive=recursive 2006 ) 2007 2008 def _parse_cte(self) -> exp.CTE: 2009 alias = self._parse_table_alias() 2010 if not alias or not alias.this: 2011 self.raise_error("Expected CTE to have alias") 2012 2013 self._match(TokenType.ALIAS) 2014 return self.expression( 2015 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2016 ) 2017 2018 def _parse_table_alias( 2019 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2020 ) -> t.Optional[exp.TableAlias]: 2021 any_token = self._match(TokenType.ALIAS) 2022 alias = ( 2023 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2024 or self._parse_string_as_identifier() 2025 ) 2026 2027 index = self._index 2028 if self._match(TokenType.L_PAREN): 2029 columns = self._parse_csv(self._parse_function_parameter) 2030 self._match_r_paren() if columns else self._retreat(index) 2031 else: 2032 columns = None 2033 2034 if not alias and not columns: 2035 return None 2036 2037 return self.expression(exp.TableAlias, this=alias, columns=columns) 2038 2039 def _parse_subquery( 2040 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2041 ) -> t.Optional[exp.Subquery]: 2042 if not this: 2043 return None 2044 2045 return self.expression( 2046 exp.Subquery, 2047 this=this, 2048 pivots=self._parse_pivots(), 2049 alias=self._parse_table_alias() if parse_alias else None, 2050 ) 2051 2052 def _parse_query_modifiers( 2053 self, this: t.Optional[exp.Expression] 2054 ) -> t.Optional[exp.Expression]: 2055 if isinstance(this, self.MODIFIABLES): 2056 for join in iter(self._parse_join, None): 2057 this.append("joins", join) 2058 for lateral in iter(self._parse_lateral, None): 2059 this.append("laterals", lateral) 2060 2061 while True: 2062 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2063 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2064 key, expression = parser(self) 2065 2066 if expression: 2067 this.set(key, expression) 2068 if key == "limit": 2069 offset = expression.args.pop("offset", None) 2070 if offset: 2071 this.set("offset", exp.Offset(expression=offset)) 2072 continue 2073 break 2074 return this 2075 2076 def _parse_hint(self) -> t.Optional[exp.Hint]: 2077 if self._match(TokenType.HINT): 2078 hints = [] 2079 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2080 hints.extend(hint) 2081 2082 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2083 self.raise_error("Expected */ after HINT") 2084 2085 return self.expression(exp.Hint, expressions=hints) 2086 2087 return None 2088 2089 def _parse_into(self) -> t.Optional[exp.Into]: 2090 if not self._match(TokenType.INTO): 2091 return None 2092 2093 temp = self._match(TokenType.TEMPORARY) 2094 unlogged = self._match_text_seq("UNLOGGED") 2095 self._match(TokenType.TABLE) 2096 2097 return self.expression( 2098 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2099 ) 2100 2101 def _parse_from( 2102 self, joins: bool = False, skip_from_token: bool = False 2103 ) -> t.Optional[exp.From]: 2104 if not skip_from_token and not self._match(TokenType.FROM): 2105 return None 2106 2107 return self.expression( 2108 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2109 ) 2110 2111 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2112 if not self._match(TokenType.MATCH_RECOGNIZE): 2113 return None 2114 2115 self._match_l_paren() 2116 2117 partition = self._parse_partition_by() 2118 order = self._parse_order() 2119 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2120 2121 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2122 rows = exp.var("ONE ROW PER MATCH") 2123 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2124 text = "ALL ROWS PER MATCH" 2125 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2126 text += f" SHOW EMPTY MATCHES" 2127 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2128 text += f" OMIT EMPTY MATCHES" 2129 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2130 text += f" WITH UNMATCHED ROWS" 2131 rows = exp.var(text) 2132 else: 2133 rows = None 2134 2135 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2136 text = "AFTER MATCH SKIP" 2137 if self._match_text_seq("PAST", "LAST", "ROW"): 2138 text += f" PAST LAST ROW" 2139 elif self._match_text_seq("TO", "NEXT", "ROW"): 2140 text += f" TO NEXT ROW" 2141 elif self._match_text_seq("TO", "FIRST"): 2142 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2143 elif self._match_text_seq("TO", "LAST"): 2144 text += f" TO LAST {self._advance_any().text}" # type: ignore 2145 after = exp.var(text) 2146 else: 2147 after = None 2148 2149 if self._match_text_seq("PATTERN"): 2150 self._match_l_paren() 2151 2152 if not self._curr: 2153 self.raise_error("Expecting )", self._curr) 2154 2155 paren = 1 2156 start = self._curr 2157 2158 while self._curr and paren > 0: 2159 if self._curr.token_type == TokenType.L_PAREN: 2160 paren += 1 2161 if self._curr.token_type == TokenType.R_PAREN: 2162 paren -= 1 2163 2164 end = self._prev 2165 self._advance() 2166 2167 if paren > 0: 2168 self.raise_error("Expecting )", self._curr) 2169 2170 pattern = exp.var(self._find_sql(start, end)) 2171 else: 2172 pattern = None 2173 2174 define = ( 2175 self._parse_csv( 2176 lambda: self.expression( 2177 exp.Alias, 2178 alias=self._parse_id_var(any_token=True), 2179 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2180 ) 2181 ) 2182 if self._match_text_seq("DEFINE") 2183 else None 2184 ) 2185 2186 self._match_r_paren() 2187 2188 return self.expression( 2189 exp.MatchRecognize, 2190 partition_by=partition, 2191 order=order, 2192 measures=measures, 2193 rows=rows, 2194 after=after, 2195 pattern=pattern, 2196 define=define, 2197 alias=self._parse_table_alias(), 2198 ) 2199 2200 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2201 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2202 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2203 2204 if outer_apply or cross_apply: 2205 this = self._parse_select(table=True) 2206 view = None 2207 outer = not cross_apply 2208 elif self._match(TokenType.LATERAL): 2209 this = self._parse_select(table=True) 2210 view = self._match(TokenType.VIEW) 2211 outer = self._match(TokenType.OUTER) 2212 else: 2213 return None 2214 2215 if not this: 2216 this = self._parse_function() or self._parse_id_var(any_token=False) 2217 while self._match(TokenType.DOT): 2218 this = exp.Dot( 2219 this=this, 2220 expression=self._parse_function() or self._parse_id_var(any_token=False), 2221 ) 2222 2223 if view: 2224 table = self._parse_id_var(any_token=False) 2225 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2226 table_alias: t.Optional[exp.TableAlias] = self.expression( 2227 exp.TableAlias, this=table, columns=columns 2228 ) 2229 elif isinstance(this, exp.Subquery) and this.alias: 2230 # Ensures parity between the Subquery's and the Lateral's "alias" args 2231 table_alias = this.args["alias"].copy() 2232 else: 2233 table_alias = self._parse_table_alias() 2234 2235 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2236 2237 def _parse_join_parts( 2238 self, 2239 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2240 return ( 2241 self._match_set(self.JOIN_METHODS) and self._prev, 2242 self._match_set(self.JOIN_SIDES) and self._prev, 2243 self._match_set(self.JOIN_KINDS) and self._prev, 2244 ) 2245 2246 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2247 if self._match(TokenType.COMMA): 2248 return self.expression(exp.Join, this=self._parse_table()) 2249 2250 index = self._index 2251 method, side, kind = self._parse_join_parts() 2252 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2253 join = self._match(TokenType.JOIN) 2254 2255 if not skip_join_token and not join: 2256 self._retreat(index) 2257 kind = None 2258 method = None 2259 side = None 2260 2261 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2262 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2263 2264 if not skip_join_token and not join and not outer_apply and not cross_apply: 2265 return None 2266 2267 if outer_apply: 2268 side = Token(TokenType.LEFT, "LEFT") 2269 2270 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2271 2272 if method: 2273 kwargs["method"] = method.text 2274 if side: 2275 kwargs["side"] = side.text 2276 if kind: 2277 kwargs["kind"] = kind.text 2278 if hint: 2279 kwargs["hint"] = hint 2280 2281 if self._match(TokenType.ON): 2282 kwargs["on"] = self._parse_conjunction() 2283 elif self._match(TokenType.USING): 2284 kwargs["using"] = self._parse_wrapped_id_vars() 2285 elif not (kind and kind.token_type == TokenType.CROSS): 2286 index = self._index 2287 joins = self._parse_joins() 2288 2289 if joins and self._match(TokenType.ON): 2290 kwargs["on"] = self._parse_conjunction() 2291 elif joins and self._match(TokenType.USING): 2292 kwargs["using"] = self._parse_wrapped_id_vars() 2293 else: 2294 joins = None 2295 self._retreat(index) 2296 kwargs["this"].set("joins", joins) 2297 2298 return self.expression(exp.Join, **kwargs) 2299 2300 def _parse_index( 2301 self, 2302 index: t.Optional[exp.Expression] = None, 2303 ) -> t.Optional[exp.Index]: 2304 if index: 2305 unique = None 2306 primary = None 2307 amp = None 2308 2309 self._match(TokenType.ON) 2310 self._match(TokenType.TABLE) # hive 2311 table = self._parse_table_parts(schema=True) 2312 else: 2313 unique = self._match(TokenType.UNIQUE) 2314 primary = self._match_text_seq("PRIMARY") 2315 amp = self._match_text_seq("AMP") 2316 2317 if not self._match(TokenType.INDEX): 2318 return None 2319 2320 index = self._parse_id_var() 2321 table = None 2322 2323 using = self._parse_field() if self._match(TokenType.USING) else None 2324 2325 if self._match(TokenType.L_PAREN, advance=False): 2326 columns = self._parse_wrapped_csv(self._parse_ordered) 2327 else: 2328 columns = None 2329 2330 return self.expression( 2331 exp.Index, 2332 this=index, 2333 table=table, 2334 using=using, 2335 columns=columns, 2336 unique=unique, 2337 primary=primary, 2338 amp=amp, 2339 partition_by=self._parse_partition_by(), 2340 ) 2341 2342 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2343 hints: t.List[exp.Expression] = [] 2344 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2345 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2346 hints.append( 2347 self.expression( 2348 exp.WithTableHint, 2349 expressions=self._parse_csv( 2350 lambda: self._parse_function() or self._parse_var(any_token=True) 2351 ), 2352 ) 2353 ) 2354 self._match_r_paren() 2355 else: 2356 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2357 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2358 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2359 2360 self._match_texts({"INDEX", "KEY"}) 2361 if self._match(TokenType.FOR): 2362 hint.set("target", self._advance_any() and self._prev.text.upper()) 2363 2364 hint.set("expressions", self._parse_wrapped_id_vars()) 2365 hints.append(hint) 2366 2367 return hints or None 2368 2369 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2370 return ( 2371 (not schema and self._parse_function(optional_parens=False)) 2372 or self._parse_id_var(any_token=False) 2373 or self._parse_string_as_identifier() 2374 or self._parse_placeholder() 2375 ) 2376 2377 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2378 catalog = None 2379 db = None 2380 table = self._parse_table_part(schema=schema) 2381 2382 while self._match(TokenType.DOT): 2383 if catalog: 2384 # This allows nesting the table in arbitrarily many dot expressions if needed 2385 table = self.expression( 2386 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2387 ) 2388 else: 2389 catalog = db 2390 db = table 2391 table = self._parse_table_part(schema=schema) 2392 2393 if not table: 2394 self.raise_error(f"Expected table name but got {self._curr}") 2395 2396 return self.expression( 2397 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2398 ) 2399 2400 def _parse_table( 2401 self, 2402 schema: bool = False, 2403 joins: bool = False, 2404 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2405 ) -> t.Optional[exp.Expression]: 2406 lateral = self._parse_lateral() 2407 if lateral: 2408 return lateral 2409 2410 unnest = self._parse_unnest() 2411 if unnest: 2412 return unnest 2413 2414 values = self._parse_derived_table_values() 2415 if values: 2416 return values 2417 2418 subquery = self._parse_select(table=True) 2419 if subquery: 2420 if not subquery.args.get("pivots"): 2421 subquery.set("pivots", self._parse_pivots()) 2422 return subquery 2423 2424 this: exp.Expression = self._parse_table_parts(schema=schema) 2425 2426 if schema: 2427 return self._parse_schema(this=this) 2428 2429 if self.ALIAS_POST_TABLESAMPLE: 2430 table_sample = self._parse_table_sample() 2431 2432 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2433 if alias: 2434 this.set("alias", alias) 2435 2436 if not this.args.get("pivots"): 2437 this.set("pivots", self._parse_pivots()) 2438 2439 this.set("hints", self._parse_table_hints()) 2440 2441 if not self.ALIAS_POST_TABLESAMPLE: 2442 table_sample = self._parse_table_sample() 2443 2444 if table_sample: 2445 table_sample.set("this", this) 2446 this = table_sample 2447 2448 if joins: 2449 for join in iter(self._parse_join, None): 2450 this.append("joins", join) 2451 2452 return this 2453 2454 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2455 if not self._match(TokenType.UNNEST): 2456 return None 2457 2458 expressions = self._parse_wrapped_csv(self._parse_type) 2459 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2460 2461 alias = self._parse_table_alias() if with_alias else None 2462 2463 if alias and self.UNNEST_COLUMN_ONLY: 2464 if alias.args.get("columns"): 2465 self.raise_error("Unexpected extra column alias in unnest.") 2466 2467 alias.set("columns", [alias.this]) 2468 alias.set("this", None) 2469 2470 offset = None 2471 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2472 self._match(TokenType.ALIAS) 2473 offset = self._parse_id_var() or exp.to_identifier("offset") 2474 2475 return self.expression( 2476 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2477 ) 2478 2479 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2480 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2481 if not is_derived and not self._match(TokenType.VALUES): 2482 return None 2483 2484 expressions = self._parse_csv(self._parse_value) 2485 alias = self._parse_table_alias() 2486 2487 if is_derived: 2488 self._match_r_paren() 2489 2490 return self.expression( 2491 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2492 ) 2493 2494 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2495 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2496 as_modifier and self._match_text_seq("USING", "SAMPLE") 2497 ): 2498 return None 2499 2500 bucket_numerator = None 2501 bucket_denominator = None 2502 bucket_field = None 2503 percent = None 2504 rows = None 2505 size = None 2506 seed = None 2507 2508 kind = ( 2509 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2510 ) 2511 method = self._parse_var(tokens=(TokenType.ROW,)) 2512 2513 self._match(TokenType.L_PAREN) 2514 2515 num = self._parse_number() 2516 2517 if self._match_text_seq("BUCKET"): 2518 bucket_numerator = self._parse_number() 2519 self._match_text_seq("OUT", "OF") 2520 bucket_denominator = bucket_denominator = self._parse_number() 2521 self._match(TokenType.ON) 2522 bucket_field = self._parse_field() 2523 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2524 percent = num 2525 elif self._match(TokenType.ROWS): 2526 rows = num 2527 else: 2528 size = num 2529 2530 self._match(TokenType.R_PAREN) 2531 2532 if self._match(TokenType.L_PAREN): 2533 method = self._parse_var() 2534 seed = self._match(TokenType.COMMA) and self._parse_number() 2535 self._match_r_paren() 2536 elif self._match_texts(("SEED", "REPEATABLE")): 2537 seed = self._parse_wrapped(self._parse_number) 2538 2539 return self.expression( 2540 exp.TableSample, 2541 method=method, 2542 bucket_numerator=bucket_numerator, 2543 bucket_denominator=bucket_denominator, 2544 bucket_field=bucket_field, 2545 percent=percent, 2546 rows=rows, 2547 size=size, 2548 seed=seed, 2549 kind=kind, 2550 ) 2551 2552 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2553 return list(iter(self._parse_pivot, None)) or None 2554 2555 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2556 return list(iter(self._parse_join, None)) or None 2557 2558 # https://duckdb.org/docs/sql/statements/pivot 2559 def _parse_simplified_pivot(self) -> exp.Pivot: 2560 def _parse_on() -> t.Optional[exp.Expression]: 2561 this = self._parse_bitwise() 2562 return self._parse_in(this) if self._match(TokenType.IN) else this 2563 2564 this = self._parse_table() 2565 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2566 using = self._match(TokenType.USING) and self._parse_csv( 2567 lambda: self._parse_alias(self._parse_function()) 2568 ) 2569 group = self._parse_group() 2570 return self.expression( 2571 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2572 ) 2573 2574 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2575 index = self._index 2576 2577 if self._match(TokenType.PIVOT): 2578 unpivot = False 2579 elif self._match(TokenType.UNPIVOT): 2580 unpivot = True 2581 else: 2582 return None 2583 2584 expressions = [] 2585 field = None 2586 2587 if not self._match(TokenType.L_PAREN): 2588 self._retreat(index) 2589 return None 2590 2591 if unpivot: 2592 expressions = self._parse_csv(self._parse_column) 2593 else: 2594 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2595 2596 if not expressions: 2597 self.raise_error("Failed to parse PIVOT's aggregation list") 2598 2599 if not self._match(TokenType.FOR): 2600 self.raise_error("Expecting FOR") 2601 2602 value = self._parse_column() 2603 2604 if not self._match(TokenType.IN): 2605 self.raise_error("Expecting IN") 2606 2607 field = self._parse_in(value, alias=True) 2608 2609 self._match_r_paren() 2610 2611 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2612 2613 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2614 pivot.set("alias", self._parse_table_alias()) 2615 2616 if not unpivot: 2617 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2618 2619 columns: t.List[exp.Expression] = [] 2620 for fld in pivot.args["field"].expressions: 2621 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2622 for name in names: 2623 if self.PREFIXED_PIVOT_COLUMNS: 2624 name = f"{name}_{field_name}" if name else field_name 2625 else: 2626 name = f"{field_name}_{name}" if name else field_name 2627 2628 columns.append(exp.to_identifier(name)) 2629 2630 pivot.set("columns", columns) 2631 2632 return pivot 2633 2634 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2635 return [agg.alias for agg in aggregations] 2636 2637 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2638 if not skip_where_token and not self._match(TokenType.WHERE): 2639 return None 2640 2641 return self.expression( 2642 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2643 ) 2644 2645 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2646 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2647 return None 2648 2649 elements = defaultdict(list) 2650 2651 if self._match(TokenType.ALL): 2652 return self.expression(exp.Group, all=True) 2653 2654 while True: 2655 expressions = self._parse_csv(self._parse_conjunction) 2656 if expressions: 2657 elements["expressions"].extend(expressions) 2658 2659 grouping_sets = self._parse_grouping_sets() 2660 if grouping_sets: 2661 elements["grouping_sets"].extend(grouping_sets) 2662 2663 rollup = None 2664 cube = None 2665 totals = None 2666 2667 with_ = self._match(TokenType.WITH) 2668 if self._match(TokenType.ROLLUP): 2669 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2670 elements["rollup"].extend(ensure_list(rollup)) 2671 2672 if self._match(TokenType.CUBE): 2673 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2674 elements["cube"].extend(ensure_list(cube)) 2675 2676 if self._match_text_seq("TOTALS"): 2677 totals = True 2678 elements["totals"] = True # type: ignore 2679 2680 if not (grouping_sets or rollup or cube or totals): 2681 break 2682 2683 return self.expression(exp.Group, **elements) # type: ignore 2684 2685 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2686 if not self._match(TokenType.GROUPING_SETS): 2687 return None 2688 2689 return self._parse_wrapped_csv(self._parse_grouping_set) 2690 2691 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2692 if self._match(TokenType.L_PAREN): 2693 grouping_set = self._parse_csv(self._parse_column) 2694 self._match_r_paren() 2695 return self.expression(exp.Tuple, expressions=grouping_set) 2696 2697 return self._parse_column() 2698 2699 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2700 if not skip_having_token and not self._match(TokenType.HAVING): 2701 return None 2702 return self.expression(exp.Having, this=self._parse_conjunction()) 2703 2704 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2705 if not self._match(TokenType.QUALIFY): 2706 return None 2707 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2708 2709 def _parse_order( 2710 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2711 ) -> t.Optional[exp.Expression]: 2712 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2713 return this 2714 2715 return self.expression( 2716 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2717 ) 2718 2719 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2720 if not self._match(token): 2721 return None 2722 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2723 2724 def _parse_ordered(self) -> exp.Ordered: 2725 this = self._parse_conjunction() 2726 self._match(TokenType.ASC) 2727 2728 is_desc = self._match(TokenType.DESC) 2729 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2730 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2731 desc = is_desc or False 2732 asc = not desc 2733 nulls_first = is_nulls_first or False 2734 explicitly_null_ordered = is_nulls_first or is_nulls_last 2735 2736 if ( 2737 not explicitly_null_ordered 2738 and ( 2739 (asc and self.NULL_ORDERING == "nulls_are_small") 2740 or (desc and self.NULL_ORDERING != "nulls_are_small") 2741 ) 2742 and self.NULL_ORDERING != "nulls_are_last" 2743 ): 2744 nulls_first = True 2745 2746 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2747 2748 def _parse_limit( 2749 self, this: t.Optional[exp.Expression] = None, top: bool = False 2750 ) -> t.Optional[exp.Expression]: 2751 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2752 limit_paren = self._match(TokenType.L_PAREN) 2753 expression = self._parse_number() if top else self._parse_term() 2754 2755 if self._match(TokenType.COMMA): 2756 offset = expression 2757 expression = self._parse_term() 2758 else: 2759 offset = None 2760 2761 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2762 2763 if limit_paren: 2764 self._match_r_paren() 2765 2766 return limit_exp 2767 2768 if self._match(TokenType.FETCH): 2769 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2770 direction = self._prev.text if direction else "FIRST" 2771 2772 count = self._parse_number() 2773 percent = self._match(TokenType.PERCENT) 2774 2775 self._match_set((TokenType.ROW, TokenType.ROWS)) 2776 2777 only = self._match_text_seq("ONLY") 2778 with_ties = self._match_text_seq("WITH", "TIES") 2779 2780 if only and with_ties: 2781 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2782 2783 return self.expression( 2784 exp.Fetch, 2785 direction=direction, 2786 count=count, 2787 percent=percent, 2788 with_ties=with_ties, 2789 ) 2790 2791 return this 2792 2793 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2794 if not self._match(TokenType.OFFSET): 2795 return this 2796 2797 count = self._parse_number() 2798 self._match_set((TokenType.ROW, TokenType.ROWS)) 2799 return self.expression(exp.Offset, this=this, expression=count) 2800 2801 def _parse_locks(self) -> t.List[exp.Lock]: 2802 locks = [] 2803 while True: 2804 if self._match_text_seq("FOR", "UPDATE"): 2805 update = True 2806 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2807 "LOCK", "IN", "SHARE", "MODE" 2808 ): 2809 update = False 2810 else: 2811 break 2812 2813 expressions = None 2814 if self._match_text_seq("OF"): 2815 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2816 2817 wait: t.Optional[bool | exp.Expression] = None 2818 if self._match_text_seq("NOWAIT"): 2819 wait = True 2820 elif self._match_text_seq("WAIT"): 2821 wait = self._parse_primary() 2822 elif self._match_text_seq("SKIP", "LOCKED"): 2823 wait = False 2824 2825 locks.append( 2826 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2827 ) 2828 2829 return locks 2830 2831 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2832 if not self._match_set(self.SET_OPERATIONS): 2833 return this 2834 2835 token_type = self._prev.token_type 2836 2837 if token_type == TokenType.UNION: 2838 expression = exp.Union 2839 elif token_type == TokenType.EXCEPT: 2840 expression = exp.Except 2841 else: 2842 expression = exp.Intersect 2843 2844 return self.expression( 2845 expression, 2846 this=this, 2847 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2848 expression=self._parse_set_operations(self._parse_select(nested=True)), 2849 ) 2850 2851 def _parse_expression(self) -> t.Optional[exp.Expression]: 2852 return self._parse_alias(self._parse_conjunction()) 2853 2854 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2855 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2856 2857 def _parse_equality(self) -> t.Optional[exp.Expression]: 2858 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2859 2860 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2861 return self._parse_tokens(self._parse_range, self.COMPARISON) 2862 2863 def _parse_range(self) -> t.Optional[exp.Expression]: 2864 this = self._parse_bitwise() 2865 negate = self._match(TokenType.NOT) 2866 2867 if self._match_set(self.RANGE_PARSERS): 2868 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2869 if not expression: 2870 return this 2871 2872 this = expression 2873 elif self._match(TokenType.ISNULL): 2874 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2875 2876 # Postgres supports ISNULL and NOTNULL for conditions. 2877 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2878 if self._match(TokenType.NOTNULL): 2879 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2880 this = self.expression(exp.Not, this=this) 2881 2882 if negate: 2883 this = self.expression(exp.Not, this=this) 2884 2885 if self._match(TokenType.IS): 2886 this = self._parse_is(this) 2887 2888 return this 2889 2890 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2891 index = self._index - 1 2892 negate = self._match(TokenType.NOT) 2893 2894 if self._match_text_seq("DISTINCT", "FROM"): 2895 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2896 return self.expression(klass, this=this, expression=self._parse_expression()) 2897 2898 expression = self._parse_null() or self._parse_boolean() 2899 if not expression: 2900 self._retreat(index) 2901 return None 2902 2903 this = self.expression(exp.Is, this=this, expression=expression) 2904 return self.expression(exp.Not, this=this) if negate else this 2905 2906 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2907 unnest = self._parse_unnest(with_alias=False) 2908 if unnest: 2909 this = self.expression(exp.In, this=this, unnest=unnest) 2910 elif self._match(TokenType.L_PAREN): 2911 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2912 2913 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2914 this = self.expression(exp.In, this=this, query=expressions[0]) 2915 else: 2916 this = self.expression(exp.In, this=this, expressions=expressions) 2917 2918 self._match_r_paren(this) 2919 else: 2920 this = self.expression(exp.In, this=this, field=self._parse_field()) 2921 2922 return this 2923 2924 def _parse_between(self, this: exp.Expression) -> exp.Between: 2925 low = self._parse_bitwise() 2926 self._match(TokenType.AND) 2927 high = self._parse_bitwise() 2928 return self.expression(exp.Between, this=this, low=low, high=high) 2929 2930 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2931 if not self._match(TokenType.ESCAPE): 2932 return this 2933 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2934 2935 def _parse_interval(self) -> t.Optional[exp.Interval]: 2936 if not self._match(TokenType.INTERVAL): 2937 return None 2938 2939 if self._match(TokenType.STRING, advance=False): 2940 this = self._parse_primary() 2941 else: 2942 this = self._parse_term() 2943 2944 unit = self._parse_function() or self._parse_var() 2945 2946 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2947 # each INTERVAL expression into this canonical form so it's easy to transpile 2948 if this and this.is_number: 2949 this = exp.Literal.string(this.name) 2950 elif this and this.is_string: 2951 parts = this.name.split() 2952 2953 if len(parts) == 2: 2954 if unit: 2955 # this is not actually a unit, it's something else 2956 unit = None 2957 self._retreat(self._index - 1) 2958 else: 2959 this = exp.Literal.string(parts[0]) 2960 unit = self.expression(exp.Var, this=parts[1]) 2961 2962 return self.expression(exp.Interval, this=this, unit=unit) 2963 2964 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2965 this = self._parse_term() 2966 2967 while True: 2968 if self._match_set(self.BITWISE): 2969 this = self.expression( 2970 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2971 ) 2972 elif self._match_pair(TokenType.LT, TokenType.LT): 2973 this = self.expression( 2974 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2975 ) 2976 elif self._match_pair(TokenType.GT, TokenType.GT): 2977 this = self.expression( 2978 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2979 ) 2980 else: 2981 break 2982 2983 return this 2984 2985 def _parse_term(self) -> t.Optional[exp.Expression]: 2986 return self._parse_tokens(self._parse_factor, self.TERM) 2987 2988 def _parse_factor(self) -> t.Optional[exp.Expression]: 2989 return self._parse_tokens(self._parse_unary, self.FACTOR) 2990 2991 def _parse_unary(self) -> t.Optional[exp.Expression]: 2992 if self._match_set(self.UNARY_PARSERS): 2993 return self.UNARY_PARSERS[self._prev.token_type](self) 2994 return self._parse_at_time_zone(self._parse_type()) 2995 2996 def _parse_type(self) -> t.Optional[exp.Expression]: 2997 interval = self._parse_interval() 2998 if interval: 2999 return interval 3000 3001 index = self._index 3002 data_type = self._parse_types(check_func=True) 3003 this = self._parse_column() 3004 3005 if data_type: 3006 if isinstance(this, exp.Literal): 3007 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3008 if parser: 3009 return parser(self, this, data_type) 3010 return self.expression(exp.Cast, this=this, to=data_type) 3011 if not data_type.expressions: 3012 self._retreat(index) 3013 return self._parse_column() 3014 return self._parse_column_ops(data_type) 3015 3016 return this 3017 3018 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3019 this = self._parse_type() 3020 if not this: 3021 return None 3022 3023 return self.expression( 3024 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3025 ) 3026 3027 def _parse_types( 3028 self, check_func: bool = False, schema: bool = False 3029 ) -> t.Optional[exp.Expression]: 3030 index = self._index 3031 3032 prefix = self._match_text_seq("SYSUDTLIB", ".") 3033 3034 if not self._match_set(self.TYPE_TOKENS): 3035 return None 3036 3037 type_token = self._prev.token_type 3038 3039 if type_token == TokenType.PSEUDO_TYPE: 3040 return self.expression(exp.PseudoType, this=self._prev.text) 3041 3042 nested = type_token in self.NESTED_TYPE_TOKENS 3043 is_struct = type_token == TokenType.STRUCT 3044 expressions = None 3045 maybe_func = False 3046 3047 if self._match(TokenType.L_PAREN): 3048 if is_struct: 3049 expressions = self._parse_csv(self._parse_struct_types) 3050 elif nested: 3051 expressions = self._parse_csv( 3052 lambda: self._parse_types(check_func=check_func, schema=schema) 3053 ) 3054 elif type_token in self.ENUM_TYPE_TOKENS: 3055 expressions = self._parse_csv(self._parse_primary) 3056 else: 3057 expressions = self._parse_csv(self._parse_type_size) 3058 3059 if not expressions or not self._match(TokenType.R_PAREN): 3060 self._retreat(index) 3061 return None 3062 3063 maybe_func = True 3064 3065 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3066 this = exp.DataType( 3067 this=exp.DataType.Type.ARRAY, 3068 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3069 nested=True, 3070 ) 3071 3072 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3073 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3074 3075 return this 3076 3077 if self._match(TokenType.L_BRACKET): 3078 self._retreat(index) 3079 return None 3080 3081 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3082 if nested and self._match(TokenType.LT): 3083 if is_struct: 3084 expressions = self._parse_csv(self._parse_struct_types) 3085 else: 3086 expressions = self._parse_csv( 3087 lambda: self._parse_types(check_func=check_func, schema=schema) 3088 ) 3089 3090 if not self._match(TokenType.GT): 3091 self.raise_error("Expecting >") 3092 3093 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3094 values = self._parse_csv(self._parse_conjunction) 3095 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3096 3097 value: t.Optional[exp.Expression] = None 3098 if type_token in self.TIMESTAMPS: 3099 if self._match_text_seq("WITH", "TIME", "ZONE"): 3100 maybe_func = False 3101 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3102 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3103 maybe_func = False 3104 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3105 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3106 maybe_func = False 3107 elif type_token == TokenType.INTERVAL: 3108 unit = self._parse_var() 3109 3110 if not unit: 3111 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3112 else: 3113 value = self.expression(exp.Interval, unit=unit) 3114 3115 if maybe_func and check_func: 3116 index2 = self._index 3117 peek = self._parse_string() 3118 3119 if not peek: 3120 self._retreat(index) 3121 return None 3122 3123 self._retreat(index2) 3124 3125 if value: 3126 return value 3127 3128 return exp.DataType( 3129 this=exp.DataType.Type[type_token.value.upper()], 3130 expressions=expressions, 3131 nested=nested, 3132 values=values, 3133 prefix=prefix, 3134 ) 3135 3136 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3137 this = self._parse_type() or self._parse_id_var() 3138 self._match(TokenType.COLON) 3139 return self._parse_column_def(this) 3140 3141 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3142 if not self._match_text_seq("AT", "TIME", "ZONE"): 3143 return this 3144 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3145 3146 def _parse_column(self) -> t.Optional[exp.Expression]: 3147 this = self._parse_field() 3148 if isinstance(this, exp.Identifier): 3149 this = self.expression(exp.Column, this=this) 3150 elif not this: 3151 return self._parse_bracket(this) 3152 return self._parse_column_ops(this) 3153 3154 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3155 this = self._parse_bracket(this) 3156 3157 while self._match_set(self.COLUMN_OPERATORS): 3158 op_token = self._prev.token_type 3159 op = self.COLUMN_OPERATORS.get(op_token) 3160 3161 if op_token == TokenType.DCOLON: 3162 field = self._parse_types() 3163 if not field: 3164 self.raise_error("Expected type") 3165 elif op and self._curr: 3166 self._advance() 3167 value = self._prev.text 3168 field = ( 3169 exp.Literal.number(value) 3170 if self._prev.token_type == TokenType.NUMBER 3171 else exp.Literal.string(value) 3172 ) 3173 else: 3174 field = self._parse_field(anonymous_func=True, any_token=True) 3175 3176 if isinstance(field, exp.Func): 3177 # bigquery allows function calls like x.y.count(...) 3178 # SAFE.SUBSTR(...) 3179 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3180 this = self._replace_columns_with_dots(this) 3181 3182 if op: 3183 this = op(self, this, field) 3184 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3185 this = self.expression( 3186 exp.Column, 3187 this=field, 3188 table=this.this, 3189 db=this.args.get("table"), 3190 catalog=this.args.get("db"), 3191 ) 3192 else: 3193 this = self.expression(exp.Dot, this=this, expression=field) 3194 this = self._parse_bracket(this) 3195 return this 3196 3197 def _parse_primary(self) -> t.Optional[exp.Expression]: 3198 if self._match_set(self.PRIMARY_PARSERS): 3199 token_type = self._prev.token_type 3200 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3201 3202 if token_type == TokenType.STRING: 3203 expressions = [primary] 3204 while self._match(TokenType.STRING): 3205 expressions.append(exp.Literal.string(self._prev.text)) 3206 3207 if len(expressions) > 1: 3208 return self.expression(exp.Concat, expressions=expressions) 3209 3210 return primary 3211 3212 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3213 return exp.Literal.number(f"0.{self._prev.text}") 3214 3215 if self._match(TokenType.L_PAREN): 3216 comments = self._prev_comments 3217 query = self._parse_select() 3218 3219 if query: 3220 expressions = [query] 3221 else: 3222 expressions = self._parse_expressions() 3223 3224 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3225 3226 if isinstance(this, exp.Subqueryable): 3227 this = self._parse_set_operations( 3228 self._parse_subquery(this=this, parse_alias=False) 3229 ) 3230 elif len(expressions) > 1: 3231 this = self.expression(exp.Tuple, expressions=expressions) 3232 else: 3233 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3234 3235 if this: 3236 this.add_comments(comments) 3237 3238 self._match_r_paren(expression=this) 3239 return this 3240 3241 return None 3242 3243 def _parse_field( 3244 self, 3245 any_token: bool = False, 3246 tokens: t.Optional[t.Collection[TokenType]] = None, 3247 anonymous_func: bool = False, 3248 ) -> t.Optional[exp.Expression]: 3249 return ( 3250 self._parse_primary() 3251 or self._parse_function(anonymous=anonymous_func) 3252 or self._parse_id_var(any_token=any_token, tokens=tokens) 3253 ) 3254 3255 def _parse_function( 3256 self, 3257 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3258 anonymous: bool = False, 3259 optional_parens: bool = True, 3260 ) -> t.Optional[exp.Expression]: 3261 if not self._curr: 3262 return None 3263 3264 token_type = self._curr.token_type 3265 3266 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3267 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3268 3269 if not self._next or self._next.token_type != TokenType.L_PAREN: 3270 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3271 self._advance() 3272 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3273 3274 return None 3275 3276 if token_type not in self.FUNC_TOKENS: 3277 return None 3278 3279 this = self._curr.text 3280 upper = this.upper() 3281 self._advance(2) 3282 3283 parser = self.FUNCTION_PARSERS.get(upper) 3284 3285 if parser and not anonymous: 3286 this = parser(self) 3287 else: 3288 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3289 3290 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3291 this = self.expression(subquery_predicate, this=self._parse_select()) 3292 self._match_r_paren() 3293 return this 3294 3295 if functions is None: 3296 functions = self.FUNCTIONS 3297 3298 function = functions.get(upper) 3299 3300 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3301 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3302 3303 if function and not anonymous: 3304 this = self.validate_expression(function(args), args) 3305 else: 3306 this = self.expression(exp.Anonymous, this=this, expressions=args) 3307 3308 self._match_r_paren(this) 3309 return self._parse_window(this) 3310 3311 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3312 return self._parse_column_def(self._parse_id_var()) 3313 3314 def _parse_user_defined_function( 3315 self, kind: t.Optional[TokenType] = None 3316 ) -> t.Optional[exp.Expression]: 3317 this = self._parse_id_var() 3318 3319 while self._match(TokenType.DOT): 3320 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3321 3322 if not self._match(TokenType.L_PAREN): 3323 return this 3324 3325 expressions = self._parse_csv(self._parse_function_parameter) 3326 self._match_r_paren() 3327 return self.expression( 3328 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3329 ) 3330 3331 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3332 literal = self._parse_primary() 3333 if literal: 3334 return self.expression(exp.Introducer, this=token.text, expression=literal) 3335 3336 return self.expression(exp.Identifier, this=token.text) 3337 3338 def _parse_session_parameter(self) -> exp.SessionParameter: 3339 kind = None 3340 this = self._parse_id_var() or self._parse_primary() 3341 3342 if this and self._match(TokenType.DOT): 3343 kind = this.name 3344 this = self._parse_var() or self._parse_primary() 3345 3346 return self.expression(exp.SessionParameter, this=this, kind=kind) 3347 3348 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3349 index = self._index 3350 3351 if self._match(TokenType.L_PAREN): 3352 expressions = self._parse_csv(self._parse_id_var) 3353 3354 if not self._match(TokenType.R_PAREN): 3355 self._retreat(index) 3356 else: 3357 expressions = [self._parse_id_var()] 3358 3359 if self._match_set(self.LAMBDAS): 3360 return self.LAMBDAS[self._prev.token_type](self, expressions) 3361 3362 self._retreat(index) 3363 3364 this: t.Optional[exp.Expression] 3365 3366 if self._match(TokenType.DISTINCT): 3367 this = self.expression( 3368 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3369 ) 3370 else: 3371 this = self._parse_select_or_expression(alias=alias) 3372 3373 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3374 3375 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3376 index = self._index 3377 3378 if not self.errors: 3379 try: 3380 if self._parse_select(nested=True): 3381 return this 3382 except ParseError: 3383 pass 3384 finally: 3385 self.errors.clear() 3386 self._retreat(index) 3387 3388 if not self._match(TokenType.L_PAREN): 3389 return this 3390 3391 args = self._parse_csv( 3392 lambda: self._parse_constraint() 3393 or self._parse_column_def(self._parse_field(any_token=True)) 3394 ) 3395 3396 self._match_r_paren() 3397 return self.expression(exp.Schema, this=this, expressions=args) 3398 3399 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3400 # column defs are not really columns, they're identifiers 3401 if isinstance(this, exp.Column): 3402 this = this.this 3403 3404 kind = self._parse_types(schema=True) 3405 3406 if self._match_text_seq("FOR", "ORDINALITY"): 3407 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3408 3409 constraints = [] 3410 while True: 3411 constraint = self._parse_column_constraint() 3412 if not constraint: 3413 break 3414 constraints.append(constraint) 3415 3416 if not kind and not constraints: 3417 return this 3418 3419 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3420 3421 def _parse_auto_increment( 3422 self, 3423 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3424 start = None 3425 increment = None 3426 3427 if self._match(TokenType.L_PAREN, advance=False): 3428 args = self._parse_wrapped_csv(self._parse_bitwise) 3429 start = seq_get(args, 0) 3430 increment = seq_get(args, 1) 3431 elif self._match_text_seq("START"): 3432 start = self._parse_bitwise() 3433 self._match_text_seq("INCREMENT") 3434 increment = self._parse_bitwise() 3435 3436 if start and increment: 3437 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3438 3439 return exp.AutoIncrementColumnConstraint() 3440 3441 def _parse_compress(self) -> exp.CompressColumnConstraint: 3442 if self._match(TokenType.L_PAREN, advance=False): 3443 return self.expression( 3444 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3445 ) 3446 3447 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3448 3449 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3450 if self._match_text_seq("BY", "DEFAULT"): 3451 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3452 this = self.expression( 3453 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3454 ) 3455 else: 3456 self._match_text_seq("ALWAYS") 3457 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3458 3459 self._match(TokenType.ALIAS) 3460 identity = self._match_text_seq("IDENTITY") 3461 3462 if self._match(TokenType.L_PAREN): 3463 if self._match_text_seq("START", "WITH"): 3464 this.set("start", self._parse_bitwise()) 3465 if self._match_text_seq("INCREMENT", "BY"): 3466 this.set("increment", self._parse_bitwise()) 3467 if self._match_text_seq("MINVALUE"): 3468 this.set("minvalue", self._parse_bitwise()) 3469 if self._match_text_seq("MAXVALUE"): 3470 this.set("maxvalue", self._parse_bitwise()) 3471 3472 if self._match_text_seq("CYCLE"): 3473 this.set("cycle", True) 3474 elif self._match_text_seq("NO", "CYCLE"): 3475 this.set("cycle", False) 3476 3477 if not identity: 3478 this.set("expression", self._parse_bitwise()) 3479 3480 self._match_r_paren() 3481 3482 return this 3483 3484 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3485 self._match_text_seq("LENGTH") 3486 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3487 3488 def _parse_not_constraint( 3489 self, 3490 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3491 if self._match_text_seq("NULL"): 3492 return self.expression(exp.NotNullColumnConstraint) 3493 if self._match_text_seq("CASESPECIFIC"): 3494 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3495 return None 3496 3497 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3498 if self._match(TokenType.CONSTRAINT): 3499 this = self._parse_id_var() 3500 else: 3501 this = None 3502 3503 if self._match_texts(self.CONSTRAINT_PARSERS): 3504 return self.expression( 3505 exp.ColumnConstraint, 3506 this=this, 3507 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3508 ) 3509 3510 return this 3511 3512 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3513 if not self._match(TokenType.CONSTRAINT): 3514 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3515 3516 this = self._parse_id_var() 3517 expressions = [] 3518 3519 while True: 3520 constraint = self._parse_unnamed_constraint() or self._parse_function() 3521 if not constraint: 3522 break 3523 expressions.append(constraint) 3524 3525 return self.expression(exp.Constraint, this=this, expressions=expressions) 3526 3527 def _parse_unnamed_constraint( 3528 self, constraints: t.Optional[t.Collection[str]] = None 3529 ) -> t.Optional[exp.Expression]: 3530 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3531 return None 3532 3533 constraint = self._prev.text.upper() 3534 if constraint not in self.CONSTRAINT_PARSERS: 3535 self.raise_error(f"No parser found for schema constraint {constraint}.") 3536 3537 return self.CONSTRAINT_PARSERS[constraint](self) 3538 3539 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3540 self._match_text_seq("KEY") 3541 return self.expression( 3542 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3543 ) 3544 3545 def _parse_key_constraint_options(self) -> t.List[str]: 3546 options = [] 3547 while True: 3548 if not self._curr: 3549 break 3550 3551 if self._match(TokenType.ON): 3552 action = None 3553 on = self._advance_any() and self._prev.text 3554 3555 if self._match_text_seq("NO", "ACTION"): 3556 action = "NO ACTION" 3557 elif self._match_text_seq("CASCADE"): 3558 action = "CASCADE" 3559 elif self._match_pair(TokenType.SET, TokenType.NULL): 3560 action = "SET NULL" 3561 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3562 action = "SET DEFAULT" 3563 else: 3564 self.raise_error("Invalid key constraint") 3565 3566 options.append(f"ON {on} {action}") 3567 elif self._match_text_seq("NOT", "ENFORCED"): 3568 options.append("NOT ENFORCED") 3569 elif self._match_text_seq("DEFERRABLE"): 3570 options.append("DEFERRABLE") 3571 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3572 options.append("INITIALLY DEFERRED") 3573 elif self._match_text_seq("NORELY"): 3574 options.append("NORELY") 3575 elif self._match_text_seq("MATCH", "FULL"): 3576 options.append("MATCH FULL") 3577 else: 3578 break 3579 3580 return options 3581 3582 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3583 if match and not self._match(TokenType.REFERENCES): 3584 return None 3585 3586 expressions = None 3587 this = self._parse_id_var() 3588 3589 if self._match(TokenType.L_PAREN, advance=False): 3590 expressions = self._parse_wrapped_id_vars() 3591 3592 options = self._parse_key_constraint_options() 3593 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3594 3595 def _parse_foreign_key(self) -> exp.ForeignKey: 3596 expressions = self._parse_wrapped_id_vars() 3597 reference = self._parse_references() 3598 options = {} 3599 3600 while self._match(TokenType.ON): 3601 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3602 self.raise_error("Expected DELETE or UPDATE") 3603 3604 kind = self._prev.text.lower() 3605 3606 if self._match_text_seq("NO", "ACTION"): 3607 action = "NO ACTION" 3608 elif self._match(TokenType.SET): 3609 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3610 action = "SET " + self._prev.text.upper() 3611 else: 3612 self._advance() 3613 action = self._prev.text.upper() 3614 3615 options[kind] = action 3616 3617 return self.expression( 3618 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3619 ) 3620 3621 def _parse_primary_key( 3622 self, wrapped_optional: bool = False, in_props: bool = False 3623 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3624 desc = ( 3625 self._match_set((TokenType.ASC, TokenType.DESC)) 3626 and self._prev.token_type == TokenType.DESC 3627 ) 3628 3629 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3630 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3631 3632 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3633 options = self._parse_key_constraint_options() 3634 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3635 3636 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3637 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3638 return this 3639 3640 bracket_kind = self._prev.token_type 3641 3642 if self._match(TokenType.COLON): 3643 expressions: t.List[t.Optional[exp.Expression]] = [ 3644 self.expression(exp.Slice, expression=self._parse_conjunction()) 3645 ] 3646 else: 3647 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3648 3649 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3650 if bracket_kind == TokenType.L_BRACE: 3651 this = self.expression(exp.Struct, expressions=expressions) 3652 elif not this or this.name.upper() == "ARRAY": 3653 this = self.expression(exp.Array, expressions=expressions) 3654 else: 3655 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3656 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3657 3658 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3659 self.raise_error("Expected ]") 3660 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3661 self.raise_error("Expected }") 3662 3663 self._add_comments(this) 3664 return self._parse_bracket(this) 3665 3666 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3667 if self._match(TokenType.COLON): 3668 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3669 return this 3670 3671 def _parse_case(self) -> t.Optional[exp.Expression]: 3672 ifs = [] 3673 default = None 3674 3675 expression = self._parse_conjunction() 3676 3677 while self._match(TokenType.WHEN): 3678 this = self._parse_conjunction() 3679 self._match(TokenType.THEN) 3680 then = self._parse_conjunction() 3681 ifs.append(self.expression(exp.If, this=this, true=then)) 3682 3683 if self._match(TokenType.ELSE): 3684 default = self._parse_conjunction() 3685 3686 if not self._match(TokenType.END): 3687 self.raise_error("Expected END after CASE", self._prev) 3688 3689 return self._parse_window( 3690 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3691 ) 3692 3693 def _parse_if(self) -> t.Optional[exp.Expression]: 3694 if self._match(TokenType.L_PAREN): 3695 args = self._parse_csv(self._parse_conjunction) 3696 this = self.validate_expression(exp.If.from_arg_list(args), args) 3697 self._match_r_paren() 3698 else: 3699 index = self._index - 1 3700 condition = self._parse_conjunction() 3701 3702 if not condition: 3703 self._retreat(index) 3704 return None 3705 3706 self._match(TokenType.THEN) 3707 true = self._parse_conjunction() 3708 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3709 self._match(TokenType.END) 3710 this = self.expression(exp.If, this=condition, true=true, false=false) 3711 3712 return self._parse_window(this) 3713 3714 def _parse_extract(self) -> exp.Extract: 3715 this = self._parse_function() or self._parse_var() or self._parse_type() 3716 3717 if self._match(TokenType.FROM): 3718 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3719 3720 if not self._match(TokenType.COMMA): 3721 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3722 3723 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3724 3725 def _parse_any_value(self) -> exp.AnyValue: 3726 this = self._parse_lambda() 3727 is_max = None 3728 having = None 3729 3730 if self._match(TokenType.HAVING): 3731 self._match_texts(("MAX", "MIN")) 3732 is_max = self._prev.text == "MAX" 3733 having = self._parse_column() 3734 3735 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3736 3737 def _parse_cast(self, strict: bool) -> exp.Expression: 3738 this = self._parse_conjunction() 3739 3740 if not self._match(TokenType.ALIAS): 3741 if self._match(TokenType.COMMA): 3742 return self.expression( 3743 exp.CastToStrType, this=this, expression=self._parse_string() 3744 ) 3745 else: 3746 self.raise_error("Expected AS after CAST") 3747 3748 fmt = None 3749 to = self._parse_types() 3750 3751 if not to: 3752 self.raise_error("Expected TYPE after CAST") 3753 elif to.this == exp.DataType.Type.CHAR: 3754 if self._match(TokenType.CHARACTER_SET): 3755 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3756 elif self._match(TokenType.FORMAT): 3757 fmt_string = self._parse_string() 3758 fmt = self._parse_at_time_zone(fmt_string) 3759 3760 if to.this in exp.DataType.TEMPORAL_TYPES: 3761 this = self.expression( 3762 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3763 this=this, 3764 format=exp.Literal.string( 3765 format_time( 3766 fmt_string.this if fmt_string else "", 3767 self.FORMAT_MAPPING or self.TIME_MAPPING, 3768 self.FORMAT_TRIE or self.TIME_TRIE, 3769 ) 3770 ), 3771 ) 3772 3773 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3774 this.set("zone", fmt.args["zone"]) 3775 3776 return this 3777 3778 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3779 3780 def _parse_concat(self) -> t.Optional[exp.Expression]: 3781 args = self._parse_csv(self._parse_conjunction) 3782 if self.CONCAT_NULL_OUTPUTS_STRING: 3783 args = [ 3784 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3785 for arg in args 3786 if arg 3787 ] 3788 3789 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3790 # we find such a call we replace it with its argument. 3791 if len(args) == 1: 3792 return args[0] 3793 3794 return self.expression( 3795 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3796 ) 3797 3798 def _parse_string_agg(self) -> exp.Expression: 3799 if self._match(TokenType.DISTINCT): 3800 args: t.List[t.Optional[exp.Expression]] = [ 3801 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3802 ] 3803 if self._match(TokenType.COMMA): 3804 args.extend(self._parse_csv(self._parse_conjunction)) 3805 else: 3806 args = self._parse_csv(self._parse_conjunction) 3807 3808 index = self._index 3809 if not self._match(TokenType.R_PAREN): 3810 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3811 return self.expression( 3812 exp.GroupConcat, 3813 this=seq_get(args, 0), 3814 separator=self._parse_order(this=seq_get(args, 1)), 3815 ) 3816 3817 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3818 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3819 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3820 if not self._match_text_seq("WITHIN", "GROUP"): 3821 self._retreat(index) 3822 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3823 3824 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3825 order = self._parse_order(this=seq_get(args, 0)) 3826 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3827 3828 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3829 this = self._parse_bitwise() 3830 3831 if self._match(TokenType.USING): 3832 to: t.Optional[exp.Expression] = self.expression( 3833 exp.CharacterSet, this=self._parse_var() 3834 ) 3835 elif self._match(TokenType.COMMA): 3836 to = self._parse_types() 3837 else: 3838 to = None 3839 3840 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3841 3842 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3843 """ 3844 There are generally two variants of the DECODE function: 3845 3846 - DECODE(bin, charset) 3847 - DECODE(expression, search, result [, search, result] ... [, default]) 3848 3849 The second variant will always be parsed into a CASE expression. Note that NULL 3850 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3851 instead of relying on pattern matching. 3852 """ 3853 args = self._parse_csv(self._parse_conjunction) 3854 3855 if len(args) < 3: 3856 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3857 3858 expression, *expressions = args 3859 if not expression: 3860 return None 3861 3862 ifs = [] 3863 for search, result in zip(expressions[::2], expressions[1::2]): 3864 if not search or not result: 3865 return None 3866 3867 if isinstance(search, exp.Literal): 3868 ifs.append( 3869 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3870 ) 3871 elif isinstance(search, exp.Null): 3872 ifs.append( 3873 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3874 ) 3875 else: 3876 cond = exp.or_( 3877 exp.EQ(this=expression.copy(), expression=search), 3878 exp.and_( 3879 exp.Is(this=expression.copy(), expression=exp.Null()), 3880 exp.Is(this=search.copy(), expression=exp.Null()), 3881 copy=False, 3882 ), 3883 copy=False, 3884 ) 3885 ifs.append(exp.If(this=cond, true=result)) 3886 3887 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3888 3889 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3890 self._match_text_seq("KEY") 3891 key = self._parse_field() 3892 self._match(TokenType.COLON) 3893 self._match_text_seq("VALUE") 3894 value = self._parse_field() 3895 3896 if not key and not value: 3897 return None 3898 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3899 3900 def _parse_json_object(self) -> exp.JSONObject: 3901 star = self._parse_star() 3902 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3903 3904 null_handling = None 3905 if self._match_text_seq("NULL", "ON", "NULL"): 3906 null_handling = "NULL ON NULL" 3907 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3908 null_handling = "ABSENT ON NULL" 3909 3910 unique_keys = None 3911 if self._match_text_seq("WITH", "UNIQUE"): 3912 unique_keys = True 3913 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3914 unique_keys = False 3915 3916 self._match_text_seq("KEYS") 3917 3918 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3919 format_json = self._match_text_seq("FORMAT", "JSON") 3920 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3921 3922 return self.expression( 3923 exp.JSONObject, 3924 expressions=expressions, 3925 null_handling=null_handling, 3926 unique_keys=unique_keys, 3927 return_type=return_type, 3928 format_json=format_json, 3929 encoding=encoding, 3930 ) 3931 3932 def _parse_logarithm(self) -> exp.Func: 3933 # Default argument order is base, expression 3934 args = self._parse_csv(self._parse_range) 3935 3936 if len(args) > 1: 3937 if not self.LOG_BASE_FIRST: 3938 args.reverse() 3939 return exp.Log.from_arg_list(args) 3940 3941 return self.expression( 3942 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3943 ) 3944 3945 def _parse_match_against(self) -> exp.MatchAgainst: 3946 expressions = self._parse_csv(self._parse_column) 3947 3948 self._match_text_seq(")", "AGAINST", "(") 3949 3950 this = self._parse_string() 3951 3952 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3953 modifier = "IN NATURAL LANGUAGE MODE" 3954 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3955 modifier = f"{modifier} WITH QUERY EXPANSION" 3956 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3957 modifier = "IN BOOLEAN MODE" 3958 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3959 modifier = "WITH QUERY EXPANSION" 3960 else: 3961 modifier = None 3962 3963 return self.expression( 3964 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3965 ) 3966 3967 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3968 def _parse_open_json(self) -> exp.OpenJSON: 3969 this = self._parse_bitwise() 3970 path = self._match(TokenType.COMMA) and self._parse_string() 3971 3972 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3973 this = self._parse_field(any_token=True) 3974 kind = self._parse_types() 3975 path = self._parse_string() 3976 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3977 3978 return self.expression( 3979 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3980 ) 3981 3982 expressions = None 3983 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3984 self._match_l_paren() 3985 expressions = self._parse_csv(_parse_open_json_column_def) 3986 3987 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3988 3989 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3990 args = self._parse_csv(self._parse_bitwise) 3991 3992 if self._match(TokenType.IN): 3993 return self.expression( 3994 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3995 ) 3996 3997 if haystack_first: 3998 haystack = seq_get(args, 0) 3999 needle = seq_get(args, 1) 4000 else: 4001 needle = seq_get(args, 0) 4002 haystack = seq_get(args, 1) 4003 4004 return self.expression( 4005 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4006 ) 4007 4008 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4009 args = self._parse_csv(self._parse_table) 4010 return exp.JoinHint(this=func_name.upper(), expressions=args) 4011 4012 def _parse_substring(self) -> exp.Substring: 4013 # Postgres supports the form: substring(string [from int] [for int]) 4014 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4015 4016 args = self._parse_csv(self._parse_bitwise) 4017 4018 if self._match(TokenType.FROM): 4019 args.append(self._parse_bitwise()) 4020 if self._match(TokenType.FOR): 4021 args.append(self._parse_bitwise()) 4022 4023 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4024 4025 def _parse_trim(self) -> exp.Trim: 4026 # https://www.w3resource.com/sql/character-functions/trim.php 4027 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4028 4029 position = None 4030 collation = None 4031 4032 if self._match_texts(self.TRIM_TYPES): 4033 position = self._prev.text.upper() 4034 4035 expression = self._parse_bitwise() 4036 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4037 this = self._parse_bitwise() 4038 else: 4039 this = expression 4040 expression = None 4041 4042 if self._match(TokenType.COLLATE): 4043 collation = self._parse_bitwise() 4044 4045 return self.expression( 4046 exp.Trim, this=this, position=position, expression=expression, collation=collation 4047 ) 4048 4049 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4050 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4051 4052 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4053 return self._parse_window(self._parse_id_var(), alias=True) 4054 4055 def _parse_respect_or_ignore_nulls( 4056 self, this: t.Optional[exp.Expression] 4057 ) -> t.Optional[exp.Expression]: 4058 if self._match_text_seq("IGNORE", "NULLS"): 4059 return self.expression(exp.IgnoreNulls, this=this) 4060 if self._match_text_seq("RESPECT", "NULLS"): 4061 return self.expression(exp.RespectNulls, this=this) 4062 return this 4063 4064 def _parse_window( 4065 self, this: t.Optional[exp.Expression], alias: bool = False 4066 ) -> t.Optional[exp.Expression]: 4067 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4068 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4069 self._match_r_paren() 4070 4071 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4072 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4073 if self._match_text_seq("WITHIN", "GROUP"): 4074 order = self._parse_wrapped(self._parse_order) 4075 this = self.expression(exp.WithinGroup, this=this, expression=order) 4076 4077 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4078 # Some dialects choose to implement and some do not. 4079 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4080 4081 # There is some code above in _parse_lambda that handles 4082 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4083 4084 # The below changes handle 4085 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4086 4087 # Oracle allows both formats 4088 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4089 # and Snowflake chose to do the same for familiarity 4090 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4091 this = self._parse_respect_or_ignore_nulls(this) 4092 4093 # bigquery select from window x AS (partition by ...) 4094 if alias: 4095 over = None 4096 self._match(TokenType.ALIAS) 4097 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4098 return this 4099 else: 4100 over = self._prev.text.upper() 4101 4102 if not self._match(TokenType.L_PAREN): 4103 return self.expression( 4104 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4105 ) 4106 4107 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4108 4109 first = self._match(TokenType.FIRST) 4110 if self._match_text_seq("LAST"): 4111 first = False 4112 4113 partition = self._parse_partition_by() 4114 order = self._parse_order() 4115 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4116 4117 if kind: 4118 self._match(TokenType.BETWEEN) 4119 start = self._parse_window_spec() 4120 self._match(TokenType.AND) 4121 end = self._parse_window_spec() 4122 4123 spec = self.expression( 4124 exp.WindowSpec, 4125 kind=kind, 4126 start=start["value"], 4127 start_side=start["side"], 4128 end=end["value"], 4129 end_side=end["side"], 4130 ) 4131 else: 4132 spec = None 4133 4134 self._match_r_paren() 4135 4136 return self.expression( 4137 exp.Window, 4138 this=this, 4139 partition_by=partition, 4140 order=order, 4141 spec=spec, 4142 alias=window_alias, 4143 over=over, 4144 first=first, 4145 ) 4146 4147 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4148 self._match(TokenType.BETWEEN) 4149 4150 return { 4151 "value": ( 4152 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4153 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4154 or self._parse_bitwise() 4155 ), 4156 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4157 } 4158 4159 def _parse_alias( 4160 self, this: t.Optional[exp.Expression], explicit: bool = False 4161 ) -> t.Optional[exp.Expression]: 4162 any_token = self._match(TokenType.ALIAS) 4163 4164 if explicit and not any_token: 4165 return this 4166 4167 if self._match(TokenType.L_PAREN): 4168 aliases = self.expression( 4169 exp.Aliases, 4170 this=this, 4171 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4172 ) 4173 self._match_r_paren(aliases) 4174 return aliases 4175 4176 alias = self._parse_id_var(any_token) 4177 4178 if alias: 4179 return self.expression(exp.Alias, this=this, alias=alias) 4180 4181 return this 4182 4183 def _parse_id_var( 4184 self, 4185 any_token: bool = True, 4186 tokens: t.Optional[t.Collection[TokenType]] = None, 4187 ) -> t.Optional[exp.Expression]: 4188 identifier = self._parse_identifier() 4189 4190 if identifier: 4191 return identifier 4192 4193 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4194 quoted = self._prev.token_type == TokenType.STRING 4195 return exp.Identifier(this=self._prev.text, quoted=quoted) 4196 4197 return None 4198 4199 def _parse_string(self) -> t.Optional[exp.Expression]: 4200 if self._match(TokenType.STRING): 4201 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4202 return self._parse_placeholder() 4203 4204 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4205 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4206 4207 def _parse_number(self) -> t.Optional[exp.Expression]: 4208 if self._match(TokenType.NUMBER): 4209 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4210 return self._parse_placeholder() 4211 4212 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4213 if self._match(TokenType.IDENTIFIER): 4214 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4215 return self._parse_placeholder() 4216 4217 def _parse_var( 4218 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4219 ) -> t.Optional[exp.Expression]: 4220 if ( 4221 (any_token and self._advance_any()) 4222 or self._match(TokenType.VAR) 4223 or (self._match_set(tokens) if tokens else False) 4224 ): 4225 return self.expression(exp.Var, this=self._prev.text) 4226 return self._parse_placeholder() 4227 4228 def _advance_any(self) -> t.Optional[Token]: 4229 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4230 self._advance() 4231 return self._prev 4232 return None 4233 4234 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4235 return self._parse_var() or self._parse_string() 4236 4237 def _parse_null(self) -> t.Optional[exp.Expression]: 4238 if self._match(TokenType.NULL): 4239 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4240 return None 4241 4242 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4243 if self._match(TokenType.TRUE): 4244 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4245 if self._match(TokenType.FALSE): 4246 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4247 return None 4248 4249 def _parse_star(self) -> t.Optional[exp.Expression]: 4250 if self._match(TokenType.STAR): 4251 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4252 return None 4253 4254 def _parse_parameter(self) -> exp.Parameter: 4255 wrapped = self._match(TokenType.L_BRACE) 4256 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4257 self._match(TokenType.R_BRACE) 4258 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4259 4260 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4261 if self._match_set(self.PLACEHOLDER_PARSERS): 4262 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4263 if placeholder: 4264 return placeholder 4265 self._advance(-1) 4266 return None 4267 4268 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4269 if not self._match(TokenType.EXCEPT): 4270 return None 4271 if self._match(TokenType.L_PAREN, advance=False): 4272 return self._parse_wrapped_csv(self._parse_column) 4273 return self._parse_csv(self._parse_column) 4274 4275 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4276 if not self._match(TokenType.REPLACE): 4277 return None 4278 if self._match(TokenType.L_PAREN, advance=False): 4279 return self._parse_wrapped_csv(self._parse_expression) 4280 return self._parse_expressions() 4281 4282 def _parse_csv( 4283 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4284 ) -> t.List[t.Optional[exp.Expression]]: 4285 parse_result = parse_method() 4286 items = [parse_result] if parse_result is not None else [] 4287 4288 while self._match(sep): 4289 self._add_comments(parse_result) 4290 parse_result = parse_method() 4291 if parse_result is not None: 4292 items.append(parse_result) 4293 4294 return items 4295 4296 def _parse_tokens( 4297 self, parse_method: t.Callable, expressions: t.Dict 4298 ) -> t.Optional[exp.Expression]: 4299 this = parse_method() 4300 4301 while self._match_set(expressions): 4302 this = self.expression( 4303 expressions[self._prev.token_type], 4304 this=this, 4305 comments=self._prev_comments, 4306 expression=parse_method(), 4307 ) 4308 4309 return this 4310 4311 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4312 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4313 4314 def _parse_wrapped_csv( 4315 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4316 ) -> t.List[t.Optional[exp.Expression]]: 4317 return self._parse_wrapped( 4318 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4319 ) 4320 4321 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4322 wrapped = self._match(TokenType.L_PAREN) 4323 if not wrapped and not optional: 4324 self.raise_error("Expecting (") 4325 parse_result = parse_method() 4326 if wrapped: 4327 self._match_r_paren() 4328 return parse_result 4329 4330 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4331 return self._parse_csv(self._parse_expression) 4332 4333 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4334 return self._parse_select() or self._parse_set_operations( 4335 self._parse_expression() if alias else self._parse_conjunction() 4336 ) 4337 4338 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4339 return self._parse_query_modifiers( 4340 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4341 ) 4342 4343 def _parse_transaction(self) -> exp.Transaction: 4344 this = None 4345 if self._match_texts(self.TRANSACTION_KIND): 4346 this = self._prev.text 4347 4348 self._match_texts({"TRANSACTION", "WORK"}) 4349 4350 modes = [] 4351 while True: 4352 mode = [] 4353 while self._match(TokenType.VAR): 4354 mode.append(self._prev.text) 4355 4356 if mode: 4357 modes.append(" ".join(mode)) 4358 if not self._match(TokenType.COMMA): 4359 break 4360 4361 return self.expression(exp.Transaction, this=this, modes=modes) 4362 4363 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4364 chain = None 4365 savepoint = None 4366 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4367 4368 self._match_texts({"TRANSACTION", "WORK"}) 4369 4370 if self._match_text_seq("TO"): 4371 self._match_text_seq("SAVEPOINT") 4372 savepoint = self._parse_id_var() 4373 4374 if self._match(TokenType.AND): 4375 chain = not self._match_text_seq("NO") 4376 self._match_text_seq("CHAIN") 4377 4378 if is_rollback: 4379 return self.expression(exp.Rollback, savepoint=savepoint) 4380 4381 return self.expression(exp.Commit, chain=chain) 4382 4383 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4384 if not self._match_text_seq("ADD"): 4385 return None 4386 4387 self._match(TokenType.COLUMN) 4388 exists_column = self._parse_exists(not_=True) 4389 expression = self._parse_column_def(self._parse_field(any_token=True)) 4390 4391 if expression: 4392 expression.set("exists", exists_column) 4393 4394 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4395 if self._match_texts(("FIRST", "AFTER")): 4396 position = self._prev.text 4397 column_position = self.expression( 4398 exp.ColumnPosition, this=self._parse_column(), position=position 4399 ) 4400 expression.set("position", column_position) 4401 4402 return expression 4403 4404 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4405 drop = self._match(TokenType.DROP) and self._parse_drop() 4406 if drop and not isinstance(drop, exp.Command): 4407 drop.set("kind", drop.args.get("kind", "COLUMN")) 4408 return drop 4409 4410 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4411 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4412 return self.expression( 4413 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4414 ) 4415 4416 def _parse_add_constraint(self) -> exp.AddConstraint: 4417 this = None 4418 kind = self._prev.token_type 4419 4420 if kind == TokenType.CONSTRAINT: 4421 this = self._parse_id_var() 4422 4423 if self._match_text_seq("CHECK"): 4424 expression = self._parse_wrapped(self._parse_conjunction) 4425 enforced = self._match_text_seq("ENFORCED") 4426 4427 return self.expression( 4428 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4429 ) 4430 4431 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4432 expression = self._parse_foreign_key() 4433 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4434 expression = self._parse_primary_key() 4435 else: 4436 expression = None 4437 4438 return self.expression(exp.AddConstraint, this=this, expression=expression) 4439 4440 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4441 index = self._index - 1 4442 4443 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4444 return self._parse_csv(self._parse_add_constraint) 4445 4446 self._retreat(index) 4447 return self._parse_csv(self._parse_add_column) 4448 4449 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4450 self._match(TokenType.COLUMN) 4451 column = self._parse_field(any_token=True) 4452 4453 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4454 return self.expression(exp.AlterColumn, this=column, drop=True) 4455 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4456 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4457 4458 self._match_text_seq("SET", "DATA") 4459 return self.expression( 4460 exp.AlterColumn, 4461 this=column, 4462 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4463 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4464 using=self._match(TokenType.USING) and self._parse_conjunction(), 4465 ) 4466 4467 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4468 index = self._index - 1 4469 4470 partition_exists = self._parse_exists() 4471 if self._match(TokenType.PARTITION, advance=False): 4472 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4473 4474 self._retreat(index) 4475 return self._parse_csv(self._parse_drop_column) 4476 4477 def _parse_alter_table_rename(self) -> exp.RenameTable: 4478 self._match_text_seq("TO") 4479 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4480 4481 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4482 start = self._prev 4483 4484 if not self._match(TokenType.TABLE): 4485 return self._parse_as_command(start) 4486 4487 exists = self._parse_exists() 4488 this = self._parse_table(schema=True) 4489 4490 if self._next: 4491 self._advance() 4492 4493 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4494 if parser: 4495 actions = ensure_list(parser(self)) 4496 4497 if not self._curr: 4498 return self.expression( 4499 exp.AlterTable, 4500 this=this, 4501 exists=exists, 4502 actions=actions, 4503 ) 4504 return self._parse_as_command(start) 4505 4506 def _parse_merge(self) -> exp.Merge: 4507 self._match(TokenType.INTO) 4508 target = self._parse_table() 4509 4510 self._match(TokenType.USING) 4511 using = self._parse_table() 4512 4513 self._match(TokenType.ON) 4514 on = self._parse_conjunction() 4515 4516 whens = [] 4517 while self._match(TokenType.WHEN): 4518 matched = not self._match(TokenType.NOT) 4519 self._match_text_seq("MATCHED") 4520 source = ( 4521 False 4522 if self._match_text_seq("BY", "TARGET") 4523 else self._match_text_seq("BY", "SOURCE") 4524 ) 4525 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4526 4527 self._match(TokenType.THEN) 4528 4529 if self._match(TokenType.INSERT): 4530 _this = self._parse_star() 4531 if _this: 4532 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4533 else: 4534 then = self.expression( 4535 exp.Insert, 4536 this=self._parse_value(), 4537 expression=self._match(TokenType.VALUES) and self._parse_value(), 4538 ) 4539 elif self._match(TokenType.UPDATE): 4540 expressions = self._parse_star() 4541 if expressions: 4542 then = self.expression(exp.Update, expressions=expressions) 4543 else: 4544 then = self.expression( 4545 exp.Update, 4546 expressions=self._match(TokenType.SET) 4547 and self._parse_csv(self._parse_equality), 4548 ) 4549 elif self._match(TokenType.DELETE): 4550 then = self.expression(exp.Var, this=self._prev.text) 4551 else: 4552 then = None 4553 4554 whens.append( 4555 self.expression( 4556 exp.When, 4557 matched=matched, 4558 source=source, 4559 condition=condition, 4560 then=then, 4561 ) 4562 ) 4563 4564 return self.expression( 4565 exp.Merge, 4566 this=target, 4567 using=using, 4568 on=on, 4569 expressions=whens, 4570 ) 4571 4572 def _parse_show(self) -> t.Optional[exp.Expression]: 4573 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4574 if parser: 4575 return parser(self) 4576 self._advance() 4577 return self.expression(exp.Show, this=self._prev.text.upper()) 4578 4579 def _parse_set_item_assignment( 4580 self, kind: t.Optional[str] = None 4581 ) -> t.Optional[exp.Expression]: 4582 index = self._index 4583 4584 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4585 return self._parse_set_transaction(global_=kind == "GLOBAL") 4586 4587 left = self._parse_primary() or self._parse_id_var() 4588 4589 if not self._match_texts(("=", "TO")): 4590 self._retreat(index) 4591 return None 4592 4593 right = self._parse_statement() or self._parse_id_var() 4594 this = self.expression(exp.EQ, this=left, expression=right) 4595 4596 return self.expression(exp.SetItem, this=this, kind=kind) 4597 4598 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4599 self._match_text_seq("TRANSACTION") 4600 characteristics = self._parse_csv( 4601 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4602 ) 4603 return self.expression( 4604 exp.SetItem, 4605 expressions=characteristics, 4606 kind="TRANSACTION", 4607 **{"global": global_}, # type: ignore 4608 ) 4609 4610 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4611 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4612 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4613 4614 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4615 index = self._index 4616 set_ = self.expression( 4617 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4618 ) 4619 4620 if self._curr: 4621 self._retreat(index) 4622 return self._parse_as_command(self._prev) 4623 4624 return set_ 4625 4626 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4627 for option in options: 4628 if self._match_text_seq(*option.split(" ")): 4629 return exp.var(option) 4630 return None 4631 4632 def _parse_as_command(self, start: Token) -> exp.Command: 4633 while self._curr: 4634 self._advance() 4635 text = self._find_sql(start, self._prev) 4636 size = len(start.text) 4637 return exp.Command(this=text[:size], expression=text[size:]) 4638 4639 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4640 settings = [] 4641 4642 self._match_l_paren() 4643 kind = self._parse_id_var() 4644 4645 if self._match(TokenType.L_PAREN): 4646 while True: 4647 key = self._parse_id_var() 4648 value = self._parse_primary() 4649 4650 if not key and value is None: 4651 break 4652 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4653 self._match(TokenType.R_PAREN) 4654 4655 self._match_r_paren() 4656 4657 return self.expression( 4658 exp.DictProperty, 4659 this=this, 4660 kind=kind.this if kind else None, 4661 settings=settings, 4662 ) 4663 4664 def _parse_dict_range(self, this: str) -> exp.DictRange: 4665 self._match_l_paren() 4666 has_min = self._match_text_seq("MIN") 4667 if has_min: 4668 min = self._parse_var() or self._parse_primary() 4669 self._match_text_seq("MAX") 4670 max = self._parse_var() or self._parse_primary() 4671 else: 4672 max = self._parse_var() or self._parse_primary() 4673 min = exp.Literal.number(0) 4674 self._match_r_paren() 4675 return self.expression(exp.DictRange, this=this, min=min, max=max) 4676 4677 def _find_parser( 4678 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4679 ) -> t.Optional[t.Callable]: 4680 if not self._curr: 4681 return None 4682 4683 index = self._index 4684 this = [] 4685 while True: 4686 # The current token might be multiple words 4687 curr = self._curr.text.upper() 4688 key = curr.split(" ") 4689 this.append(curr) 4690 4691 self._advance() 4692 result, trie = in_trie(trie, key) 4693 if result == TrieResult.FAILED: 4694 break 4695 4696 if result == TrieResult.EXISTS: 4697 subparser = parsers[" ".join(this)] 4698 return subparser 4699 4700 self._retreat(index) 4701 return None 4702 4703 def _match(self, token_type, advance=True, expression=None): 4704 if not self._curr: 4705 return None 4706 4707 if self._curr.token_type == token_type: 4708 if advance: 4709 self._advance() 4710 self._add_comments(expression) 4711 return True 4712 4713 return None 4714 4715 def _match_set(self, types, advance=True): 4716 if not self._curr: 4717 return None 4718 4719 if self._curr.token_type in types: 4720 if advance: 4721 self._advance() 4722 return True 4723 4724 return None 4725 4726 def _match_pair(self, token_type_a, token_type_b, advance=True): 4727 if not self._curr or not self._next: 4728 return None 4729 4730 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4731 if advance: 4732 self._advance(2) 4733 return True 4734 4735 return None 4736 4737 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4738 if not self._match(TokenType.L_PAREN, expression=expression): 4739 self.raise_error("Expecting (") 4740 4741 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4742 if not self._match(TokenType.R_PAREN, expression=expression): 4743 self.raise_error("Expecting )") 4744 4745 def _match_texts(self, texts, advance=True): 4746 if self._curr and self._curr.text.upper() in texts: 4747 if advance: 4748 self._advance() 4749 return True 4750 return False 4751 4752 def _match_text_seq(self, *texts, advance=True): 4753 index = self._index 4754 for text in texts: 4755 if self._curr and self._curr.text.upper() == text: 4756 self._advance() 4757 else: 4758 self._retreat(index) 4759 return False 4760 4761 if not advance: 4762 self._retreat(index) 4763 4764 return True 4765 4766 @t.overload 4767 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4768 ... 4769 4770 @t.overload 4771 def _replace_columns_with_dots( 4772 self, this: t.Optional[exp.Expression] 4773 ) -> t.Optional[exp.Expression]: 4774 ... 4775 4776 def _replace_columns_with_dots(self, this): 4777 if isinstance(this, exp.Dot): 4778 exp.replace_children(this, self._replace_columns_with_dots) 4779 elif isinstance(this, exp.Column): 4780 exp.replace_children(this, self._replace_columns_with_dots) 4781 table = this.args.get("table") 4782 this = ( 4783 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4784 ) 4785 4786 return this 4787 4788 def _replace_lambda( 4789 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4790 ) -> t.Optional[exp.Expression]: 4791 if not node: 4792 return node 4793 4794 for column in node.find_all(exp.Column): 4795 if column.parts[0].name in lambda_variables: 4796 dot_or_id = column.to_dot() if column.table else column.this 4797 parent = column.parent 4798 4799 while isinstance(parent, exp.Dot): 4800 if not isinstance(parent.parent, exp.Dot): 4801 parent.replace(dot_or_id) 4802 break 4803 parent = parent.parent 4804 else: 4805 if column is node: 4806 node = dot_or_id 4807 else: 4808 column.replace(dot_or_id) 4809 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.