sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 exists=self._parse_exists(), 1148 this=self._parse_table(schema=True), 1149 kind=kind, 1150 temporary=temporary, 1151 materialized=materialized, 1152 cascade=self._match_text_seq("CASCADE"), 1153 constraints=self._match_text_seq("CONSTRAINTS"), 1154 purge=self._match_text_seq("PURGE"), 1155 ) 1156 1157 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1158 return ( 1159 self._match(TokenType.IF) 1160 and (not not_ or self._match(TokenType.NOT)) 1161 and self._match(TokenType.EXISTS) 1162 ) 1163 1164 def _parse_create(self) -> exp.Create | exp.Command: 1165 # Note: this can't be None because we've matched a statement parser 1166 start = self._prev 1167 replace = start.text.upper() == "REPLACE" or self._match_pair( 1168 TokenType.OR, TokenType.REPLACE 1169 ) 1170 unique = self._match(TokenType.UNIQUE) 1171 1172 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1173 self._advance() 1174 1175 properties = None 1176 create_token = self._match_set(self.CREATABLES) and self._prev 1177 1178 if not create_token: 1179 # exp.Properties.Location.POST_CREATE 1180 properties = self._parse_properties() 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not properties or not create_token: 1184 return self._parse_as_command(start) 1185 1186 exists = self._parse_exists(not_=True) 1187 this = None 1188 expression = None 1189 indexes = None 1190 no_schema_binding = None 1191 begin = None 1192 clone = None 1193 1194 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1195 nonlocal properties 1196 if properties and temp_props: 1197 properties.expressions.extend(temp_props.expressions) 1198 elif temp_props: 1199 properties = temp_props 1200 1201 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1202 this = self._parse_user_defined_function(kind=create_token.token_type) 1203 1204 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1205 extend_props(self._parse_properties()) 1206 1207 self._match(TokenType.ALIAS) 1208 begin = self._match(TokenType.BEGIN) 1209 return_ = self._match_text_seq("RETURN") 1210 expression = self._parse_statement() 1211 1212 if return_: 1213 expression = self.expression(exp.Return, this=expression) 1214 elif create_token.token_type == TokenType.INDEX: 1215 this = self._parse_index(index=self._parse_id_var()) 1216 elif create_token.token_type in self.DB_CREATABLES: 1217 table_parts = self._parse_table_parts(schema=True) 1218 1219 # exp.Properties.Location.POST_NAME 1220 self._match(TokenType.COMMA) 1221 extend_props(self._parse_properties(before=True)) 1222 1223 this = self._parse_schema(this=table_parts) 1224 1225 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1226 extend_props(self._parse_properties()) 1227 1228 self._match(TokenType.ALIAS) 1229 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1230 # exp.Properties.Location.POST_ALIAS 1231 extend_props(self._parse_properties()) 1232 1233 expression = self._parse_ddl_select() 1234 1235 if create_token.token_type == TokenType.TABLE: 1236 indexes = [] 1237 while True: 1238 index = self._parse_index() 1239 1240 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1241 extend_props(self._parse_properties()) 1242 1243 if not index: 1244 break 1245 else: 1246 self._match(TokenType.COMMA) 1247 indexes.append(index) 1248 elif create_token.token_type == TokenType.VIEW: 1249 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1250 no_schema_binding = True 1251 1252 if self._match_text_seq("CLONE"): 1253 clone = self._parse_table(schema=True) 1254 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1255 clone_kind = ( 1256 self._match(TokenType.L_PAREN) 1257 and self._match_texts(self.CLONE_KINDS) 1258 and self._prev.text.upper() 1259 ) 1260 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1261 self._match(TokenType.R_PAREN) 1262 clone = self.expression( 1263 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1264 ) 1265 1266 return self.expression( 1267 exp.Create, 1268 this=this, 1269 kind=create_token.text, 1270 replace=replace, 1271 unique=unique, 1272 expression=expression, 1273 exists=exists, 1274 properties=properties, 1275 indexes=indexes, 1276 no_schema_binding=no_schema_binding, 1277 begin=begin, 1278 clone=clone, 1279 ) 1280 1281 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1282 # only used for teradata currently 1283 self._match(TokenType.COMMA) 1284 1285 kwargs = { 1286 "no": self._match_text_seq("NO"), 1287 "dual": self._match_text_seq("DUAL"), 1288 "before": self._match_text_seq("BEFORE"), 1289 "default": self._match_text_seq("DEFAULT"), 1290 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1291 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1292 "after": self._match_text_seq("AFTER"), 1293 "minimum": self._match_texts(("MIN", "MINIMUM")), 1294 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1295 } 1296 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1299 try: 1300 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1301 except TypeError: 1302 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1303 1304 return None 1305 1306 def _parse_property(self) -> t.Optional[exp.Expression]: 1307 if self._match_texts(self.PROPERTY_PARSERS): 1308 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1309 1310 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1311 return self._parse_character_set(default=True) 1312 1313 if self._match_text_seq("COMPOUND", "SORTKEY"): 1314 return self._parse_sortkey(compound=True) 1315 1316 if self._match_text_seq("SQL", "SECURITY"): 1317 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1318 1319 assignment = self._match_pair( 1320 TokenType.VAR, TokenType.EQ, advance=False 1321 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1322 1323 if assignment: 1324 key = self._parse_var_or_string() 1325 self._match(TokenType.EQ) 1326 return self.expression(exp.Property, this=key, value=self._parse_column()) 1327 1328 return None 1329 1330 def _parse_stored(self) -> exp.FileFormatProperty: 1331 self._match(TokenType.ALIAS) 1332 1333 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1334 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 this=self.expression( 1339 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1340 ) 1341 if input_format or output_format 1342 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1343 ) 1344 1345 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1346 self._match(TokenType.EQ) 1347 self._match(TokenType.ALIAS) 1348 return self.expression(exp_class, this=self._parse_field()) 1349 1350 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1351 properties = [] 1352 while True: 1353 if before: 1354 prop = self._parse_property_before() 1355 else: 1356 prop = self._parse_property() 1357 1358 if not prop: 1359 break 1360 for p in ensure_list(prop): 1361 properties.append(p) 1362 1363 if properties: 1364 return self.expression(exp.Properties, expressions=properties) 1365 1366 return None 1367 1368 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1369 return self.expression( 1370 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1371 ) 1372 1373 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1374 if self._index >= 2: 1375 pre_volatile_token = self._tokens[self._index - 2] 1376 else: 1377 pre_volatile_token = None 1378 1379 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1380 return exp.VolatileProperty() 1381 1382 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1383 1384 def _parse_with_property( 1385 self, 1386 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1387 self._match(TokenType.WITH) 1388 if self._match(TokenType.L_PAREN, advance=False): 1389 return self._parse_wrapped_csv(self._parse_property) 1390 1391 if self._match_text_seq("JOURNAL"): 1392 return self._parse_withjournaltable() 1393 1394 if self._match_text_seq("DATA"): 1395 return self._parse_withdata(no=False) 1396 elif self._match_text_seq("NO", "DATA"): 1397 return self._parse_withdata(no=True) 1398 1399 if not self._next: 1400 return None 1401 1402 return self._parse_withisolatedloading() 1403 1404 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1405 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1406 self._match(TokenType.EQ) 1407 1408 user = self._parse_id_var() 1409 self._match(TokenType.PARAMETER) 1410 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1411 1412 if not user or not host: 1413 return None 1414 1415 return exp.DefinerProperty(this=f"{user}@{host}") 1416 1417 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1418 self._match(TokenType.TABLE) 1419 self._match(TokenType.EQ) 1420 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1421 1422 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1423 return self.expression(exp.LogProperty, no=no) 1424 1425 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1426 return self.expression(exp.JournalProperty, **kwargs) 1427 1428 def _parse_checksum(self) -> exp.ChecksumProperty: 1429 self._match(TokenType.EQ) 1430 1431 on = None 1432 if self._match(TokenType.ON): 1433 on = True 1434 elif self._match_text_seq("OFF"): 1435 on = False 1436 1437 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1438 1439 def _parse_cluster(self) -> exp.Cluster: 1440 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1441 1442 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1443 self._match_text_seq("BY") 1444 1445 self._match_l_paren() 1446 expressions = self._parse_csv(self._parse_column) 1447 self._match_r_paren() 1448 1449 if self._match_text_seq("SORTED", "BY"): 1450 self._match_l_paren() 1451 sorted_by = self._parse_csv(self._parse_ordered) 1452 self._match_r_paren() 1453 else: 1454 sorted_by = None 1455 1456 self._match(TokenType.INTO) 1457 buckets = self._parse_number() 1458 self._match_text_seq("BUCKETS") 1459 1460 return self.expression( 1461 exp.ClusteredByProperty, 1462 expressions=expressions, 1463 sorted_by=sorted_by, 1464 buckets=buckets, 1465 ) 1466 1467 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1468 if not self._match_text_seq("GRANTS"): 1469 self._retreat(self._index - 1) 1470 return None 1471 1472 return self.expression(exp.CopyGrantsProperty) 1473 1474 def _parse_freespace(self) -> exp.FreespaceProperty: 1475 self._match(TokenType.EQ) 1476 return self.expression( 1477 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1478 ) 1479 1480 def _parse_mergeblockratio( 1481 self, no: bool = False, default: bool = False 1482 ) -> exp.MergeBlockRatioProperty: 1483 if self._match(TokenType.EQ): 1484 return self.expression( 1485 exp.MergeBlockRatioProperty, 1486 this=self._parse_number(), 1487 percent=self._match(TokenType.PERCENT), 1488 ) 1489 1490 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1491 1492 def _parse_datablocksize( 1493 self, 1494 default: t.Optional[bool] = None, 1495 minimum: t.Optional[bool] = None, 1496 maximum: t.Optional[bool] = None, 1497 ) -> exp.DataBlocksizeProperty: 1498 self._match(TokenType.EQ) 1499 size = self._parse_number() 1500 1501 units = None 1502 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1503 units = self._prev.text 1504 1505 return self.expression( 1506 exp.DataBlocksizeProperty, 1507 size=size, 1508 units=units, 1509 default=default, 1510 minimum=minimum, 1511 maximum=maximum, 1512 ) 1513 1514 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1515 self._match(TokenType.EQ) 1516 always = self._match_text_seq("ALWAYS") 1517 manual = self._match_text_seq("MANUAL") 1518 never = self._match_text_seq("NEVER") 1519 default = self._match_text_seq("DEFAULT") 1520 1521 autotemp = None 1522 if self._match_text_seq("AUTOTEMP"): 1523 autotemp = self._parse_schema() 1524 1525 return self.expression( 1526 exp.BlockCompressionProperty, 1527 always=always, 1528 manual=manual, 1529 never=never, 1530 default=default, 1531 autotemp=autotemp, 1532 ) 1533 1534 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1535 no = self._match_text_seq("NO") 1536 concurrent = self._match_text_seq("CONCURRENT") 1537 self._match_text_seq("ISOLATED", "LOADING") 1538 for_all = self._match_text_seq("FOR", "ALL") 1539 for_insert = self._match_text_seq("FOR", "INSERT") 1540 for_none = self._match_text_seq("FOR", "NONE") 1541 return self.expression( 1542 exp.IsolatedLoadingProperty, 1543 no=no, 1544 concurrent=concurrent, 1545 for_all=for_all, 1546 for_insert=for_insert, 1547 for_none=for_none, 1548 ) 1549 1550 def _parse_locking(self) -> exp.LockingProperty: 1551 if self._match(TokenType.TABLE): 1552 kind = "TABLE" 1553 elif self._match(TokenType.VIEW): 1554 kind = "VIEW" 1555 elif self._match(TokenType.ROW): 1556 kind = "ROW" 1557 elif self._match_text_seq("DATABASE"): 1558 kind = "DATABASE" 1559 else: 1560 kind = None 1561 1562 if kind in ("DATABASE", "TABLE", "VIEW"): 1563 this = self._parse_table_parts() 1564 else: 1565 this = None 1566 1567 if self._match(TokenType.FOR): 1568 for_or_in = "FOR" 1569 elif self._match(TokenType.IN): 1570 for_or_in = "IN" 1571 else: 1572 for_or_in = None 1573 1574 if self._match_text_seq("ACCESS"): 1575 lock_type = "ACCESS" 1576 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1577 lock_type = "EXCLUSIVE" 1578 elif self._match_text_seq("SHARE"): 1579 lock_type = "SHARE" 1580 elif self._match_text_seq("READ"): 1581 lock_type = "READ" 1582 elif self._match_text_seq("WRITE"): 1583 lock_type = "WRITE" 1584 elif self._match_text_seq("CHECKSUM"): 1585 lock_type = "CHECKSUM" 1586 else: 1587 lock_type = None 1588 1589 override = self._match_text_seq("OVERRIDE") 1590 1591 return self.expression( 1592 exp.LockingProperty, 1593 this=this, 1594 kind=kind, 1595 for_or_in=for_or_in, 1596 lock_type=lock_type, 1597 override=override, 1598 ) 1599 1600 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1601 if self._match(TokenType.PARTITION_BY): 1602 return self._parse_csv(self._parse_conjunction) 1603 return [] 1604 1605 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1606 self._match(TokenType.EQ) 1607 return self.expression( 1608 exp.PartitionedByProperty, 1609 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1610 ) 1611 1612 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1613 if self._match_text_seq("AND", "STATISTICS"): 1614 statistics = True 1615 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1616 statistics = False 1617 else: 1618 statistics = None 1619 1620 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1621 1622 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1623 if self._match_text_seq("PRIMARY", "INDEX"): 1624 return exp.NoPrimaryIndexProperty() 1625 return None 1626 1627 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1628 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1629 return exp.OnCommitProperty() 1630 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1631 return exp.OnCommitProperty(delete=True) 1632 return None 1633 1634 def _parse_distkey(self) -> exp.DistKeyProperty: 1635 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1636 1637 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1638 table = self._parse_table(schema=True) 1639 1640 options = [] 1641 while self._match_texts(("INCLUDING", "EXCLUDING")): 1642 this = self._prev.text.upper() 1643 1644 id_var = self._parse_id_var() 1645 if not id_var: 1646 return None 1647 1648 options.append( 1649 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1650 ) 1651 1652 return self.expression(exp.LikeProperty, this=table, expressions=options) 1653 1654 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1655 return self.expression( 1656 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1657 ) 1658 1659 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1660 self._match(TokenType.EQ) 1661 return self.expression( 1662 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1663 ) 1664 1665 def _parse_returns(self) -> exp.ReturnsProperty: 1666 value: t.Optional[exp.Expression] 1667 is_table = self._match(TokenType.TABLE) 1668 1669 if is_table: 1670 if self._match(TokenType.LT): 1671 value = self.expression( 1672 exp.Schema, 1673 this="TABLE", 1674 expressions=self._parse_csv(self._parse_struct_types), 1675 ) 1676 if not self._match(TokenType.GT): 1677 self.raise_error("Expecting >") 1678 else: 1679 value = self._parse_schema(exp.var("TABLE")) 1680 else: 1681 value = self._parse_types() 1682 1683 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1684 1685 def _parse_describe(self) -> exp.Describe: 1686 kind = self._match_set(self.CREATABLES) and self._prev.text 1687 this = self._parse_table() 1688 return self.expression(exp.Describe, this=this, kind=kind) 1689 1690 def _parse_insert(self) -> exp.Insert: 1691 overwrite = self._match(TokenType.OVERWRITE) 1692 ignore = self._match(TokenType.IGNORE) 1693 local = self._match_text_seq("LOCAL") 1694 alternative = None 1695 1696 if self._match_text_seq("DIRECTORY"): 1697 this: t.Optional[exp.Expression] = self.expression( 1698 exp.Directory, 1699 this=self._parse_var_or_string(), 1700 local=local, 1701 row_format=self._parse_row_format(match_row=True), 1702 ) 1703 else: 1704 if self._match(TokenType.OR): 1705 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1706 1707 self._match(TokenType.INTO) 1708 self._match(TokenType.TABLE) 1709 this = self._parse_table(schema=True) 1710 1711 return self.expression( 1712 exp.Insert, 1713 this=this, 1714 exists=self._parse_exists(), 1715 partition=self._parse_partition(), 1716 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1717 and self._parse_conjunction(), 1718 expression=self._parse_ddl_select(), 1719 conflict=self._parse_on_conflict(), 1720 returning=self._parse_returning(), 1721 overwrite=overwrite, 1722 alternative=alternative, 1723 ignore=ignore, 1724 ) 1725 1726 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1727 conflict = self._match_text_seq("ON", "CONFLICT") 1728 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1729 1730 if not conflict and not duplicate: 1731 return None 1732 1733 nothing = None 1734 expressions = None 1735 key = None 1736 constraint = None 1737 1738 if conflict: 1739 if self._match_text_seq("ON", "CONSTRAINT"): 1740 constraint = self._parse_id_var() 1741 else: 1742 key = self._parse_csv(self._parse_value) 1743 1744 self._match_text_seq("DO") 1745 if self._match_text_seq("NOTHING"): 1746 nothing = True 1747 else: 1748 self._match(TokenType.UPDATE) 1749 self._match(TokenType.SET) 1750 expressions = self._parse_csv(self._parse_equality) 1751 1752 return self.expression( 1753 exp.OnConflict, 1754 duplicate=duplicate, 1755 expressions=expressions, 1756 nothing=nothing, 1757 key=key, 1758 constraint=constraint, 1759 ) 1760 1761 def _parse_returning(self) -> t.Optional[exp.Returning]: 1762 if not self._match(TokenType.RETURNING): 1763 return None 1764 1765 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1766 1767 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1768 if not self._match(TokenType.FORMAT): 1769 return None 1770 return self._parse_row_format() 1771 1772 def _parse_row_format( 1773 self, match_row: bool = False 1774 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1775 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1776 return None 1777 1778 if self._match_text_seq("SERDE"): 1779 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1780 1781 self._match_text_seq("DELIMITED") 1782 1783 kwargs = {} 1784 1785 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1786 kwargs["fields"] = self._parse_string() 1787 if self._match_text_seq("ESCAPED", "BY"): 1788 kwargs["escaped"] = self._parse_string() 1789 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1790 kwargs["collection_items"] = self._parse_string() 1791 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1792 kwargs["map_keys"] = self._parse_string() 1793 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1794 kwargs["lines"] = self._parse_string() 1795 if self._match_text_seq("NULL", "DEFINED", "AS"): 1796 kwargs["null"] = self._parse_string() 1797 1798 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1799 1800 def _parse_load(self) -> exp.LoadData | exp.Command: 1801 if self._match_text_seq("DATA"): 1802 local = self._match_text_seq("LOCAL") 1803 self._match_text_seq("INPATH") 1804 inpath = self._parse_string() 1805 overwrite = self._match(TokenType.OVERWRITE) 1806 self._match_pair(TokenType.INTO, TokenType.TABLE) 1807 1808 return self.expression( 1809 exp.LoadData, 1810 this=self._parse_table(schema=True), 1811 local=local, 1812 overwrite=overwrite, 1813 inpath=inpath, 1814 partition=self._parse_partition(), 1815 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1816 serde=self._match_text_seq("SERDE") and self._parse_string(), 1817 ) 1818 return self._parse_as_command(self._prev) 1819 1820 def _parse_delete(self) -> exp.Delete: 1821 # This handles MySQL's "Multiple-Table Syntax" 1822 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1823 tables = None 1824 if not self._match(TokenType.FROM, advance=False): 1825 tables = self._parse_csv(self._parse_table) or None 1826 1827 return self.expression( 1828 exp.Delete, 1829 tables=tables, 1830 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1831 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1832 where=self._parse_where(), 1833 returning=self._parse_returning(), 1834 limit=self._parse_limit(), 1835 ) 1836 1837 def _parse_update(self) -> exp.Update: 1838 return self.expression( 1839 exp.Update, 1840 **{ # type: ignore 1841 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1842 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1843 "from": self._parse_from(joins=True), 1844 "where": self._parse_where(), 1845 "returning": self._parse_returning(), 1846 "limit": self._parse_limit(), 1847 }, 1848 ) 1849 1850 def _parse_uncache(self) -> exp.Uncache: 1851 if not self._match(TokenType.TABLE): 1852 self.raise_error("Expecting TABLE after UNCACHE") 1853 1854 return self.expression( 1855 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1856 ) 1857 1858 def _parse_cache(self) -> exp.Cache: 1859 lazy = self._match_text_seq("LAZY") 1860 self._match(TokenType.TABLE) 1861 table = self._parse_table(schema=True) 1862 1863 options = [] 1864 if self._match_text_seq("OPTIONS"): 1865 self._match_l_paren() 1866 k = self._parse_string() 1867 self._match(TokenType.EQ) 1868 v = self._parse_string() 1869 options = [k, v] 1870 self._match_r_paren() 1871 1872 self._match(TokenType.ALIAS) 1873 return self.expression( 1874 exp.Cache, 1875 this=table, 1876 lazy=lazy, 1877 options=options, 1878 expression=self._parse_select(nested=True), 1879 ) 1880 1881 def _parse_partition(self) -> t.Optional[exp.Partition]: 1882 if not self._match(TokenType.PARTITION): 1883 return None 1884 1885 return self.expression( 1886 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1887 ) 1888 1889 def _parse_value(self) -> exp.Tuple: 1890 if self._match(TokenType.L_PAREN): 1891 expressions = self._parse_csv(self._parse_conjunction) 1892 self._match_r_paren() 1893 return self.expression(exp.Tuple, expressions=expressions) 1894 1895 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1896 # https://prestodb.io/docs/current/sql/values.html 1897 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1898 1899 def _parse_select( 1900 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1901 ) -> t.Optional[exp.Expression]: 1902 cte = self._parse_with() 1903 if cte: 1904 this = self._parse_statement() 1905 1906 if not this: 1907 self.raise_error("Failed to parse any statement following CTE") 1908 return cte 1909 1910 if "with" in this.arg_types: 1911 this.set("with", cte) 1912 else: 1913 self.raise_error(f"{this.key} does not support CTE") 1914 this = cte 1915 elif self._match(TokenType.SELECT): 1916 comments = self._prev_comments 1917 1918 hint = self._parse_hint() 1919 all_ = self._match(TokenType.ALL) 1920 distinct = self._match(TokenType.DISTINCT) 1921 1922 kind = ( 1923 self._match(TokenType.ALIAS) 1924 and self._match_texts(("STRUCT", "VALUE")) 1925 and self._prev.text 1926 ) 1927 1928 if distinct: 1929 distinct = self.expression( 1930 exp.Distinct, 1931 on=self._parse_value() if self._match(TokenType.ON) else None, 1932 ) 1933 1934 if all_ and distinct: 1935 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1936 1937 limit = self._parse_limit(top=True) 1938 expressions = self._parse_expressions() 1939 1940 this = self.expression( 1941 exp.Select, 1942 kind=kind, 1943 hint=hint, 1944 distinct=distinct, 1945 expressions=expressions, 1946 limit=limit, 1947 ) 1948 this.comments = comments 1949 1950 into = self._parse_into() 1951 if into: 1952 this.set("into", into) 1953 1954 from_ = self._parse_from() 1955 if from_: 1956 this.set("from", from_) 1957 1958 this = self._parse_query_modifiers(this) 1959 elif (table or nested) and self._match(TokenType.L_PAREN): 1960 if self._match(TokenType.PIVOT): 1961 this = self._parse_simplified_pivot() 1962 elif self._match(TokenType.FROM): 1963 this = exp.select("*").from_( 1964 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1965 ) 1966 else: 1967 this = self._parse_table() if table else self._parse_select(nested=True) 1968 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1969 1970 self._match_r_paren() 1971 1972 # early return so that subquery unions aren't parsed again 1973 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1974 # Union ALL should be a property of the top select node, not the subquery 1975 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1976 elif self._match(TokenType.VALUES): 1977 this = self.expression( 1978 exp.Values, 1979 expressions=self._parse_csv(self._parse_value), 1980 alias=self._parse_table_alias(), 1981 ) 1982 else: 1983 this = None 1984 1985 return self._parse_set_operations(this) 1986 1987 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1988 if not skip_with_token and not self._match(TokenType.WITH): 1989 return None 1990 1991 comments = self._prev_comments 1992 recursive = self._match(TokenType.RECURSIVE) 1993 1994 expressions = [] 1995 while True: 1996 expressions.append(self._parse_cte()) 1997 1998 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1999 break 2000 else: 2001 self._match(TokenType.WITH) 2002 2003 return self.expression( 2004 exp.With, comments=comments, expressions=expressions, recursive=recursive 2005 ) 2006 2007 def _parse_cte(self) -> exp.CTE: 2008 alias = self._parse_table_alias() 2009 if not alias or not alias.this: 2010 self.raise_error("Expected CTE to have alias") 2011 2012 self._match(TokenType.ALIAS) 2013 return self.expression( 2014 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2015 ) 2016 2017 def _parse_table_alias( 2018 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2019 ) -> t.Optional[exp.TableAlias]: 2020 any_token = self._match(TokenType.ALIAS) 2021 alias = ( 2022 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2023 or self._parse_string_as_identifier() 2024 ) 2025 2026 index = self._index 2027 if self._match(TokenType.L_PAREN): 2028 columns = self._parse_csv(self._parse_function_parameter) 2029 self._match_r_paren() if columns else self._retreat(index) 2030 else: 2031 columns = None 2032 2033 if not alias and not columns: 2034 return None 2035 2036 return self.expression(exp.TableAlias, this=alias, columns=columns) 2037 2038 def _parse_subquery( 2039 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2040 ) -> t.Optional[exp.Subquery]: 2041 if not this: 2042 return None 2043 2044 return self.expression( 2045 exp.Subquery, 2046 this=this, 2047 pivots=self._parse_pivots(), 2048 alias=self._parse_table_alias() if parse_alias else None, 2049 ) 2050 2051 def _parse_query_modifiers( 2052 self, this: t.Optional[exp.Expression] 2053 ) -> t.Optional[exp.Expression]: 2054 if isinstance(this, self.MODIFIABLES): 2055 for join in iter(self._parse_join, None): 2056 this.append("joins", join) 2057 for lateral in iter(self._parse_lateral, None): 2058 this.append("laterals", lateral) 2059 2060 while True: 2061 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2062 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2063 key, expression = parser(self) 2064 2065 if expression: 2066 this.set(key, expression) 2067 if key == "limit": 2068 offset = expression.args.pop("offset", None) 2069 if offset: 2070 this.set("offset", exp.Offset(expression=offset)) 2071 continue 2072 break 2073 return this 2074 2075 def _parse_hint(self) -> t.Optional[exp.Hint]: 2076 if self._match(TokenType.HINT): 2077 hints = [] 2078 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2079 hints.extend(hint) 2080 2081 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2082 self.raise_error("Expected */ after HINT") 2083 2084 return self.expression(exp.Hint, expressions=hints) 2085 2086 return None 2087 2088 def _parse_into(self) -> t.Optional[exp.Into]: 2089 if not self._match(TokenType.INTO): 2090 return None 2091 2092 temp = self._match(TokenType.TEMPORARY) 2093 unlogged = self._match_text_seq("UNLOGGED") 2094 self._match(TokenType.TABLE) 2095 2096 return self.expression( 2097 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2098 ) 2099 2100 def _parse_from( 2101 self, joins: bool = False, skip_from_token: bool = False 2102 ) -> t.Optional[exp.From]: 2103 if not skip_from_token and not self._match(TokenType.FROM): 2104 return None 2105 2106 return self.expression( 2107 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2108 ) 2109 2110 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2111 if not self._match(TokenType.MATCH_RECOGNIZE): 2112 return None 2113 2114 self._match_l_paren() 2115 2116 partition = self._parse_partition_by() 2117 order = self._parse_order() 2118 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2119 2120 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2121 rows = exp.var("ONE ROW PER MATCH") 2122 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2123 text = "ALL ROWS PER MATCH" 2124 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2125 text += f" SHOW EMPTY MATCHES" 2126 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2127 text += f" OMIT EMPTY MATCHES" 2128 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2129 text += f" WITH UNMATCHED ROWS" 2130 rows = exp.var(text) 2131 else: 2132 rows = None 2133 2134 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2135 text = "AFTER MATCH SKIP" 2136 if self._match_text_seq("PAST", "LAST", "ROW"): 2137 text += f" PAST LAST ROW" 2138 elif self._match_text_seq("TO", "NEXT", "ROW"): 2139 text += f" TO NEXT ROW" 2140 elif self._match_text_seq("TO", "FIRST"): 2141 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2142 elif self._match_text_seq("TO", "LAST"): 2143 text += f" TO LAST {self._advance_any().text}" # type: ignore 2144 after = exp.var(text) 2145 else: 2146 after = None 2147 2148 if self._match_text_seq("PATTERN"): 2149 self._match_l_paren() 2150 2151 if not self._curr: 2152 self.raise_error("Expecting )", self._curr) 2153 2154 paren = 1 2155 start = self._curr 2156 2157 while self._curr and paren > 0: 2158 if self._curr.token_type == TokenType.L_PAREN: 2159 paren += 1 2160 if self._curr.token_type == TokenType.R_PAREN: 2161 paren -= 1 2162 2163 end = self._prev 2164 self._advance() 2165 2166 if paren > 0: 2167 self.raise_error("Expecting )", self._curr) 2168 2169 pattern = exp.var(self._find_sql(start, end)) 2170 else: 2171 pattern = None 2172 2173 define = ( 2174 self._parse_csv( 2175 lambda: self.expression( 2176 exp.Alias, 2177 alias=self._parse_id_var(any_token=True), 2178 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2179 ) 2180 ) 2181 if self._match_text_seq("DEFINE") 2182 else None 2183 ) 2184 2185 self._match_r_paren() 2186 2187 return self.expression( 2188 exp.MatchRecognize, 2189 partition_by=partition, 2190 order=order, 2191 measures=measures, 2192 rows=rows, 2193 after=after, 2194 pattern=pattern, 2195 define=define, 2196 alias=self._parse_table_alias(), 2197 ) 2198 2199 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2200 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2201 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2202 2203 if outer_apply or cross_apply: 2204 this = self._parse_select(table=True) 2205 view = None 2206 outer = not cross_apply 2207 elif self._match(TokenType.LATERAL): 2208 this = self._parse_select(table=True) 2209 view = self._match(TokenType.VIEW) 2210 outer = self._match(TokenType.OUTER) 2211 else: 2212 return None 2213 2214 if not this: 2215 this = self._parse_function() or self._parse_id_var(any_token=False) 2216 while self._match(TokenType.DOT): 2217 this = exp.Dot( 2218 this=this, 2219 expression=self._parse_function() or self._parse_id_var(any_token=False), 2220 ) 2221 2222 if view: 2223 table = self._parse_id_var(any_token=False) 2224 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2225 table_alias: t.Optional[exp.TableAlias] = self.expression( 2226 exp.TableAlias, this=table, columns=columns 2227 ) 2228 elif isinstance(this, exp.Subquery) and this.alias: 2229 # Ensures parity between the Subquery's and the Lateral's "alias" args 2230 table_alias = this.args["alias"].copy() 2231 else: 2232 table_alias = self._parse_table_alias() 2233 2234 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2235 2236 def _parse_join_parts( 2237 self, 2238 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2239 return ( 2240 self._match_set(self.JOIN_METHODS) and self._prev, 2241 self._match_set(self.JOIN_SIDES) and self._prev, 2242 self._match_set(self.JOIN_KINDS) and self._prev, 2243 ) 2244 2245 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2246 if self._match(TokenType.COMMA): 2247 return self.expression(exp.Join, this=self._parse_table()) 2248 2249 index = self._index 2250 method, side, kind = self._parse_join_parts() 2251 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2252 join = self._match(TokenType.JOIN) 2253 2254 if not skip_join_token and not join: 2255 self._retreat(index) 2256 kind = None 2257 method = None 2258 side = None 2259 2260 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2261 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2262 2263 if not skip_join_token and not join and not outer_apply and not cross_apply: 2264 return None 2265 2266 if outer_apply: 2267 side = Token(TokenType.LEFT, "LEFT") 2268 2269 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2270 2271 if method: 2272 kwargs["method"] = method.text 2273 if side: 2274 kwargs["side"] = side.text 2275 if kind: 2276 kwargs["kind"] = kind.text 2277 if hint: 2278 kwargs["hint"] = hint 2279 2280 if self._match(TokenType.ON): 2281 kwargs["on"] = self._parse_conjunction() 2282 elif self._match(TokenType.USING): 2283 kwargs["using"] = self._parse_wrapped_id_vars() 2284 elif not (kind and kind.token_type == TokenType.CROSS): 2285 index = self._index 2286 joins = self._parse_joins() 2287 2288 if joins and self._match(TokenType.ON): 2289 kwargs["on"] = self._parse_conjunction() 2290 elif joins and self._match(TokenType.USING): 2291 kwargs["using"] = self._parse_wrapped_id_vars() 2292 else: 2293 joins = None 2294 self._retreat(index) 2295 kwargs["this"].set("joins", joins) 2296 2297 return self.expression(exp.Join, **kwargs) 2298 2299 def _parse_index( 2300 self, 2301 index: t.Optional[exp.Expression] = None, 2302 ) -> t.Optional[exp.Index]: 2303 if index: 2304 unique = None 2305 primary = None 2306 amp = None 2307 2308 self._match(TokenType.ON) 2309 self._match(TokenType.TABLE) # hive 2310 table = self._parse_table_parts(schema=True) 2311 else: 2312 unique = self._match(TokenType.UNIQUE) 2313 primary = self._match_text_seq("PRIMARY") 2314 amp = self._match_text_seq("AMP") 2315 2316 if not self._match(TokenType.INDEX): 2317 return None 2318 2319 index = self._parse_id_var() 2320 table = None 2321 2322 using = self._parse_field() if self._match(TokenType.USING) else None 2323 2324 if self._match(TokenType.L_PAREN, advance=False): 2325 columns = self._parse_wrapped_csv(self._parse_ordered) 2326 else: 2327 columns = None 2328 2329 return self.expression( 2330 exp.Index, 2331 this=index, 2332 table=table, 2333 using=using, 2334 columns=columns, 2335 unique=unique, 2336 primary=primary, 2337 amp=amp, 2338 partition_by=self._parse_partition_by(), 2339 ) 2340 2341 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2342 hints: t.List[exp.Expression] = [] 2343 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2344 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2345 hints.append( 2346 self.expression( 2347 exp.WithTableHint, 2348 expressions=self._parse_csv( 2349 lambda: self._parse_function() or self._parse_var(any_token=True) 2350 ), 2351 ) 2352 ) 2353 self._match_r_paren() 2354 else: 2355 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2356 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2357 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2358 2359 self._match_texts({"INDEX", "KEY"}) 2360 if self._match(TokenType.FOR): 2361 hint.set("target", self._advance_any() and self._prev.text.upper()) 2362 2363 hint.set("expressions", self._parse_wrapped_id_vars()) 2364 hints.append(hint) 2365 2366 return hints or None 2367 2368 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2369 return ( 2370 (not schema and self._parse_function(optional_parens=False)) 2371 or self._parse_id_var(any_token=False) 2372 or self._parse_string_as_identifier() 2373 or self._parse_placeholder() 2374 ) 2375 2376 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2377 catalog = None 2378 db = None 2379 table = self._parse_table_part(schema=schema) 2380 2381 while self._match(TokenType.DOT): 2382 if catalog: 2383 # This allows nesting the table in arbitrarily many dot expressions if needed 2384 table = self.expression( 2385 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2386 ) 2387 else: 2388 catalog = db 2389 db = table 2390 table = self._parse_table_part(schema=schema) 2391 2392 if not table: 2393 self.raise_error(f"Expected table name but got {self._curr}") 2394 2395 return self.expression( 2396 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2397 ) 2398 2399 def _parse_table( 2400 self, 2401 schema: bool = False, 2402 joins: bool = False, 2403 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2404 ) -> t.Optional[exp.Expression]: 2405 lateral = self._parse_lateral() 2406 if lateral: 2407 return lateral 2408 2409 unnest = self._parse_unnest() 2410 if unnest: 2411 return unnest 2412 2413 values = self._parse_derived_table_values() 2414 if values: 2415 return values 2416 2417 subquery = self._parse_select(table=True) 2418 if subquery: 2419 if not subquery.args.get("pivots"): 2420 subquery.set("pivots", self._parse_pivots()) 2421 return subquery 2422 2423 this: exp.Expression = self._parse_table_parts(schema=schema) 2424 2425 if schema: 2426 return self._parse_schema(this=this) 2427 2428 if self.ALIAS_POST_TABLESAMPLE: 2429 table_sample = self._parse_table_sample() 2430 2431 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2432 if alias: 2433 this.set("alias", alias) 2434 2435 if not this.args.get("pivots"): 2436 this.set("pivots", self._parse_pivots()) 2437 2438 this.set("hints", self._parse_table_hints()) 2439 2440 if not self.ALIAS_POST_TABLESAMPLE: 2441 table_sample = self._parse_table_sample() 2442 2443 if table_sample: 2444 table_sample.set("this", this) 2445 this = table_sample 2446 2447 if joins: 2448 for join in iter(self._parse_join, None): 2449 this.append("joins", join) 2450 2451 return this 2452 2453 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2454 if not self._match(TokenType.UNNEST): 2455 return None 2456 2457 expressions = self._parse_wrapped_csv(self._parse_type) 2458 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2459 2460 alias = self._parse_table_alias() if with_alias else None 2461 2462 if alias and self.UNNEST_COLUMN_ONLY: 2463 if alias.args.get("columns"): 2464 self.raise_error("Unexpected extra column alias in unnest.") 2465 2466 alias.set("columns", [alias.this]) 2467 alias.set("this", None) 2468 2469 offset = None 2470 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2471 self._match(TokenType.ALIAS) 2472 offset = self._parse_id_var() or exp.to_identifier("offset") 2473 2474 return self.expression( 2475 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2476 ) 2477 2478 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2479 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2480 if not is_derived and not self._match(TokenType.VALUES): 2481 return None 2482 2483 expressions = self._parse_csv(self._parse_value) 2484 alias = self._parse_table_alias() 2485 2486 if is_derived: 2487 self._match_r_paren() 2488 2489 return self.expression( 2490 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2491 ) 2492 2493 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2494 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2495 as_modifier and self._match_text_seq("USING", "SAMPLE") 2496 ): 2497 return None 2498 2499 bucket_numerator = None 2500 bucket_denominator = None 2501 bucket_field = None 2502 percent = None 2503 rows = None 2504 size = None 2505 seed = None 2506 2507 kind = ( 2508 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2509 ) 2510 method = self._parse_var(tokens=(TokenType.ROW,)) 2511 2512 self._match(TokenType.L_PAREN) 2513 2514 num = self._parse_number() 2515 2516 if self._match_text_seq("BUCKET"): 2517 bucket_numerator = self._parse_number() 2518 self._match_text_seq("OUT", "OF") 2519 bucket_denominator = bucket_denominator = self._parse_number() 2520 self._match(TokenType.ON) 2521 bucket_field = self._parse_field() 2522 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2523 percent = num 2524 elif self._match(TokenType.ROWS): 2525 rows = num 2526 else: 2527 size = num 2528 2529 self._match(TokenType.R_PAREN) 2530 2531 if self._match(TokenType.L_PAREN): 2532 method = self._parse_var() 2533 seed = self._match(TokenType.COMMA) and self._parse_number() 2534 self._match_r_paren() 2535 elif self._match_texts(("SEED", "REPEATABLE")): 2536 seed = self._parse_wrapped(self._parse_number) 2537 2538 return self.expression( 2539 exp.TableSample, 2540 method=method, 2541 bucket_numerator=bucket_numerator, 2542 bucket_denominator=bucket_denominator, 2543 bucket_field=bucket_field, 2544 percent=percent, 2545 rows=rows, 2546 size=size, 2547 seed=seed, 2548 kind=kind, 2549 ) 2550 2551 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2552 return list(iter(self._parse_pivot, None)) or None 2553 2554 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2555 return list(iter(self._parse_join, None)) or None 2556 2557 # https://duckdb.org/docs/sql/statements/pivot 2558 def _parse_simplified_pivot(self) -> exp.Pivot: 2559 def _parse_on() -> t.Optional[exp.Expression]: 2560 this = self._parse_bitwise() 2561 return self._parse_in(this) if self._match(TokenType.IN) else this 2562 2563 this = self._parse_table() 2564 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2565 using = self._match(TokenType.USING) and self._parse_csv( 2566 lambda: self._parse_alias(self._parse_function()) 2567 ) 2568 group = self._parse_group() 2569 return self.expression( 2570 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2571 ) 2572 2573 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2574 index = self._index 2575 2576 if self._match(TokenType.PIVOT): 2577 unpivot = False 2578 elif self._match(TokenType.UNPIVOT): 2579 unpivot = True 2580 else: 2581 return None 2582 2583 expressions = [] 2584 field = None 2585 2586 if not self._match(TokenType.L_PAREN): 2587 self._retreat(index) 2588 return None 2589 2590 if unpivot: 2591 expressions = self._parse_csv(self._parse_column) 2592 else: 2593 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2594 2595 if not expressions: 2596 self.raise_error("Failed to parse PIVOT's aggregation list") 2597 2598 if not self._match(TokenType.FOR): 2599 self.raise_error("Expecting FOR") 2600 2601 value = self._parse_column() 2602 2603 if not self._match(TokenType.IN): 2604 self.raise_error("Expecting IN") 2605 2606 field = self._parse_in(value, alias=True) 2607 2608 self._match_r_paren() 2609 2610 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2611 2612 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2613 pivot.set("alias", self._parse_table_alias()) 2614 2615 if not unpivot: 2616 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2617 2618 columns: t.List[exp.Expression] = [] 2619 for fld in pivot.args["field"].expressions: 2620 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2621 for name in names: 2622 if self.PREFIXED_PIVOT_COLUMNS: 2623 name = f"{name}_{field_name}" if name else field_name 2624 else: 2625 name = f"{field_name}_{name}" if name else field_name 2626 2627 columns.append(exp.to_identifier(name)) 2628 2629 pivot.set("columns", columns) 2630 2631 return pivot 2632 2633 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2634 return [agg.alias for agg in aggregations] 2635 2636 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2637 if not skip_where_token and not self._match(TokenType.WHERE): 2638 return None 2639 2640 return self.expression( 2641 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2642 ) 2643 2644 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2645 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2646 return None 2647 2648 elements = defaultdict(list) 2649 2650 if self._match(TokenType.ALL): 2651 return self.expression(exp.Group, all=True) 2652 2653 while True: 2654 expressions = self._parse_csv(self._parse_conjunction) 2655 if expressions: 2656 elements["expressions"].extend(expressions) 2657 2658 grouping_sets = self._parse_grouping_sets() 2659 if grouping_sets: 2660 elements["grouping_sets"].extend(grouping_sets) 2661 2662 rollup = None 2663 cube = None 2664 totals = None 2665 2666 with_ = self._match(TokenType.WITH) 2667 if self._match(TokenType.ROLLUP): 2668 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2669 elements["rollup"].extend(ensure_list(rollup)) 2670 2671 if self._match(TokenType.CUBE): 2672 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2673 elements["cube"].extend(ensure_list(cube)) 2674 2675 if self._match_text_seq("TOTALS"): 2676 totals = True 2677 elements["totals"] = True # type: ignore 2678 2679 if not (grouping_sets or rollup or cube or totals): 2680 break 2681 2682 return self.expression(exp.Group, **elements) # type: ignore 2683 2684 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2685 if not self._match(TokenType.GROUPING_SETS): 2686 return None 2687 2688 return self._parse_wrapped_csv(self._parse_grouping_set) 2689 2690 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2691 if self._match(TokenType.L_PAREN): 2692 grouping_set = self._parse_csv(self._parse_column) 2693 self._match_r_paren() 2694 return self.expression(exp.Tuple, expressions=grouping_set) 2695 2696 return self._parse_column() 2697 2698 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2699 if not skip_having_token and not self._match(TokenType.HAVING): 2700 return None 2701 return self.expression(exp.Having, this=self._parse_conjunction()) 2702 2703 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2704 if not self._match(TokenType.QUALIFY): 2705 return None 2706 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2707 2708 def _parse_order( 2709 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2710 ) -> t.Optional[exp.Expression]: 2711 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2712 return this 2713 2714 return self.expression( 2715 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2716 ) 2717 2718 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2719 if not self._match(token): 2720 return None 2721 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2722 2723 def _parse_ordered(self) -> exp.Ordered: 2724 this = self._parse_conjunction() 2725 self._match(TokenType.ASC) 2726 2727 is_desc = self._match(TokenType.DESC) 2728 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2729 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2730 desc = is_desc or False 2731 asc = not desc 2732 nulls_first = is_nulls_first or False 2733 explicitly_null_ordered = is_nulls_first or is_nulls_last 2734 2735 if ( 2736 not explicitly_null_ordered 2737 and ( 2738 (asc and self.NULL_ORDERING == "nulls_are_small") 2739 or (desc and self.NULL_ORDERING != "nulls_are_small") 2740 ) 2741 and self.NULL_ORDERING != "nulls_are_last" 2742 ): 2743 nulls_first = True 2744 2745 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2746 2747 def _parse_limit( 2748 self, this: t.Optional[exp.Expression] = None, top: bool = False 2749 ) -> t.Optional[exp.Expression]: 2750 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2751 limit_paren = self._match(TokenType.L_PAREN) 2752 expression = self._parse_number() if top else self._parse_term() 2753 2754 if self._match(TokenType.COMMA): 2755 offset = expression 2756 expression = self._parse_term() 2757 else: 2758 offset = None 2759 2760 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2761 2762 if limit_paren: 2763 self._match_r_paren() 2764 2765 return limit_exp 2766 2767 if self._match(TokenType.FETCH): 2768 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2769 direction = self._prev.text if direction else "FIRST" 2770 2771 count = self._parse_number() 2772 percent = self._match(TokenType.PERCENT) 2773 2774 self._match_set((TokenType.ROW, TokenType.ROWS)) 2775 2776 only = self._match_text_seq("ONLY") 2777 with_ties = self._match_text_seq("WITH", "TIES") 2778 2779 if only and with_ties: 2780 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2781 2782 return self.expression( 2783 exp.Fetch, 2784 direction=direction, 2785 count=count, 2786 percent=percent, 2787 with_ties=with_ties, 2788 ) 2789 2790 return this 2791 2792 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2793 if not self._match(TokenType.OFFSET): 2794 return this 2795 2796 count = self._parse_number() 2797 self._match_set((TokenType.ROW, TokenType.ROWS)) 2798 return self.expression(exp.Offset, this=this, expression=count) 2799 2800 def _parse_locks(self) -> t.List[exp.Lock]: 2801 locks = [] 2802 while True: 2803 if self._match_text_seq("FOR", "UPDATE"): 2804 update = True 2805 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2806 "LOCK", "IN", "SHARE", "MODE" 2807 ): 2808 update = False 2809 else: 2810 break 2811 2812 expressions = None 2813 if self._match_text_seq("OF"): 2814 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2815 2816 wait: t.Optional[bool | exp.Expression] = None 2817 if self._match_text_seq("NOWAIT"): 2818 wait = True 2819 elif self._match_text_seq("WAIT"): 2820 wait = self._parse_primary() 2821 elif self._match_text_seq("SKIP", "LOCKED"): 2822 wait = False 2823 2824 locks.append( 2825 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2826 ) 2827 2828 return locks 2829 2830 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2831 if not self._match_set(self.SET_OPERATIONS): 2832 return this 2833 2834 token_type = self._prev.token_type 2835 2836 if token_type == TokenType.UNION: 2837 expression = exp.Union 2838 elif token_type == TokenType.EXCEPT: 2839 expression = exp.Except 2840 else: 2841 expression = exp.Intersect 2842 2843 return self.expression( 2844 expression, 2845 this=this, 2846 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2847 expression=self._parse_set_operations(self._parse_select(nested=True)), 2848 ) 2849 2850 def _parse_expression(self) -> t.Optional[exp.Expression]: 2851 return self._parse_alias(self._parse_conjunction()) 2852 2853 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2854 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2855 2856 def _parse_equality(self) -> t.Optional[exp.Expression]: 2857 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2858 2859 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2860 return self._parse_tokens(self._parse_range, self.COMPARISON) 2861 2862 def _parse_range(self) -> t.Optional[exp.Expression]: 2863 this = self._parse_bitwise() 2864 negate = self._match(TokenType.NOT) 2865 2866 if self._match_set(self.RANGE_PARSERS): 2867 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2868 if not expression: 2869 return this 2870 2871 this = expression 2872 elif self._match(TokenType.ISNULL): 2873 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2874 2875 # Postgres supports ISNULL and NOTNULL for conditions. 2876 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2877 if self._match(TokenType.NOTNULL): 2878 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2879 this = self.expression(exp.Not, this=this) 2880 2881 if negate: 2882 this = self.expression(exp.Not, this=this) 2883 2884 if self._match(TokenType.IS): 2885 this = self._parse_is(this) 2886 2887 return this 2888 2889 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2890 index = self._index - 1 2891 negate = self._match(TokenType.NOT) 2892 2893 if self._match_text_seq("DISTINCT", "FROM"): 2894 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2895 return self.expression(klass, this=this, expression=self._parse_expression()) 2896 2897 expression = self._parse_null() or self._parse_boolean() 2898 if not expression: 2899 self._retreat(index) 2900 return None 2901 2902 this = self.expression(exp.Is, this=this, expression=expression) 2903 return self.expression(exp.Not, this=this) if negate else this 2904 2905 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2906 unnest = self._parse_unnest(with_alias=False) 2907 if unnest: 2908 this = self.expression(exp.In, this=this, unnest=unnest) 2909 elif self._match(TokenType.L_PAREN): 2910 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2911 2912 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2913 this = self.expression(exp.In, this=this, query=expressions[0]) 2914 else: 2915 this = self.expression(exp.In, this=this, expressions=expressions) 2916 2917 self._match_r_paren(this) 2918 else: 2919 this = self.expression(exp.In, this=this, field=self._parse_field()) 2920 2921 return this 2922 2923 def _parse_between(self, this: exp.Expression) -> exp.Between: 2924 low = self._parse_bitwise() 2925 self._match(TokenType.AND) 2926 high = self._parse_bitwise() 2927 return self.expression(exp.Between, this=this, low=low, high=high) 2928 2929 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2930 if not self._match(TokenType.ESCAPE): 2931 return this 2932 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2933 2934 def _parse_interval(self) -> t.Optional[exp.Interval]: 2935 if not self._match(TokenType.INTERVAL): 2936 return None 2937 2938 if self._match(TokenType.STRING, advance=False): 2939 this = self._parse_primary() 2940 else: 2941 this = self._parse_term() 2942 2943 unit = self._parse_function() or self._parse_var() 2944 2945 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2946 # each INTERVAL expression into this canonical form so it's easy to transpile 2947 if this and this.is_number: 2948 this = exp.Literal.string(this.name) 2949 elif this and this.is_string: 2950 parts = this.name.split() 2951 2952 if len(parts) == 2: 2953 if unit: 2954 # this is not actually a unit, it's something else 2955 unit = None 2956 self._retreat(self._index - 1) 2957 else: 2958 this = exp.Literal.string(parts[0]) 2959 unit = self.expression(exp.Var, this=parts[1]) 2960 2961 return self.expression(exp.Interval, this=this, unit=unit) 2962 2963 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2964 this = self._parse_term() 2965 2966 while True: 2967 if self._match_set(self.BITWISE): 2968 this = self.expression( 2969 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2970 ) 2971 elif self._match_pair(TokenType.LT, TokenType.LT): 2972 this = self.expression( 2973 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2974 ) 2975 elif self._match_pair(TokenType.GT, TokenType.GT): 2976 this = self.expression( 2977 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2978 ) 2979 else: 2980 break 2981 2982 return this 2983 2984 def _parse_term(self) -> t.Optional[exp.Expression]: 2985 return self._parse_tokens(self._parse_factor, self.TERM) 2986 2987 def _parse_factor(self) -> t.Optional[exp.Expression]: 2988 return self._parse_tokens(self._parse_unary, self.FACTOR) 2989 2990 def _parse_unary(self) -> t.Optional[exp.Expression]: 2991 if self._match_set(self.UNARY_PARSERS): 2992 return self.UNARY_PARSERS[self._prev.token_type](self) 2993 return self._parse_at_time_zone(self._parse_type()) 2994 2995 def _parse_type(self) -> t.Optional[exp.Expression]: 2996 interval = self._parse_interval() 2997 if interval: 2998 return interval 2999 3000 index = self._index 3001 data_type = self._parse_types(check_func=True) 3002 this = self._parse_column() 3003 3004 if data_type: 3005 if isinstance(this, exp.Literal): 3006 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3007 if parser: 3008 return parser(self, this, data_type) 3009 return self.expression(exp.Cast, this=this, to=data_type) 3010 if not data_type.expressions: 3011 self._retreat(index) 3012 return self._parse_column() 3013 return self._parse_column_ops(data_type) 3014 3015 return this 3016 3017 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3018 this = self._parse_type() 3019 if not this: 3020 return None 3021 3022 return self.expression( 3023 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3024 ) 3025 3026 def _parse_types( 3027 self, check_func: bool = False, schema: bool = False 3028 ) -> t.Optional[exp.Expression]: 3029 index = self._index 3030 3031 prefix = self._match_text_seq("SYSUDTLIB", ".") 3032 3033 if not self._match_set(self.TYPE_TOKENS): 3034 return None 3035 3036 type_token = self._prev.token_type 3037 3038 if type_token == TokenType.PSEUDO_TYPE: 3039 return self.expression(exp.PseudoType, this=self._prev.text) 3040 3041 nested = type_token in self.NESTED_TYPE_TOKENS 3042 is_struct = type_token == TokenType.STRUCT 3043 expressions = None 3044 maybe_func = False 3045 3046 if self._match(TokenType.L_PAREN): 3047 if is_struct: 3048 expressions = self._parse_csv(self._parse_struct_types) 3049 elif nested: 3050 expressions = self._parse_csv( 3051 lambda: self._parse_types(check_func=check_func, schema=schema) 3052 ) 3053 elif type_token in self.ENUM_TYPE_TOKENS: 3054 expressions = self._parse_csv(self._parse_primary) 3055 else: 3056 expressions = self._parse_csv(self._parse_type_size) 3057 3058 if not expressions or not self._match(TokenType.R_PAREN): 3059 self._retreat(index) 3060 return None 3061 3062 maybe_func = True 3063 3064 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3065 this = exp.DataType( 3066 this=exp.DataType.Type.ARRAY, 3067 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3068 nested=True, 3069 ) 3070 3071 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3072 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3073 3074 return this 3075 3076 if self._match(TokenType.L_BRACKET): 3077 self._retreat(index) 3078 return None 3079 3080 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3081 if nested and self._match(TokenType.LT): 3082 if is_struct: 3083 expressions = self._parse_csv(self._parse_struct_types) 3084 else: 3085 expressions = self._parse_csv( 3086 lambda: self._parse_types(check_func=check_func, schema=schema) 3087 ) 3088 3089 if not self._match(TokenType.GT): 3090 self.raise_error("Expecting >") 3091 3092 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3093 values = self._parse_csv(self._parse_conjunction) 3094 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3095 3096 value: t.Optional[exp.Expression] = None 3097 if type_token in self.TIMESTAMPS: 3098 if self._match_text_seq("WITH", "TIME", "ZONE"): 3099 maybe_func = False 3100 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3101 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3102 maybe_func = False 3103 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3104 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3105 maybe_func = False 3106 elif type_token == TokenType.INTERVAL: 3107 unit = self._parse_var() 3108 3109 if not unit: 3110 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3111 else: 3112 value = self.expression(exp.Interval, unit=unit) 3113 3114 if maybe_func and check_func: 3115 index2 = self._index 3116 peek = self._parse_string() 3117 3118 if not peek: 3119 self._retreat(index) 3120 return None 3121 3122 self._retreat(index2) 3123 3124 if value: 3125 return value 3126 3127 return exp.DataType( 3128 this=exp.DataType.Type[type_token.value.upper()], 3129 expressions=expressions, 3130 nested=nested, 3131 values=values, 3132 prefix=prefix, 3133 ) 3134 3135 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3136 this = self._parse_type() or self._parse_id_var() 3137 self._match(TokenType.COLON) 3138 return self._parse_column_def(this) 3139 3140 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3141 if not self._match_text_seq("AT", "TIME", "ZONE"): 3142 return this 3143 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3144 3145 def _parse_column(self) -> t.Optional[exp.Expression]: 3146 this = self._parse_field() 3147 if isinstance(this, exp.Identifier): 3148 this = self.expression(exp.Column, this=this) 3149 elif not this: 3150 return self._parse_bracket(this) 3151 return self._parse_column_ops(this) 3152 3153 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3154 this = self._parse_bracket(this) 3155 3156 while self._match_set(self.COLUMN_OPERATORS): 3157 op_token = self._prev.token_type 3158 op = self.COLUMN_OPERATORS.get(op_token) 3159 3160 if op_token == TokenType.DCOLON: 3161 field = self._parse_types() 3162 if not field: 3163 self.raise_error("Expected type") 3164 elif op and self._curr: 3165 self._advance() 3166 value = self._prev.text 3167 field = ( 3168 exp.Literal.number(value) 3169 if self._prev.token_type == TokenType.NUMBER 3170 else exp.Literal.string(value) 3171 ) 3172 else: 3173 field = self._parse_field(anonymous_func=True, any_token=True) 3174 3175 if isinstance(field, exp.Func): 3176 # bigquery allows function calls like x.y.count(...) 3177 # SAFE.SUBSTR(...) 3178 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3179 this = self._replace_columns_with_dots(this) 3180 3181 if op: 3182 this = op(self, this, field) 3183 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3184 this = self.expression( 3185 exp.Column, 3186 this=field, 3187 table=this.this, 3188 db=this.args.get("table"), 3189 catalog=this.args.get("db"), 3190 ) 3191 else: 3192 this = self.expression(exp.Dot, this=this, expression=field) 3193 this = self._parse_bracket(this) 3194 return this 3195 3196 def _parse_primary(self) -> t.Optional[exp.Expression]: 3197 if self._match_set(self.PRIMARY_PARSERS): 3198 token_type = self._prev.token_type 3199 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3200 3201 if token_type == TokenType.STRING: 3202 expressions = [primary] 3203 while self._match(TokenType.STRING): 3204 expressions.append(exp.Literal.string(self._prev.text)) 3205 3206 if len(expressions) > 1: 3207 return self.expression(exp.Concat, expressions=expressions) 3208 3209 return primary 3210 3211 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3212 return exp.Literal.number(f"0.{self._prev.text}") 3213 3214 if self._match(TokenType.L_PAREN): 3215 comments = self._prev_comments 3216 query = self._parse_select() 3217 3218 if query: 3219 expressions = [query] 3220 else: 3221 expressions = self._parse_expressions() 3222 3223 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3224 3225 if isinstance(this, exp.Subqueryable): 3226 this = self._parse_set_operations( 3227 self._parse_subquery(this=this, parse_alias=False) 3228 ) 3229 elif len(expressions) > 1: 3230 this = self.expression(exp.Tuple, expressions=expressions) 3231 else: 3232 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3233 3234 if this: 3235 this.add_comments(comments) 3236 3237 self._match_r_paren(expression=this) 3238 return this 3239 3240 return None 3241 3242 def _parse_field( 3243 self, 3244 any_token: bool = False, 3245 tokens: t.Optional[t.Collection[TokenType]] = None, 3246 anonymous_func: bool = False, 3247 ) -> t.Optional[exp.Expression]: 3248 return ( 3249 self._parse_primary() 3250 or self._parse_function(anonymous=anonymous_func) 3251 or self._parse_id_var(any_token=any_token, tokens=tokens) 3252 ) 3253 3254 def _parse_function( 3255 self, 3256 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3257 anonymous: bool = False, 3258 optional_parens: bool = True, 3259 ) -> t.Optional[exp.Expression]: 3260 if not self._curr: 3261 return None 3262 3263 token_type = self._curr.token_type 3264 3265 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3266 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3267 3268 if not self._next or self._next.token_type != TokenType.L_PAREN: 3269 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3270 self._advance() 3271 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3272 3273 return None 3274 3275 if token_type not in self.FUNC_TOKENS: 3276 return None 3277 3278 this = self._curr.text 3279 upper = this.upper() 3280 self._advance(2) 3281 3282 parser = self.FUNCTION_PARSERS.get(upper) 3283 3284 if parser and not anonymous: 3285 this = parser(self) 3286 else: 3287 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3288 3289 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3290 this = self.expression(subquery_predicate, this=self._parse_select()) 3291 self._match_r_paren() 3292 return this 3293 3294 if functions is None: 3295 functions = self.FUNCTIONS 3296 3297 function = functions.get(upper) 3298 3299 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3300 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3301 3302 if function and not anonymous: 3303 this = self.validate_expression(function(args), args) 3304 else: 3305 this = self.expression(exp.Anonymous, this=this, expressions=args) 3306 3307 self._match_r_paren(this) 3308 return self._parse_window(this) 3309 3310 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3311 return self._parse_column_def(self._parse_id_var()) 3312 3313 def _parse_user_defined_function( 3314 self, kind: t.Optional[TokenType] = None 3315 ) -> t.Optional[exp.Expression]: 3316 this = self._parse_id_var() 3317 3318 while self._match(TokenType.DOT): 3319 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3320 3321 if not self._match(TokenType.L_PAREN): 3322 return this 3323 3324 expressions = self._parse_csv(self._parse_function_parameter) 3325 self._match_r_paren() 3326 return self.expression( 3327 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3328 ) 3329 3330 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3331 literal = self._parse_primary() 3332 if literal: 3333 return self.expression(exp.Introducer, this=token.text, expression=literal) 3334 3335 return self.expression(exp.Identifier, this=token.text) 3336 3337 def _parse_session_parameter(self) -> exp.SessionParameter: 3338 kind = None 3339 this = self._parse_id_var() or self._parse_primary() 3340 3341 if this and self._match(TokenType.DOT): 3342 kind = this.name 3343 this = self._parse_var() or self._parse_primary() 3344 3345 return self.expression(exp.SessionParameter, this=this, kind=kind) 3346 3347 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3348 index = self._index 3349 3350 if self._match(TokenType.L_PAREN): 3351 expressions = self._parse_csv(self._parse_id_var) 3352 3353 if not self._match(TokenType.R_PAREN): 3354 self._retreat(index) 3355 else: 3356 expressions = [self._parse_id_var()] 3357 3358 if self._match_set(self.LAMBDAS): 3359 return self.LAMBDAS[self._prev.token_type](self, expressions) 3360 3361 self._retreat(index) 3362 3363 this: t.Optional[exp.Expression] 3364 3365 if self._match(TokenType.DISTINCT): 3366 this = self.expression( 3367 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3368 ) 3369 else: 3370 this = self._parse_select_or_expression(alias=alias) 3371 3372 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3373 3374 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3375 index = self._index 3376 3377 if not self.errors: 3378 try: 3379 if self._parse_select(nested=True): 3380 return this 3381 except ParseError: 3382 pass 3383 finally: 3384 self.errors.clear() 3385 self._retreat(index) 3386 3387 if not self._match(TokenType.L_PAREN): 3388 return this 3389 3390 args = self._parse_csv( 3391 lambda: self._parse_constraint() 3392 or self._parse_column_def(self._parse_field(any_token=True)) 3393 ) 3394 3395 self._match_r_paren() 3396 return self.expression(exp.Schema, this=this, expressions=args) 3397 3398 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3399 # column defs are not really columns, they're identifiers 3400 if isinstance(this, exp.Column): 3401 this = this.this 3402 3403 kind = self._parse_types(schema=True) 3404 3405 if self._match_text_seq("FOR", "ORDINALITY"): 3406 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3407 3408 constraints = [] 3409 while True: 3410 constraint = self._parse_column_constraint() 3411 if not constraint: 3412 break 3413 constraints.append(constraint) 3414 3415 if not kind and not constraints: 3416 return this 3417 3418 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3419 3420 def _parse_auto_increment( 3421 self, 3422 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3423 start = None 3424 increment = None 3425 3426 if self._match(TokenType.L_PAREN, advance=False): 3427 args = self._parse_wrapped_csv(self._parse_bitwise) 3428 start = seq_get(args, 0) 3429 increment = seq_get(args, 1) 3430 elif self._match_text_seq("START"): 3431 start = self._parse_bitwise() 3432 self._match_text_seq("INCREMENT") 3433 increment = self._parse_bitwise() 3434 3435 if start and increment: 3436 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3437 3438 return exp.AutoIncrementColumnConstraint() 3439 3440 def _parse_compress(self) -> exp.CompressColumnConstraint: 3441 if self._match(TokenType.L_PAREN, advance=False): 3442 return self.expression( 3443 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3444 ) 3445 3446 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3447 3448 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3449 if self._match_text_seq("BY", "DEFAULT"): 3450 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3451 this = self.expression( 3452 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3453 ) 3454 else: 3455 self._match_text_seq("ALWAYS") 3456 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3457 3458 self._match(TokenType.ALIAS) 3459 identity = self._match_text_seq("IDENTITY") 3460 3461 if self._match(TokenType.L_PAREN): 3462 if self._match_text_seq("START", "WITH"): 3463 this.set("start", self._parse_bitwise()) 3464 if self._match_text_seq("INCREMENT", "BY"): 3465 this.set("increment", self._parse_bitwise()) 3466 if self._match_text_seq("MINVALUE"): 3467 this.set("minvalue", self._parse_bitwise()) 3468 if self._match_text_seq("MAXVALUE"): 3469 this.set("maxvalue", self._parse_bitwise()) 3470 3471 if self._match_text_seq("CYCLE"): 3472 this.set("cycle", True) 3473 elif self._match_text_seq("NO", "CYCLE"): 3474 this.set("cycle", False) 3475 3476 if not identity: 3477 this.set("expression", self._parse_bitwise()) 3478 3479 self._match_r_paren() 3480 3481 return this 3482 3483 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3484 self._match_text_seq("LENGTH") 3485 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3486 3487 def _parse_not_constraint( 3488 self, 3489 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3490 if self._match_text_seq("NULL"): 3491 return self.expression(exp.NotNullColumnConstraint) 3492 if self._match_text_seq("CASESPECIFIC"): 3493 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3494 return None 3495 3496 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3497 if self._match(TokenType.CONSTRAINT): 3498 this = self._parse_id_var() 3499 else: 3500 this = None 3501 3502 if self._match_texts(self.CONSTRAINT_PARSERS): 3503 return self.expression( 3504 exp.ColumnConstraint, 3505 this=this, 3506 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3507 ) 3508 3509 return this 3510 3511 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3512 if not self._match(TokenType.CONSTRAINT): 3513 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3514 3515 this = self._parse_id_var() 3516 expressions = [] 3517 3518 while True: 3519 constraint = self._parse_unnamed_constraint() or self._parse_function() 3520 if not constraint: 3521 break 3522 expressions.append(constraint) 3523 3524 return self.expression(exp.Constraint, this=this, expressions=expressions) 3525 3526 def _parse_unnamed_constraint( 3527 self, constraints: t.Optional[t.Collection[str]] = None 3528 ) -> t.Optional[exp.Expression]: 3529 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3530 return None 3531 3532 constraint = self._prev.text.upper() 3533 if constraint not in self.CONSTRAINT_PARSERS: 3534 self.raise_error(f"No parser found for schema constraint {constraint}.") 3535 3536 return self.CONSTRAINT_PARSERS[constraint](self) 3537 3538 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3539 self._match_text_seq("KEY") 3540 return self.expression( 3541 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3542 ) 3543 3544 def _parse_key_constraint_options(self) -> t.List[str]: 3545 options = [] 3546 while True: 3547 if not self._curr: 3548 break 3549 3550 if self._match(TokenType.ON): 3551 action = None 3552 on = self._advance_any() and self._prev.text 3553 3554 if self._match_text_seq("NO", "ACTION"): 3555 action = "NO ACTION" 3556 elif self._match_text_seq("CASCADE"): 3557 action = "CASCADE" 3558 elif self._match_pair(TokenType.SET, TokenType.NULL): 3559 action = "SET NULL" 3560 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3561 action = "SET DEFAULT" 3562 else: 3563 self.raise_error("Invalid key constraint") 3564 3565 options.append(f"ON {on} {action}") 3566 elif self._match_text_seq("NOT", "ENFORCED"): 3567 options.append("NOT ENFORCED") 3568 elif self._match_text_seq("DEFERRABLE"): 3569 options.append("DEFERRABLE") 3570 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3571 options.append("INITIALLY DEFERRED") 3572 elif self._match_text_seq("NORELY"): 3573 options.append("NORELY") 3574 elif self._match_text_seq("MATCH", "FULL"): 3575 options.append("MATCH FULL") 3576 else: 3577 break 3578 3579 return options 3580 3581 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3582 if match and not self._match(TokenType.REFERENCES): 3583 return None 3584 3585 expressions = None 3586 this = self._parse_table(schema=True) 3587 options = self._parse_key_constraint_options() 3588 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3589 3590 def _parse_foreign_key(self) -> exp.ForeignKey: 3591 expressions = self._parse_wrapped_id_vars() 3592 reference = self._parse_references() 3593 options = {} 3594 3595 while self._match(TokenType.ON): 3596 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3597 self.raise_error("Expected DELETE or UPDATE") 3598 3599 kind = self._prev.text.lower() 3600 3601 if self._match_text_seq("NO", "ACTION"): 3602 action = "NO ACTION" 3603 elif self._match(TokenType.SET): 3604 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3605 action = "SET " + self._prev.text.upper() 3606 else: 3607 self._advance() 3608 action = self._prev.text.upper() 3609 3610 options[kind] = action 3611 3612 return self.expression( 3613 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3614 ) 3615 3616 def _parse_primary_key( 3617 self, wrapped_optional: bool = False, in_props: bool = False 3618 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3619 desc = ( 3620 self._match_set((TokenType.ASC, TokenType.DESC)) 3621 and self._prev.token_type == TokenType.DESC 3622 ) 3623 3624 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3625 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3626 3627 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3628 options = self._parse_key_constraint_options() 3629 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3630 3631 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3632 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3633 return this 3634 3635 bracket_kind = self._prev.token_type 3636 3637 if self._match(TokenType.COLON): 3638 expressions: t.List[t.Optional[exp.Expression]] = [ 3639 self.expression(exp.Slice, expression=self._parse_conjunction()) 3640 ] 3641 else: 3642 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3643 3644 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3645 if bracket_kind == TokenType.L_BRACE: 3646 this = self.expression(exp.Struct, expressions=expressions) 3647 elif not this or this.name.upper() == "ARRAY": 3648 this = self.expression(exp.Array, expressions=expressions) 3649 else: 3650 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3651 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3652 3653 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3654 self.raise_error("Expected ]") 3655 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3656 self.raise_error("Expected }") 3657 3658 self._add_comments(this) 3659 return self._parse_bracket(this) 3660 3661 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3662 if self._match(TokenType.COLON): 3663 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3664 return this 3665 3666 def _parse_case(self) -> t.Optional[exp.Expression]: 3667 ifs = [] 3668 default = None 3669 3670 expression = self._parse_conjunction() 3671 3672 while self._match(TokenType.WHEN): 3673 this = self._parse_conjunction() 3674 self._match(TokenType.THEN) 3675 then = self._parse_conjunction() 3676 ifs.append(self.expression(exp.If, this=this, true=then)) 3677 3678 if self._match(TokenType.ELSE): 3679 default = self._parse_conjunction() 3680 3681 if not self._match(TokenType.END): 3682 self.raise_error("Expected END after CASE", self._prev) 3683 3684 return self._parse_window( 3685 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3686 ) 3687 3688 def _parse_if(self) -> t.Optional[exp.Expression]: 3689 if self._match(TokenType.L_PAREN): 3690 args = self._parse_csv(self._parse_conjunction) 3691 this = self.validate_expression(exp.If.from_arg_list(args), args) 3692 self._match_r_paren() 3693 else: 3694 index = self._index - 1 3695 condition = self._parse_conjunction() 3696 3697 if not condition: 3698 self._retreat(index) 3699 return None 3700 3701 self._match(TokenType.THEN) 3702 true = self._parse_conjunction() 3703 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3704 self._match(TokenType.END) 3705 this = self.expression(exp.If, this=condition, true=true, false=false) 3706 3707 return self._parse_window(this) 3708 3709 def _parse_extract(self) -> exp.Extract: 3710 this = self._parse_function() or self._parse_var() or self._parse_type() 3711 3712 if self._match(TokenType.FROM): 3713 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3714 3715 if not self._match(TokenType.COMMA): 3716 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3717 3718 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3719 3720 def _parse_any_value(self) -> exp.AnyValue: 3721 this = self._parse_lambda() 3722 is_max = None 3723 having = None 3724 3725 if self._match(TokenType.HAVING): 3726 self._match_texts(("MAX", "MIN")) 3727 is_max = self._prev.text == "MAX" 3728 having = self._parse_column() 3729 3730 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3731 3732 def _parse_cast(self, strict: bool) -> exp.Expression: 3733 this = self._parse_conjunction() 3734 3735 if not self._match(TokenType.ALIAS): 3736 if self._match(TokenType.COMMA): 3737 return self.expression( 3738 exp.CastToStrType, this=this, expression=self._parse_string() 3739 ) 3740 else: 3741 self.raise_error("Expected AS after CAST") 3742 3743 fmt = None 3744 to = self._parse_types() 3745 3746 if not to: 3747 self.raise_error("Expected TYPE after CAST") 3748 elif to.this == exp.DataType.Type.CHAR: 3749 if self._match(TokenType.CHARACTER_SET): 3750 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3751 elif self._match(TokenType.FORMAT): 3752 fmt_string = self._parse_string() 3753 fmt = self._parse_at_time_zone(fmt_string) 3754 3755 if to.this in exp.DataType.TEMPORAL_TYPES: 3756 this = self.expression( 3757 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3758 this=this, 3759 format=exp.Literal.string( 3760 format_time( 3761 fmt_string.this if fmt_string else "", 3762 self.FORMAT_MAPPING or self.TIME_MAPPING, 3763 self.FORMAT_TRIE or self.TIME_TRIE, 3764 ) 3765 ), 3766 ) 3767 3768 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3769 this.set("zone", fmt.args["zone"]) 3770 3771 return this 3772 3773 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3774 3775 def _parse_concat(self) -> t.Optional[exp.Expression]: 3776 args = self._parse_csv(self._parse_conjunction) 3777 if self.CONCAT_NULL_OUTPUTS_STRING: 3778 args = [ 3779 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3780 for arg in args 3781 if arg 3782 ] 3783 3784 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3785 # we find such a call we replace it with its argument. 3786 if len(args) == 1: 3787 return args[0] 3788 3789 return self.expression( 3790 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3791 ) 3792 3793 def _parse_string_agg(self) -> exp.Expression: 3794 if self._match(TokenType.DISTINCT): 3795 args: t.List[t.Optional[exp.Expression]] = [ 3796 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3797 ] 3798 if self._match(TokenType.COMMA): 3799 args.extend(self._parse_csv(self._parse_conjunction)) 3800 else: 3801 args = self._parse_csv(self._parse_conjunction) 3802 3803 index = self._index 3804 if not self._match(TokenType.R_PAREN): 3805 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3806 return self.expression( 3807 exp.GroupConcat, 3808 this=seq_get(args, 0), 3809 separator=self._parse_order(this=seq_get(args, 1)), 3810 ) 3811 3812 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3813 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3814 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3815 if not self._match_text_seq("WITHIN", "GROUP"): 3816 self._retreat(index) 3817 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3818 3819 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3820 order = self._parse_order(this=seq_get(args, 0)) 3821 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3822 3823 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3824 this = self._parse_bitwise() 3825 3826 if self._match(TokenType.USING): 3827 to: t.Optional[exp.Expression] = self.expression( 3828 exp.CharacterSet, this=self._parse_var() 3829 ) 3830 elif self._match(TokenType.COMMA): 3831 to = self._parse_types() 3832 else: 3833 to = None 3834 3835 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3836 3837 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3838 """ 3839 There are generally two variants of the DECODE function: 3840 3841 - DECODE(bin, charset) 3842 - DECODE(expression, search, result [, search, result] ... [, default]) 3843 3844 The second variant will always be parsed into a CASE expression. Note that NULL 3845 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3846 instead of relying on pattern matching. 3847 """ 3848 args = self._parse_csv(self._parse_conjunction) 3849 3850 if len(args) < 3: 3851 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3852 3853 expression, *expressions = args 3854 if not expression: 3855 return None 3856 3857 ifs = [] 3858 for search, result in zip(expressions[::2], expressions[1::2]): 3859 if not search or not result: 3860 return None 3861 3862 if isinstance(search, exp.Literal): 3863 ifs.append( 3864 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3865 ) 3866 elif isinstance(search, exp.Null): 3867 ifs.append( 3868 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3869 ) 3870 else: 3871 cond = exp.or_( 3872 exp.EQ(this=expression.copy(), expression=search), 3873 exp.and_( 3874 exp.Is(this=expression.copy(), expression=exp.Null()), 3875 exp.Is(this=search.copy(), expression=exp.Null()), 3876 copy=False, 3877 ), 3878 copy=False, 3879 ) 3880 ifs.append(exp.If(this=cond, true=result)) 3881 3882 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3883 3884 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3885 self._match_text_seq("KEY") 3886 key = self._parse_field() 3887 self._match(TokenType.COLON) 3888 self._match_text_seq("VALUE") 3889 value = self._parse_field() 3890 3891 if not key and not value: 3892 return None 3893 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3894 3895 def _parse_json_object(self) -> exp.JSONObject: 3896 star = self._parse_star() 3897 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3898 3899 null_handling = None 3900 if self._match_text_seq("NULL", "ON", "NULL"): 3901 null_handling = "NULL ON NULL" 3902 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3903 null_handling = "ABSENT ON NULL" 3904 3905 unique_keys = None 3906 if self._match_text_seq("WITH", "UNIQUE"): 3907 unique_keys = True 3908 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3909 unique_keys = False 3910 3911 self._match_text_seq("KEYS") 3912 3913 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3914 format_json = self._match_text_seq("FORMAT", "JSON") 3915 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3916 3917 return self.expression( 3918 exp.JSONObject, 3919 expressions=expressions, 3920 null_handling=null_handling, 3921 unique_keys=unique_keys, 3922 return_type=return_type, 3923 format_json=format_json, 3924 encoding=encoding, 3925 ) 3926 3927 def _parse_logarithm(self) -> exp.Func: 3928 # Default argument order is base, expression 3929 args = self._parse_csv(self._parse_range) 3930 3931 if len(args) > 1: 3932 if not self.LOG_BASE_FIRST: 3933 args.reverse() 3934 return exp.Log.from_arg_list(args) 3935 3936 return self.expression( 3937 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3938 ) 3939 3940 def _parse_match_against(self) -> exp.MatchAgainst: 3941 expressions = self._parse_csv(self._parse_column) 3942 3943 self._match_text_seq(")", "AGAINST", "(") 3944 3945 this = self._parse_string() 3946 3947 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3948 modifier = "IN NATURAL LANGUAGE MODE" 3949 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3950 modifier = f"{modifier} WITH QUERY EXPANSION" 3951 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3952 modifier = "IN BOOLEAN MODE" 3953 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3954 modifier = "WITH QUERY EXPANSION" 3955 else: 3956 modifier = None 3957 3958 return self.expression( 3959 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3960 ) 3961 3962 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3963 def _parse_open_json(self) -> exp.OpenJSON: 3964 this = self._parse_bitwise() 3965 path = self._match(TokenType.COMMA) and self._parse_string() 3966 3967 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3968 this = self._parse_field(any_token=True) 3969 kind = self._parse_types() 3970 path = self._parse_string() 3971 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3972 3973 return self.expression( 3974 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3975 ) 3976 3977 expressions = None 3978 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3979 self._match_l_paren() 3980 expressions = self._parse_csv(_parse_open_json_column_def) 3981 3982 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3983 3984 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3985 args = self._parse_csv(self._parse_bitwise) 3986 3987 if self._match(TokenType.IN): 3988 return self.expression( 3989 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3990 ) 3991 3992 if haystack_first: 3993 haystack = seq_get(args, 0) 3994 needle = seq_get(args, 1) 3995 else: 3996 needle = seq_get(args, 0) 3997 haystack = seq_get(args, 1) 3998 3999 return self.expression( 4000 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4001 ) 4002 4003 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4004 args = self._parse_csv(self._parse_table) 4005 return exp.JoinHint(this=func_name.upper(), expressions=args) 4006 4007 def _parse_substring(self) -> exp.Substring: 4008 # Postgres supports the form: substring(string [from int] [for int]) 4009 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4010 4011 args = self._parse_csv(self._parse_bitwise) 4012 4013 if self._match(TokenType.FROM): 4014 args.append(self._parse_bitwise()) 4015 if self._match(TokenType.FOR): 4016 args.append(self._parse_bitwise()) 4017 4018 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4019 4020 def _parse_trim(self) -> exp.Trim: 4021 # https://www.w3resource.com/sql/character-functions/trim.php 4022 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4023 4024 position = None 4025 collation = None 4026 4027 if self._match_texts(self.TRIM_TYPES): 4028 position = self._prev.text.upper() 4029 4030 expression = self._parse_bitwise() 4031 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4032 this = self._parse_bitwise() 4033 else: 4034 this = expression 4035 expression = None 4036 4037 if self._match(TokenType.COLLATE): 4038 collation = self._parse_bitwise() 4039 4040 return self.expression( 4041 exp.Trim, this=this, position=position, expression=expression, collation=collation 4042 ) 4043 4044 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4045 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4046 4047 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4048 return self._parse_window(self._parse_id_var(), alias=True) 4049 4050 def _parse_respect_or_ignore_nulls( 4051 self, this: t.Optional[exp.Expression] 4052 ) -> t.Optional[exp.Expression]: 4053 if self._match_text_seq("IGNORE", "NULLS"): 4054 return self.expression(exp.IgnoreNulls, this=this) 4055 if self._match_text_seq("RESPECT", "NULLS"): 4056 return self.expression(exp.RespectNulls, this=this) 4057 return this 4058 4059 def _parse_window( 4060 self, this: t.Optional[exp.Expression], alias: bool = False 4061 ) -> t.Optional[exp.Expression]: 4062 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4063 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4064 self._match_r_paren() 4065 4066 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4067 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4068 if self._match_text_seq("WITHIN", "GROUP"): 4069 order = self._parse_wrapped(self._parse_order) 4070 this = self.expression(exp.WithinGroup, this=this, expression=order) 4071 4072 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4073 # Some dialects choose to implement and some do not. 4074 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4075 4076 # There is some code above in _parse_lambda that handles 4077 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4078 4079 # The below changes handle 4080 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4081 4082 # Oracle allows both formats 4083 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4084 # and Snowflake chose to do the same for familiarity 4085 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4086 this = self._parse_respect_or_ignore_nulls(this) 4087 4088 # bigquery select from window x AS (partition by ...) 4089 if alias: 4090 over = None 4091 self._match(TokenType.ALIAS) 4092 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4093 return this 4094 else: 4095 over = self._prev.text.upper() 4096 4097 if not self._match(TokenType.L_PAREN): 4098 return self.expression( 4099 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4100 ) 4101 4102 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4103 4104 first = self._match(TokenType.FIRST) 4105 if self._match_text_seq("LAST"): 4106 first = False 4107 4108 partition = self._parse_partition_by() 4109 order = self._parse_order() 4110 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4111 4112 if kind: 4113 self._match(TokenType.BETWEEN) 4114 start = self._parse_window_spec() 4115 self._match(TokenType.AND) 4116 end = self._parse_window_spec() 4117 4118 spec = self.expression( 4119 exp.WindowSpec, 4120 kind=kind, 4121 start=start["value"], 4122 start_side=start["side"], 4123 end=end["value"], 4124 end_side=end["side"], 4125 ) 4126 else: 4127 spec = None 4128 4129 self._match_r_paren() 4130 4131 return self.expression( 4132 exp.Window, 4133 this=this, 4134 partition_by=partition, 4135 order=order, 4136 spec=spec, 4137 alias=window_alias, 4138 over=over, 4139 first=first, 4140 ) 4141 4142 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4143 self._match(TokenType.BETWEEN) 4144 4145 return { 4146 "value": ( 4147 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4148 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4149 or self._parse_bitwise() 4150 ), 4151 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4152 } 4153 4154 def _parse_alias( 4155 self, this: t.Optional[exp.Expression], explicit: bool = False 4156 ) -> t.Optional[exp.Expression]: 4157 any_token = self._match(TokenType.ALIAS) 4158 4159 if explicit and not any_token: 4160 return this 4161 4162 if self._match(TokenType.L_PAREN): 4163 aliases = self.expression( 4164 exp.Aliases, 4165 this=this, 4166 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4167 ) 4168 self._match_r_paren(aliases) 4169 return aliases 4170 4171 alias = self._parse_id_var(any_token) 4172 4173 if alias: 4174 return self.expression(exp.Alias, this=this, alias=alias) 4175 4176 return this 4177 4178 def _parse_id_var( 4179 self, 4180 any_token: bool = True, 4181 tokens: t.Optional[t.Collection[TokenType]] = None, 4182 ) -> t.Optional[exp.Expression]: 4183 identifier = self._parse_identifier() 4184 4185 if identifier: 4186 return identifier 4187 4188 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4189 quoted = self._prev.token_type == TokenType.STRING 4190 return exp.Identifier(this=self._prev.text, quoted=quoted) 4191 4192 return None 4193 4194 def _parse_string(self) -> t.Optional[exp.Expression]: 4195 if self._match(TokenType.STRING): 4196 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4197 return self._parse_placeholder() 4198 4199 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4200 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4201 4202 def _parse_number(self) -> t.Optional[exp.Expression]: 4203 if self._match(TokenType.NUMBER): 4204 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4205 return self._parse_placeholder() 4206 4207 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4208 if self._match(TokenType.IDENTIFIER): 4209 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4210 return self._parse_placeholder() 4211 4212 def _parse_var( 4213 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4214 ) -> t.Optional[exp.Expression]: 4215 if ( 4216 (any_token and self._advance_any()) 4217 or self._match(TokenType.VAR) 4218 or (self._match_set(tokens) if tokens else False) 4219 ): 4220 return self.expression(exp.Var, this=self._prev.text) 4221 return self._parse_placeholder() 4222 4223 def _advance_any(self) -> t.Optional[Token]: 4224 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4225 self._advance() 4226 return self._prev 4227 return None 4228 4229 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4230 return self._parse_var() or self._parse_string() 4231 4232 def _parse_null(self) -> t.Optional[exp.Expression]: 4233 if self._match(TokenType.NULL): 4234 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4235 return None 4236 4237 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4238 if self._match(TokenType.TRUE): 4239 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4240 if self._match(TokenType.FALSE): 4241 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4242 return None 4243 4244 def _parse_star(self) -> t.Optional[exp.Expression]: 4245 if self._match(TokenType.STAR): 4246 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4247 return None 4248 4249 def _parse_parameter(self) -> exp.Parameter: 4250 wrapped = self._match(TokenType.L_BRACE) 4251 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4252 self._match(TokenType.R_BRACE) 4253 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4254 4255 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4256 if self._match_set(self.PLACEHOLDER_PARSERS): 4257 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4258 if placeholder: 4259 return placeholder 4260 self._advance(-1) 4261 return None 4262 4263 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4264 if not self._match(TokenType.EXCEPT): 4265 return None 4266 if self._match(TokenType.L_PAREN, advance=False): 4267 return self._parse_wrapped_csv(self._parse_column) 4268 return self._parse_csv(self._parse_column) 4269 4270 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4271 if not self._match(TokenType.REPLACE): 4272 return None 4273 if self._match(TokenType.L_PAREN, advance=False): 4274 return self._parse_wrapped_csv(self._parse_expression) 4275 return self._parse_expressions() 4276 4277 def _parse_csv( 4278 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4279 ) -> t.List[t.Optional[exp.Expression]]: 4280 parse_result = parse_method() 4281 items = [parse_result] if parse_result is not None else [] 4282 4283 while self._match(sep): 4284 self._add_comments(parse_result) 4285 parse_result = parse_method() 4286 if parse_result is not None: 4287 items.append(parse_result) 4288 4289 return items 4290 4291 def _parse_tokens( 4292 self, parse_method: t.Callable, expressions: t.Dict 4293 ) -> t.Optional[exp.Expression]: 4294 this = parse_method() 4295 4296 while self._match_set(expressions): 4297 this = self.expression( 4298 expressions[self._prev.token_type], 4299 this=this, 4300 comments=self._prev_comments, 4301 expression=parse_method(), 4302 ) 4303 4304 return this 4305 4306 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4307 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4308 4309 def _parse_wrapped_csv( 4310 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4311 ) -> t.List[t.Optional[exp.Expression]]: 4312 return self._parse_wrapped( 4313 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4314 ) 4315 4316 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4317 wrapped = self._match(TokenType.L_PAREN) 4318 if not wrapped and not optional: 4319 self.raise_error("Expecting (") 4320 parse_result = parse_method() 4321 if wrapped: 4322 self._match_r_paren() 4323 return parse_result 4324 4325 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4326 return self._parse_csv(self._parse_expression) 4327 4328 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4329 return self._parse_select() or self._parse_set_operations( 4330 self._parse_expression() if alias else self._parse_conjunction() 4331 ) 4332 4333 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4334 return self._parse_query_modifiers( 4335 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4336 ) 4337 4338 def _parse_transaction(self) -> exp.Transaction: 4339 this = None 4340 if self._match_texts(self.TRANSACTION_KIND): 4341 this = self._prev.text 4342 4343 self._match_texts({"TRANSACTION", "WORK"}) 4344 4345 modes = [] 4346 while True: 4347 mode = [] 4348 while self._match(TokenType.VAR): 4349 mode.append(self._prev.text) 4350 4351 if mode: 4352 modes.append(" ".join(mode)) 4353 if not self._match(TokenType.COMMA): 4354 break 4355 4356 return self.expression(exp.Transaction, this=this, modes=modes) 4357 4358 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4359 chain = None 4360 savepoint = None 4361 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4362 4363 self._match_texts({"TRANSACTION", "WORK"}) 4364 4365 if self._match_text_seq("TO"): 4366 self._match_text_seq("SAVEPOINT") 4367 savepoint = self._parse_id_var() 4368 4369 if self._match(TokenType.AND): 4370 chain = not self._match_text_seq("NO") 4371 self._match_text_seq("CHAIN") 4372 4373 if is_rollback: 4374 return self.expression(exp.Rollback, savepoint=savepoint) 4375 4376 return self.expression(exp.Commit, chain=chain) 4377 4378 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4379 if not self._match_text_seq("ADD"): 4380 return None 4381 4382 self._match(TokenType.COLUMN) 4383 exists_column = self._parse_exists(not_=True) 4384 expression = self._parse_column_def(self._parse_field(any_token=True)) 4385 4386 if expression: 4387 expression.set("exists", exists_column) 4388 4389 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4390 if self._match_texts(("FIRST", "AFTER")): 4391 position = self._prev.text 4392 column_position = self.expression( 4393 exp.ColumnPosition, this=self._parse_column(), position=position 4394 ) 4395 expression.set("position", column_position) 4396 4397 return expression 4398 4399 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4400 drop = self._match(TokenType.DROP) and self._parse_drop() 4401 if drop and not isinstance(drop, exp.Command): 4402 drop.set("kind", drop.args.get("kind", "COLUMN")) 4403 return drop 4404 4405 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4406 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4407 return self.expression( 4408 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4409 ) 4410 4411 def _parse_add_constraint(self) -> exp.AddConstraint: 4412 this = None 4413 kind = self._prev.token_type 4414 4415 if kind == TokenType.CONSTRAINT: 4416 this = self._parse_id_var() 4417 4418 if self._match_text_seq("CHECK"): 4419 expression = self._parse_wrapped(self._parse_conjunction) 4420 enforced = self._match_text_seq("ENFORCED") 4421 4422 return self.expression( 4423 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4424 ) 4425 4426 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4427 expression = self._parse_foreign_key() 4428 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4429 expression = self._parse_primary_key() 4430 else: 4431 expression = None 4432 4433 return self.expression(exp.AddConstraint, this=this, expression=expression) 4434 4435 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4436 index = self._index - 1 4437 4438 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4439 return self._parse_csv(self._parse_add_constraint) 4440 4441 self._retreat(index) 4442 return self._parse_csv(self._parse_add_column) 4443 4444 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4445 self._match(TokenType.COLUMN) 4446 column = self._parse_field(any_token=True) 4447 4448 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4449 return self.expression(exp.AlterColumn, this=column, drop=True) 4450 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4451 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4452 4453 self._match_text_seq("SET", "DATA") 4454 return self.expression( 4455 exp.AlterColumn, 4456 this=column, 4457 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4458 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4459 using=self._match(TokenType.USING) and self._parse_conjunction(), 4460 ) 4461 4462 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4463 index = self._index - 1 4464 4465 partition_exists = self._parse_exists() 4466 if self._match(TokenType.PARTITION, advance=False): 4467 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4468 4469 self._retreat(index) 4470 return self._parse_csv(self._parse_drop_column) 4471 4472 def _parse_alter_table_rename(self) -> exp.RenameTable: 4473 self._match_text_seq("TO") 4474 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4475 4476 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4477 start = self._prev 4478 4479 if not self._match(TokenType.TABLE): 4480 return self._parse_as_command(start) 4481 4482 exists = self._parse_exists() 4483 this = self._parse_table(schema=True) 4484 4485 if self._next: 4486 self._advance() 4487 4488 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4489 if parser: 4490 actions = ensure_list(parser(self)) 4491 4492 if not self._curr: 4493 return self.expression( 4494 exp.AlterTable, 4495 this=this, 4496 exists=exists, 4497 actions=actions, 4498 ) 4499 return self._parse_as_command(start) 4500 4501 def _parse_merge(self) -> exp.Merge: 4502 self._match(TokenType.INTO) 4503 target = self._parse_table() 4504 4505 self._match(TokenType.USING) 4506 using = self._parse_table() 4507 4508 self._match(TokenType.ON) 4509 on = self._parse_conjunction() 4510 4511 whens = [] 4512 while self._match(TokenType.WHEN): 4513 matched = not self._match(TokenType.NOT) 4514 self._match_text_seq("MATCHED") 4515 source = ( 4516 False 4517 if self._match_text_seq("BY", "TARGET") 4518 else self._match_text_seq("BY", "SOURCE") 4519 ) 4520 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4521 4522 self._match(TokenType.THEN) 4523 4524 if self._match(TokenType.INSERT): 4525 _this = self._parse_star() 4526 if _this: 4527 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4528 else: 4529 then = self.expression( 4530 exp.Insert, 4531 this=self._parse_value(), 4532 expression=self._match(TokenType.VALUES) and self._parse_value(), 4533 ) 4534 elif self._match(TokenType.UPDATE): 4535 expressions = self._parse_star() 4536 if expressions: 4537 then = self.expression(exp.Update, expressions=expressions) 4538 else: 4539 then = self.expression( 4540 exp.Update, 4541 expressions=self._match(TokenType.SET) 4542 and self._parse_csv(self._parse_equality), 4543 ) 4544 elif self._match(TokenType.DELETE): 4545 then = self.expression(exp.Var, this=self._prev.text) 4546 else: 4547 then = None 4548 4549 whens.append( 4550 self.expression( 4551 exp.When, 4552 matched=matched, 4553 source=source, 4554 condition=condition, 4555 then=then, 4556 ) 4557 ) 4558 4559 return self.expression( 4560 exp.Merge, 4561 this=target, 4562 using=using, 4563 on=on, 4564 expressions=whens, 4565 ) 4566 4567 def _parse_show(self) -> t.Optional[exp.Expression]: 4568 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4569 if parser: 4570 return parser(self) 4571 self._advance() 4572 return self.expression(exp.Show, this=self._prev.text.upper()) 4573 4574 def _parse_set_item_assignment( 4575 self, kind: t.Optional[str] = None 4576 ) -> t.Optional[exp.Expression]: 4577 index = self._index 4578 4579 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4580 return self._parse_set_transaction(global_=kind == "GLOBAL") 4581 4582 left = self._parse_primary() or self._parse_id_var() 4583 4584 if not self._match_texts(("=", "TO")): 4585 self._retreat(index) 4586 return None 4587 4588 right = self._parse_statement() or self._parse_id_var() 4589 this = self.expression(exp.EQ, this=left, expression=right) 4590 4591 return self.expression(exp.SetItem, this=this, kind=kind) 4592 4593 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4594 self._match_text_seq("TRANSACTION") 4595 characteristics = self._parse_csv( 4596 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4597 ) 4598 return self.expression( 4599 exp.SetItem, 4600 expressions=characteristics, 4601 kind="TRANSACTION", 4602 **{"global": global_}, # type: ignore 4603 ) 4604 4605 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4606 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4607 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4608 4609 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4610 index = self._index 4611 set_ = self.expression( 4612 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4613 ) 4614 4615 if self._curr: 4616 self._retreat(index) 4617 return self._parse_as_command(self._prev) 4618 4619 return set_ 4620 4621 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4622 for option in options: 4623 if self._match_text_seq(*option.split(" ")): 4624 return exp.var(option) 4625 return None 4626 4627 def _parse_as_command(self, start: Token) -> exp.Command: 4628 while self._curr: 4629 self._advance() 4630 text = self._find_sql(start, self._prev) 4631 size = len(start.text) 4632 return exp.Command(this=text[:size], expression=text[size:]) 4633 4634 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4635 settings = [] 4636 4637 self._match_l_paren() 4638 kind = self._parse_id_var() 4639 4640 if self._match(TokenType.L_PAREN): 4641 while True: 4642 key = self._parse_id_var() 4643 value = self._parse_primary() 4644 4645 if not key and value is None: 4646 break 4647 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4648 self._match(TokenType.R_PAREN) 4649 4650 self._match_r_paren() 4651 4652 return self.expression( 4653 exp.DictProperty, 4654 this=this, 4655 kind=kind.this if kind else None, 4656 settings=settings, 4657 ) 4658 4659 def _parse_dict_range(self, this: str) -> exp.DictRange: 4660 self._match_l_paren() 4661 has_min = self._match_text_seq("MIN") 4662 if has_min: 4663 min = self._parse_var() or self._parse_primary() 4664 self._match_text_seq("MAX") 4665 max = self._parse_var() or self._parse_primary() 4666 else: 4667 max = self._parse_var() or self._parse_primary() 4668 min = exp.Literal.number(0) 4669 self._match_r_paren() 4670 return self.expression(exp.DictRange, this=this, min=min, max=max) 4671 4672 def _find_parser( 4673 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4674 ) -> t.Optional[t.Callable]: 4675 if not self._curr: 4676 return None 4677 4678 index = self._index 4679 this = [] 4680 while True: 4681 # The current token might be multiple words 4682 curr = self._curr.text.upper() 4683 key = curr.split(" ") 4684 this.append(curr) 4685 4686 self._advance() 4687 result, trie = in_trie(trie, key) 4688 if result == TrieResult.FAILED: 4689 break 4690 4691 if result == TrieResult.EXISTS: 4692 subparser = parsers[" ".join(this)] 4693 return subparser 4694 4695 self._retreat(index) 4696 return None 4697 4698 def _match(self, token_type, advance=True, expression=None): 4699 if not self._curr: 4700 return None 4701 4702 if self._curr.token_type == token_type: 4703 if advance: 4704 self._advance() 4705 self._add_comments(expression) 4706 return True 4707 4708 return None 4709 4710 def _match_set(self, types, advance=True): 4711 if not self._curr: 4712 return None 4713 4714 if self._curr.token_type in types: 4715 if advance: 4716 self._advance() 4717 return True 4718 4719 return None 4720 4721 def _match_pair(self, token_type_a, token_type_b, advance=True): 4722 if not self._curr or not self._next: 4723 return None 4724 4725 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4726 if advance: 4727 self._advance(2) 4728 return True 4729 4730 return None 4731 4732 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4733 if not self._match(TokenType.L_PAREN, expression=expression): 4734 self.raise_error("Expecting (") 4735 4736 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4737 if not self._match(TokenType.R_PAREN, expression=expression): 4738 self.raise_error("Expecting )") 4739 4740 def _match_texts(self, texts, advance=True): 4741 if self._curr and self._curr.text.upper() in texts: 4742 if advance: 4743 self._advance() 4744 return True 4745 return False 4746 4747 def _match_text_seq(self, *texts, advance=True): 4748 index = self._index 4749 for text in texts: 4750 if self._curr and self._curr.text.upper() == text: 4751 self._advance() 4752 else: 4753 self._retreat(index) 4754 return False 4755 4756 if not advance: 4757 self._retreat(index) 4758 4759 return True 4760 4761 @t.overload 4762 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4763 ... 4764 4765 @t.overload 4766 def _replace_columns_with_dots( 4767 self, this: t.Optional[exp.Expression] 4768 ) -> t.Optional[exp.Expression]: 4769 ... 4770 4771 def _replace_columns_with_dots(self, this): 4772 if isinstance(this, exp.Dot): 4773 exp.replace_children(this, self._replace_columns_with_dots) 4774 elif isinstance(this, exp.Column): 4775 exp.replace_children(this, self._replace_columns_with_dots) 4776 table = this.args.get("table") 4777 this = ( 4778 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4779 ) 4780 4781 return this 4782 4783 def _replace_lambda( 4784 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4785 ) -> t.Optional[exp.Expression]: 4786 if not node: 4787 return node 4788 4789 for column in node.find_all(exp.Column): 4790 if column.parts[0].name in lambda_variables: 4791 dot_or_id = column.to_dot() if column.table else column.this 4792 parent = column.parent 4793 4794 while isinstance(parent, exp.Dot): 4795 if not isinstance(parent.parent, exp.Dot): 4796 parent.replace(dot_or_id) 4797 break 4798 parent = parent.parent 4799 else: 4800 if column is node: 4801 node = dot_or_id 4802 else: 4803 column.replace(dot_or_id) 4804 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match(TokenType.IF) 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 begin = self._match(TokenType.BEGIN) 1210 return_ = self._match_text_seq("RETURN") 1211 expression = self._parse_statement() 1212 1213 if return_: 1214 expression = self.expression(exp.Return, this=expression) 1215 elif create_token.token_type == TokenType.INDEX: 1216 this = self._parse_index(index=self._parse_id_var()) 1217 elif create_token.token_type in self.DB_CREATABLES: 1218 table_parts = self._parse_table_parts(schema=True) 1219 1220 # exp.Properties.Location.POST_NAME 1221 self._match(TokenType.COMMA) 1222 extend_props(self._parse_properties(before=True)) 1223 1224 this = self._parse_schema(this=table_parts) 1225 1226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1227 extend_props(self._parse_properties()) 1228 1229 self._match(TokenType.ALIAS) 1230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1231 # exp.Properties.Location.POST_ALIAS 1232 extend_props(self._parse_properties()) 1233 1234 expression = self._parse_ddl_select() 1235 1236 if create_token.token_type == TokenType.TABLE: 1237 indexes = [] 1238 while True: 1239 index = self._parse_index() 1240 1241 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1242 extend_props(self._parse_properties()) 1243 1244 if not index: 1245 break 1246 else: 1247 self._match(TokenType.COMMA) 1248 indexes.append(index) 1249 elif create_token.token_type == TokenType.VIEW: 1250 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1251 no_schema_binding = True 1252 1253 if self._match_text_seq("CLONE"): 1254 clone = self._parse_table(schema=True) 1255 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1256 clone_kind = ( 1257 self._match(TokenType.L_PAREN) 1258 and self._match_texts(self.CLONE_KINDS) 1259 and self._prev.text.upper() 1260 ) 1261 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1262 self._match(TokenType.R_PAREN) 1263 clone = self.expression( 1264 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1265 ) 1266 1267 return self.expression( 1268 exp.Create, 1269 this=this, 1270 kind=create_token.text, 1271 replace=replace, 1272 unique=unique, 1273 expression=expression, 1274 exists=exists, 1275 properties=properties, 1276 indexes=indexes, 1277 no_schema_binding=no_schema_binding, 1278 begin=begin, 1279 clone=clone, 1280 ) 1281 1282 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1283 # only used for teradata currently 1284 self._match(TokenType.COMMA) 1285 1286 kwargs = { 1287 "no": self._match_text_seq("NO"), 1288 "dual": self._match_text_seq("DUAL"), 1289 "before": self._match_text_seq("BEFORE"), 1290 "default": self._match_text_seq("DEFAULT"), 1291 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1292 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1293 "after": self._match_text_seq("AFTER"), 1294 "minimum": self._match_texts(("MIN", "MINIMUM")), 1295 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1296 } 1297 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1300 try: 1301 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1302 except TypeError: 1303 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1304 1305 return None 1306 1307 def _parse_property(self) -> t.Optional[exp.Expression]: 1308 if self._match_texts(self.PROPERTY_PARSERS): 1309 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1310 1311 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1312 return self._parse_character_set(default=True) 1313 1314 if self._match_text_seq("COMPOUND", "SORTKEY"): 1315 return self._parse_sortkey(compound=True) 1316 1317 if self._match_text_seq("SQL", "SECURITY"): 1318 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1319 1320 assignment = self._match_pair( 1321 TokenType.VAR, TokenType.EQ, advance=False 1322 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1323 1324 if assignment: 1325 key = self._parse_var_or_string() 1326 self._match(TokenType.EQ) 1327 return self.expression(exp.Property, this=key, value=self._parse_column()) 1328 1329 return None 1330 1331 def _parse_stored(self) -> exp.FileFormatProperty: 1332 self._match(TokenType.ALIAS) 1333 1334 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1335 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1336 1337 return self.expression( 1338 exp.FileFormatProperty, 1339 this=self.expression( 1340 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1341 ) 1342 if input_format or output_format 1343 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1344 ) 1345 1346 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1347 self._match(TokenType.EQ) 1348 self._match(TokenType.ALIAS) 1349 return self.expression(exp_class, this=self._parse_field()) 1350 1351 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1352 properties = [] 1353 while True: 1354 if before: 1355 prop = self._parse_property_before() 1356 else: 1357 prop = self._parse_property() 1358 1359 if not prop: 1360 break 1361 for p in ensure_list(prop): 1362 properties.append(p) 1363 1364 if properties: 1365 return self.expression(exp.Properties, expressions=properties) 1366 1367 return None 1368 1369 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1370 return self.expression( 1371 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1372 ) 1373 1374 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1375 if self._index >= 2: 1376 pre_volatile_token = self._tokens[self._index - 2] 1377 else: 1378 pre_volatile_token = None 1379 1380 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1381 return exp.VolatileProperty() 1382 1383 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1384 1385 def _parse_with_property( 1386 self, 1387 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1388 self._match(TokenType.WITH) 1389 if self._match(TokenType.L_PAREN, advance=False): 1390 return self._parse_wrapped_csv(self._parse_property) 1391 1392 if self._match_text_seq("JOURNAL"): 1393 return self._parse_withjournaltable() 1394 1395 if self._match_text_seq("DATA"): 1396 return self._parse_withdata(no=False) 1397 elif self._match_text_seq("NO", "DATA"): 1398 return self._parse_withdata(no=True) 1399 1400 if not self._next: 1401 return None 1402 1403 return self._parse_withisolatedloading() 1404 1405 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1406 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1407 self._match(TokenType.EQ) 1408 1409 user = self._parse_id_var() 1410 self._match(TokenType.PARAMETER) 1411 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1412 1413 if not user or not host: 1414 return None 1415 1416 return exp.DefinerProperty(this=f"{user}@{host}") 1417 1418 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1419 self._match(TokenType.TABLE) 1420 self._match(TokenType.EQ) 1421 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1422 1423 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1424 return self.expression(exp.LogProperty, no=no) 1425 1426 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1427 return self.expression(exp.JournalProperty, **kwargs) 1428 1429 def _parse_checksum(self) -> exp.ChecksumProperty: 1430 self._match(TokenType.EQ) 1431 1432 on = None 1433 if self._match(TokenType.ON): 1434 on = True 1435 elif self._match_text_seq("OFF"): 1436 on = False 1437 1438 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1439 1440 def _parse_cluster(self) -> exp.Cluster: 1441 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1442 1443 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1444 self._match_text_seq("BY") 1445 1446 self._match_l_paren() 1447 expressions = self._parse_csv(self._parse_column) 1448 self._match_r_paren() 1449 1450 if self._match_text_seq("SORTED", "BY"): 1451 self._match_l_paren() 1452 sorted_by = self._parse_csv(self._parse_ordered) 1453 self._match_r_paren() 1454 else: 1455 sorted_by = None 1456 1457 self._match(TokenType.INTO) 1458 buckets = self._parse_number() 1459 self._match_text_seq("BUCKETS") 1460 1461 return self.expression( 1462 exp.ClusteredByProperty, 1463 expressions=expressions, 1464 sorted_by=sorted_by, 1465 buckets=buckets, 1466 ) 1467 1468 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1469 if not self._match_text_seq("GRANTS"): 1470 self._retreat(self._index - 1) 1471 return None 1472 1473 return self.expression(exp.CopyGrantsProperty) 1474 1475 def _parse_freespace(self) -> exp.FreespaceProperty: 1476 self._match(TokenType.EQ) 1477 return self.expression( 1478 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1479 ) 1480 1481 def _parse_mergeblockratio( 1482 self, no: bool = False, default: bool = False 1483 ) -> exp.MergeBlockRatioProperty: 1484 if self._match(TokenType.EQ): 1485 return self.expression( 1486 exp.MergeBlockRatioProperty, 1487 this=self._parse_number(), 1488 percent=self._match(TokenType.PERCENT), 1489 ) 1490 1491 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1492 1493 def _parse_datablocksize( 1494 self, 1495 default: t.Optional[bool] = None, 1496 minimum: t.Optional[bool] = None, 1497 maximum: t.Optional[bool] = None, 1498 ) -> exp.DataBlocksizeProperty: 1499 self._match(TokenType.EQ) 1500 size = self._parse_number() 1501 1502 units = None 1503 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1504 units = self._prev.text 1505 1506 return self.expression( 1507 exp.DataBlocksizeProperty, 1508 size=size, 1509 units=units, 1510 default=default, 1511 minimum=minimum, 1512 maximum=maximum, 1513 ) 1514 1515 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1516 self._match(TokenType.EQ) 1517 always = self._match_text_seq("ALWAYS") 1518 manual = self._match_text_seq("MANUAL") 1519 never = self._match_text_seq("NEVER") 1520 default = self._match_text_seq("DEFAULT") 1521 1522 autotemp = None 1523 if self._match_text_seq("AUTOTEMP"): 1524 autotemp = self._parse_schema() 1525 1526 return self.expression( 1527 exp.BlockCompressionProperty, 1528 always=always, 1529 manual=manual, 1530 never=never, 1531 default=default, 1532 autotemp=autotemp, 1533 ) 1534 1535 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1536 no = self._match_text_seq("NO") 1537 concurrent = self._match_text_seq("CONCURRENT") 1538 self._match_text_seq("ISOLATED", "LOADING") 1539 for_all = self._match_text_seq("FOR", "ALL") 1540 for_insert = self._match_text_seq("FOR", "INSERT") 1541 for_none = self._match_text_seq("FOR", "NONE") 1542 return self.expression( 1543 exp.IsolatedLoadingProperty, 1544 no=no, 1545 concurrent=concurrent, 1546 for_all=for_all, 1547 for_insert=for_insert, 1548 for_none=for_none, 1549 ) 1550 1551 def _parse_locking(self) -> exp.LockingProperty: 1552 if self._match(TokenType.TABLE): 1553 kind = "TABLE" 1554 elif self._match(TokenType.VIEW): 1555 kind = "VIEW" 1556 elif self._match(TokenType.ROW): 1557 kind = "ROW" 1558 elif self._match_text_seq("DATABASE"): 1559 kind = "DATABASE" 1560 else: 1561 kind = None 1562 1563 if kind in ("DATABASE", "TABLE", "VIEW"): 1564 this = self._parse_table_parts() 1565 else: 1566 this = None 1567 1568 if self._match(TokenType.FOR): 1569 for_or_in = "FOR" 1570 elif self._match(TokenType.IN): 1571 for_or_in = "IN" 1572 else: 1573 for_or_in = None 1574 1575 if self._match_text_seq("ACCESS"): 1576 lock_type = "ACCESS" 1577 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1578 lock_type = "EXCLUSIVE" 1579 elif self._match_text_seq("SHARE"): 1580 lock_type = "SHARE" 1581 elif self._match_text_seq("READ"): 1582 lock_type = "READ" 1583 elif self._match_text_seq("WRITE"): 1584 lock_type = "WRITE" 1585 elif self._match_text_seq("CHECKSUM"): 1586 lock_type = "CHECKSUM" 1587 else: 1588 lock_type = None 1589 1590 override = self._match_text_seq("OVERRIDE") 1591 1592 return self.expression( 1593 exp.LockingProperty, 1594 this=this, 1595 kind=kind, 1596 for_or_in=for_or_in, 1597 lock_type=lock_type, 1598 override=override, 1599 ) 1600 1601 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1602 if self._match(TokenType.PARTITION_BY): 1603 return self._parse_csv(self._parse_conjunction) 1604 return [] 1605 1606 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1607 self._match(TokenType.EQ) 1608 return self.expression( 1609 exp.PartitionedByProperty, 1610 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1611 ) 1612 1613 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1614 if self._match_text_seq("AND", "STATISTICS"): 1615 statistics = True 1616 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1617 statistics = False 1618 else: 1619 statistics = None 1620 1621 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1622 1623 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1624 if self._match_text_seq("PRIMARY", "INDEX"): 1625 return exp.NoPrimaryIndexProperty() 1626 return None 1627 1628 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1629 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1630 return exp.OnCommitProperty() 1631 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1632 return exp.OnCommitProperty(delete=True) 1633 return None 1634 1635 def _parse_distkey(self) -> exp.DistKeyProperty: 1636 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1637 1638 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1639 table = self._parse_table(schema=True) 1640 1641 options = [] 1642 while self._match_texts(("INCLUDING", "EXCLUDING")): 1643 this = self._prev.text.upper() 1644 1645 id_var = self._parse_id_var() 1646 if not id_var: 1647 return None 1648 1649 options.append( 1650 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1651 ) 1652 1653 return self.expression(exp.LikeProperty, this=table, expressions=options) 1654 1655 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1656 return self.expression( 1657 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1658 ) 1659 1660 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1661 self._match(TokenType.EQ) 1662 return self.expression( 1663 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1664 ) 1665 1666 def _parse_returns(self) -> exp.ReturnsProperty: 1667 value: t.Optional[exp.Expression] 1668 is_table = self._match(TokenType.TABLE) 1669 1670 if is_table: 1671 if self._match(TokenType.LT): 1672 value = self.expression( 1673 exp.Schema, 1674 this="TABLE", 1675 expressions=self._parse_csv(self._parse_struct_types), 1676 ) 1677 if not self._match(TokenType.GT): 1678 self.raise_error("Expecting >") 1679 else: 1680 value = self._parse_schema(exp.var("TABLE")) 1681 else: 1682 value = self._parse_types() 1683 1684 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1685 1686 def _parse_describe(self) -> exp.Describe: 1687 kind = self._match_set(self.CREATABLES) and self._prev.text 1688 this = self._parse_table() 1689 return self.expression(exp.Describe, this=this, kind=kind) 1690 1691 def _parse_insert(self) -> exp.Insert: 1692 overwrite = self._match(TokenType.OVERWRITE) 1693 ignore = self._match(TokenType.IGNORE) 1694 local = self._match_text_seq("LOCAL") 1695 alternative = None 1696 1697 if self._match_text_seq("DIRECTORY"): 1698 this: t.Optional[exp.Expression] = self.expression( 1699 exp.Directory, 1700 this=self._parse_var_or_string(), 1701 local=local, 1702 row_format=self._parse_row_format(match_row=True), 1703 ) 1704 else: 1705 if self._match(TokenType.OR): 1706 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1707 1708 self._match(TokenType.INTO) 1709 self._match(TokenType.TABLE) 1710 this = self._parse_table(schema=True) 1711 1712 return self.expression( 1713 exp.Insert, 1714 this=this, 1715 exists=self._parse_exists(), 1716 partition=self._parse_partition(), 1717 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1718 and self._parse_conjunction(), 1719 expression=self._parse_ddl_select(), 1720 conflict=self._parse_on_conflict(), 1721 returning=self._parse_returning(), 1722 overwrite=overwrite, 1723 alternative=alternative, 1724 ignore=ignore, 1725 ) 1726 1727 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1728 conflict = self._match_text_seq("ON", "CONFLICT") 1729 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1730 1731 if not conflict and not duplicate: 1732 return None 1733 1734 nothing = None 1735 expressions = None 1736 key = None 1737 constraint = None 1738 1739 if conflict: 1740 if self._match_text_seq("ON", "CONSTRAINT"): 1741 constraint = self._parse_id_var() 1742 else: 1743 key = self._parse_csv(self._parse_value) 1744 1745 self._match_text_seq("DO") 1746 if self._match_text_seq("NOTHING"): 1747 nothing = True 1748 else: 1749 self._match(TokenType.UPDATE) 1750 self._match(TokenType.SET) 1751 expressions = self._parse_csv(self._parse_equality) 1752 1753 return self.expression( 1754 exp.OnConflict, 1755 duplicate=duplicate, 1756 expressions=expressions, 1757 nothing=nothing, 1758 key=key, 1759 constraint=constraint, 1760 ) 1761 1762 def _parse_returning(self) -> t.Optional[exp.Returning]: 1763 if not self._match(TokenType.RETURNING): 1764 return None 1765 1766 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1767 1768 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1769 if not self._match(TokenType.FORMAT): 1770 return None 1771 return self._parse_row_format() 1772 1773 def _parse_row_format( 1774 self, match_row: bool = False 1775 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1776 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1777 return None 1778 1779 if self._match_text_seq("SERDE"): 1780 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1781 1782 self._match_text_seq("DELIMITED") 1783 1784 kwargs = {} 1785 1786 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1787 kwargs["fields"] = self._parse_string() 1788 if self._match_text_seq("ESCAPED", "BY"): 1789 kwargs["escaped"] = self._parse_string() 1790 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1791 kwargs["collection_items"] = self._parse_string() 1792 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1793 kwargs["map_keys"] = self._parse_string() 1794 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1795 kwargs["lines"] = self._parse_string() 1796 if self._match_text_seq("NULL", "DEFINED", "AS"): 1797 kwargs["null"] = self._parse_string() 1798 1799 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1800 1801 def _parse_load(self) -> exp.LoadData | exp.Command: 1802 if self._match_text_seq("DATA"): 1803 local = self._match_text_seq("LOCAL") 1804 self._match_text_seq("INPATH") 1805 inpath = self._parse_string() 1806 overwrite = self._match(TokenType.OVERWRITE) 1807 self._match_pair(TokenType.INTO, TokenType.TABLE) 1808 1809 return self.expression( 1810 exp.LoadData, 1811 this=self._parse_table(schema=True), 1812 local=local, 1813 overwrite=overwrite, 1814 inpath=inpath, 1815 partition=self._parse_partition(), 1816 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1817 serde=self._match_text_seq("SERDE") and self._parse_string(), 1818 ) 1819 return self._parse_as_command(self._prev) 1820 1821 def _parse_delete(self) -> exp.Delete: 1822 # This handles MySQL's "Multiple-Table Syntax" 1823 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1824 tables = None 1825 if not self._match(TokenType.FROM, advance=False): 1826 tables = self._parse_csv(self._parse_table) or None 1827 1828 return self.expression( 1829 exp.Delete, 1830 tables=tables, 1831 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1832 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1833 where=self._parse_where(), 1834 returning=self._parse_returning(), 1835 limit=self._parse_limit(), 1836 ) 1837 1838 def _parse_update(self) -> exp.Update: 1839 return self.expression( 1840 exp.Update, 1841 **{ # type: ignore 1842 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1843 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1844 "from": self._parse_from(joins=True), 1845 "where": self._parse_where(), 1846 "returning": self._parse_returning(), 1847 "limit": self._parse_limit(), 1848 }, 1849 ) 1850 1851 def _parse_uncache(self) -> exp.Uncache: 1852 if not self._match(TokenType.TABLE): 1853 self.raise_error("Expecting TABLE after UNCACHE") 1854 1855 return self.expression( 1856 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1857 ) 1858 1859 def _parse_cache(self) -> exp.Cache: 1860 lazy = self._match_text_seq("LAZY") 1861 self._match(TokenType.TABLE) 1862 table = self._parse_table(schema=True) 1863 1864 options = [] 1865 if self._match_text_seq("OPTIONS"): 1866 self._match_l_paren() 1867 k = self._parse_string() 1868 self._match(TokenType.EQ) 1869 v = self._parse_string() 1870 options = [k, v] 1871 self._match_r_paren() 1872 1873 self._match(TokenType.ALIAS) 1874 return self.expression( 1875 exp.Cache, 1876 this=table, 1877 lazy=lazy, 1878 options=options, 1879 expression=self._parse_select(nested=True), 1880 ) 1881 1882 def _parse_partition(self) -> t.Optional[exp.Partition]: 1883 if not self._match(TokenType.PARTITION): 1884 return None 1885 1886 return self.expression( 1887 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1888 ) 1889 1890 def _parse_value(self) -> exp.Tuple: 1891 if self._match(TokenType.L_PAREN): 1892 expressions = self._parse_csv(self._parse_conjunction) 1893 self._match_r_paren() 1894 return self.expression(exp.Tuple, expressions=expressions) 1895 1896 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1897 # https://prestodb.io/docs/current/sql/values.html 1898 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1899 1900 def _parse_select( 1901 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1902 ) -> t.Optional[exp.Expression]: 1903 cte = self._parse_with() 1904 if cte: 1905 this = self._parse_statement() 1906 1907 if not this: 1908 self.raise_error("Failed to parse any statement following CTE") 1909 return cte 1910 1911 if "with" in this.arg_types: 1912 this.set("with", cte) 1913 else: 1914 self.raise_error(f"{this.key} does not support CTE") 1915 this = cte 1916 elif self._match(TokenType.SELECT): 1917 comments = self._prev_comments 1918 1919 hint = self._parse_hint() 1920 all_ = self._match(TokenType.ALL) 1921 distinct = self._match(TokenType.DISTINCT) 1922 1923 kind = ( 1924 self._match(TokenType.ALIAS) 1925 and self._match_texts(("STRUCT", "VALUE")) 1926 and self._prev.text 1927 ) 1928 1929 if distinct: 1930 distinct = self.expression( 1931 exp.Distinct, 1932 on=self._parse_value() if self._match(TokenType.ON) else None, 1933 ) 1934 1935 if all_ and distinct: 1936 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1937 1938 limit = self._parse_limit(top=True) 1939 expressions = self._parse_expressions() 1940 1941 this = self.expression( 1942 exp.Select, 1943 kind=kind, 1944 hint=hint, 1945 distinct=distinct, 1946 expressions=expressions, 1947 limit=limit, 1948 ) 1949 this.comments = comments 1950 1951 into = self._parse_into() 1952 if into: 1953 this.set("into", into) 1954 1955 from_ = self._parse_from() 1956 if from_: 1957 this.set("from", from_) 1958 1959 this = self._parse_query_modifiers(this) 1960 elif (table or nested) and self._match(TokenType.L_PAREN): 1961 if self._match(TokenType.PIVOT): 1962 this = self._parse_simplified_pivot() 1963 elif self._match(TokenType.FROM): 1964 this = exp.select("*").from_( 1965 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1966 ) 1967 else: 1968 this = self._parse_table() if table else self._parse_select(nested=True) 1969 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1970 1971 self._match_r_paren() 1972 1973 # early return so that subquery unions aren't parsed again 1974 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1975 # Union ALL should be a property of the top select node, not the subquery 1976 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1977 elif self._match(TokenType.VALUES): 1978 this = self.expression( 1979 exp.Values, 1980 expressions=self._parse_csv(self._parse_value), 1981 alias=self._parse_table_alias(), 1982 ) 1983 else: 1984 this = None 1985 1986 return self._parse_set_operations(this) 1987 1988 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1989 if not skip_with_token and not self._match(TokenType.WITH): 1990 return None 1991 1992 comments = self._prev_comments 1993 recursive = self._match(TokenType.RECURSIVE) 1994 1995 expressions = [] 1996 while True: 1997 expressions.append(self._parse_cte()) 1998 1999 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2000 break 2001 else: 2002 self._match(TokenType.WITH) 2003 2004 return self.expression( 2005 exp.With, comments=comments, expressions=expressions, recursive=recursive 2006 ) 2007 2008 def _parse_cte(self) -> exp.CTE: 2009 alias = self._parse_table_alias() 2010 if not alias or not alias.this: 2011 self.raise_error("Expected CTE to have alias") 2012 2013 self._match(TokenType.ALIAS) 2014 return self.expression( 2015 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2016 ) 2017 2018 def _parse_table_alias( 2019 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2020 ) -> t.Optional[exp.TableAlias]: 2021 any_token = self._match(TokenType.ALIAS) 2022 alias = ( 2023 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2024 or self._parse_string_as_identifier() 2025 ) 2026 2027 index = self._index 2028 if self._match(TokenType.L_PAREN): 2029 columns = self._parse_csv(self._parse_function_parameter) 2030 self._match_r_paren() if columns else self._retreat(index) 2031 else: 2032 columns = None 2033 2034 if not alias and not columns: 2035 return None 2036 2037 return self.expression(exp.TableAlias, this=alias, columns=columns) 2038 2039 def _parse_subquery( 2040 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2041 ) -> t.Optional[exp.Subquery]: 2042 if not this: 2043 return None 2044 2045 return self.expression( 2046 exp.Subquery, 2047 this=this, 2048 pivots=self._parse_pivots(), 2049 alias=self._parse_table_alias() if parse_alias else None, 2050 ) 2051 2052 def _parse_query_modifiers( 2053 self, this: t.Optional[exp.Expression] 2054 ) -> t.Optional[exp.Expression]: 2055 if isinstance(this, self.MODIFIABLES): 2056 for join in iter(self._parse_join, None): 2057 this.append("joins", join) 2058 for lateral in iter(self._parse_lateral, None): 2059 this.append("laterals", lateral) 2060 2061 while True: 2062 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2063 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2064 key, expression = parser(self) 2065 2066 if expression: 2067 this.set(key, expression) 2068 if key == "limit": 2069 offset = expression.args.pop("offset", None) 2070 if offset: 2071 this.set("offset", exp.Offset(expression=offset)) 2072 continue 2073 break 2074 return this 2075 2076 def _parse_hint(self) -> t.Optional[exp.Hint]: 2077 if self._match(TokenType.HINT): 2078 hints = [] 2079 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2080 hints.extend(hint) 2081 2082 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2083 self.raise_error("Expected */ after HINT") 2084 2085 return self.expression(exp.Hint, expressions=hints) 2086 2087 return None 2088 2089 def _parse_into(self) -> t.Optional[exp.Into]: 2090 if not self._match(TokenType.INTO): 2091 return None 2092 2093 temp = self._match(TokenType.TEMPORARY) 2094 unlogged = self._match_text_seq("UNLOGGED") 2095 self._match(TokenType.TABLE) 2096 2097 return self.expression( 2098 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2099 ) 2100 2101 def _parse_from( 2102 self, joins: bool = False, skip_from_token: bool = False 2103 ) -> t.Optional[exp.From]: 2104 if not skip_from_token and not self._match(TokenType.FROM): 2105 return None 2106 2107 return self.expression( 2108 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2109 ) 2110 2111 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2112 if not self._match(TokenType.MATCH_RECOGNIZE): 2113 return None 2114 2115 self._match_l_paren() 2116 2117 partition = self._parse_partition_by() 2118 order = self._parse_order() 2119 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2120 2121 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2122 rows = exp.var("ONE ROW PER MATCH") 2123 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2124 text = "ALL ROWS PER MATCH" 2125 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2126 text += f" SHOW EMPTY MATCHES" 2127 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2128 text += f" OMIT EMPTY MATCHES" 2129 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2130 text += f" WITH UNMATCHED ROWS" 2131 rows = exp.var(text) 2132 else: 2133 rows = None 2134 2135 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2136 text = "AFTER MATCH SKIP" 2137 if self._match_text_seq("PAST", "LAST", "ROW"): 2138 text += f" PAST LAST ROW" 2139 elif self._match_text_seq("TO", "NEXT", "ROW"): 2140 text += f" TO NEXT ROW" 2141 elif self._match_text_seq("TO", "FIRST"): 2142 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2143 elif self._match_text_seq("TO", "LAST"): 2144 text += f" TO LAST {self._advance_any().text}" # type: ignore 2145 after = exp.var(text) 2146 else: 2147 after = None 2148 2149 if self._match_text_seq("PATTERN"): 2150 self._match_l_paren() 2151 2152 if not self._curr: 2153 self.raise_error("Expecting )", self._curr) 2154 2155 paren = 1 2156 start = self._curr 2157 2158 while self._curr and paren > 0: 2159 if self._curr.token_type == TokenType.L_PAREN: 2160 paren += 1 2161 if self._curr.token_type == TokenType.R_PAREN: 2162 paren -= 1 2163 2164 end = self._prev 2165 self._advance() 2166 2167 if paren > 0: 2168 self.raise_error("Expecting )", self._curr) 2169 2170 pattern = exp.var(self._find_sql(start, end)) 2171 else: 2172 pattern = None 2173 2174 define = ( 2175 self._parse_csv( 2176 lambda: self.expression( 2177 exp.Alias, 2178 alias=self._parse_id_var(any_token=True), 2179 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2180 ) 2181 ) 2182 if self._match_text_seq("DEFINE") 2183 else None 2184 ) 2185 2186 self._match_r_paren() 2187 2188 return self.expression( 2189 exp.MatchRecognize, 2190 partition_by=partition, 2191 order=order, 2192 measures=measures, 2193 rows=rows, 2194 after=after, 2195 pattern=pattern, 2196 define=define, 2197 alias=self._parse_table_alias(), 2198 ) 2199 2200 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2201 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2202 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2203 2204 if outer_apply or cross_apply: 2205 this = self._parse_select(table=True) 2206 view = None 2207 outer = not cross_apply 2208 elif self._match(TokenType.LATERAL): 2209 this = self._parse_select(table=True) 2210 view = self._match(TokenType.VIEW) 2211 outer = self._match(TokenType.OUTER) 2212 else: 2213 return None 2214 2215 if not this: 2216 this = self._parse_function() or self._parse_id_var(any_token=False) 2217 while self._match(TokenType.DOT): 2218 this = exp.Dot( 2219 this=this, 2220 expression=self._parse_function() or self._parse_id_var(any_token=False), 2221 ) 2222 2223 if view: 2224 table = self._parse_id_var(any_token=False) 2225 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2226 table_alias: t.Optional[exp.TableAlias] = self.expression( 2227 exp.TableAlias, this=table, columns=columns 2228 ) 2229 elif isinstance(this, exp.Subquery) and this.alias: 2230 # Ensures parity between the Subquery's and the Lateral's "alias" args 2231 table_alias = this.args["alias"].copy() 2232 else: 2233 table_alias = self._parse_table_alias() 2234 2235 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2236 2237 def _parse_join_parts( 2238 self, 2239 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2240 return ( 2241 self._match_set(self.JOIN_METHODS) and self._prev, 2242 self._match_set(self.JOIN_SIDES) and self._prev, 2243 self._match_set(self.JOIN_KINDS) and self._prev, 2244 ) 2245 2246 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2247 if self._match(TokenType.COMMA): 2248 return self.expression(exp.Join, this=self._parse_table()) 2249 2250 index = self._index 2251 method, side, kind = self._parse_join_parts() 2252 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2253 join = self._match(TokenType.JOIN) 2254 2255 if not skip_join_token and not join: 2256 self._retreat(index) 2257 kind = None 2258 method = None 2259 side = None 2260 2261 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2262 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2263 2264 if not skip_join_token and not join and not outer_apply and not cross_apply: 2265 return None 2266 2267 if outer_apply: 2268 side = Token(TokenType.LEFT, "LEFT") 2269 2270 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2271 2272 if method: 2273 kwargs["method"] = method.text 2274 if side: 2275 kwargs["side"] = side.text 2276 if kind: 2277 kwargs["kind"] = kind.text 2278 if hint: 2279 kwargs["hint"] = hint 2280 2281 if self._match(TokenType.ON): 2282 kwargs["on"] = self._parse_conjunction() 2283 elif self._match(TokenType.USING): 2284 kwargs["using"] = self._parse_wrapped_id_vars() 2285 elif not (kind and kind.token_type == TokenType.CROSS): 2286 index = self._index 2287 joins = self._parse_joins() 2288 2289 if joins and self._match(TokenType.ON): 2290 kwargs["on"] = self._parse_conjunction() 2291 elif joins and self._match(TokenType.USING): 2292 kwargs["using"] = self._parse_wrapped_id_vars() 2293 else: 2294 joins = None 2295 self._retreat(index) 2296 kwargs["this"].set("joins", joins) 2297 2298 return self.expression(exp.Join, **kwargs) 2299 2300 def _parse_index( 2301 self, 2302 index: t.Optional[exp.Expression] = None, 2303 ) -> t.Optional[exp.Index]: 2304 if index: 2305 unique = None 2306 primary = None 2307 amp = None 2308 2309 self._match(TokenType.ON) 2310 self._match(TokenType.TABLE) # hive 2311 table = self._parse_table_parts(schema=True) 2312 else: 2313 unique = self._match(TokenType.UNIQUE) 2314 primary = self._match_text_seq("PRIMARY") 2315 amp = self._match_text_seq("AMP") 2316 2317 if not self._match(TokenType.INDEX): 2318 return None 2319 2320 index = self._parse_id_var() 2321 table = None 2322 2323 using = self._parse_field() if self._match(TokenType.USING) else None 2324 2325 if self._match(TokenType.L_PAREN, advance=False): 2326 columns = self._parse_wrapped_csv(self._parse_ordered) 2327 else: 2328 columns = None 2329 2330 return self.expression( 2331 exp.Index, 2332 this=index, 2333 table=table, 2334 using=using, 2335 columns=columns, 2336 unique=unique, 2337 primary=primary, 2338 amp=amp, 2339 partition_by=self._parse_partition_by(), 2340 ) 2341 2342 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2343 hints: t.List[exp.Expression] = [] 2344 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2345 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2346 hints.append( 2347 self.expression( 2348 exp.WithTableHint, 2349 expressions=self._parse_csv( 2350 lambda: self._parse_function() or self._parse_var(any_token=True) 2351 ), 2352 ) 2353 ) 2354 self._match_r_paren() 2355 else: 2356 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2357 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2358 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2359 2360 self._match_texts({"INDEX", "KEY"}) 2361 if self._match(TokenType.FOR): 2362 hint.set("target", self._advance_any() and self._prev.text.upper()) 2363 2364 hint.set("expressions", self._parse_wrapped_id_vars()) 2365 hints.append(hint) 2366 2367 return hints or None 2368 2369 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2370 return ( 2371 (not schema and self._parse_function(optional_parens=False)) 2372 or self._parse_id_var(any_token=False) 2373 or self._parse_string_as_identifier() 2374 or self._parse_placeholder() 2375 ) 2376 2377 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2378 catalog = None 2379 db = None 2380 table = self._parse_table_part(schema=schema) 2381 2382 while self._match(TokenType.DOT): 2383 if catalog: 2384 # This allows nesting the table in arbitrarily many dot expressions if needed 2385 table = self.expression( 2386 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2387 ) 2388 else: 2389 catalog = db 2390 db = table 2391 table = self._parse_table_part(schema=schema) 2392 2393 if not table: 2394 self.raise_error(f"Expected table name but got {self._curr}") 2395 2396 return self.expression( 2397 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2398 ) 2399 2400 def _parse_table( 2401 self, 2402 schema: bool = False, 2403 joins: bool = False, 2404 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2405 ) -> t.Optional[exp.Expression]: 2406 lateral = self._parse_lateral() 2407 if lateral: 2408 return lateral 2409 2410 unnest = self._parse_unnest() 2411 if unnest: 2412 return unnest 2413 2414 values = self._parse_derived_table_values() 2415 if values: 2416 return values 2417 2418 subquery = self._parse_select(table=True) 2419 if subquery: 2420 if not subquery.args.get("pivots"): 2421 subquery.set("pivots", self._parse_pivots()) 2422 return subquery 2423 2424 this: exp.Expression = self._parse_table_parts(schema=schema) 2425 2426 if schema: 2427 return self._parse_schema(this=this) 2428 2429 if self.ALIAS_POST_TABLESAMPLE: 2430 table_sample = self._parse_table_sample() 2431 2432 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2433 if alias: 2434 this.set("alias", alias) 2435 2436 if not this.args.get("pivots"): 2437 this.set("pivots", self._parse_pivots()) 2438 2439 this.set("hints", self._parse_table_hints()) 2440 2441 if not self.ALIAS_POST_TABLESAMPLE: 2442 table_sample = self._parse_table_sample() 2443 2444 if table_sample: 2445 table_sample.set("this", this) 2446 this = table_sample 2447 2448 if joins: 2449 for join in iter(self._parse_join, None): 2450 this.append("joins", join) 2451 2452 return this 2453 2454 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2455 if not self._match(TokenType.UNNEST): 2456 return None 2457 2458 expressions = self._parse_wrapped_csv(self._parse_type) 2459 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2460 2461 alias = self._parse_table_alias() if with_alias else None 2462 2463 if alias and self.UNNEST_COLUMN_ONLY: 2464 if alias.args.get("columns"): 2465 self.raise_error("Unexpected extra column alias in unnest.") 2466 2467 alias.set("columns", [alias.this]) 2468 alias.set("this", None) 2469 2470 offset = None 2471 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2472 self._match(TokenType.ALIAS) 2473 offset = self._parse_id_var() or exp.to_identifier("offset") 2474 2475 return self.expression( 2476 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2477 ) 2478 2479 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2480 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2481 if not is_derived and not self._match(TokenType.VALUES): 2482 return None 2483 2484 expressions = self._parse_csv(self._parse_value) 2485 alias = self._parse_table_alias() 2486 2487 if is_derived: 2488 self._match_r_paren() 2489 2490 return self.expression( 2491 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2492 ) 2493 2494 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2495 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2496 as_modifier and self._match_text_seq("USING", "SAMPLE") 2497 ): 2498 return None 2499 2500 bucket_numerator = None 2501 bucket_denominator = None 2502 bucket_field = None 2503 percent = None 2504 rows = None 2505 size = None 2506 seed = None 2507 2508 kind = ( 2509 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2510 ) 2511 method = self._parse_var(tokens=(TokenType.ROW,)) 2512 2513 self._match(TokenType.L_PAREN) 2514 2515 num = self._parse_number() 2516 2517 if self._match_text_seq("BUCKET"): 2518 bucket_numerator = self._parse_number() 2519 self._match_text_seq("OUT", "OF") 2520 bucket_denominator = bucket_denominator = self._parse_number() 2521 self._match(TokenType.ON) 2522 bucket_field = self._parse_field() 2523 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2524 percent = num 2525 elif self._match(TokenType.ROWS): 2526 rows = num 2527 else: 2528 size = num 2529 2530 self._match(TokenType.R_PAREN) 2531 2532 if self._match(TokenType.L_PAREN): 2533 method = self._parse_var() 2534 seed = self._match(TokenType.COMMA) and self._parse_number() 2535 self._match_r_paren() 2536 elif self._match_texts(("SEED", "REPEATABLE")): 2537 seed = self._parse_wrapped(self._parse_number) 2538 2539 return self.expression( 2540 exp.TableSample, 2541 method=method, 2542 bucket_numerator=bucket_numerator, 2543 bucket_denominator=bucket_denominator, 2544 bucket_field=bucket_field, 2545 percent=percent, 2546 rows=rows, 2547 size=size, 2548 seed=seed, 2549 kind=kind, 2550 ) 2551 2552 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2553 return list(iter(self._parse_pivot, None)) or None 2554 2555 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2556 return list(iter(self._parse_join, None)) or None 2557 2558 # https://duckdb.org/docs/sql/statements/pivot 2559 def _parse_simplified_pivot(self) -> exp.Pivot: 2560 def _parse_on() -> t.Optional[exp.Expression]: 2561 this = self._parse_bitwise() 2562 return self._parse_in(this) if self._match(TokenType.IN) else this 2563 2564 this = self._parse_table() 2565 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2566 using = self._match(TokenType.USING) and self._parse_csv( 2567 lambda: self._parse_alias(self._parse_function()) 2568 ) 2569 group = self._parse_group() 2570 return self.expression( 2571 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2572 ) 2573 2574 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2575 index = self._index 2576 2577 if self._match(TokenType.PIVOT): 2578 unpivot = False 2579 elif self._match(TokenType.UNPIVOT): 2580 unpivot = True 2581 else: 2582 return None 2583 2584 expressions = [] 2585 field = None 2586 2587 if not self._match(TokenType.L_PAREN): 2588 self._retreat(index) 2589 return None 2590 2591 if unpivot: 2592 expressions = self._parse_csv(self._parse_column) 2593 else: 2594 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2595 2596 if not expressions: 2597 self.raise_error("Failed to parse PIVOT's aggregation list") 2598 2599 if not self._match(TokenType.FOR): 2600 self.raise_error("Expecting FOR") 2601 2602 value = self._parse_column() 2603 2604 if not self._match(TokenType.IN): 2605 self.raise_error("Expecting IN") 2606 2607 field = self._parse_in(value, alias=True) 2608 2609 self._match_r_paren() 2610 2611 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2612 2613 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2614 pivot.set("alias", self._parse_table_alias()) 2615 2616 if not unpivot: 2617 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2618 2619 columns: t.List[exp.Expression] = [] 2620 for fld in pivot.args["field"].expressions: 2621 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2622 for name in names: 2623 if self.PREFIXED_PIVOT_COLUMNS: 2624 name = f"{name}_{field_name}" if name else field_name 2625 else: 2626 name = f"{field_name}_{name}" if name else field_name 2627 2628 columns.append(exp.to_identifier(name)) 2629 2630 pivot.set("columns", columns) 2631 2632 return pivot 2633 2634 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2635 return [agg.alias for agg in aggregations] 2636 2637 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2638 if not skip_where_token and not self._match(TokenType.WHERE): 2639 return None 2640 2641 return self.expression( 2642 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2643 ) 2644 2645 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2646 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2647 return None 2648 2649 elements = defaultdict(list) 2650 2651 if self._match(TokenType.ALL): 2652 return self.expression(exp.Group, all=True) 2653 2654 while True: 2655 expressions = self._parse_csv(self._parse_conjunction) 2656 if expressions: 2657 elements["expressions"].extend(expressions) 2658 2659 grouping_sets = self._parse_grouping_sets() 2660 if grouping_sets: 2661 elements["grouping_sets"].extend(grouping_sets) 2662 2663 rollup = None 2664 cube = None 2665 totals = None 2666 2667 with_ = self._match(TokenType.WITH) 2668 if self._match(TokenType.ROLLUP): 2669 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2670 elements["rollup"].extend(ensure_list(rollup)) 2671 2672 if self._match(TokenType.CUBE): 2673 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2674 elements["cube"].extend(ensure_list(cube)) 2675 2676 if self._match_text_seq("TOTALS"): 2677 totals = True 2678 elements["totals"] = True # type: ignore 2679 2680 if not (grouping_sets or rollup or cube or totals): 2681 break 2682 2683 return self.expression(exp.Group, **elements) # type: ignore 2684 2685 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2686 if not self._match(TokenType.GROUPING_SETS): 2687 return None 2688 2689 return self._parse_wrapped_csv(self._parse_grouping_set) 2690 2691 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2692 if self._match(TokenType.L_PAREN): 2693 grouping_set = self._parse_csv(self._parse_column) 2694 self._match_r_paren() 2695 return self.expression(exp.Tuple, expressions=grouping_set) 2696 2697 return self._parse_column() 2698 2699 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2700 if not skip_having_token and not self._match(TokenType.HAVING): 2701 return None 2702 return self.expression(exp.Having, this=self._parse_conjunction()) 2703 2704 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2705 if not self._match(TokenType.QUALIFY): 2706 return None 2707 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2708 2709 def _parse_order( 2710 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2711 ) -> t.Optional[exp.Expression]: 2712 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2713 return this 2714 2715 return self.expression( 2716 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2717 ) 2718 2719 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2720 if not self._match(token): 2721 return None 2722 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2723 2724 def _parse_ordered(self) -> exp.Ordered: 2725 this = self._parse_conjunction() 2726 self._match(TokenType.ASC) 2727 2728 is_desc = self._match(TokenType.DESC) 2729 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2730 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2731 desc = is_desc or False 2732 asc = not desc 2733 nulls_first = is_nulls_first or False 2734 explicitly_null_ordered = is_nulls_first or is_nulls_last 2735 2736 if ( 2737 not explicitly_null_ordered 2738 and ( 2739 (asc and self.NULL_ORDERING == "nulls_are_small") 2740 or (desc and self.NULL_ORDERING != "nulls_are_small") 2741 ) 2742 and self.NULL_ORDERING != "nulls_are_last" 2743 ): 2744 nulls_first = True 2745 2746 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2747 2748 def _parse_limit( 2749 self, this: t.Optional[exp.Expression] = None, top: bool = False 2750 ) -> t.Optional[exp.Expression]: 2751 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2752 limit_paren = self._match(TokenType.L_PAREN) 2753 expression = self._parse_number() if top else self._parse_term() 2754 2755 if self._match(TokenType.COMMA): 2756 offset = expression 2757 expression = self._parse_term() 2758 else: 2759 offset = None 2760 2761 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2762 2763 if limit_paren: 2764 self._match_r_paren() 2765 2766 return limit_exp 2767 2768 if self._match(TokenType.FETCH): 2769 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2770 direction = self._prev.text if direction else "FIRST" 2771 2772 count = self._parse_number() 2773 percent = self._match(TokenType.PERCENT) 2774 2775 self._match_set((TokenType.ROW, TokenType.ROWS)) 2776 2777 only = self._match_text_seq("ONLY") 2778 with_ties = self._match_text_seq("WITH", "TIES") 2779 2780 if only and with_ties: 2781 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2782 2783 return self.expression( 2784 exp.Fetch, 2785 direction=direction, 2786 count=count, 2787 percent=percent, 2788 with_ties=with_ties, 2789 ) 2790 2791 return this 2792 2793 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2794 if not self._match(TokenType.OFFSET): 2795 return this 2796 2797 count = self._parse_number() 2798 self._match_set((TokenType.ROW, TokenType.ROWS)) 2799 return self.expression(exp.Offset, this=this, expression=count) 2800 2801 def _parse_locks(self) -> t.List[exp.Lock]: 2802 locks = [] 2803 while True: 2804 if self._match_text_seq("FOR", "UPDATE"): 2805 update = True 2806 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2807 "LOCK", "IN", "SHARE", "MODE" 2808 ): 2809 update = False 2810 else: 2811 break 2812 2813 expressions = None 2814 if self._match_text_seq("OF"): 2815 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2816 2817 wait: t.Optional[bool | exp.Expression] = None 2818 if self._match_text_seq("NOWAIT"): 2819 wait = True 2820 elif self._match_text_seq("WAIT"): 2821 wait = self._parse_primary() 2822 elif self._match_text_seq("SKIP", "LOCKED"): 2823 wait = False 2824 2825 locks.append( 2826 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2827 ) 2828 2829 return locks 2830 2831 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2832 if not self._match_set(self.SET_OPERATIONS): 2833 return this 2834 2835 token_type = self._prev.token_type 2836 2837 if token_type == TokenType.UNION: 2838 expression = exp.Union 2839 elif token_type == TokenType.EXCEPT: 2840 expression = exp.Except 2841 else: 2842 expression = exp.Intersect 2843 2844 return self.expression( 2845 expression, 2846 this=this, 2847 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2848 expression=self._parse_set_operations(self._parse_select(nested=True)), 2849 ) 2850 2851 def _parse_expression(self) -> t.Optional[exp.Expression]: 2852 return self._parse_alias(self._parse_conjunction()) 2853 2854 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2855 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2856 2857 def _parse_equality(self) -> t.Optional[exp.Expression]: 2858 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2859 2860 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2861 return self._parse_tokens(self._parse_range, self.COMPARISON) 2862 2863 def _parse_range(self) -> t.Optional[exp.Expression]: 2864 this = self._parse_bitwise() 2865 negate = self._match(TokenType.NOT) 2866 2867 if self._match_set(self.RANGE_PARSERS): 2868 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2869 if not expression: 2870 return this 2871 2872 this = expression 2873 elif self._match(TokenType.ISNULL): 2874 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2875 2876 # Postgres supports ISNULL and NOTNULL for conditions. 2877 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2878 if self._match(TokenType.NOTNULL): 2879 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2880 this = self.expression(exp.Not, this=this) 2881 2882 if negate: 2883 this = self.expression(exp.Not, this=this) 2884 2885 if self._match(TokenType.IS): 2886 this = self._parse_is(this) 2887 2888 return this 2889 2890 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2891 index = self._index - 1 2892 negate = self._match(TokenType.NOT) 2893 2894 if self._match_text_seq("DISTINCT", "FROM"): 2895 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2896 return self.expression(klass, this=this, expression=self._parse_expression()) 2897 2898 expression = self._parse_null() or self._parse_boolean() 2899 if not expression: 2900 self._retreat(index) 2901 return None 2902 2903 this = self.expression(exp.Is, this=this, expression=expression) 2904 return self.expression(exp.Not, this=this) if negate else this 2905 2906 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2907 unnest = self._parse_unnest(with_alias=False) 2908 if unnest: 2909 this = self.expression(exp.In, this=this, unnest=unnest) 2910 elif self._match(TokenType.L_PAREN): 2911 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2912 2913 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2914 this = self.expression(exp.In, this=this, query=expressions[0]) 2915 else: 2916 this = self.expression(exp.In, this=this, expressions=expressions) 2917 2918 self._match_r_paren(this) 2919 else: 2920 this = self.expression(exp.In, this=this, field=self._parse_field()) 2921 2922 return this 2923 2924 def _parse_between(self, this: exp.Expression) -> exp.Between: 2925 low = self._parse_bitwise() 2926 self._match(TokenType.AND) 2927 high = self._parse_bitwise() 2928 return self.expression(exp.Between, this=this, low=low, high=high) 2929 2930 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2931 if not self._match(TokenType.ESCAPE): 2932 return this 2933 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2934 2935 def _parse_interval(self) -> t.Optional[exp.Interval]: 2936 if not self._match(TokenType.INTERVAL): 2937 return None 2938 2939 if self._match(TokenType.STRING, advance=False): 2940 this = self._parse_primary() 2941 else: 2942 this = self._parse_term() 2943 2944 unit = self._parse_function() or self._parse_var() 2945 2946 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2947 # each INTERVAL expression into this canonical form so it's easy to transpile 2948 if this and this.is_number: 2949 this = exp.Literal.string(this.name) 2950 elif this and this.is_string: 2951 parts = this.name.split() 2952 2953 if len(parts) == 2: 2954 if unit: 2955 # this is not actually a unit, it's something else 2956 unit = None 2957 self._retreat(self._index - 1) 2958 else: 2959 this = exp.Literal.string(parts[0]) 2960 unit = self.expression(exp.Var, this=parts[1]) 2961 2962 return self.expression(exp.Interval, this=this, unit=unit) 2963 2964 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2965 this = self._parse_term() 2966 2967 while True: 2968 if self._match_set(self.BITWISE): 2969 this = self.expression( 2970 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2971 ) 2972 elif self._match_pair(TokenType.LT, TokenType.LT): 2973 this = self.expression( 2974 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2975 ) 2976 elif self._match_pair(TokenType.GT, TokenType.GT): 2977 this = self.expression( 2978 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2979 ) 2980 else: 2981 break 2982 2983 return this 2984 2985 def _parse_term(self) -> t.Optional[exp.Expression]: 2986 return self._parse_tokens(self._parse_factor, self.TERM) 2987 2988 def _parse_factor(self) -> t.Optional[exp.Expression]: 2989 return self._parse_tokens(self._parse_unary, self.FACTOR) 2990 2991 def _parse_unary(self) -> t.Optional[exp.Expression]: 2992 if self._match_set(self.UNARY_PARSERS): 2993 return self.UNARY_PARSERS[self._prev.token_type](self) 2994 return self._parse_at_time_zone(self._parse_type()) 2995 2996 def _parse_type(self) -> t.Optional[exp.Expression]: 2997 interval = self._parse_interval() 2998 if interval: 2999 return interval 3000 3001 index = self._index 3002 data_type = self._parse_types(check_func=True) 3003 this = self._parse_column() 3004 3005 if data_type: 3006 if isinstance(this, exp.Literal): 3007 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3008 if parser: 3009 return parser(self, this, data_type) 3010 return self.expression(exp.Cast, this=this, to=data_type) 3011 if not data_type.expressions: 3012 self._retreat(index) 3013 return self._parse_column() 3014 return self._parse_column_ops(data_type) 3015 3016 return this 3017 3018 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3019 this = self._parse_type() 3020 if not this: 3021 return None 3022 3023 return self.expression( 3024 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3025 ) 3026 3027 def _parse_types( 3028 self, check_func: bool = False, schema: bool = False 3029 ) -> t.Optional[exp.Expression]: 3030 index = self._index 3031 3032 prefix = self._match_text_seq("SYSUDTLIB", ".") 3033 3034 if not self._match_set(self.TYPE_TOKENS): 3035 return None 3036 3037 type_token = self._prev.token_type 3038 3039 if type_token == TokenType.PSEUDO_TYPE: 3040 return self.expression(exp.PseudoType, this=self._prev.text) 3041 3042 nested = type_token in self.NESTED_TYPE_TOKENS 3043 is_struct = type_token == TokenType.STRUCT 3044 expressions = None 3045 maybe_func = False 3046 3047 if self._match(TokenType.L_PAREN): 3048 if is_struct: 3049 expressions = self._parse_csv(self._parse_struct_types) 3050 elif nested: 3051 expressions = self._parse_csv( 3052 lambda: self._parse_types(check_func=check_func, schema=schema) 3053 ) 3054 elif type_token in self.ENUM_TYPE_TOKENS: 3055 expressions = self._parse_csv(self._parse_primary) 3056 else: 3057 expressions = self._parse_csv(self._parse_type_size) 3058 3059 if not expressions or not self._match(TokenType.R_PAREN): 3060 self._retreat(index) 3061 return None 3062 3063 maybe_func = True 3064 3065 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3066 this = exp.DataType( 3067 this=exp.DataType.Type.ARRAY, 3068 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3069 nested=True, 3070 ) 3071 3072 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3073 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3074 3075 return this 3076 3077 if self._match(TokenType.L_BRACKET): 3078 self._retreat(index) 3079 return None 3080 3081 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3082 if nested and self._match(TokenType.LT): 3083 if is_struct: 3084 expressions = self._parse_csv(self._parse_struct_types) 3085 else: 3086 expressions = self._parse_csv( 3087 lambda: self._parse_types(check_func=check_func, schema=schema) 3088 ) 3089 3090 if not self._match(TokenType.GT): 3091 self.raise_error("Expecting >") 3092 3093 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3094 values = self._parse_csv(self._parse_conjunction) 3095 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3096 3097 value: t.Optional[exp.Expression] = None 3098 if type_token in self.TIMESTAMPS: 3099 if self._match_text_seq("WITH", "TIME", "ZONE"): 3100 maybe_func = False 3101 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3102 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3103 maybe_func = False 3104 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3105 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3106 maybe_func = False 3107 elif type_token == TokenType.INTERVAL: 3108 unit = self._parse_var() 3109 3110 if not unit: 3111 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3112 else: 3113 value = self.expression(exp.Interval, unit=unit) 3114 3115 if maybe_func and check_func: 3116 index2 = self._index 3117 peek = self._parse_string() 3118 3119 if not peek: 3120 self._retreat(index) 3121 return None 3122 3123 self._retreat(index2) 3124 3125 if value: 3126 return value 3127 3128 return exp.DataType( 3129 this=exp.DataType.Type[type_token.value.upper()], 3130 expressions=expressions, 3131 nested=nested, 3132 values=values, 3133 prefix=prefix, 3134 ) 3135 3136 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3137 this = self._parse_type() or self._parse_id_var() 3138 self._match(TokenType.COLON) 3139 return self._parse_column_def(this) 3140 3141 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3142 if not self._match_text_seq("AT", "TIME", "ZONE"): 3143 return this 3144 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3145 3146 def _parse_column(self) -> t.Optional[exp.Expression]: 3147 this = self._parse_field() 3148 if isinstance(this, exp.Identifier): 3149 this = self.expression(exp.Column, this=this) 3150 elif not this: 3151 return self._parse_bracket(this) 3152 return self._parse_column_ops(this) 3153 3154 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3155 this = self._parse_bracket(this) 3156 3157 while self._match_set(self.COLUMN_OPERATORS): 3158 op_token = self._prev.token_type 3159 op = self.COLUMN_OPERATORS.get(op_token) 3160 3161 if op_token == TokenType.DCOLON: 3162 field = self._parse_types() 3163 if not field: 3164 self.raise_error("Expected type") 3165 elif op and self._curr: 3166 self._advance() 3167 value = self._prev.text 3168 field = ( 3169 exp.Literal.number(value) 3170 if self._prev.token_type == TokenType.NUMBER 3171 else exp.Literal.string(value) 3172 ) 3173 else: 3174 field = self._parse_field(anonymous_func=True, any_token=True) 3175 3176 if isinstance(field, exp.Func): 3177 # bigquery allows function calls like x.y.count(...) 3178 # SAFE.SUBSTR(...) 3179 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3180 this = self._replace_columns_with_dots(this) 3181 3182 if op: 3183 this = op(self, this, field) 3184 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3185 this = self.expression( 3186 exp.Column, 3187 this=field, 3188 table=this.this, 3189 db=this.args.get("table"), 3190 catalog=this.args.get("db"), 3191 ) 3192 else: 3193 this = self.expression(exp.Dot, this=this, expression=field) 3194 this = self._parse_bracket(this) 3195 return this 3196 3197 def _parse_primary(self) -> t.Optional[exp.Expression]: 3198 if self._match_set(self.PRIMARY_PARSERS): 3199 token_type = self._prev.token_type 3200 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3201 3202 if token_type == TokenType.STRING: 3203 expressions = [primary] 3204 while self._match(TokenType.STRING): 3205 expressions.append(exp.Literal.string(self._prev.text)) 3206 3207 if len(expressions) > 1: 3208 return self.expression(exp.Concat, expressions=expressions) 3209 3210 return primary 3211 3212 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3213 return exp.Literal.number(f"0.{self._prev.text}") 3214 3215 if self._match(TokenType.L_PAREN): 3216 comments = self._prev_comments 3217 query = self._parse_select() 3218 3219 if query: 3220 expressions = [query] 3221 else: 3222 expressions = self._parse_expressions() 3223 3224 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3225 3226 if isinstance(this, exp.Subqueryable): 3227 this = self._parse_set_operations( 3228 self._parse_subquery(this=this, parse_alias=False) 3229 ) 3230 elif len(expressions) > 1: 3231 this = self.expression(exp.Tuple, expressions=expressions) 3232 else: 3233 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3234 3235 if this: 3236 this.add_comments(comments) 3237 3238 self._match_r_paren(expression=this) 3239 return this 3240 3241 return None 3242 3243 def _parse_field( 3244 self, 3245 any_token: bool = False, 3246 tokens: t.Optional[t.Collection[TokenType]] = None, 3247 anonymous_func: bool = False, 3248 ) -> t.Optional[exp.Expression]: 3249 return ( 3250 self._parse_primary() 3251 or self._parse_function(anonymous=anonymous_func) 3252 or self._parse_id_var(any_token=any_token, tokens=tokens) 3253 ) 3254 3255 def _parse_function( 3256 self, 3257 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3258 anonymous: bool = False, 3259 optional_parens: bool = True, 3260 ) -> t.Optional[exp.Expression]: 3261 if not self._curr: 3262 return None 3263 3264 token_type = self._curr.token_type 3265 3266 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3267 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3268 3269 if not self._next or self._next.token_type != TokenType.L_PAREN: 3270 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3271 self._advance() 3272 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3273 3274 return None 3275 3276 if token_type not in self.FUNC_TOKENS: 3277 return None 3278 3279 this = self._curr.text 3280 upper = this.upper() 3281 self._advance(2) 3282 3283 parser = self.FUNCTION_PARSERS.get(upper) 3284 3285 if parser and not anonymous: 3286 this = parser(self) 3287 else: 3288 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3289 3290 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3291 this = self.expression(subquery_predicate, this=self._parse_select()) 3292 self._match_r_paren() 3293 return this 3294 3295 if functions is None: 3296 functions = self.FUNCTIONS 3297 3298 function = functions.get(upper) 3299 3300 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3301 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3302 3303 if function and not anonymous: 3304 this = self.validate_expression(function(args), args) 3305 else: 3306 this = self.expression(exp.Anonymous, this=this, expressions=args) 3307 3308 self._match_r_paren(this) 3309 return self._parse_window(this) 3310 3311 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3312 return self._parse_column_def(self._parse_id_var()) 3313 3314 def _parse_user_defined_function( 3315 self, kind: t.Optional[TokenType] = None 3316 ) -> t.Optional[exp.Expression]: 3317 this = self._parse_id_var() 3318 3319 while self._match(TokenType.DOT): 3320 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3321 3322 if not self._match(TokenType.L_PAREN): 3323 return this 3324 3325 expressions = self._parse_csv(self._parse_function_parameter) 3326 self._match_r_paren() 3327 return self.expression( 3328 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3329 ) 3330 3331 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3332 literal = self._parse_primary() 3333 if literal: 3334 return self.expression(exp.Introducer, this=token.text, expression=literal) 3335 3336 return self.expression(exp.Identifier, this=token.text) 3337 3338 def _parse_session_parameter(self) -> exp.SessionParameter: 3339 kind = None 3340 this = self._parse_id_var() or self._parse_primary() 3341 3342 if this and self._match(TokenType.DOT): 3343 kind = this.name 3344 this = self._parse_var() or self._parse_primary() 3345 3346 return self.expression(exp.SessionParameter, this=this, kind=kind) 3347 3348 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3349 index = self._index 3350 3351 if self._match(TokenType.L_PAREN): 3352 expressions = self._parse_csv(self._parse_id_var) 3353 3354 if not self._match(TokenType.R_PAREN): 3355 self._retreat(index) 3356 else: 3357 expressions = [self._parse_id_var()] 3358 3359 if self._match_set(self.LAMBDAS): 3360 return self.LAMBDAS[self._prev.token_type](self, expressions) 3361 3362 self._retreat(index) 3363 3364 this: t.Optional[exp.Expression] 3365 3366 if self._match(TokenType.DISTINCT): 3367 this = self.expression( 3368 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3369 ) 3370 else: 3371 this = self._parse_select_or_expression(alias=alias) 3372 3373 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3374 3375 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3376 index = self._index 3377 3378 if not self.errors: 3379 try: 3380 if self._parse_select(nested=True): 3381 return this 3382 except ParseError: 3383 pass 3384 finally: 3385 self.errors.clear() 3386 self._retreat(index) 3387 3388 if not self._match(TokenType.L_PAREN): 3389 return this 3390 3391 args = self._parse_csv( 3392 lambda: self._parse_constraint() 3393 or self._parse_column_def(self._parse_field(any_token=True)) 3394 ) 3395 3396 self._match_r_paren() 3397 return self.expression(exp.Schema, this=this, expressions=args) 3398 3399 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3400 # column defs are not really columns, they're identifiers 3401 if isinstance(this, exp.Column): 3402 this = this.this 3403 3404 kind = self._parse_types(schema=True) 3405 3406 if self._match_text_seq("FOR", "ORDINALITY"): 3407 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3408 3409 constraints = [] 3410 while True: 3411 constraint = self._parse_column_constraint() 3412 if not constraint: 3413 break 3414 constraints.append(constraint) 3415 3416 if not kind and not constraints: 3417 return this 3418 3419 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3420 3421 def _parse_auto_increment( 3422 self, 3423 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3424 start = None 3425 increment = None 3426 3427 if self._match(TokenType.L_PAREN, advance=False): 3428 args = self._parse_wrapped_csv(self._parse_bitwise) 3429 start = seq_get(args, 0) 3430 increment = seq_get(args, 1) 3431 elif self._match_text_seq("START"): 3432 start = self._parse_bitwise() 3433 self._match_text_seq("INCREMENT") 3434 increment = self._parse_bitwise() 3435 3436 if start and increment: 3437 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3438 3439 return exp.AutoIncrementColumnConstraint() 3440 3441 def _parse_compress(self) -> exp.CompressColumnConstraint: 3442 if self._match(TokenType.L_PAREN, advance=False): 3443 return self.expression( 3444 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3445 ) 3446 3447 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3448 3449 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3450 if self._match_text_seq("BY", "DEFAULT"): 3451 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3452 this = self.expression( 3453 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3454 ) 3455 else: 3456 self._match_text_seq("ALWAYS") 3457 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3458 3459 self._match(TokenType.ALIAS) 3460 identity = self._match_text_seq("IDENTITY") 3461 3462 if self._match(TokenType.L_PAREN): 3463 if self._match_text_seq("START", "WITH"): 3464 this.set("start", self._parse_bitwise()) 3465 if self._match_text_seq("INCREMENT", "BY"): 3466 this.set("increment", self._parse_bitwise()) 3467 if self._match_text_seq("MINVALUE"): 3468 this.set("minvalue", self._parse_bitwise()) 3469 if self._match_text_seq("MAXVALUE"): 3470 this.set("maxvalue", self._parse_bitwise()) 3471 3472 if self._match_text_seq("CYCLE"): 3473 this.set("cycle", True) 3474 elif self._match_text_seq("NO", "CYCLE"): 3475 this.set("cycle", False) 3476 3477 if not identity: 3478 this.set("expression", self._parse_bitwise()) 3479 3480 self._match_r_paren() 3481 3482 return this 3483 3484 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3485 self._match_text_seq("LENGTH") 3486 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3487 3488 def _parse_not_constraint( 3489 self, 3490 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3491 if self._match_text_seq("NULL"): 3492 return self.expression(exp.NotNullColumnConstraint) 3493 if self._match_text_seq("CASESPECIFIC"): 3494 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3495 return None 3496 3497 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3498 if self._match(TokenType.CONSTRAINT): 3499 this = self._parse_id_var() 3500 else: 3501 this = None 3502 3503 if self._match_texts(self.CONSTRAINT_PARSERS): 3504 return self.expression( 3505 exp.ColumnConstraint, 3506 this=this, 3507 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3508 ) 3509 3510 return this 3511 3512 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3513 if not self._match(TokenType.CONSTRAINT): 3514 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3515 3516 this = self._parse_id_var() 3517 expressions = [] 3518 3519 while True: 3520 constraint = self._parse_unnamed_constraint() or self._parse_function() 3521 if not constraint: 3522 break 3523 expressions.append(constraint) 3524 3525 return self.expression(exp.Constraint, this=this, expressions=expressions) 3526 3527 def _parse_unnamed_constraint( 3528 self, constraints: t.Optional[t.Collection[str]] = None 3529 ) -> t.Optional[exp.Expression]: 3530 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3531 return None 3532 3533 constraint = self._prev.text.upper() 3534 if constraint not in self.CONSTRAINT_PARSERS: 3535 self.raise_error(f"No parser found for schema constraint {constraint}.") 3536 3537 return self.CONSTRAINT_PARSERS[constraint](self) 3538 3539 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3540 self._match_text_seq("KEY") 3541 return self.expression( 3542 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3543 ) 3544 3545 def _parse_key_constraint_options(self) -> t.List[str]: 3546 options = [] 3547 while True: 3548 if not self._curr: 3549 break 3550 3551 if self._match(TokenType.ON): 3552 action = None 3553 on = self._advance_any() and self._prev.text 3554 3555 if self._match_text_seq("NO", "ACTION"): 3556 action = "NO ACTION" 3557 elif self._match_text_seq("CASCADE"): 3558 action = "CASCADE" 3559 elif self._match_pair(TokenType.SET, TokenType.NULL): 3560 action = "SET NULL" 3561 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3562 action = "SET DEFAULT" 3563 else: 3564 self.raise_error("Invalid key constraint") 3565 3566 options.append(f"ON {on} {action}") 3567 elif self._match_text_seq("NOT", "ENFORCED"): 3568 options.append("NOT ENFORCED") 3569 elif self._match_text_seq("DEFERRABLE"): 3570 options.append("DEFERRABLE") 3571 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3572 options.append("INITIALLY DEFERRED") 3573 elif self._match_text_seq("NORELY"): 3574 options.append("NORELY") 3575 elif self._match_text_seq("MATCH", "FULL"): 3576 options.append("MATCH FULL") 3577 else: 3578 break 3579 3580 return options 3581 3582 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3583 if match and not self._match(TokenType.REFERENCES): 3584 return None 3585 3586 expressions = None 3587 this = self._parse_table(schema=True) 3588 options = self._parse_key_constraint_options() 3589 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3590 3591 def _parse_foreign_key(self) -> exp.ForeignKey: 3592 expressions = self._parse_wrapped_id_vars() 3593 reference = self._parse_references() 3594 options = {} 3595 3596 while self._match(TokenType.ON): 3597 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3598 self.raise_error("Expected DELETE or UPDATE") 3599 3600 kind = self._prev.text.lower() 3601 3602 if self._match_text_seq("NO", "ACTION"): 3603 action = "NO ACTION" 3604 elif self._match(TokenType.SET): 3605 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3606 action = "SET " + self._prev.text.upper() 3607 else: 3608 self._advance() 3609 action = self._prev.text.upper() 3610 3611 options[kind] = action 3612 3613 return self.expression( 3614 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3615 ) 3616 3617 def _parse_primary_key( 3618 self, wrapped_optional: bool = False, in_props: bool = False 3619 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3620 desc = ( 3621 self._match_set((TokenType.ASC, TokenType.DESC)) 3622 and self._prev.token_type == TokenType.DESC 3623 ) 3624 3625 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3626 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3627 3628 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3629 options = self._parse_key_constraint_options() 3630 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3631 3632 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3633 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3634 return this 3635 3636 bracket_kind = self._prev.token_type 3637 3638 if self._match(TokenType.COLON): 3639 expressions: t.List[t.Optional[exp.Expression]] = [ 3640 self.expression(exp.Slice, expression=self._parse_conjunction()) 3641 ] 3642 else: 3643 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3644 3645 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3646 if bracket_kind == TokenType.L_BRACE: 3647 this = self.expression(exp.Struct, expressions=expressions) 3648 elif not this or this.name.upper() == "ARRAY": 3649 this = self.expression(exp.Array, expressions=expressions) 3650 else: 3651 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3652 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3653 3654 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3655 self.raise_error("Expected ]") 3656 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3657 self.raise_error("Expected }") 3658 3659 self._add_comments(this) 3660 return self._parse_bracket(this) 3661 3662 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3663 if self._match(TokenType.COLON): 3664 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3665 return this 3666 3667 def _parse_case(self) -> t.Optional[exp.Expression]: 3668 ifs = [] 3669 default = None 3670 3671 expression = self._parse_conjunction() 3672 3673 while self._match(TokenType.WHEN): 3674 this = self._parse_conjunction() 3675 self._match(TokenType.THEN) 3676 then = self._parse_conjunction() 3677 ifs.append(self.expression(exp.If, this=this, true=then)) 3678 3679 if self._match(TokenType.ELSE): 3680 default = self._parse_conjunction() 3681 3682 if not self._match(TokenType.END): 3683 self.raise_error("Expected END after CASE", self._prev) 3684 3685 return self._parse_window( 3686 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3687 ) 3688 3689 def _parse_if(self) -> t.Optional[exp.Expression]: 3690 if self._match(TokenType.L_PAREN): 3691 args = self._parse_csv(self._parse_conjunction) 3692 this = self.validate_expression(exp.If.from_arg_list(args), args) 3693 self._match_r_paren() 3694 else: 3695 index = self._index - 1 3696 condition = self._parse_conjunction() 3697 3698 if not condition: 3699 self._retreat(index) 3700 return None 3701 3702 self._match(TokenType.THEN) 3703 true = self._parse_conjunction() 3704 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3705 self._match(TokenType.END) 3706 this = self.expression(exp.If, this=condition, true=true, false=false) 3707 3708 return self._parse_window(this) 3709 3710 def _parse_extract(self) -> exp.Extract: 3711 this = self._parse_function() or self._parse_var() or self._parse_type() 3712 3713 if self._match(TokenType.FROM): 3714 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3715 3716 if not self._match(TokenType.COMMA): 3717 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3718 3719 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3720 3721 def _parse_any_value(self) -> exp.AnyValue: 3722 this = self._parse_lambda() 3723 is_max = None 3724 having = None 3725 3726 if self._match(TokenType.HAVING): 3727 self._match_texts(("MAX", "MIN")) 3728 is_max = self._prev.text == "MAX" 3729 having = self._parse_column() 3730 3731 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3732 3733 def _parse_cast(self, strict: bool) -> exp.Expression: 3734 this = self._parse_conjunction() 3735 3736 if not self._match(TokenType.ALIAS): 3737 if self._match(TokenType.COMMA): 3738 return self.expression( 3739 exp.CastToStrType, this=this, expression=self._parse_string() 3740 ) 3741 else: 3742 self.raise_error("Expected AS after CAST") 3743 3744 fmt = None 3745 to = self._parse_types() 3746 3747 if not to: 3748 self.raise_error("Expected TYPE after CAST") 3749 elif to.this == exp.DataType.Type.CHAR: 3750 if self._match(TokenType.CHARACTER_SET): 3751 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3752 elif self._match(TokenType.FORMAT): 3753 fmt_string = self._parse_string() 3754 fmt = self._parse_at_time_zone(fmt_string) 3755 3756 if to.this in exp.DataType.TEMPORAL_TYPES: 3757 this = self.expression( 3758 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3759 this=this, 3760 format=exp.Literal.string( 3761 format_time( 3762 fmt_string.this if fmt_string else "", 3763 self.FORMAT_MAPPING or self.TIME_MAPPING, 3764 self.FORMAT_TRIE or self.TIME_TRIE, 3765 ) 3766 ), 3767 ) 3768 3769 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3770 this.set("zone", fmt.args["zone"]) 3771 3772 return this 3773 3774 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3775 3776 def _parse_concat(self) -> t.Optional[exp.Expression]: 3777 args = self._parse_csv(self._parse_conjunction) 3778 if self.CONCAT_NULL_OUTPUTS_STRING: 3779 args = [ 3780 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3781 for arg in args 3782 if arg 3783 ] 3784 3785 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3786 # we find such a call we replace it with its argument. 3787 if len(args) == 1: 3788 return args[0] 3789 3790 return self.expression( 3791 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3792 ) 3793 3794 def _parse_string_agg(self) -> exp.Expression: 3795 if self._match(TokenType.DISTINCT): 3796 args: t.List[t.Optional[exp.Expression]] = [ 3797 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3798 ] 3799 if self._match(TokenType.COMMA): 3800 args.extend(self._parse_csv(self._parse_conjunction)) 3801 else: 3802 args = self._parse_csv(self._parse_conjunction) 3803 3804 index = self._index 3805 if not self._match(TokenType.R_PAREN): 3806 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3807 return self.expression( 3808 exp.GroupConcat, 3809 this=seq_get(args, 0), 3810 separator=self._parse_order(this=seq_get(args, 1)), 3811 ) 3812 3813 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3814 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3815 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3816 if not self._match_text_seq("WITHIN", "GROUP"): 3817 self._retreat(index) 3818 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3819 3820 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3821 order = self._parse_order(this=seq_get(args, 0)) 3822 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3823 3824 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3825 this = self._parse_bitwise() 3826 3827 if self._match(TokenType.USING): 3828 to: t.Optional[exp.Expression] = self.expression( 3829 exp.CharacterSet, this=self._parse_var() 3830 ) 3831 elif self._match(TokenType.COMMA): 3832 to = self._parse_types() 3833 else: 3834 to = None 3835 3836 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3837 3838 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3839 """ 3840 There are generally two variants of the DECODE function: 3841 3842 - DECODE(bin, charset) 3843 - DECODE(expression, search, result [, search, result] ... [, default]) 3844 3845 The second variant will always be parsed into a CASE expression. Note that NULL 3846 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3847 instead of relying on pattern matching. 3848 """ 3849 args = self._parse_csv(self._parse_conjunction) 3850 3851 if len(args) < 3: 3852 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3853 3854 expression, *expressions = args 3855 if not expression: 3856 return None 3857 3858 ifs = [] 3859 for search, result in zip(expressions[::2], expressions[1::2]): 3860 if not search or not result: 3861 return None 3862 3863 if isinstance(search, exp.Literal): 3864 ifs.append( 3865 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3866 ) 3867 elif isinstance(search, exp.Null): 3868 ifs.append( 3869 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3870 ) 3871 else: 3872 cond = exp.or_( 3873 exp.EQ(this=expression.copy(), expression=search), 3874 exp.and_( 3875 exp.Is(this=expression.copy(), expression=exp.Null()), 3876 exp.Is(this=search.copy(), expression=exp.Null()), 3877 copy=False, 3878 ), 3879 copy=False, 3880 ) 3881 ifs.append(exp.If(this=cond, true=result)) 3882 3883 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3884 3885 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3886 self._match_text_seq("KEY") 3887 key = self._parse_field() 3888 self._match(TokenType.COLON) 3889 self._match_text_seq("VALUE") 3890 value = self._parse_field() 3891 3892 if not key and not value: 3893 return None 3894 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3895 3896 def _parse_json_object(self) -> exp.JSONObject: 3897 star = self._parse_star() 3898 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3899 3900 null_handling = None 3901 if self._match_text_seq("NULL", "ON", "NULL"): 3902 null_handling = "NULL ON NULL" 3903 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3904 null_handling = "ABSENT ON NULL" 3905 3906 unique_keys = None 3907 if self._match_text_seq("WITH", "UNIQUE"): 3908 unique_keys = True 3909 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3910 unique_keys = False 3911 3912 self._match_text_seq("KEYS") 3913 3914 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3915 format_json = self._match_text_seq("FORMAT", "JSON") 3916 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3917 3918 return self.expression( 3919 exp.JSONObject, 3920 expressions=expressions, 3921 null_handling=null_handling, 3922 unique_keys=unique_keys, 3923 return_type=return_type, 3924 format_json=format_json, 3925 encoding=encoding, 3926 ) 3927 3928 def _parse_logarithm(self) -> exp.Func: 3929 # Default argument order is base, expression 3930 args = self._parse_csv(self._parse_range) 3931 3932 if len(args) > 1: 3933 if not self.LOG_BASE_FIRST: 3934 args.reverse() 3935 return exp.Log.from_arg_list(args) 3936 3937 return self.expression( 3938 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3939 ) 3940 3941 def _parse_match_against(self) -> exp.MatchAgainst: 3942 expressions = self._parse_csv(self._parse_column) 3943 3944 self._match_text_seq(")", "AGAINST", "(") 3945 3946 this = self._parse_string() 3947 3948 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3949 modifier = "IN NATURAL LANGUAGE MODE" 3950 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3951 modifier = f"{modifier} WITH QUERY EXPANSION" 3952 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3953 modifier = "IN BOOLEAN MODE" 3954 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3955 modifier = "WITH QUERY EXPANSION" 3956 else: 3957 modifier = None 3958 3959 return self.expression( 3960 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3961 ) 3962 3963 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3964 def _parse_open_json(self) -> exp.OpenJSON: 3965 this = self._parse_bitwise() 3966 path = self._match(TokenType.COMMA) and self._parse_string() 3967 3968 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3969 this = self._parse_field(any_token=True) 3970 kind = self._parse_types() 3971 path = self._parse_string() 3972 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3973 3974 return self.expression( 3975 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3976 ) 3977 3978 expressions = None 3979 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3980 self._match_l_paren() 3981 expressions = self._parse_csv(_parse_open_json_column_def) 3982 3983 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3984 3985 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3986 args = self._parse_csv(self._parse_bitwise) 3987 3988 if self._match(TokenType.IN): 3989 return self.expression( 3990 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3991 ) 3992 3993 if haystack_first: 3994 haystack = seq_get(args, 0) 3995 needle = seq_get(args, 1) 3996 else: 3997 needle = seq_get(args, 0) 3998 haystack = seq_get(args, 1) 3999 4000 return self.expression( 4001 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4002 ) 4003 4004 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4005 args = self._parse_csv(self._parse_table) 4006 return exp.JoinHint(this=func_name.upper(), expressions=args) 4007 4008 def _parse_substring(self) -> exp.Substring: 4009 # Postgres supports the form: substring(string [from int] [for int]) 4010 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4011 4012 args = self._parse_csv(self._parse_bitwise) 4013 4014 if self._match(TokenType.FROM): 4015 args.append(self._parse_bitwise()) 4016 if self._match(TokenType.FOR): 4017 args.append(self._parse_bitwise()) 4018 4019 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4020 4021 def _parse_trim(self) -> exp.Trim: 4022 # https://www.w3resource.com/sql/character-functions/trim.php 4023 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4024 4025 position = None 4026 collation = None 4027 4028 if self._match_texts(self.TRIM_TYPES): 4029 position = self._prev.text.upper() 4030 4031 expression = self._parse_bitwise() 4032 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4033 this = self._parse_bitwise() 4034 else: 4035 this = expression 4036 expression = None 4037 4038 if self._match(TokenType.COLLATE): 4039 collation = self._parse_bitwise() 4040 4041 return self.expression( 4042 exp.Trim, this=this, position=position, expression=expression, collation=collation 4043 ) 4044 4045 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4046 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4047 4048 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4049 return self._parse_window(self._parse_id_var(), alias=True) 4050 4051 def _parse_respect_or_ignore_nulls( 4052 self, this: t.Optional[exp.Expression] 4053 ) -> t.Optional[exp.Expression]: 4054 if self._match_text_seq("IGNORE", "NULLS"): 4055 return self.expression(exp.IgnoreNulls, this=this) 4056 if self._match_text_seq("RESPECT", "NULLS"): 4057 return self.expression(exp.RespectNulls, this=this) 4058 return this 4059 4060 def _parse_window( 4061 self, this: t.Optional[exp.Expression], alias: bool = False 4062 ) -> t.Optional[exp.Expression]: 4063 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4064 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4065 self._match_r_paren() 4066 4067 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4068 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4069 if self._match_text_seq("WITHIN", "GROUP"): 4070 order = self._parse_wrapped(self._parse_order) 4071 this = self.expression(exp.WithinGroup, this=this, expression=order) 4072 4073 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4074 # Some dialects choose to implement and some do not. 4075 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4076 4077 # There is some code above in _parse_lambda that handles 4078 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4079 4080 # The below changes handle 4081 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4082 4083 # Oracle allows both formats 4084 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4085 # and Snowflake chose to do the same for familiarity 4086 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4087 this = self._parse_respect_or_ignore_nulls(this) 4088 4089 # bigquery select from window x AS (partition by ...) 4090 if alias: 4091 over = None 4092 self._match(TokenType.ALIAS) 4093 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4094 return this 4095 else: 4096 over = self._prev.text.upper() 4097 4098 if not self._match(TokenType.L_PAREN): 4099 return self.expression( 4100 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4101 ) 4102 4103 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4104 4105 first = self._match(TokenType.FIRST) 4106 if self._match_text_seq("LAST"): 4107 first = False 4108 4109 partition = self._parse_partition_by() 4110 order = self._parse_order() 4111 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4112 4113 if kind: 4114 self._match(TokenType.BETWEEN) 4115 start = self._parse_window_spec() 4116 self._match(TokenType.AND) 4117 end = self._parse_window_spec() 4118 4119 spec = self.expression( 4120 exp.WindowSpec, 4121 kind=kind, 4122 start=start["value"], 4123 start_side=start["side"], 4124 end=end["value"], 4125 end_side=end["side"], 4126 ) 4127 else: 4128 spec = None 4129 4130 self._match_r_paren() 4131 4132 return self.expression( 4133 exp.Window, 4134 this=this, 4135 partition_by=partition, 4136 order=order, 4137 spec=spec, 4138 alias=window_alias, 4139 over=over, 4140 first=first, 4141 ) 4142 4143 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4144 self._match(TokenType.BETWEEN) 4145 4146 return { 4147 "value": ( 4148 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4149 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4150 or self._parse_bitwise() 4151 ), 4152 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4153 } 4154 4155 def _parse_alias( 4156 self, this: t.Optional[exp.Expression], explicit: bool = False 4157 ) -> t.Optional[exp.Expression]: 4158 any_token = self._match(TokenType.ALIAS) 4159 4160 if explicit and not any_token: 4161 return this 4162 4163 if self._match(TokenType.L_PAREN): 4164 aliases = self.expression( 4165 exp.Aliases, 4166 this=this, 4167 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4168 ) 4169 self._match_r_paren(aliases) 4170 return aliases 4171 4172 alias = self._parse_id_var(any_token) 4173 4174 if alias: 4175 return self.expression(exp.Alias, this=this, alias=alias) 4176 4177 return this 4178 4179 def _parse_id_var( 4180 self, 4181 any_token: bool = True, 4182 tokens: t.Optional[t.Collection[TokenType]] = None, 4183 ) -> t.Optional[exp.Expression]: 4184 identifier = self._parse_identifier() 4185 4186 if identifier: 4187 return identifier 4188 4189 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4190 quoted = self._prev.token_type == TokenType.STRING 4191 return exp.Identifier(this=self._prev.text, quoted=quoted) 4192 4193 return None 4194 4195 def _parse_string(self) -> t.Optional[exp.Expression]: 4196 if self._match(TokenType.STRING): 4197 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4198 return self._parse_placeholder() 4199 4200 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4201 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4202 4203 def _parse_number(self) -> t.Optional[exp.Expression]: 4204 if self._match(TokenType.NUMBER): 4205 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4206 return self._parse_placeholder() 4207 4208 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4209 if self._match(TokenType.IDENTIFIER): 4210 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4211 return self._parse_placeholder() 4212 4213 def _parse_var( 4214 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4215 ) -> t.Optional[exp.Expression]: 4216 if ( 4217 (any_token and self._advance_any()) 4218 or self._match(TokenType.VAR) 4219 or (self._match_set(tokens) if tokens else False) 4220 ): 4221 return self.expression(exp.Var, this=self._prev.text) 4222 return self._parse_placeholder() 4223 4224 def _advance_any(self) -> t.Optional[Token]: 4225 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4226 self._advance() 4227 return self._prev 4228 return None 4229 4230 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4231 return self._parse_var() or self._parse_string() 4232 4233 def _parse_null(self) -> t.Optional[exp.Expression]: 4234 if self._match(TokenType.NULL): 4235 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4236 return None 4237 4238 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4239 if self._match(TokenType.TRUE): 4240 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4241 if self._match(TokenType.FALSE): 4242 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4243 return None 4244 4245 def _parse_star(self) -> t.Optional[exp.Expression]: 4246 if self._match(TokenType.STAR): 4247 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4248 return None 4249 4250 def _parse_parameter(self) -> exp.Parameter: 4251 wrapped = self._match(TokenType.L_BRACE) 4252 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4253 self._match(TokenType.R_BRACE) 4254 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4255 4256 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4257 if self._match_set(self.PLACEHOLDER_PARSERS): 4258 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4259 if placeholder: 4260 return placeholder 4261 self._advance(-1) 4262 return None 4263 4264 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4265 if not self._match(TokenType.EXCEPT): 4266 return None 4267 if self._match(TokenType.L_PAREN, advance=False): 4268 return self._parse_wrapped_csv(self._parse_column) 4269 return self._parse_csv(self._parse_column) 4270 4271 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4272 if not self._match(TokenType.REPLACE): 4273 return None 4274 if self._match(TokenType.L_PAREN, advance=False): 4275 return self._parse_wrapped_csv(self._parse_expression) 4276 return self._parse_expressions() 4277 4278 def _parse_csv( 4279 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4280 ) -> t.List[t.Optional[exp.Expression]]: 4281 parse_result = parse_method() 4282 items = [parse_result] if parse_result is not None else [] 4283 4284 while self._match(sep): 4285 self._add_comments(parse_result) 4286 parse_result = parse_method() 4287 if parse_result is not None: 4288 items.append(parse_result) 4289 4290 return items 4291 4292 def _parse_tokens( 4293 self, parse_method: t.Callable, expressions: t.Dict 4294 ) -> t.Optional[exp.Expression]: 4295 this = parse_method() 4296 4297 while self._match_set(expressions): 4298 this = self.expression( 4299 expressions[self._prev.token_type], 4300 this=this, 4301 comments=self._prev_comments, 4302 expression=parse_method(), 4303 ) 4304 4305 return this 4306 4307 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4308 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4309 4310 def _parse_wrapped_csv( 4311 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4312 ) -> t.List[t.Optional[exp.Expression]]: 4313 return self._parse_wrapped( 4314 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4315 ) 4316 4317 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4318 wrapped = self._match(TokenType.L_PAREN) 4319 if not wrapped and not optional: 4320 self.raise_error("Expecting (") 4321 parse_result = parse_method() 4322 if wrapped: 4323 self._match_r_paren() 4324 return parse_result 4325 4326 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4327 return self._parse_csv(self._parse_expression) 4328 4329 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4330 return self._parse_select() or self._parse_set_operations( 4331 self._parse_expression() if alias else self._parse_conjunction() 4332 ) 4333 4334 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4335 return self._parse_query_modifiers( 4336 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4337 ) 4338 4339 def _parse_transaction(self) -> exp.Transaction: 4340 this = None 4341 if self._match_texts(self.TRANSACTION_KIND): 4342 this = self._prev.text 4343 4344 self._match_texts({"TRANSACTION", "WORK"}) 4345 4346 modes = [] 4347 while True: 4348 mode = [] 4349 while self._match(TokenType.VAR): 4350 mode.append(self._prev.text) 4351 4352 if mode: 4353 modes.append(" ".join(mode)) 4354 if not self._match(TokenType.COMMA): 4355 break 4356 4357 return self.expression(exp.Transaction, this=this, modes=modes) 4358 4359 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4360 chain = None 4361 savepoint = None 4362 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4363 4364 self._match_texts({"TRANSACTION", "WORK"}) 4365 4366 if self._match_text_seq("TO"): 4367 self._match_text_seq("SAVEPOINT") 4368 savepoint = self._parse_id_var() 4369 4370 if self._match(TokenType.AND): 4371 chain = not self._match_text_seq("NO") 4372 self._match_text_seq("CHAIN") 4373 4374 if is_rollback: 4375 return self.expression(exp.Rollback, savepoint=savepoint) 4376 4377 return self.expression(exp.Commit, chain=chain) 4378 4379 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4380 if not self._match_text_seq("ADD"): 4381 return None 4382 4383 self._match(TokenType.COLUMN) 4384 exists_column = self._parse_exists(not_=True) 4385 expression = self._parse_column_def(self._parse_field(any_token=True)) 4386 4387 if expression: 4388 expression.set("exists", exists_column) 4389 4390 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4391 if self._match_texts(("FIRST", "AFTER")): 4392 position = self._prev.text 4393 column_position = self.expression( 4394 exp.ColumnPosition, this=self._parse_column(), position=position 4395 ) 4396 expression.set("position", column_position) 4397 4398 return expression 4399 4400 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4401 drop = self._match(TokenType.DROP) and self._parse_drop() 4402 if drop and not isinstance(drop, exp.Command): 4403 drop.set("kind", drop.args.get("kind", "COLUMN")) 4404 return drop 4405 4406 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4407 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4408 return self.expression( 4409 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4410 ) 4411 4412 def _parse_add_constraint(self) -> exp.AddConstraint: 4413 this = None 4414 kind = self._prev.token_type 4415 4416 if kind == TokenType.CONSTRAINT: 4417 this = self._parse_id_var() 4418 4419 if self._match_text_seq("CHECK"): 4420 expression = self._parse_wrapped(self._parse_conjunction) 4421 enforced = self._match_text_seq("ENFORCED") 4422 4423 return self.expression( 4424 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4425 ) 4426 4427 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4428 expression = self._parse_foreign_key() 4429 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4430 expression = self._parse_primary_key() 4431 else: 4432 expression = None 4433 4434 return self.expression(exp.AddConstraint, this=this, expression=expression) 4435 4436 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4437 index = self._index - 1 4438 4439 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4440 return self._parse_csv(self._parse_add_constraint) 4441 4442 self._retreat(index) 4443 return self._parse_csv(self._parse_add_column) 4444 4445 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4446 self._match(TokenType.COLUMN) 4447 column = self._parse_field(any_token=True) 4448 4449 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4450 return self.expression(exp.AlterColumn, this=column, drop=True) 4451 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4452 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4453 4454 self._match_text_seq("SET", "DATA") 4455 return self.expression( 4456 exp.AlterColumn, 4457 this=column, 4458 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4459 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4460 using=self._match(TokenType.USING) and self._parse_conjunction(), 4461 ) 4462 4463 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4464 index = self._index - 1 4465 4466 partition_exists = self._parse_exists() 4467 if self._match(TokenType.PARTITION, advance=False): 4468 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4469 4470 self._retreat(index) 4471 return self._parse_csv(self._parse_drop_column) 4472 4473 def _parse_alter_table_rename(self) -> exp.RenameTable: 4474 self._match_text_seq("TO") 4475 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4476 4477 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4478 start = self._prev 4479 4480 if not self._match(TokenType.TABLE): 4481 return self._parse_as_command(start) 4482 4483 exists = self._parse_exists() 4484 this = self._parse_table(schema=True) 4485 4486 if self._next: 4487 self._advance() 4488 4489 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4490 if parser: 4491 actions = ensure_list(parser(self)) 4492 4493 if not self._curr: 4494 return self.expression( 4495 exp.AlterTable, 4496 this=this, 4497 exists=exists, 4498 actions=actions, 4499 ) 4500 return self._parse_as_command(start) 4501 4502 def _parse_merge(self) -> exp.Merge: 4503 self._match(TokenType.INTO) 4504 target = self._parse_table() 4505 4506 self._match(TokenType.USING) 4507 using = self._parse_table() 4508 4509 self._match(TokenType.ON) 4510 on = self._parse_conjunction() 4511 4512 whens = [] 4513 while self._match(TokenType.WHEN): 4514 matched = not self._match(TokenType.NOT) 4515 self._match_text_seq("MATCHED") 4516 source = ( 4517 False 4518 if self._match_text_seq("BY", "TARGET") 4519 else self._match_text_seq("BY", "SOURCE") 4520 ) 4521 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4522 4523 self._match(TokenType.THEN) 4524 4525 if self._match(TokenType.INSERT): 4526 _this = self._parse_star() 4527 if _this: 4528 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4529 else: 4530 then = self.expression( 4531 exp.Insert, 4532 this=self._parse_value(), 4533 expression=self._match(TokenType.VALUES) and self._parse_value(), 4534 ) 4535 elif self._match(TokenType.UPDATE): 4536 expressions = self._parse_star() 4537 if expressions: 4538 then = self.expression(exp.Update, expressions=expressions) 4539 else: 4540 then = self.expression( 4541 exp.Update, 4542 expressions=self._match(TokenType.SET) 4543 and self._parse_csv(self._parse_equality), 4544 ) 4545 elif self._match(TokenType.DELETE): 4546 then = self.expression(exp.Var, this=self._prev.text) 4547 else: 4548 then = None 4549 4550 whens.append( 4551 self.expression( 4552 exp.When, 4553 matched=matched, 4554 source=source, 4555 condition=condition, 4556 then=then, 4557 ) 4558 ) 4559 4560 return self.expression( 4561 exp.Merge, 4562 this=target, 4563 using=using, 4564 on=on, 4565 expressions=whens, 4566 ) 4567 4568 def _parse_show(self) -> t.Optional[exp.Expression]: 4569 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4570 if parser: 4571 return parser(self) 4572 self._advance() 4573 return self.expression(exp.Show, this=self._prev.text.upper()) 4574 4575 def _parse_set_item_assignment( 4576 self, kind: t.Optional[str] = None 4577 ) -> t.Optional[exp.Expression]: 4578 index = self._index 4579 4580 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4581 return self._parse_set_transaction(global_=kind == "GLOBAL") 4582 4583 left = self._parse_primary() or self._parse_id_var() 4584 4585 if not self._match_texts(("=", "TO")): 4586 self._retreat(index) 4587 return None 4588 4589 right = self._parse_statement() or self._parse_id_var() 4590 this = self.expression(exp.EQ, this=left, expression=right) 4591 4592 return self.expression(exp.SetItem, this=this, kind=kind) 4593 4594 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4595 self._match_text_seq("TRANSACTION") 4596 characteristics = self._parse_csv( 4597 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4598 ) 4599 return self.expression( 4600 exp.SetItem, 4601 expressions=characteristics, 4602 kind="TRANSACTION", 4603 **{"global": global_}, # type: ignore 4604 ) 4605 4606 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4607 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4608 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4609 4610 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4611 index = self._index 4612 set_ = self.expression( 4613 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4614 ) 4615 4616 if self._curr: 4617 self._retreat(index) 4618 return self._parse_as_command(self._prev) 4619 4620 return set_ 4621 4622 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4623 for option in options: 4624 if self._match_text_seq(*option.split(" ")): 4625 return exp.var(option) 4626 return None 4627 4628 def _parse_as_command(self, start: Token) -> exp.Command: 4629 while self._curr: 4630 self._advance() 4631 text = self._find_sql(start, self._prev) 4632 size = len(start.text) 4633 return exp.Command(this=text[:size], expression=text[size:]) 4634 4635 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4636 settings = [] 4637 4638 self._match_l_paren() 4639 kind = self._parse_id_var() 4640 4641 if self._match(TokenType.L_PAREN): 4642 while True: 4643 key = self._parse_id_var() 4644 value = self._parse_primary() 4645 4646 if not key and value is None: 4647 break 4648 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4649 self._match(TokenType.R_PAREN) 4650 4651 self._match_r_paren() 4652 4653 return self.expression( 4654 exp.DictProperty, 4655 this=this, 4656 kind=kind.this if kind else None, 4657 settings=settings, 4658 ) 4659 4660 def _parse_dict_range(self, this: str) -> exp.DictRange: 4661 self._match_l_paren() 4662 has_min = self._match_text_seq("MIN") 4663 if has_min: 4664 min = self._parse_var() or self._parse_primary() 4665 self._match_text_seq("MAX") 4666 max = self._parse_var() or self._parse_primary() 4667 else: 4668 max = self._parse_var() or self._parse_primary() 4669 min = exp.Literal.number(0) 4670 self._match_r_paren() 4671 return self.expression(exp.DictRange, this=this, min=min, max=max) 4672 4673 def _find_parser( 4674 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4675 ) -> t.Optional[t.Callable]: 4676 if not self._curr: 4677 return None 4678 4679 index = self._index 4680 this = [] 4681 while True: 4682 # The current token might be multiple words 4683 curr = self._curr.text.upper() 4684 key = curr.split(" ") 4685 this.append(curr) 4686 4687 self._advance() 4688 result, trie = in_trie(trie, key) 4689 if result == TrieResult.FAILED: 4690 break 4691 4692 if result == TrieResult.EXISTS: 4693 subparser = parsers[" ".join(this)] 4694 return subparser 4695 4696 self._retreat(index) 4697 return None 4698 4699 def _match(self, token_type, advance=True, expression=None): 4700 if not self._curr: 4701 return None 4702 4703 if self._curr.token_type == token_type: 4704 if advance: 4705 self._advance() 4706 self._add_comments(expression) 4707 return True 4708 4709 return None 4710 4711 def _match_set(self, types, advance=True): 4712 if not self._curr: 4713 return None 4714 4715 if self._curr.token_type in types: 4716 if advance: 4717 self._advance() 4718 return True 4719 4720 return None 4721 4722 def _match_pair(self, token_type_a, token_type_b, advance=True): 4723 if not self._curr or not self._next: 4724 return None 4725 4726 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4727 if advance: 4728 self._advance(2) 4729 return True 4730 4731 return None 4732 4733 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4734 if not self._match(TokenType.L_PAREN, expression=expression): 4735 self.raise_error("Expecting (") 4736 4737 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4738 if not self._match(TokenType.R_PAREN, expression=expression): 4739 self.raise_error("Expecting )") 4740 4741 def _match_texts(self, texts, advance=True): 4742 if self._curr and self._curr.text.upper() in texts: 4743 if advance: 4744 self._advance() 4745 return True 4746 return False 4747 4748 def _match_text_seq(self, *texts, advance=True): 4749 index = self._index 4750 for text in texts: 4751 if self._curr and self._curr.text.upper() == text: 4752 self._advance() 4753 else: 4754 self._retreat(index) 4755 return False 4756 4757 if not advance: 4758 self._retreat(index) 4759 4760 return True 4761 4762 @t.overload 4763 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4764 ... 4765 4766 @t.overload 4767 def _replace_columns_with_dots( 4768 self, this: t.Optional[exp.Expression] 4769 ) -> t.Optional[exp.Expression]: 4770 ... 4771 4772 def _replace_columns_with_dots(self, this): 4773 if isinstance(this, exp.Dot): 4774 exp.replace_children(this, self._replace_columns_with_dots) 4775 elif isinstance(this, exp.Column): 4776 exp.replace_children(this, self._replace_columns_with_dots) 4777 table = this.args.get("table") 4778 this = ( 4779 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4780 ) 4781 4782 return this 4783 4784 def _replace_lambda( 4785 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4786 ) -> t.Optional[exp.Expression]: 4787 if not node: 4788 return node 4789 4790 for column in node.find_all(exp.Column): 4791 if column.parts[0].name in lambda_variables: 4792 dot_or_id = column.to_dot() if column.table else column.this 4793 parent = column.parent 4794 4795 while isinstance(parent, exp.Dot): 4796 if not isinstance(parent.parent, exp.Dot): 4797 parent.replace(dot_or_id) 4798 break 4799 parent = parent.parent 4800 else: 4801 if column is node: 4802 node = dot_or_id 4803 else: 4804 column.replace(dot_or_id) 4805 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.