sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.OBJECT_IDENTIFIER, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.INSERT: lambda self: self._parse_insert(), 533 TokenType.LOAD: lambda self: self._parse_load(), 534 TokenType.MERGE: lambda self: self._parse_merge(), 535 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 536 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 537 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 538 TokenType.SET: lambda self: self._parse_set(), 539 TokenType.UNCACHE: lambda self: self._parse_uncache(), 540 TokenType.UPDATE: lambda self: self._parse_update(), 541 TokenType.USE: lambda self: self.expression( 542 exp.Use, 543 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 544 and exp.var(self._prev.text), 545 this=self._parse_table(schema=False), 546 ), 547 } 548 549 UNARY_PARSERS = { 550 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 551 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 552 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 553 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 554 } 555 556 PRIMARY_PARSERS = { 557 TokenType.STRING: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=True 559 ), 560 TokenType.NUMBER: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=False 562 ), 563 TokenType.STAR: lambda self, _: self.expression( 564 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 565 ), 566 TokenType.NULL: lambda self, _: self.expression(exp.Null), 567 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 568 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 569 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 570 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 571 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 572 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 573 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 574 exp.National, this=token.text 575 ), 576 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 577 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 578 } 579 580 PLACEHOLDER_PARSERS = { 581 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 582 TokenType.PARAMETER: lambda self: self._parse_parameter(), 583 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 584 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 585 else None, 586 } 587 588 RANGE_PARSERS = { 589 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 590 TokenType.GLOB: binary_range_parser(exp.Glob), 591 TokenType.ILIKE: binary_range_parser(exp.ILike), 592 TokenType.IN: lambda self, this: self._parse_in(this), 593 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 594 TokenType.IS: lambda self, this: self._parse_is(this), 595 TokenType.LIKE: binary_range_parser(exp.Like), 596 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 597 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 598 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 599 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 600 } 601 602 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 603 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 604 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 605 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 606 "CHARACTER SET": lambda self: self._parse_character_set(), 607 "CHECKSUM": lambda self: self._parse_checksum(), 608 "CLUSTER BY": lambda self: self._parse_cluster(), 609 "CLUSTERED": lambda self: self._parse_clustered_by(), 610 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 611 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 612 "COPY": lambda self: self._parse_copy_property(), 613 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 614 "DEFINER": lambda self: self._parse_definer(), 615 "DETERMINISTIC": lambda self: self.expression( 616 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 617 ), 618 "DISTKEY": lambda self: self._parse_distkey(), 619 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 620 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 621 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 622 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 623 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 624 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 625 "FREESPACE": lambda self: self._parse_freespace(), 626 "HEAP": lambda self: self.expression(exp.HeapProperty), 627 "IMMUTABLE": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 631 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 632 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 633 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 634 "LIKE": lambda self: self._parse_create_like(), 635 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 636 "LOCK": lambda self: self._parse_locking(), 637 "LOCKING": lambda self: self._parse_locking(), 638 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 639 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 640 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 641 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 642 "NO": lambda self: self._parse_no_property(), 643 "ON": lambda self: self._parse_on_property(), 644 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 645 "PARTITION BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 648 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 649 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 650 "RETURNS": lambda self: self._parse_returns(), 651 "ROW": lambda self: self._parse_row(), 652 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 653 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 654 "SETTINGS": lambda self: self.expression( 655 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 656 ), 657 "SORTKEY": lambda self: self._parse_sortkey(), 658 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 659 "STABLE": lambda self: self.expression( 660 exp.StabilityProperty, this=exp.Literal.string("STABLE") 661 ), 662 "STORED": lambda self: self._parse_stored(), 663 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 664 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 665 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 666 "TO": lambda self: self._parse_to_table(), 667 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 668 "TTL": lambda self: self._parse_ttl(), 669 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 670 "VOLATILE": lambda self: self._parse_volatile_property(), 671 "WITH": lambda self: self._parse_with_property(), 672 } 673 674 CONSTRAINT_PARSERS = { 675 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 676 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 677 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 678 "CHARACTER SET": lambda self: self.expression( 679 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "CHECK": lambda self: self.expression( 682 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 683 ), 684 "COLLATE": lambda self: self.expression( 685 exp.CollateColumnConstraint, this=self._parse_var() 686 ), 687 "COMMENT": lambda self: self.expression( 688 exp.CommentColumnConstraint, this=self._parse_string() 689 ), 690 "COMPRESS": lambda self: self._parse_compress(), 691 "CLUSTERED": lambda self: self.expression( 692 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 693 ), 694 "NONCLUSTERED": lambda self: self.expression( 695 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 696 ), 697 "DEFAULT": lambda self: self.expression( 698 exp.DefaultColumnConstraint, this=self._parse_bitwise() 699 ), 700 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 701 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 702 "FORMAT": lambda self: self.expression( 703 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 704 ), 705 "GENERATED": lambda self: self._parse_generated_as_identity(), 706 "IDENTITY": lambda self: self._parse_auto_increment(), 707 "INLINE": lambda self: self._parse_inline(), 708 "LIKE": lambda self: self._parse_create_like(), 709 "NOT": lambda self: self._parse_not_constraint(), 710 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 711 "ON": lambda self: ( 712 self._match(TokenType.UPDATE) 713 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 714 ) 715 or self.expression(exp.OnProperty, this=self._parse_id_var()), 716 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 717 "PRIMARY KEY": lambda self: self._parse_primary_key(), 718 "REFERENCES": lambda self: self._parse_references(match=False), 719 "TITLE": lambda self: self.expression( 720 exp.TitleColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 723 "UNIQUE": lambda self: self._parse_unique(), 724 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 725 "WITH": lambda self: self.expression( 726 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 727 ), 728 } 729 730 ALTER_PARSERS = { 731 "ADD": lambda self: self._parse_alter_table_add(), 732 "ALTER": lambda self: self._parse_alter_table_alter(), 733 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 734 "DROP": lambda self: self._parse_alter_table_drop(), 735 "RENAME": lambda self: self._parse_alter_table_rename(), 736 } 737 738 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 739 740 NO_PAREN_FUNCTION_PARSERS = { 741 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 742 "CASE": lambda self: self._parse_case(), 743 "IF": lambda self: self._parse_if(), 744 "NEXT": lambda self: self._parse_next_value_for(), 745 } 746 747 INVALID_FUNC_NAME_TOKENS = { 748 TokenType.IDENTIFIER, 749 TokenType.STRING, 750 } 751 752 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 753 754 FUNCTION_PARSERS = { 755 "ANY_VALUE": lambda self: self._parse_any_value(), 756 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 757 "CONCAT": lambda self: self._parse_concat(), 758 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 759 "DECODE": lambda self: self._parse_decode(), 760 "EXTRACT": lambda self: self._parse_extract(), 761 "JSON_OBJECT": lambda self: self._parse_json_object(), 762 "LOG": lambda self: self._parse_logarithm(), 763 "MATCH": lambda self: self._parse_match_against(), 764 "OPENJSON": lambda self: self._parse_open_json(), 765 "POSITION": lambda self: self._parse_position(), 766 "SAFE_CAST": lambda self: self._parse_cast(False), 767 "STRING_AGG": lambda self: self._parse_string_agg(), 768 "SUBSTRING": lambda self: self._parse_substring(), 769 "TRIM": lambda self: self._parse_trim(), 770 "TRY_CAST": lambda self: self._parse_cast(False), 771 "TRY_CONVERT": lambda self: self._parse_convert(False), 772 } 773 774 QUERY_MODIFIER_PARSERS = { 775 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 776 TokenType.WHERE: lambda self: ("where", self._parse_where()), 777 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 778 TokenType.HAVING: lambda self: ("having", self._parse_having()), 779 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 780 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 781 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 782 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 783 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 784 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 785 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 786 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 787 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 789 TokenType.CLUSTER_BY: lambda self: ( 790 "cluster", 791 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 792 ), 793 TokenType.DISTRIBUTE_BY: lambda self: ( 794 "distribute", 795 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 796 ), 797 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 798 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 799 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 800 } 801 802 SET_PARSERS = { 803 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 804 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 805 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 806 "TRANSACTION": lambda self: self._parse_set_transaction(), 807 } 808 809 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 810 811 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 812 813 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 814 815 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 816 817 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 818 819 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 820 TRANSACTION_CHARACTERISTICS = { 821 "ISOLATION LEVEL REPEATABLE READ", 822 "ISOLATION LEVEL READ COMMITTED", 823 "ISOLATION LEVEL READ UNCOMMITTED", 824 "ISOLATION LEVEL SERIALIZABLE", 825 "READ WRITE", 826 "READ ONLY", 827 } 828 829 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 830 831 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 832 833 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 834 835 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 836 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 837 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 838 839 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 840 841 DISTINCT_TOKENS = {TokenType.DISTINCT} 842 843 STRICT_CAST = True 844 845 # A NULL arg in CONCAT yields NULL by default 846 CONCAT_NULL_OUTPUTS_STRING = False 847 848 PREFIXED_PIVOT_COLUMNS = False 849 IDENTIFY_PIVOT_STRINGS = False 850 851 LOG_BASE_FIRST = True 852 LOG_DEFAULTS_TO_LN = False 853 854 SUPPORTS_USER_DEFINED_TYPES = True 855 856 # Whether or not ADD is present for each column added by ALTER TABLE 857 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 858 859 __slots__ = ( 860 "error_level", 861 "error_message_context", 862 "max_errors", 863 "sql", 864 "errors", 865 "_tokens", 866 "_index", 867 "_curr", 868 "_next", 869 "_prev", 870 "_prev_comments", 871 "_tokenizer", 872 ) 873 874 # Autofilled 875 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 876 INDEX_OFFSET: int = 0 877 UNNEST_COLUMN_ONLY: bool = False 878 ALIAS_POST_TABLESAMPLE: bool = False 879 STRICT_STRING_CONCAT = False 880 NORMALIZE_FUNCTIONS = "upper" 881 NULL_ORDERING: str = "nulls_are_small" 882 SHOW_TRIE: t.Dict = {} 883 SET_TRIE: t.Dict = {} 884 FORMAT_MAPPING: t.Dict[str, str] = {} 885 FORMAT_TRIE: t.Dict = {} 886 TIME_MAPPING: t.Dict[str, str] = {} 887 TIME_TRIE: t.Dict = {} 888 889 def __init__( 890 self, 891 error_level: t.Optional[ErrorLevel] = None, 892 error_message_context: int = 100, 893 max_errors: int = 3, 894 ): 895 self.error_level = error_level or ErrorLevel.IMMEDIATE 896 self.error_message_context = error_message_context 897 self.max_errors = max_errors 898 self._tokenizer = self.TOKENIZER_CLASS() 899 self.reset() 900 901 def reset(self): 902 self.sql = "" 903 self.errors = [] 904 self._tokens = [] 905 self._index = 0 906 self._curr = None 907 self._next = None 908 self._prev = None 909 self._prev_comments = None 910 911 def parse( 912 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 913 ) -> t.List[t.Optional[exp.Expression]]: 914 """ 915 Parses a list of tokens and returns a list of syntax trees, one tree 916 per parsed SQL statement. 917 918 Args: 919 raw_tokens: The list of tokens. 920 sql: The original SQL string, used to produce helpful debug messages. 921 922 Returns: 923 The list of the produced syntax trees. 924 """ 925 return self._parse( 926 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 927 ) 928 929 def parse_into( 930 self, 931 expression_types: exp.IntoType, 932 raw_tokens: t.List[Token], 933 sql: t.Optional[str] = None, 934 ) -> t.List[t.Optional[exp.Expression]]: 935 """ 936 Parses a list of tokens into a given Expression type. If a collection of Expression 937 types is given instead, this method will try to parse the token list into each one 938 of them, stopping at the first for which the parsing succeeds. 939 940 Args: 941 expression_types: The expression type(s) to try and parse the token list into. 942 raw_tokens: The list of tokens. 943 sql: The original SQL string, used to produce helpful debug messages. 944 945 Returns: 946 The target Expression. 947 """ 948 errors = [] 949 for expression_type in ensure_list(expression_types): 950 parser = self.EXPRESSION_PARSERS.get(expression_type) 951 if not parser: 952 raise TypeError(f"No parser registered for {expression_type}") 953 954 try: 955 return self._parse(parser, raw_tokens, sql) 956 except ParseError as e: 957 e.errors[0]["into_expression"] = expression_type 958 errors.append(e) 959 960 raise ParseError( 961 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 962 errors=merge_errors(errors), 963 ) from errors[-1] 964 965 def _parse( 966 self, 967 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 968 raw_tokens: t.List[Token], 969 sql: t.Optional[str] = None, 970 ) -> t.List[t.Optional[exp.Expression]]: 971 self.reset() 972 self.sql = sql or "" 973 974 total = len(raw_tokens) 975 chunks: t.List[t.List[Token]] = [[]] 976 977 for i, token in enumerate(raw_tokens): 978 if token.token_type == TokenType.SEMICOLON: 979 if i < total - 1: 980 chunks.append([]) 981 else: 982 chunks[-1].append(token) 983 984 expressions = [] 985 986 for tokens in chunks: 987 self._index = -1 988 self._tokens = tokens 989 self._advance() 990 991 expressions.append(parse_method(self)) 992 993 if self._index < len(self._tokens): 994 self.raise_error("Invalid expression / Unexpected token") 995 996 self.check_errors() 997 998 return expressions 999 1000 def check_errors(self) -> None: 1001 """Logs or raises any found errors, depending on the chosen error level setting.""" 1002 if self.error_level == ErrorLevel.WARN: 1003 for error in self.errors: 1004 logger.error(str(error)) 1005 elif self.error_level == ErrorLevel.RAISE and self.errors: 1006 raise ParseError( 1007 concat_messages(self.errors, self.max_errors), 1008 errors=merge_errors(self.errors), 1009 ) 1010 1011 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1012 """ 1013 Appends an error in the list of recorded errors or raises it, depending on the chosen 1014 error level setting. 1015 """ 1016 token = token or self._curr or self._prev or Token.string("") 1017 start = token.start 1018 end = token.end + 1 1019 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1020 highlight = self.sql[start:end] 1021 end_context = self.sql[end : end + self.error_message_context] 1022 1023 error = ParseError.new( 1024 f"{message}. Line {token.line}, Col: {token.col}.\n" 1025 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1026 description=message, 1027 line=token.line, 1028 col=token.col, 1029 start_context=start_context, 1030 highlight=highlight, 1031 end_context=end_context, 1032 ) 1033 1034 if self.error_level == ErrorLevel.IMMEDIATE: 1035 raise error 1036 1037 self.errors.append(error) 1038 1039 def expression( 1040 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1041 ) -> E: 1042 """ 1043 Creates a new, validated Expression. 1044 1045 Args: 1046 exp_class: The expression class to instantiate. 1047 comments: An optional list of comments to attach to the expression. 1048 kwargs: The arguments to set for the expression along with their respective values. 1049 1050 Returns: 1051 The target expression. 1052 """ 1053 instance = exp_class(**kwargs) 1054 instance.add_comments(comments) if comments else self._add_comments(instance) 1055 return self.validate_expression(instance) 1056 1057 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1058 if expression and self._prev_comments: 1059 expression.add_comments(self._prev_comments) 1060 self._prev_comments = None 1061 1062 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1063 """ 1064 Validates an Expression, making sure that all its mandatory arguments are set. 1065 1066 Args: 1067 expression: The expression to validate. 1068 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1069 1070 Returns: 1071 The validated expression. 1072 """ 1073 if self.error_level != ErrorLevel.IGNORE: 1074 for error_message in expression.error_messages(args): 1075 self.raise_error(error_message) 1076 1077 return expression 1078 1079 def _find_sql(self, start: Token, end: Token) -> str: 1080 return self.sql[start.start : end.end + 1] 1081 1082 def _advance(self, times: int = 1) -> None: 1083 self._index += times 1084 self._curr = seq_get(self._tokens, self._index) 1085 self._next = seq_get(self._tokens, self._index + 1) 1086 1087 if self._index > 0: 1088 self._prev = self._tokens[self._index - 1] 1089 self._prev_comments = self._prev.comments 1090 else: 1091 self._prev = None 1092 self._prev_comments = None 1093 1094 def _retreat(self, index: int) -> None: 1095 if index != self._index: 1096 self._advance(index - self._index) 1097 1098 def _parse_command(self) -> exp.Command: 1099 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1100 1101 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1102 start = self._prev 1103 exists = self._parse_exists() if allow_exists else None 1104 1105 self._match(TokenType.ON) 1106 1107 kind = self._match_set(self.CREATABLES) and self._prev 1108 if not kind: 1109 return self._parse_as_command(start) 1110 1111 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1112 this = self._parse_user_defined_function(kind=kind.token_type) 1113 elif kind.token_type == TokenType.TABLE: 1114 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1115 elif kind.token_type == TokenType.COLUMN: 1116 this = self._parse_column() 1117 else: 1118 this = self._parse_id_var() 1119 1120 self._match(TokenType.IS) 1121 1122 return self.expression( 1123 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1124 ) 1125 1126 def _parse_to_table( 1127 self, 1128 ) -> exp.ToTableProperty: 1129 table = self._parse_table_parts(schema=True) 1130 return self.expression(exp.ToTableProperty, this=table) 1131 1132 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1133 def _parse_ttl(self) -> exp.Expression: 1134 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1135 this = self._parse_bitwise() 1136 1137 if self._match_text_seq("DELETE"): 1138 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1139 if self._match_text_seq("RECOMPRESS"): 1140 return self.expression( 1141 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1142 ) 1143 if self._match_text_seq("TO", "DISK"): 1144 return self.expression( 1145 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1146 ) 1147 if self._match_text_seq("TO", "VOLUME"): 1148 return self.expression( 1149 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1150 ) 1151 1152 return this 1153 1154 expressions = self._parse_csv(_parse_ttl_action) 1155 where = self._parse_where() 1156 group = self._parse_group() 1157 1158 aggregates = None 1159 if group and self._match(TokenType.SET): 1160 aggregates = self._parse_csv(self._parse_set_item) 1161 1162 return self.expression( 1163 exp.MergeTreeTTL, 1164 expressions=expressions, 1165 where=where, 1166 group=group, 1167 aggregates=aggregates, 1168 ) 1169 1170 def _parse_statement(self) -> t.Optional[exp.Expression]: 1171 if self._curr is None: 1172 return None 1173 1174 if self._match_set(self.STATEMENT_PARSERS): 1175 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1176 1177 if self._match_set(Tokenizer.COMMANDS): 1178 return self._parse_command() 1179 1180 expression = self._parse_expression() 1181 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1182 return self._parse_query_modifiers(expression) 1183 1184 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1185 start = self._prev 1186 temporary = self._match(TokenType.TEMPORARY) 1187 materialized = self._match_text_seq("MATERIALIZED") 1188 1189 kind = self._match_set(self.CREATABLES) and self._prev.text 1190 if not kind: 1191 return self._parse_as_command(start) 1192 1193 return self.expression( 1194 exp.Drop, 1195 comments=start.comments, 1196 exists=exists or self._parse_exists(), 1197 this=self._parse_table(schema=True), 1198 kind=kind, 1199 temporary=temporary, 1200 materialized=materialized, 1201 cascade=self._match_text_seq("CASCADE"), 1202 constraints=self._match_text_seq("CONSTRAINTS"), 1203 purge=self._match_text_seq("PURGE"), 1204 ) 1205 1206 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1207 return ( 1208 self._match_text_seq("IF") 1209 and (not not_ or self._match(TokenType.NOT)) 1210 and self._match(TokenType.EXISTS) 1211 ) 1212 1213 def _parse_create(self) -> exp.Create | exp.Command: 1214 # Note: this can't be None because we've matched a statement parser 1215 start = self._prev 1216 comments = self._prev_comments 1217 1218 replace = start.text.upper() == "REPLACE" or self._match_pair( 1219 TokenType.OR, TokenType.REPLACE 1220 ) 1221 unique = self._match(TokenType.UNIQUE) 1222 1223 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1224 self._advance() 1225 1226 properties = None 1227 create_token = self._match_set(self.CREATABLES) and self._prev 1228 1229 if not create_token: 1230 # exp.Properties.Location.POST_CREATE 1231 properties = self._parse_properties() 1232 create_token = self._match_set(self.CREATABLES) and self._prev 1233 1234 if not properties or not create_token: 1235 return self._parse_as_command(start) 1236 1237 exists = self._parse_exists(not_=True) 1238 this = None 1239 expression: t.Optional[exp.Expression] = None 1240 indexes = None 1241 no_schema_binding = None 1242 begin = None 1243 clone = None 1244 1245 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1246 nonlocal properties 1247 if properties and temp_props: 1248 properties.expressions.extend(temp_props.expressions) 1249 elif temp_props: 1250 properties = temp_props 1251 1252 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1253 this = self._parse_user_defined_function(kind=create_token.token_type) 1254 1255 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1256 extend_props(self._parse_properties()) 1257 1258 self._match(TokenType.ALIAS) 1259 1260 if self._match(TokenType.COMMAND): 1261 expression = self._parse_as_command(self._prev) 1262 else: 1263 begin = self._match(TokenType.BEGIN) 1264 return_ = self._match_text_seq("RETURN") 1265 expression = self._parse_statement() 1266 1267 if return_: 1268 expression = self.expression(exp.Return, this=expression) 1269 elif create_token.token_type == TokenType.INDEX: 1270 this = self._parse_index(index=self._parse_id_var()) 1271 elif create_token.token_type in self.DB_CREATABLES: 1272 table_parts = self._parse_table_parts(schema=True) 1273 1274 # exp.Properties.Location.POST_NAME 1275 self._match(TokenType.COMMA) 1276 extend_props(self._parse_properties(before=True)) 1277 1278 this = self._parse_schema(this=table_parts) 1279 1280 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1281 extend_props(self._parse_properties()) 1282 1283 self._match(TokenType.ALIAS) 1284 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1285 # exp.Properties.Location.POST_ALIAS 1286 extend_props(self._parse_properties()) 1287 1288 expression = self._parse_ddl_select() 1289 1290 if create_token.token_type == TokenType.TABLE: 1291 # exp.Properties.Location.POST_EXPRESSION 1292 extend_props(self._parse_properties()) 1293 1294 indexes = [] 1295 while True: 1296 index = self._parse_index() 1297 1298 # exp.Properties.Location.POST_INDEX 1299 extend_props(self._parse_properties()) 1300 1301 if not index: 1302 break 1303 else: 1304 self._match(TokenType.COMMA) 1305 indexes.append(index) 1306 elif create_token.token_type == TokenType.VIEW: 1307 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1308 no_schema_binding = True 1309 1310 shallow = self._match_text_seq("SHALLOW") 1311 1312 if self._match_text_seq("CLONE"): 1313 clone = self._parse_table(schema=True) 1314 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1315 clone_kind = ( 1316 self._match(TokenType.L_PAREN) 1317 and self._match_texts(self.CLONE_KINDS) 1318 and self._prev.text.upper() 1319 ) 1320 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1321 self._match(TokenType.R_PAREN) 1322 clone = self.expression( 1323 exp.Clone, 1324 this=clone, 1325 when=when, 1326 kind=clone_kind, 1327 shallow=shallow, 1328 expression=clone_expression, 1329 ) 1330 1331 return self.expression( 1332 exp.Create, 1333 comments=comments, 1334 this=this, 1335 kind=create_token.text, 1336 replace=replace, 1337 unique=unique, 1338 expression=expression, 1339 exists=exists, 1340 properties=properties, 1341 indexes=indexes, 1342 no_schema_binding=no_schema_binding, 1343 begin=begin, 1344 clone=clone, 1345 ) 1346 1347 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1348 # only used for teradata currently 1349 self._match(TokenType.COMMA) 1350 1351 kwargs = { 1352 "no": self._match_text_seq("NO"), 1353 "dual": self._match_text_seq("DUAL"), 1354 "before": self._match_text_seq("BEFORE"), 1355 "default": self._match_text_seq("DEFAULT"), 1356 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1357 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1358 "after": self._match_text_seq("AFTER"), 1359 "minimum": self._match_texts(("MIN", "MINIMUM")), 1360 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1361 } 1362 1363 if self._match_texts(self.PROPERTY_PARSERS): 1364 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1365 try: 1366 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1367 except TypeError: 1368 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1369 1370 return None 1371 1372 def _parse_property(self) -> t.Optional[exp.Expression]: 1373 if self._match_texts(self.PROPERTY_PARSERS): 1374 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1375 1376 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1377 return self._parse_character_set(default=True) 1378 1379 if self._match_text_seq("COMPOUND", "SORTKEY"): 1380 return self._parse_sortkey(compound=True) 1381 1382 if self._match_text_seq("SQL", "SECURITY"): 1383 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1384 1385 assignment = self._match_pair( 1386 TokenType.VAR, TokenType.EQ, advance=False 1387 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1388 1389 if assignment: 1390 key = self._parse_var_or_string() 1391 self._match(TokenType.EQ) 1392 return self.expression( 1393 exp.Property, 1394 this=key, 1395 value=self._parse_column() or self._parse_var(any_token=True), 1396 ) 1397 1398 return None 1399 1400 def _parse_stored(self) -> exp.FileFormatProperty: 1401 self._match(TokenType.ALIAS) 1402 1403 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1404 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1405 1406 return self.expression( 1407 exp.FileFormatProperty, 1408 this=self.expression( 1409 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1410 ) 1411 if input_format or output_format 1412 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1413 ) 1414 1415 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1416 self._match(TokenType.EQ) 1417 self._match(TokenType.ALIAS) 1418 return self.expression(exp_class, this=self._parse_field()) 1419 1420 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1421 properties = [] 1422 while True: 1423 if before: 1424 prop = self._parse_property_before() 1425 else: 1426 prop = self._parse_property() 1427 1428 if not prop: 1429 break 1430 for p in ensure_list(prop): 1431 properties.append(p) 1432 1433 if properties: 1434 return self.expression(exp.Properties, expressions=properties) 1435 1436 return None 1437 1438 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1439 return self.expression( 1440 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1441 ) 1442 1443 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1444 if self._index >= 2: 1445 pre_volatile_token = self._tokens[self._index - 2] 1446 else: 1447 pre_volatile_token = None 1448 1449 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1450 return exp.VolatileProperty() 1451 1452 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1453 1454 def _parse_with_property( 1455 self, 1456 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1457 if self._match(TokenType.L_PAREN, advance=False): 1458 return self._parse_wrapped_csv(self._parse_property) 1459 1460 if self._match_text_seq("JOURNAL"): 1461 return self._parse_withjournaltable() 1462 1463 if self._match_text_seq("DATA"): 1464 return self._parse_withdata(no=False) 1465 elif self._match_text_seq("NO", "DATA"): 1466 return self._parse_withdata(no=True) 1467 1468 if not self._next: 1469 return None 1470 1471 return self._parse_withisolatedloading() 1472 1473 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1474 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1475 self._match(TokenType.EQ) 1476 1477 user = self._parse_id_var() 1478 self._match(TokenType.PARAMETER) 1479 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1480 1481 if not user or not host: 1482 return None 1483 1484 return exp.DefinerProperty(this=f"{user}@{host}") 1485 1486 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1487 self._match(TokenType.TABLE) 1488 self._match(TokenType.EQ) 1489 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1490 1491 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1492 return self.expression(exp.LogProperty, no=no) 1493 1494 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1495 return self.expression(exp.JournalProperty, **kwargs) 1496 1497 def _parse_checksum(self) -> exp.ChecksumProperty: 1498 self._match(TokenType.EQ) 1499 1500 on = None 1501 if self._match(TokenType.ON): 1502 on = True 1503 elif self._match_text_seq("OFF"): 1504 on = False 1505 1506 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1507 1508 def _parse_cluster(self) -> exp.Cluster: 1509 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1510 1511 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1512 self._match_text_seq("BY") 1513 1514 self._match_l_paren() 1515 expressions = self._parse_csv(self._parse_column) 1516 self._match_r_paren() 1517 1518 if self._match_text_seq("SORTED", "BY"): 1519 self._match_l_paren() 1520 sorted_by = self._parse_csv(self._parse_ordered) 1521 self._match_r_paren() 1522 else: 1523 sorted_by = None 1524 1525 self._match(TokenType.INTO) 1526 buckets = self._parse_number() 1527 self._match_text_seq("BUCKETS") 1528 1529 return self.expression( 1530 exp.ClusteredByProperty, 1531 expressions=expressions, 1532 sorted_by=sorted_by, 1533 buckets=buckets, 1534 ) 1535 1536 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1537 if not self._match_text_seq("GRANTS"): 1538 self._retreat(self._index - 1) 1539 return None 1540 1541 return self.expression(exp.CopyGrantsProperty) 1542 1543 def _parse_freespace(self) -> exp.FreespaceProperty: 1544 self._match(TokenType.EQ) 1545 return self.expression( 1546 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1547 ) 1548 1549 def _parse_mergeblockratio( 1550 self, no: bool = False, default: bool = False 1551 ) -> exp.MergeBlockRatioProperty: 1552 if self._match(TokenType.EQ): 1553 return self.expression( 1554 exp.MergeBlockRatioProperty, 1555 this=self._parse_number(), 1556 percent=self._match(TokenType.PERCENT), 1557 ) 1558 1559 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1560 1561 def _parse_datablocksize( 1562 self, 1563 default: t.Optional[bool] = None, 1564 minimum: t.Optional[bool] = None, 1565 maximum: t.Optional[bool] = None, 1566 ) -> exp.DataBlocksizeProperty: 1567 self._match(TokenType.EQ) 1568 size = self._parse_number() 1569 1570 units = None 1571 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1572 units = self._prev.text 1573 1574 return self.expression( 1575 exp.DataBlocksizeProperty, 1576 size=size, 1577 units=units, 1578 default=default, 1579 minimum=minimum, 1580 maximum=maximum, 1581 ) 1582 1583 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1584 self._match(TokenType.EQ) 1585 always = self._match_text_seq("ALWAYS") 1586 manual = self._match_text_seq("MANUAL") 1587 never = self._match_text_seq("NEVER") 1588 default = self._match_text_seq("DEFAULT") 1589 1590 autotemp = None 1591 if self._match_text_seq("AUTOTEMP"): 1592 autotemp = self._parse_schema() 1593 1594 return self.expression( 1595 exp.BlockCompressionProperty, 1596 always=always, 1597 manual=manual, 1598 never=never, 1599 default=default, 1600 autotemp=autotemp, 1601 ) 1602 1603 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1604 no = self._match_text_seq("NO") 1605 concurrent = self._match_text_seq("CONCURRENT") 1606 self._match_text_seq("ISOLATED", "LOADING") 1607 for_all = self._match_text_seq("FOR", "ALL") 1608 for_insert = self._match_text_seq("FOR", "INSERT") 1609 for_none = self._match_text_seq("FOR", "NONE") 1610 return self.expression( 1611 exp.IsolatedLoadingProperty, 1612 no=no, 1613 concurrent=concurrent, 1614 for_all=for_all, 1615 for_insert=for_insert, 1616 for_none=for_none, 1617 ) 1618 1619 def _parse_locking(self) -> exp.LockingProperty: 1620 if self._match(TokenType.TABLE): 1621 kind = "TABLE" 1622 elif self._match(TokenType.VIEW): 1623 kind = "VIEW" 1624 elif self._match(TokenType.ROW): 1625 kind = "ROW" 1626 elif self._match_text_seq("DATABASE"): 1627 kind = "DATABASE" 1628 else: 1629 kind = None 1630 1631 if kind in ("DATABASE", "TABLE", "VIEW"): 1632 this = self._parse_table_parts() 1633 else: 1634 this = None 1635 1636 if self._match(TokenType.FOR): 1637 for_or_in = "FOR" 1638 elif self._match(TokenType.IN): 1639 for_or_in = "IN" 1640 else: 1641 for_or_in = None 1642 1643 if self._match_text_seq("ACCESS"): 1644 lock_type = "ACCESS" 1645 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1646 lock_type = "EXCLUSIVE" 1647 elif self._match_text_seq("SHARE"): 1648 lock_type = "SHARE" 1649 elif self._match_text_seq("READ"): 1650 lock_type = "READ" 1651 elif self._match_text_seq("WRITE"): 1652 lock_type = "WRITE" 1653 elif self._match_text_seq("CHECKSUM"): 1654 lock_type = "CHECKSUM" 1655 else: 1656 lock_type = None 1657 1658 override = self._match_text_seq("OVERRIDE") 1659 1660 return self.expression( 1661 exp.LockingProperty, 1662 this=this, 1663 kind=kind, 1664 for_or_in=for_or_in, 1665 lock_type=lock_type, 1666 override=override, 1667 ) 1668 1669 def _parse_partition_by(self) -> t.List[exp.Expression]: 1670 if self._match(TokenType.PARTITION_BY): 1671 return self._parse_csv(self._parse_conjunction) 1672 return [] 1673 1674 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1675 self._match(TokenType.EQ) 1676 return self.expression( 1677 exp.PartitionedByProperty, 1678 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1679 ) 1680 1681 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1682 if self._match_text_seq("AND", "STATISTICS"): 1683 statistics = True 1684 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1685 statistics = False 1686 else: 1687 statistics = None 1688 1689 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1690 1691 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1692 if self._match_text_seq("PRIMARY", "INDEX"): 1693 return exp.NoPrimaryIndexProperty() 1694 return None 1695 1696 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1697 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1698 return exp.OnCommitProperty() 1699 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1700 return exp.OnCommitProperty(delete=True) 1701 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1702 1703 def _parse_distkey(self) -> exp.DistKeyProperty: 1704 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1705 1706 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1707 table = self._parse_table(schema=True) 1708 1709 options = [] 1710 while self._match_texts(("INCLUDING", "EXCLUDING")): 1711 this = self._prev.text.upper() 1712 1713 id_var = self._parse_id_var() 1714 if not id_var: 1715 return None 1716 1717 options.append( 1718 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1719 ) 1720 1721 return self.expression(exp.LikeProperty, this=table, expressions=options) 1722 1723 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1724 return self.expression( 1725 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1726 ) 1727 1728 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1729 self._match(TokenType.EQ) 1730 return self.expression( 1731 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1732 ) 1733 1734 def _parse_returns(self) -> exp.ReturnsProperty: 1735 value: t.Optional[exp.Expression] 1736 is_table = self._match(TokenType.TABLE) 1737 1738 if is_table: 1739 if self._match(TokenType.LT): 1740 value = self.expression( 1741 exp.Schema, 1742 this="TABLE", 1743 expressions=self._parse_csv(self._parse_struct_types), 1744 ) 1745 if not self._match(TokenType.GT): 1746 self.raise_error("Expecting >") 1747 else: 1748 value = self._parse_schema(exp.var("TABLE")) 1749 else: 1750 value = self._parse_types() 1751 1752 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1753 1754 def _parse_describe(self) -> exp.Describe: 1755 kind = self._match_set(self.CREATABLES) and self._prev.text 1756 this = self._parse_table(schema=True) 1757 properties = self._parse_properties() 1758 expressions = properties.expressions if properties else None 1759 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1760 1761 def _parse_insert(self) -> exp.Insert: 1762 comments = ensure_list(self._prev_comments) 1763 overwrite = self._match(TokenType.OVERWRITE) 1764 ignore = self._match(TokenType.IGNORE) 1765 local = self._match_text_seq("LOCAL") 1766 alternative = None 1767 1768 if self._match_text_seq("DIRECTORY"): 1769 this: t.Optional[exp.Expression] = self.expression( 1770 exp.Directory, 1771 this=self._parse_var_or_string(), 1772 local=local, 1773 row_format=self._parse_row_format(match_row=True), 1774 ) 1775 else: 1776 if self._match(TokenType.OR): 1777 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1778 1779 self._match(TokenType.INTO) 1780 comments += ensure_list(self._prev_comments) 1781 self._match(TokenType.TABLE) 1782 this = self._parse_table(schema=True) 1783 1784 returning = self._parse_returning() 1785 1786 return self.expression( 1787 exp.Insert, 1788 comments=comments, 1789 this=this, 1790 by_name=self._match_text_seq("BY", "NAME"), 1791 exists=self._parse_exists(), 1792 partition=self._parse_partition(), 1793 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1794 and self._parse_conjunction(), 1795 expression=self._parse_ddl_select(), 1796 conflict=self._parse_on_conflict(), 1797 returning=returning or self._parse_returning(), 1798 overwrite=overwrite, 1799 alternative=alternative, 1800 ignore=ignore, 1801 ) 1802 1803 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1804 conflict = self._match_text_seq("ON", "CONFLICT") 1805 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1806 1807 if not conflict and not duplicate: 1808 return None 1809 1810 nothing = None 1811 expressions = None 1812 key = None 1813 constraint = None 1814 1815 if conflict: 1816 if self._match_text_seq("ON", "CONSTRAINT"): 1817 constraint = self._parse_id_var() 1818 else: 1819 key = self._parse_csv(self._parse_value) 1820 1821 self._match_text_seq("DO") 1822 if self._match_text_seq("NOTHING"): 1823 nothing = True 1824 else: 1825 self._match(TokenType.UPDATE) 1826 self._match(TokenType.SET) 1827 expressions = self._parse_csv(self._parse_equality) 1828 1829 return self.expression( 1830 exp.OnConflict, 1831 duplicate=duplicate, 1832 expressions=expressions, 1833 nothing=nothing, 1834 key=key, 1835 constraint=constraint, 1836 ) 1837 1838 def _parse_returning(self) -> t.Optional[exp.Returning]: 1839 if not self._match(TokenType.RETURNING): 1840 return None 1841 return self.expression( 1842 exp.Returning, 1843 expressions=self._parse_csv(self._parse_expression), 1844 into=self._match(TokenType.INTO) and self._parse_table_part(), 1845 ) 1846 1847 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1848 if not self._match(TokenType.FORMAT): 1849 return None 1850 return self._parse_row_format() 1851 1852 def _parse_row_format( 1853 self, match_row: bool = False 1854 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1855 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1856 return None 1857 1858 if self._match_text_seq("SERDE"): 1859 this = self._parse_string() 1860 1861 serde_properties = None 1862 if self._match(TokenType.SERDE_PROPERTIES): 1863 serde_properties = self.expression( 1864 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1865 ) 1866 1867 return self.expression( 1868 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1869 ) 1870 1871 self._match_text_seq("DELIMITED") 1872 1873 kwargs = {} 1874 1875 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1876 kwargs["fields"] = self._parse_string() 1877 if self._match_text_seq("ESCAPED", "BY"): 1878 kwargs["escaped"] = self._parse_string() 1879 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1880 kwargs["collection_items"] = self._parse_string() 1881 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1882 kwargs["map_keys"] = self._parse_string() 1883 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1884 kwargs["lines"] = self._parse_string() 1885 if self._match_text_seq("NULL", "DEFINED", "AS"): 1886 kwargs["null"] = self._parse_string() 1887 1888 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1889 1890 def _parse_load(self) -> exp.LoadData | exp.Command: 1891 if self._match_text_seq("DATA"): 1892 local = self._match_text_seq("LOCAL") 1893 self._match_text_seq("INPATH") 1894 inpath = self._parse_string() 1895 overwrite = self._match(TokenType.OVERWRITE) 1896 self._match_pair(TokenType.INTO, TokenType.TABLE) 1897 1898 return self.expression( 1899 exp.LoadData, 1900 this=self._parse_table(schema=True), 1901 local=local, 1902 overwrite=overwrite, 1903 inpath=inpath, 1904 partition=self._parse_partition(), 1905 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1906 serde=self._match_text_seq("SERDE") and self._parse_string(), 1907 ) 1908 return self._parse_as_command(self._prev) 1909 1910 def _parse_delete(self) -> exp.Delete: 1911 # This handles MySQL's "Multiple-Table Syntax" 1912 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1913 tables = None 1914 comments = self._prev_comments 1915 if not self._match(TokenType.FROM, advance=False): 1916 tables = self._parse_csv(self._parse_table) or None 1917 1918 returning = self._parse_returning() 1919 1920 return self.expression( 1921 exp.Delete, 1922 comments=comments, 1923 tables=tables, 1924 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1925 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1926 where=self._parse_where(), 1927 returning=returning or self._parse_returning(), 1928 limit=self._parse_limit(), 1929 ) 1930 1931 def _parse_update(self) -> exp.Update: 1932 comments = self._prev_comments 1933 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1934 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1935 returning = self._parse_returning() 1936 return self.expression( 1937 exp.Update, 1938 comments=comments, 1939 **{ # type: ignore 1940 "this": this, 1941 "expressions": expressions, 1942 "from": self._parse_from(joins=True), 1943 "where": self._parse_where(), 1944 "returning": returning or self._parse_returning(), 1945 "order": self._parse_order(), 1946 "limit": self._parse_limit(), 1947 }, 1948 ) 1949 1950 def _parse_uncache(self) -> exp.Uncache: 1951 if not self._match(TokenType.TABLE): 1952 self.raise_error("Expecting TABLE after UNCACHE") 1953 1954 return self.expression( 1955 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1956 ) 1957 1958 def _parse_cache(self) -> exp.Cache: 1959 lazy = self._match_text_seq("LAZY") 1960 self._match(TokenType.TABLE) 1961 table = self._parse_table(schema=True) 1962 1963 options = [] 1964 if self._match_text_seq("OPTIONS"): 1965 self._match_l_paren() 1966 k = self._parse_string() 1967 self._match(TokenType.EQ) 1968 v = self._parse_string() 1969 options = [k, v] 1970 self._match_r_paren() 1971 1972 self._match(TokenType.ALIAS) 1973 return self.expression( 1974 exp.Cache, 1975 this=table, 1976 lazy=lazy, 1977 options=options, 1978 expression=self._parse_select(nested=True), 1979 ) 1980 1981 def _parse_partition(self) -> t.Optional[exp.Partition]: 1982 if not self._match(TokenType.PARTITION): 1983 return None 1984 1985 return self.expression( 1986 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1987 ) 1988 1989 def _parse_value(self) -> exp.Tuple: 1990 if self._match(TokenType.L_PAREN): 1991 expressions = self._parse_csv(self._parse_conjunction) 1992 self._match_r_paren() 1993 return self.expression(exp.Tuple, expressions=expressions) 1994 1995 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1996 # https://prestodb.io/docs/current/sql/values.html 1997 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1998 1999 def _parse_projections(self) -> t.List[exp.Expression]: 2000 return self._parse_expressions() 2001 2002 def _parse_select( 2003 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2004 ) -> t.Optional[exp.Expression]: 2005 cte = self._parse_with() 2006 2007 if cte: 2008 this = self._parse_statement() 2009 2010 if not this: 2011 self.raise_error("Failed to parse any statement following CTE") 2012 return cte 2013 2014 if "with" in this.arg_types: 2015 this.set("with", cte) 2016 else: 2017 self.raise_error(f"{this.key} does not support CTE") 2018 this = cte 2019 2020 return this 2021 2022 # duckdb supports leading with FROM x 2023 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2024 2025 if self._match(TokenType.SELECT): 2026 comments = self._prev_comments 2027 2028 hint = self._parse_hint() 2029 all_ = self._match(TokenType.ALL) 2030 distinct = self._match_set(self.DISTINCT_TOKENS) 2031 2032 kind = ( 2033 self._match(TokenType.ALIAS) 2034 and self._match_texts(("STRUCT", "VALUE")) 2035 and self._prev.text 2036 ) 2037 2038 if distinct: 2039 distinct = self.expression( 2040 exp.Distinct, 2041 on=self._parse_value() if self._match(TokenType.ON) else None, 2042 ) 2043 2044 if all_ and distinct: 2045 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2046 2047 limit = self._parse_limit(top=True) 2048 projections = self._parse_projections() 2049 2050 this = self.expression( 2051 exp.Select, 2052 kind=kind, 2053 hint=hint, 2054 distinct=distinct, 2055 expressions=projections, 2056 limit=limit, 2057 ) 2058 this.comments = comments 2059 2060 into = self._parse_into() 2061 if into: 2062 this.set("into", into) 2063 2064 if not from_: 2065 from_ = self._parse_from() 2066 2067 if from_: 2068 this.set("from", from_) 2069 2070 this = self._parse_query_modifiers(this) 2071 elif (table or nested) and self._match(TokenType.L_PAREN): 2072 if self._match(TokenType.PIVOT): 2073 this = self._parse_simplified_pivot() 2074 elif self._match(TokenType.FROM): 2075 this = exp.select("*").from_( 2076 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2077 ) 2078 else: 2079 this = self._parse_table() if table else self._parse_select(nested=True) 2080 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2081 2082 self._match_r_paren() 2083 2084 # We return early here so that the UNION isn't attached to the subquery by the 2085 # following call to _parse_set_operations, but instead becomes the parent node 2086 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2087 elif self._match(TokenType.VALUES): 2088 this = self.expression( 2089 exp.Values, 2090 expressions=self._parse_csv(self._parse_value), 2091 alias=self._parse_table_alias(), 2092 ) 2093 elif from_: 2094 this = exp.select("*").from_(from_.this, copy=False) 2095 else: 2096 this = None 2097 2098 return self._parse_set_operations(this) 2099 2100 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2101 if not skip_with_token and not self._match(TokenType.WITH): 2102 return None 2103 2104 comments = self._prev_comments 2105 recursive = self._match(TokenType.RECURSIVE) 2106 2107 expressions = [] 2108 while True: 2109 expressions.append(self._parse_cte()) 2110 2111 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2112 break 2113 else: 2114 self._match(TokenType.WITH) 2115 2116 return self.expression( 2117 exp.With, comments=comments, expressions=expressions, recursive=recursive 2118 ) 2119 2120 def _parse_cte(self) -> exp.CTE: 2121 alias = self._parse_table_alias() 2122 if not alias or not alias.this: 2123 self.raise_error("Expected CTE to have alias") 2124 2125 self._match(TokenType.ALIAS) 2126 return self.expression( 2127 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2128 ) 2129 2130 def _parse_table_alias( 2131 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2132 ) -> t.Optional[exp.TableAlias]: 2133 any_token = self._match(TokenType.ALIAS) 2134 alias = ( 2135 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2136 or self._parse_string_as_identifier() 2137 ) 2138 2139 index = self._index 2140 if self._match(TokenType.L_PAREN): 2141 columns = self._parse_csv(self._parse_function_parameter) 2142 self._match_r_paren() if columns else self._retreat(index) 2143 else: 2144 columns = None 2145 2146 if not alias and not columns: 2147 return None 2148 2149 return self.expression(exp.TableAlias, this=alias, columns=columns) 2150 2151 def _parse_subquery( 2152 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2153 ) -> t.Optional[exp.Subquery]: 2154 if not this: 2155 return None 2156 2157 return self.expression( 2158 exp.Subquery, 2159 this=this, 2160 pivots=self._parse_pivots(), 2161 alias=self._parse_table_alias() if parse_alias else None, 2162 ) 2163 2164 def _parse_query_modifiers( 2165 self, this: t.Optional[exp.Expression] 2166 ) -> t.Optional[exp.Expression]: 2167 if isinstance(this, self.MODIFIABLES): 2168 for join in iter(self._parse_join, None): 2169 this.append("joins", join) 2170 for lateral in iter(self._parse_lateral, None): 2171 this.append("laterals", lateral) 2172 2173 while True: 2174 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2175 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2176 key, expression = parser(self) 2177 2178 if expression: 2179 this.set(key, expression) 2180 if key == "limit": 2181 offset = expression.args.pop("offset", None) 2182 if offset: 2183 this.set("offset", exp.Offset(expression=offset)) 2184 continue 2185 break 2186 return this 2187 2188 def _parse_hint(self) -> t.Optional[exp.Hint]: 2189 if self._match(TokenType.HINT): 2190 hints = [] 2191 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2192 hints.extend(hint) 2193 2194 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2195 self.raise_error("Expected */ after HINT") 2196 2197 return self.expression(exp.Hint, expressions=hints) 2198 2199 return None 2200 2201 def _parse_into(self) -> t.Optional[exp.Into]: 2202 if not self._match(TokenType.INTO): 2203 return None 2204 2205 temp = self._match(TokenType.TEMPORARY) 2206 unlogged = self._match_text_seq("UNLOGGED") 2207 self._match(TokenType.TABLE) 2208 2209 return self.expression( 2210 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2211 ) 2212 2213 def _parse_from( 2214 self, joins: bool = False, skip_from_token: bool = False 2215 ) -> t.Optional[exp.From]: 2216 if not skip_from_token and not self._match(TokenType.FROM): 2217 return None 2218 2219 return self.expression( 2220 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2221 ) 2222 2223 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2224 if not self._match(TokenType.MATCH_RECOGNIZE): 2225 return None 2226 2227 self._match_l_paren() 2228 2229 partition = self._parse_partition_by() 2230 order = self._parse_order() 2231 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2232 2233 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2234 rows = exp.var("ONE ROW PER MATCH") 2235 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2236 text = "ALL ROWS PER MATCH" 2237 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2238 text += f" SHOW EMPTY MATCHES" 2239 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2240 text += f" OMIT EMPTY MATCHES" 2241 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2242 text += f" WITH UNMATCHED ROWS" 2243 rows = exp.var(text) 2244 else: 2245 rows = None 2246 2247 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2248 text = "AFTER MATCH SKIP" 2249 if self._match_text_seq("PAST", "LAST", "ROW"): 2250 text += f" PAST LAST ROW" 2251 elif self._match_text_seq("TO", "NEXT", "ROW"): 2252 text += f" TO NEXT ROW" 2253 elif self._match_text_seq("TO", "FIRST"): 2254 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2255 elif self._match_text_seq("TO", "LAST"): 2256 text += f" TO LAST {self._advance_any().text}" # type: ignore 2257 after = exp.var(text) 2258 else: 2259 after = None 2260 2261 if self._match_text_seq("PATTERN"): 2262 self._match_l_paren() 2263 2264 if not self._curr: 2265 self.raise_error("Expecting )", self._curr) 2266 2267 paren = 1 2268 start = self._curr 2269 2270 while self._curr and paren > 0: 2271 if self._curr.token_type == TokenType.L_PAREN: 2272 paren += 1 2273 if self._curr.token_type == TokenType.R_PAREN: 2274 paren -= 1 2275 2276 end = self._prev 2277 self._advance() 2278 2279 if paren > 0: 2280 self.raise_error("Expecting )", self._curr) 2281 2282 pattern = exp.var(self._find_sql(start, end)) 2283 else: 2284 pattern = None 2285 2286 define = ( 2287 self._parse_csv( 2288 lambda: self.expression( 2289 exp.Alias, 2290 alias=self._parse_id_var(any_token=True), 2291 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2292 ) 2293 ) 2294 if self._match_text_seq("DEFINE") 2295 else None 2296 ) 2297 2298 self._match_r_paren() 2299 2300 return self.expression( 2301 exp.MatchRecognize, 2302 partition_by=partition, 2303 order=order, 2304 measures=measures, 2305 rows=rows, 2306 after=after, 2307 pattern=pattern, 2308 define=define, 2309 alias=self._parse_table_alias(), 2310 ) 2311 2312 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2313 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2314 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2315 2316 if outer_apply or cross_apply: 2317 this = self._parse_select(table=True) 2318 view = None 2319 outer = not cross_apply 2320 elif self._match(TokenType.LATERAL): 2321 this = self._parse_select(table=True) 2322 view = self._match(TokenType.VIEW) 2323 outer = self._match(TokenType.OUTER) 2324 else: 2325 return None 2326 2327 if not this: 2328 this = ( 2329 self._parse_unnest() 2330 or self._parse_function() 2331 or self._parse_id_var(any_token=False) 2332 ) 2333 2334 while self._match(TokenType.DOT): 2335 this = exp.Dot( 2336 this=this, 2337 expression=self._parse_function() or self._parse_id_var(any_token=False), 2338 ) 2339 2340 if view: 2341 table = self._parse_id_var(any_token=False) 2342 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2343 table_alias: t.Optional[exp.TableAlias] = self.expression( 2344 exp.TableAlias, this=table, columns=columns 2345 ) 2346 elif isinstance(this, exp.Subquery) and this.alias: 2347 # Ensures parity between the Subquery's and the Lateral's "alias" args 2348 table_alias = this.args["alias"].copy() 2349 else: 2350 table_alias = self._parse_table_alias() 2351 2352 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2353 2354 def _parse_join_parts( 2355 self, 2356 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2357 return ( 2358 self._match_set(self.JOIN_METHODS) and self._prev, 2359 self._match_set(self.JOIN_SIDES) and self._prev, 2360 self._match_set(self.JOIN_KINDS) and self._prev, 2361 ) 2362 2363 def _parse_join( 2364 self, skip_join_token: bool = False, parse_bracket: bool = False 2365 ) -> t.Optional[exp.Join]: 2366 if self._match(TokenType.COMMA): 2367 return self.expression(exp.Join, this=self._parse_table()) 2368 2369 index = self._index 2370 method, side, kind = self._parse_join_parts() 2371 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2372 join = self._match(TokenType.JOIN) 2373 2374 if not skip_join_token and not join: 2375 self._retreat(index) 2376 kind = None 2377 method = None 2378 side = None 2379 2380 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2381 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2382 2383 if not skip_join_token and not join and not outer_apply and not cross_apply: 2384 return None 2385 2386 if outer_apply: 2387 side = Token(TokenType.LEFT, "LEFT") 2388 2389 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2390 2391 if method: 2392 kwargs["method"] = method.text 2393 if side: 2394 kwargs["side"] = side.text 2395 if kind: 2396 kwargs["kind"] = kind.text 2397 if hint: 2398 kwargs["hint"] = hint 2399 2400 if self._match(TokenType.ON): 2401 kwargs["on"] = self._parse_conjunction() 2402 elif self._match(TokenType.USING): 2403 kwargs["using"] = self._parse_wrapped_id_vars() 2404 elif not (kind and kind.token_type == TokenType.CROSS): 2405 index = self._index 2406 joins = self._parse_joins() 2407 2408 if joins and self._match(TokenType.ON): 2409 kwargs["on"] = self._parse_conjunction() 2410 elif joins and self._match(TokenType.USING): 2411 kwargs["using"] = self._parse_wrapped_id_vars() 2412 else: 2413 joins = None 2414 self._retreat(index) 2415 2416 kwargs["this"].set("joins", joins) 2417 2418 comments = [c for token in (method, side, kind) if token for c in token.comments] 2419 return self.expression(exp.Join, comments=comments, **kwargs) 2420 2421 def _parse_index( 2422 self, 2423 index: t.Optional[exp.Expression] = None, 2424 ) -> t.Optional[exp.Index]: 2425 if index: 2426 unique = None 2427 primary = None 2428 amp = None 2429 2430 self._match(TokenType.ON) 2431 self._match(TokenType.TABLE) # hive 2432 table = self._parse_table_parts(schema=True) 2433 else: 2434 unique = self._match(TokenType.UNIQUE) 2435 primary = self._match_text_seq("PRIMARY") 2436 amp = self._match_text_seq("AMP") 2437 2438 if not self._match(TokenType.INDEX): 2439 return None 2440 2441 index = self._parse_id_var() 2442 table = None 2443 2444 using = self._parse_field() if self._match(TokenType.USING) else None 2445 2446 if self._match(TokenType.L_PAREN, advance=False): 2447 columns = self._parse_wrapped_csv(self._parse_ordered) 2448 else: 2449 columns = None 2450 2451 return self.expression( 2452 exp.Index, 2453 this=index, 2454 table=table, 2455 using=using, 2456 columns=columns, 2457 unique=unique, 2458 primary=primary, 2459 amp=amp, 2460 partition_by=self._parse_partition_by(), 2461 ) 2462 2463 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2464 hints: t.List[exp.Expression] = [] 2465 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2466 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2467 hints.append( 2468 self.expression( 2469 exp.WithTableHint, 2470 expressions=self._parse_csv( 2471 lambda: self._parse_function() or self._parse_var(any_token=True) 2472 ), 2473 ) 2474 ) 2475 self._match_r_paren() 2476 else: 2477 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2478 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2479 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2480 2481 self._match_texts({"INDEX", "KEY"}) 2482 if self._match(TokenType.FOR): 2483 hint.set("target", self._advance_any() and self._prev.text.upper()) 2484 2485 hint.set("expressions", self._parse_wrapped_id_vars()) 2486 hints.append(hint) 2487 2488 return hints or None 2489 2490 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2491 return ( 2492 (not schema and self._parse_function(optional_parens=False)) 2493 or self._parse_id_var(any_token=False) 2494 or self._parse_string_as_identifier() 2495 or self._parse_placeholder() 2496 ) 2497 2498 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2499 catalog = None 2500 db = None 2501 table = self._parse_table_part(schema=schema) 2502 2503 while self._match(TokenType.DOT): 2504 if catalog: 2505 # This allows nesting the table in arbitrarily many dot expressions if needed 2506 table = self.expression( 2507 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2508 ) 2509 else: 2510 catalog = db 2511 db = table 2512 table = self._parse_table_part(schema=schema) 2513 2514 if not table: 2515 self.raise_error(f"Expected table name but got {self._curr}") 2516 2517 return self.expression( 2518 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2519 ) 2520 2521 def _parse_table( 2522 self, 2523 schema: bool = False, 2524 joins: bool = False, 2525 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2526 parse_bracket: bool = False, 2527 ) -> t.Optional[exp.Expression]: 2528 lateral = self._parse_lateral() 2529 if lateral: 2530 return lateral 2531 2532 unnest = self._parse_unnest() 2533 if unnest: 2534 return unnest 2535 2536 values = self._parse_derived_table_values() 2537 if values: 2538 return values 2539 2540 subquery = self._parse_select(table=True) 2541 if subquery: 2542 if not subquery.args.get("pivots"): 2543 subquery.set("pivots", self._parse_pivots()) 2544 return subquery 2545 2546 bracket = parse_bracket and self._parse_bracket(None) 2547 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2548 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2549 2550 if schema: 2551 return self._parse_schema(this=this) 2552 2553 version = self._parse_version() 2554 2555 if version: 2556 this.set("version", version) 2557 2558 if self.ALIAS_POST_TABLESAMPLE: 2559 table_sample = self._parse_table_sample() 2560 2561 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2562 if alias: 2563 this.set("alias", alias) 2564 2565 this.set("hints", self._parse_table_hints()) 2566 2567 if not this.args.get("pivots"): 2568 this.set("pivots", self._parse_pivots()) 2569 2570 if not self.ALIAS_POST_TABLESAMPLE: 2571 table_sample = self._parse_table_sample() 2572 2573 if table_sample: 2574 table_sample.set("this", this) 2575 this = table_sample 2576 2577 if joins: 2578 for join in iter(self._parse_join, None): 2579 this.append("joins", join) 2580 2581 return this 2582 2583 def _parse_version(self) -> t.Optional[exp.Version]: 2584 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2585 this = "TIMESTAMP" 2586 elif self._match(TokenType.VERSION_SNAPSHOT): 2587 this = "VERSION" 2588 else: 2589 return None 2590 2591 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2592 kind = self._prev.text.upper() 2593 start = self._parse_bitwise() 2594 self._match_texts(("TO", "AND")) 2595 end = self._parse_bitwise() 2596 expression: t.Optional[exp.Expression] = self.expression( 2597 exp.Tuple, expressions=[start, end] 2598 ) 2599 elif self._match_text_seq("CONTAINED", "IN"): 2600 kind = "CONTAINED IN" 2601 expression = self.expression( 2602 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2603 ) 2604 elif self._match(TokenType.ALL): 2605 kind = "ALL" 2606 expression = None 2607 else: 2608 self._match_text_seq("AS", "OF") 2609 kind = "AS OF" 2610 expression = self._parse_type() 2611 2612 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2613 2614 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2615 if not self._match(TokenType.UNNEST): 2616 return None 2617 2618 expressions = self._parse_wrapped_csv(self._parse_type) 2619 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2620 2621 alias = self._parse_table_alias() if with_alias else None 2622 2623 if alias and self.UNNEST_COLUMN_ONLY: 2624 if alias.args.get("columns"): 2625 self.raise_error("Unexpected extra column alias in unnest.") 2626 2627 alias.set("columns", [alias.this]) 2628 alias.set("this", None) 2629 2630 offset = None 2631 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2632 self._match(TokenType.ALIAS) 2633 offset = self._parse_id_var() or exp.to_identifier("offset") 2634 2635 return self.expression( 2636 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2637 ) 2638 2639 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2640 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2641 if not is_derived and not self._match(TokenType.VALUES): 2642 return None 2643 2644 expressions = self._parse_csv(self._parse_value) 2645 alias = self._parse_table_alias() 2646 2647 if is_derived: 2648 self._match_r_paren() 2649 2650 return self.expression( 2651 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2652 ) 2653 2654 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2655 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2656 as_modifier and self._match_text_seq("USING", "SAMPLE") 2657 ): 2658 return None 2659 2660 bucket_numerator = None 2661 bucket_denominator = None 2662 bucket_field = None 2663 percent = None 2664 rows = None 2665 size = None 2666 seed = None 2667 2668 kind = ( 2669 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2670 ) 2671 method = self._parse_var(tokens=(TokenType.ROW,)) 2672 2673 self._match(TokenType.L_PAREN) 2674 2675 num = self._parse_number() 2676 2677 if self._match_text_seq("BUCKET"): 2678 bucket_numerator = self._parse_number() 2679 self._match_text_seq("OUT", "OF") 2680 bucket_denominator = bucket_denominator = self._parse_number() 2681 self._match(TokenType.ON) 2682 bucket_field = self._parse_field() 2683 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2684 percent = num 2685 elif self._match(TokenType.ROWS): 2686 rows = num 2687 else: 2688 size = num 2689 2690 self._match(TokenType.R_PAREN) 2691 2692 if self._match(TokenType.L_PAREN): 2693 method = self._parse_var() 2694 seed = self._match(TokenType.COMMA) and self._parse_number() 2695 self._match_r_paren() 2696 elif self._match_texts(("SEED", "REPEATABLE")): 2697 seed = self._parse_wrapped(self._parse_number) 2698 2699 return self.expression( 2700 exp.TableSample, 2701 method=method, 2702 bucket_numerator=bucket_numerator, 2703 bucket_denominator=bucket_denominator, 2704 bucket_field=bucket_field, 2705 percent=percent, 2706 rows=rows, 2707 size=size, 2708 seed=seed, 2709 kind=kind, 2710 ) 2711 2712 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2713 return list(iter(self._parse_pivot, None)) or None 2714 2715 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2716 return list(iter(self._parse_join, None)) or None 2717 2718 # https://duckdb.org/docs/sql/statements/pivot 2719 def _parse_simplified_pivot(self) -> exp.Pivot: 2720 def _parse_on() -> t.Optional[exp.Expression]: 2721 this = self._parse_bitwise() 2722 return self._parse_in(this) if self._match(TokenType.IN) else this 2723 2724 this = self._parse_table() 2725 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2726 using = self._match(TokenType.USING) and self._parse_csv( 2727 lambda: self._parse_alias(self._parse_function()) 2728 ) 2729 group = self._parse_group() 2730 return self.expression( 2731 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2732 ) 2733 2734 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2735 index = self._index 2736 include_nulls = None 2737 2738 if self._match(TokenType.PIVOT): 2739 unpivot = False 2740 elif self._match(TokenType.UNPIVOT): 2741 unpivot = True 2742 2743 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2744 if self._match_text_seq("INCLUDE", "NULLS"): 2745 include_nulls = True 2746 elif self._match_text_seq("EXCLUDE", "NULLS"): 2747 include_nulls = False 2748 else: 2749 return None 2750 2751 expressions = [] 2752 field = None 2753 2754 if not self._match(TokenType.L_PAREN): 2755 self._retreat(index) 2756 return None 2757 2758 if unpivot: 2759 expressions = self._parse_csv(self._parse_column) 2760 else: 2761 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2762 2763 if not expressions: 2764 self.raise_error("Failed to parse PIVOT's aggregation list") 2765 2766 if not self._match(TokenType.FOR): 2767 self.raise_error("Expecting FOR") 2768 2769 value = self._parse_column() 2770 2771 if not self._match(TokenType.IN): 2772 self.raise_error("Expecting IN") 2773 2774 field = self._parse_in(value, alias=True) 2775 2776 self._match_r_paren() 2777 2778 pivot = self.expression( 2779 exp.Pivot, 2780 expressions=expressions, 2781 field=field, 2782 unpivot=unpivot, 2783 include_nulls=include_nulls, 2784 ) 2785 2786 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2787 pivot.set("alias", self._parse_table_alias()) 2788 2789 if not unpivot: 2790 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2791 2792 columns: t.List[exp.Expression] = [] 2793 for fld in pivot.args["field"].expressions: 2794 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2795 for name in names: 2796 if self.PREFIXED_PIVOT_COLUMNS: 2797 name = f"{name}_{field_name}" if name else field_name 2798 else: 2799 name = f"{field_name}_{name}" if name else field_name 2800 2801 columns.append(exp.to_identifier(name)) 2802 2803 pivot.set("columns", columns) 2804 2805 return pivot 2806 2807 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2808 return [agg.alias for agg in aggregations] 2809 2810 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2811 if not skip_where_token and not self._match(TokenType.WHERE): 2812 return None 2813 2814 return self.expression( 2815 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2816 ) 2817 2818 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2819 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2820 return None 2821 2822 elements = defaultdict(list) 2823 2824 if self._match(TokenType.ALL): 2825 return self.expression(exp.Group, all=True) 2826 2827 while True: 2828 expressions = self._parse_csv(self._parse_conjunction) 2829 if expressions: 2830 elements["expressions"].extend(expressions) 2831 2832 grouping_sets = self._parse_grouping_sets() 2833 if grouping_sets: 2834 elements["grouping_sets"].extend(grouping_sets) 2835 2836 rollup = None 2837 cube = None 2838 totals = None 2839 2840 with_ = self._match(TokenType.WITH) 2841 if self._match(TokenType.ROLLUP): 2842 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2843 elements["rollup"].extend(ensure_list(rollup)) 2844 2845 if self._match(TokenType.CUBE): 2846 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2847 elements["cube"].extend(ensure_list(cube)) 2848 2849 if self._match_text_seq("TOTALS"): 2850 totals = True 2851 elements["totals"] = True # type: ignore 2852 2853 if not (grouping_sets or rollup or cube or totals): 2854 break 2855 2856 return self.expression(exp.Group, **elements) # type: ignore 2857 2858 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2859 if not self._match(TokenType.GROUPING_SETS): 2860 return None 2861 2862 return self._parse_wrapped_csv(self._parse_grouping_set) 2863 2864 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2865 if self._match(TokenType.L_PAREN): 2866 grouping_set = self._parse_csv(self._parse_column) 2867 self._match_r_paren() 2868 return self.expression(exp.Tuple, expressions=grouping_set) 2869 2870 return self._parse_column() 2871 2872 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2873 if not skip_having_token and not self._match(TokenType.HAVING): 2874 return None 2875 return self.expression(exp.Having, this=self._parse_conjunction()) 2876 2877 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2878 if not self._match(TokenType.QUALIFY): 2879 return None 2880 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2881 2882 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2883 if skip_start_token: 2884 start = None 2885 elif self._match(TokenType.START_WITH): 2886 start = self._parse_conjunction() 2887 else: 2888 return None 2889 2890 self._match(TokenType.CONNECT_BY) 2891 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2892 exp.Prior, this=self._parse_bitwise() 2893 ) 2894 connect = self._parse_conjunction() 2895 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2896 return self.expression(exp.Connect, start=start, connect=connect) 2897 2898 def _parse_order( 2899 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2900 ) -> t.Optional[exp.Expression]: 2901 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2902 return this 2903 2904 return self.expression( 2905 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2906 ) 2907 2908 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2909 if not self._match(token): 2910 return None 2911 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2912 2913 def _parse_ordered(self) -> exp.Ordered: 2914 this = self._parse_conjunction() 2915 self._match(TokenType.ASC) 2916 2917 is_desc = self._match(TokenType.DESC) 2918 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2919 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2920 desc = is_desc or False 2921 asc = not desc 2922 nulls_first = is_nulls_first or False 2923 explicitly_null_ordered = is_nulls_first or is_nulls_last 2924 2925 if ( 2926 not explicitly_null_ordered 2927 and ( 2928 (asc and self.NULL_ORDERING == "nulls_are_small") 2929 or (desc and self.NULL_ORDERING != "nulls_are_small") 2930 ) 2931 and self.NULL_ORDERING != "nulls_are_last" 2932 ): 2933 nulls_first = True 2934 2935 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2936 2937 def _parse_limit( 2938 self, this: t.Optional[exp.Expression] = None, top: bool = False 2939 ) -> t.Optional[exp.Expression]: 2940 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2941 comments = self._prev_comments 2942 if top: 2943 limit_paren = self._match(TokenType.L_PAREN) 2944 expression = self._parse_number() 2945 2946 if limit_paren: 2947 self._match_r_paren() 2948 else: 2949 expression = self._parse_term() 2950 2951 if self._match(TokenType.COMMA): 2952 offset = expression 2953 expression = self._parse_term() 2954 else: 2955 offset = None 2956 2957 limit_exp = self.expression( 2958 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2959 ) 2960 2961 return limit_exp 2962 2963 if self._match(TokenType.FETCH): 2964 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2965 direction = self._prev.text if direction else "FIRST" 2966 2967 count = self._parse_number() 2968 percent = self._match(TokenType.PERCENT) 2969 2970 self._match_set((TokenType.ROW, TokenType.ROWS)) 2971 2972 only = self._match_text_seq("ONLY") 2973 with_ties = self._match_text_seq("WITH", "TIES") 2974 2975 if only and with_ties: 2976 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2977 2978 return self.expression( 2979 exp.Fetch, 2980 direction=direction, 2981 count=count, 2982 percent=percent, 2983 with_ties=with_ties, 2984 ) 2985 2986 return this 2987 2988 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2989 if not self._match(TokenType.OFFSET): 2990 return this 2991 2992 count = self._parse_term() 2993 self._match_set((TokenType.ROW, TokenType.ROWS)) 2994 return self.expression(exp.Offset, this=this, expression=count) 2995 2996 def _parse_locks(self) -> t.List[exp.Lock]: 2997 locks = [] 2998 while True: 2999 if self._match_text_seq("FOR", "UPDATE"): 3000 update = True 3001 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3002 "LOCK", "IN", "SHARE", "MODE" 3003 ): 3004 update = False 3005 else: 3006 break 3007 3008 expressions = None 3009 if self._match_text_seq("OF"): 3010 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3011 3012 wait: t.Optional[bool | exp.Expression] = None 3013 if self._match_text_seq("NOWAIT"): 3014 wait = True 3015 elif self._match_text_seq("WAIT"): 3016 wait = self._parse_primary() 3017 elif self._match_text_seq("SKIP", "LOCKED"): 3018 wait = False 3019 3020 locks.append( 3021 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3022 ) 3023 3024 return locks 3025 3026 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3027 if not self._match_set(self.SET_OPERATIONS): 3028 return this 3029 3030 token_type = self._prev.token_type 3031 3032 if token_type == TokenType.UNION: 3033 expression = exp.Union 3034 elif token_type == TokenType.EXCEPT: 3035 expression = exp.Except 3036 else: 3037 expression = exp.Intersect 3038 3039 return self.expression( 3040 expression, 3041 this=this, 3042 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3043 by_name=self._match_text_seq("BY", "NAME"), 3044 expression=self._parse_set_operations(self._parse_select(nested=True)), 3045 ) 3046 3047 def _parse_expression(self) -> t.Optional[exp.Expression]: 3048 return self._parse_alias(self._parse_conjunction()) 3049 3050 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3051 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3052 3053 def _parse_equality(self) -> t.Optional[exp.Expression]: 3054 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3055 3056 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3057 return self._parse_tokens(self._parse_range, self.COMPARISON) 3058 3059 def _parse_range(self) -> t.Optional[exp.Expression]: 3060 this = self._parse_bitwise() 3061 negate = self._match(TokenType.NOT) 3062 3063 if self._match_set(self.RANGE_PARSERS): 3064 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3065 if not expression: 3066 return this 3067 3068 this = expression 3069 elif self._match(TokenType.ISNULL): 3070 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3071 3072 # Postgres supports ISNULL and NOTNULL for conditions. 3073 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3074 if self._match(TokenType.NOTNULL): 3075 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3076 this = self.expression(exp.Not, this=this) 3077 3078 if negate: 3079 this = self.expression(exp.Not, this=this) 3080 3081 if self._match(TokenType.IS): 3082 this = self._parse_is(this) 3083 3084 return this 3085 3086 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3087 index = self._index - 1 3088 negate = self._match(TokenType.NOT) 3089 3090 if self._match_text_seq("DISTINCT", "FROM"): 3091 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3092 return self.expression(klass, this=this, expression=self._parse_expression()) 3093 3094 expression = self._parse_null() or self._parse_boolean() 3095 if not expression: 3096 self._retreat(index) 3097 return None 3098 3099 this = self.expression(exp.Is, this=this, expression=expression) 3100 return self.expression(exp.Not, this=this) if negate else this 3101 3102 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3103 unnest = self._parse_unnest(with_alias=False) 3104 if unnest: 3105 this = self.expression(exp.In, this=this, unnest=unnest) 3106 elif self._match(TokenType.L_PAREN): 3107 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3108 3109 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3110 this = self.expression(exp.In, this=this, query=expressions[0]) 3111 else: 3112 this = self.expression(exp.In, this=this, expressions=expressions) 3113 3114 self._match_r_paren(this) 3115 else: 3116 this = self.expression(exp.In, this=this, field=self._parse_field()) 3117 3118 return this 3119 3120 def _parse_between(self, this: exp.Expression) -> exp.Between: 3121 low = self._parse_bitwise() 3122 self._match(TokenType.AND) 3123 high = self._parse_bitwise() 3124 return self.expression(exp.Between, this=this, low=low, high=high) 3125 3126 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3127 if not self._match(TokenType.ESCAPE): 3128 return this 3129 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3130 3131 def _parse_interval(self) -> t.Optional[exp.Interval]: 3132 index = self._index 3133 3134 if not self._match(TokenType.INTERVAL): 3135 return None 3136 3137 if self._match(TokenType.STRING, advance=False): 3138 this = self._parse_primary() 3139 else: 3140 this = self._parse_term() 3141 3142 if not this: 3143 self._retreat(index) 3144 return None 3145 3146 unit = self._parse_function() or self._parse_var(any_token=True) 3147 3148 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3149 # each INTERVAL expression into this canonical form so it's easy to transpile 3150 if this and this.is_number: 3151 this = exp.Literal.string(this.name) 3152 elif this and this.is_string: 3153 parts = this.name.split() 3154 3155 if len(parts) == 2: 3156 if unit: 3157 # This is not actually a unit, it's something else (e.g. a "window side") 3158 unit = None 3159 self._retreat(self._index - 1) 3160 3161 this = exp.Literal.string(parts[0]) 3162 unit = self.expression(exp.Var, this=parts[1]) 3163 3164 return self.expression(exp.Interval, this=this, unit=unit) 3165 3166 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3167 this = self._parse_term() 3168 3169 while True: 3170 if self._match_set(self.BITWISE): 3171 this = self.expression( 3172 self.BITWISE[self._prev.token_type], 3173 this=this, 3174 expression=self._parse_term(), 3175 ) 3176 elif self._match(TokenType.DQMARK): 3177 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3178 elif self._match_pair(TokenType.LT, TokenType.LT): 3179 this = self.expression( 3180 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3181 ) 3182 elif self._match_pair(TokenType.GT, TokenType.GT): 3183 this = self.expression( 3184 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3185 ) 3186 else: 3187 break 3188 3189 return this 3190 3191 def _parse_term(self) -> t.Optional[exp.Expression]: 3192 return self._parse_tokens(self._parse_factor, self.TERM) 3193 3194 def _parse_factor(self) -> t.Optional[exp.Expression]: 3195 return self._parse_tokens(self._parse_unary, self.FACTOR) 3196 3197 def _parse_unary(self) -> t.Optional[exp.Expression]: 3198 if self._match_set(self.UNARY_PARSERS): 3199 return self.UNARY_PARSERS[self._prev.token_type](self) 3200 return self._parse_at_time_zone(self._parse_type()) 3201 3202 def _parse_type(self) -> t.Optional[exp.Expression]: 3203 interval = self._parse_interval() 3204 if interval: 3205 return interval 3206 3207 index = self._index 3208 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3209 this = self._parse_column() 3210 3211 if data_type: 3212 if isinstance(this, exp.Literal): 3213 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3214 if parser: 3215 return parser(self, this, data_type) 3216 return self.expression(exp.Cast, this=this, to=data_type) 3217 if not data_type.expressions: 3218 self._retreat(index) 3219 return self._parse_column() 3220 return self._parse_column_ops(data_type) 3221 3222 return this 3223 3224 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3225 this = self._parse_type() 3226 if not this: 3227 return None 3228 3229 return self.expression( 3230 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3231 ) 3232 3233 def _parse_types( 3234 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3235 ) -> t.Optional[exp.Expression]: 3236 index = self._index 3237 3238 prefix = self._match_text_seq("SYSUDTLIB", ".") 3239 3240 if not self._match_set(self.TYPE_TOKENS): 3241 identifier = allow_identifiers and self._parse_id_var( 3242 any_token=False, tokens=(TokenType.VAR,) 3243 ) 3244 3245 if identifier: 3246 tokens = self._tokenizer.tokenize(identifier.name) 3247 3248 if len(tokens) != 1: 3249 self.raise_error("Unexpected identifier", self._prev) 3250 3251 if tokens[0].token_type in self.TYPE_TOKENS: 3252 self._prev = tokens[0] 3253 elif self.SUPPORTS_USER_DEFINED_TYPES: 3254 return identifier 3255 else: 3256 return None 3257 else: 3258 return None 3259 3260 type_token = self._prev.token_type 3261 3262 if type_token == TokenType.PSEUDO_TYPE: 3263 return self.expression(exp.PseudoType, this=self._prev.text) 3264 3265 if type_token == TokenType.OBJECT_IDENTIFIER: 3266 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3267 3268 nested = type_token in self.NESTED_TYPE_TOKENS 3269 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3270 expressions = None 3271 maybe_func = False 3272 3273 if self._match(TokenType.L_PAREN): 3274 if is_struct: 3275 expressions = self._parse_csv(self._parse_struct_types) 3276 elif nested: 3277 expressions = self._parse_csv( 3278 lambda: self._parse_types( 3279 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3280 ) 3281 ) 3282 elif type_token in self.ENUM_TYPE_TOKENS: 3283 expressions = self._parse_csv(self._parse_equality) 3284 else: 3285 expressions = self._parse_csv(self._parse_type_size) 3286 3287 if not expressions or not self._match(TokenType.R_PAREN): 3288 self._retreat(index) 3289 return None 3290 3291 maybe_func = True 3292 3293 this: t.Optional[exp.Expression] = None 3294 values: t.Optional[t.List[exp.Expression]] = None 3295 3296 if nested and self._match(TokenType.LT): 3297 if is_struct: 3298 expressions = self._parse_csv(self._parse_struct_types) 3299 else: 3300 expressions = self._parse_csv( 3301 lambda: self._parse_types( 3302 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3303 ) 3304 ) 3305 3306 if not self._match(TokenType.GT): 3307 self.raise_error("Expecting >") 3308 3309 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3310 values = self._parse_csv(self._parse_conjunction) 3311 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3312 3313 if type_token in self.TIMESTAMPS: 3314 if self._match_text_seq("WITH", "TIME", "ZONE"): 3315 maybe_func = False 3316 tz_type = ( 3317 exp.DataType.Type.TIMETZ 3318 if type_token in self.TIMES 3319 else exp.DataType.Type.TIMESTAMPTZ 3320 ) 3321 this = exp.DataType(this=tz_type, expressions=expressions) 3322 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3323 maybe_func = False 3324 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3325 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3326 maybe_func = False 3327 elif type_token == TokenType.INTERVAL: 3328 if self._match_text_seq("YEAR", "TO", "MONTH"): 3329 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3330 elif self._match_text_seq("DAY", "TO", "SECOND"): 3331 span = [exp.IntervalDayToSecondSpan()] 3332 else: 3333 span = None 3334 3335 unit = not span and self._parse_var() 3336 if not unit: 3337 this = self.expression( 3338 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3339 ) 3340 else: 3341 this = self.expression(exp.Interval, unit=unit) 3342 3343 if maybe_func and check_func: 3344 index2 = self._index 3345 peek = self._parse_string() 3346 3347 if not peek: 3348 self._retreat(index) 3349 return None 3350 3351 self._retreat(index2) 3352 3353 if not this: 3354 this = exp.DataType( 3355 this=exp.DataType.Type[type_token.value], 3356 expressions=expressions, 3357 nested=nested, 3358 values=values, 3359 prefix=prefix, 3360 ) 3361 3362 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3363 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3364 3365 return this 3366 3367 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3368 this = self._parse_type() or self._parse_id_var() 3369 self._match(TokenType.COLON) 3370 return self._parse_column_def(this) 3371 3372 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3373 if not self._match_text_seq("AT", "TIME", "ZONE"): 3374 return this 3375 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3376 3377 def _parse_column(self) -> t.Optional[exp.Expression]: 3378 this = self._parse_field() 3379 if isinstance(this, exp.Identifier): 3380 this = self.expression(exp.Column, this=this) 3381 elif not this: 3382 return self._parse_bracket(this) 3383 return self._parse_column_ops(this) 3384 3385 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3386 this = self._parse_bracket(this) 3387 3388 while self._match_set(self.COLUMN_OPERATORS): 3389 op_token = self._prev.token_type 3390 op = self.COLUMN_OPERATORS.get(op_token) 3391 3392 if op_token == TokenType.DCOLON: 3393 field = self._parse_types() 3394 if not field: 3395 self.raise_error("Expected type") 3396 elif op and self._curr: 3397 self._advance() 3398 value = self._prev.text 3399 field = ( 3400 exp.Literal.number(value) 3401 if self._prev.token_type == TokenType.NUMBER 3402 else exp.Literal.string(value) 3403 ) 3404 else: 3405 field = self._parse_field(anonymous_func=True, any_token=True) 3406 3407 if isinstance(field, exp.Func): 3408 # bigquery allows function calls like x.y.count(...) 3409 # SAFE.SUBSTR(...) 3410 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3411 this = self._replace_columns_with_dots(this) 3412 3413 if op: 3414 this = op(self, this, field) 3415 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3416 this = self.expression( 3417 exp.Column, 3418 this=field, 3419 table=this.this, 3420 db=this.args.get("table"), 3421 catalog=this.args.get("db"), 3422 ) 3423 else: 3424 this = self.expression(exp.Dot, this=this, expression=field) 3425 this = self._parse_bracket(this) 3426 return this 3427 3428 def _parse_primary(self) -> t.Optional[exp.Expression]: 3429 if self._match_set(self.PRIMARY_PARSERS): 3430 token_type = self._prev.token_type 3431 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3432 3433 if token_type == TokenType.STRING: 3434 expressions = [primary] 3435 while self._match(TokenType.STRING): 3436 expressions.append(exp.Literal.string(self._prev.text)) 3437 3438 if len(expressions) > 1: 3439 return self.expression(exp.Concat, expressions=expressions) 3440 3441 return primary 3442 3443 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3444 return exp.Literal.number(f"0.{self._prev.text}") 3445 3446 if self._match(TokenType.L_PAREN): 3447 comments = self._prev_comments 3448 query = self._parse_select() 3449 3450 if query: 3451 expressions = [query] 3452 else: 3453 expressions = self._parse_expressions() 3454 3455 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3456 3457 if isinstance(this, exp.Subqueryable): 3458 this = self._parse_set_operations( 3459 self._parse_subquery(this=this, parse_alias=False) 3460 ) 3461 elif len(expressions) > 1: 3462 this = self.expression(exp.Tuple, expressions=expressions) 3463 else: 3464 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3465 3466 if this: 3467 this.add_comments(comments) 3468 3469 self._match_r_paren(expression=this) 3470 return this 3471 3472 return None 3473 3474 def _parse_field( 3475 self, 3476 any_token: bool = False, 3477 tokens: t.Optional[t.Collection[TokenType]] = None, 3478 anonymous_func: bool = False, 3479 ) -> t.Optional[exp.Expression]: 3480 return ( 3481 self._parse_primary() 3482 or self._parse_function(anonymous=anonymous_func) 3483 or self._parse_id_var(any_token=any_token, tokens=tokens) 3484 ) 3485 3486 def _parse_function( 3487 self, 3488 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3489 anonymous: bool = False, 3490 optional_parens: bool = True, 3491 ) -> t.Optional[exp.Expression]: 3492 if not self._curr: 3493 return None 3494 3495 token_type = self._curr.token_type 3496 this = self._curr.text 3497 upper = this.upper() 3498 3499 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3500 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3501 self._advance() 3502 return parser(self) 3503 3504 if not self._next or self._next.token_type != TokenType.L_PAREN: 3505 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3506 self._advance() 3507 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3508 3509 return None 3510 3511 if token_type not in self.FUNC_TOKENS: 3512 return None 3513 3514 self._advance(2) 3515 3516 parser = self.FUNCTION_PARSERS.get(upper) 3517 if parser and not anonymous: 3518 this = parser(self) 3519 else: 3520 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3521 3522 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3523 this = self.expression(subquery_predicate, this=self._parse_select()) 3524 self._match_r_paren() 3525 return this 3526 3527 if functions is None: 3528 functions = self.FUNCTIONS 3529 3530 function = functions.get(upper) 3531 3532 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3533 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3534 3535 if function and not anonymous: 3536 func = self.validate_expression(function(args), args) 3537 if not self.NORMALIZE_FUNCTIONS: 3538 func.meta["name"] = this 3539 this = func 3540 else: 3541 this = self.expression(exp.Anonymous, this=this, expressions=args) 3542 3543 self._match_r_paren(this) 3544 return self._parse_window(this) 3545 3546 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3547 return self._parse_column_def(self._parse_id_var()) 3548 3549 def _parse_user_defined_function( 3550 self, kind: t.Optional[TokenType] = None 3551 ) -> t.Optional[exp.Expression]: 3552 this = self._parse_id_var() 3553 3554 while self._match(TokenType.DOT): 3555 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3556 3557 if not self._match(TokenType.L_PAREN): 3558 return this 3559 3560 expressions = self._parse_csv(self._parse_function_parameter) 3561 self._match_r_paren() 3562 return self.expression( 3563 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3564 ) 3565 3566 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3567 literal = self._parse_primary() 3568 if literal: 3569 return self.expression(exp.Introducer, this=token.text, expression=literal) 3570 3571 return self.expression(exp.Identifier, this=token.text) 3572 3573 def _parse_session_parameter(self) -> exp.SessionParameter: 3574 kind = None 3575 this = self._parse_id_var() or self._parse_primary() 3576 3577 if this and self._match(TokenType.DOT): 3578 kind = this.name 3579 this = self._parse_var() or self._parse_primary() 3580 3581 return self.expression(exp.SessionParameter, this=this, kind=kind) 3582 3583 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3584 index = self._index 3585 3586 if self._match(TokenType.L_PAREN): 3587 expressions = t.cast( 3588 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3589 ) 3590 3591 if not self._match(TokenType.R_PAREN): 3592 self._retreat(index) 3593 else: 3594 expressions = [self._parse_id_var()] 3595 3596 if self._match_set(self.LAMBDAS): 3597 return self.LAMBDAS[self._prev.token_type](self, expressions) 3598 3599 self._retreat(index) 3600 3601 this: t.Optional[exp.Expression] 3602 3603 if self._match(TokenType.DISTINCT): 3604 this = self.expression( 3605 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3606 ) 3607 else: 3608 this = self._parse_select_or_expression(alias=alias) 3609 3610 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3611 3612 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3613 index = self._index 3614 3615 if not self.errors: 3616 try: 3617 if self._parse_select(nested=True): 3618 return this 3619 except ParseError: 3620 pass 3621 finally: 3622 self.errors.clear() 3623 self._retreat(index) 3624 3625 if not self._match(TokenType.L_PAREN): 3626 return this 3627 3628 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3629 3630 self._match_r_paren() 3631 return self.expression(exp.Schema, this=this, expressions=args) 3632 3633 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3634 return self._parse_column_def(self._parse_field(any_token=True)) 3635 3636 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3637 # column defs are not really columns, they're identifiers 3638 if isinstance(this, exp.Column): 3639 this = this.this 3640 3641 kind = self._parse_types(schema=True) 3642 3643 if self._match_text_seq("FOR", "ORDINALITY"): 3644 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3645 3646 constraints: t.List[exp.Expression] = [] 3647 3648 if not kind and self._match(TokenType.ALIAS): 3649 constraints.append( 3650 self.expression( 3651 exp.ComputedColumnConstraint, 3652 this=self._parse_conjunction(), 3653 persisted=self._match_text_seq("PERSISTED"), 3654 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3655 ) 3656 ) 3657 3658 while True: 3659 constraint = self._parse_column_constraint() 3660 if not constraint: 3661 break 3662 constraints.append(constraint) 3663 3664 if not kind and not constraints: 3665 return this 3666 3667 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3668 3669 def _parse_auto_increment( 3670 self, 3671 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3672 start = None 3673 increment = None 3674 3675 if self._match(TokenType.L_PAREN, advance=False): 3676 args = self._parse_wrapped_csv(self._parse_bitwise) 3677 start = seq_get(args, 0) 3678 increment = seq_get(args, 1) 3679 elif self._match_text_seq("START"): 3680 start = self._parse_bitwise() 3681 self._match_text_seq("INCREMENT") 3682 increment = self._parse_bitwise() 3683 3684 if start and increment: 3685 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3686 3687 return exp.AutoIncrementColumnConstraint() 3688 3689 def _parse_compress(self) -> exp.CompressColumnConstraint: 3690 if self._match(TokenType.L_PAREN, advance=False): 3691 return self.expression( 3692 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3693 ) 3694 3695 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3696 3697 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3698 if self._match_text_seq("BY", "DEFAULT"): 3699 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3700 this = self.expression( 3701 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3702 ) 3703 else: 3704 self._match_text_seq("ALWAYS") 3705 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3706 3707 self._match(TokenType.ALIAS) 3708 identity = self._match_text_seq("IDENTITY") 3709 3710 if self._match(TokenType.L_PAREN): 3711 if self._match(TokenType.START_WITH): 3712 this.set("start", self._parse_bitwise()) 3713 if self._match_text_seq("INCREMENT", "BY"): 3714 this.set("increment", self._parse_bitwise()) 3715 if self._match_text_seq("MINVALUE"): 3716 this.set("minvalue", self._parse_bitwise()) 3717 if self._match_text_seq("MAXVALUE"): 3718 this.set("maxvalue", self._parse_bitwise()) 3719 3720 if self._match_text_seq("CYCLE"): 3721 this.set("cycle", True) 3722 elif self._match_text_seq("NO", "CYCLE"): 3723 this.set("cycle", False) 3724 3725 if not identity: 3726 this.set("expression", self._parse_bitwise()) 3727 3728 self._match_r_paren() 3729 3730 return this 3731 3732 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3733 self._match_text_seq("LENGTH") 3734 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3735 3736 def _parse_not_constraint( 3737 self, 3738 ) -> t.Optional[exp.Expression]: 3739 if self._match_text_seq("NULL"): 3740 return self.expression(exp.NotNullColumnConstraint) 3741 if self._match_text_seq("CASESPECIFIC"): 3742 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3743 if self._match_text_seq("FOR", "REPLICATION"): 3744 return self.expression(exp.NotForReplicationColumnConstraint) 3745 return None 3746 3747 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3748 if self._match(TokenType.CONSTRAINT): 3749 this = self._parse_id_var() 3750 else: 3751 this = None 3752 3753 if self._match_texts(self.CONSTRAINT_PARSERS): 3754 return self.expression( 3755 exp.ColumnConstraint, 3756 this=this, 3757 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3758 ) 3759 3760 return this 3761 3762 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3763 if not self._match(TokenType.CONSTRAINT): 3764 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3765 3766 this = self._parse_id_var() 3767 expressions = [] 3768 3769 while True: 3770 constraint = self._parse_unnamed_constraint() or self._parse_function() 3771 if not constraint: 3772 break 3773 expressions.append(constraint) 3774 3775 return self.expression(exp.Constraint, this=this, expressions=expressions) 3776 3777 def _parse_unnamed_constraint( 3778 self, constraints: t.Optional[t.Collection[str]] = None 3779 ) -> t.Optional[exp.Expression]: 3780 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3781 return None 3782 3783 constraint = self._prev.text.upper() 3784 if constraint not in self.CONSTRAINT_PARSERS: 3785 self.raise_error(f"No parser found for schema constraint {constraint}.") 3786 3787 return self.CONSTRAINT_PARSERS[constraint](self) 3788 3789 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3790 self._match_text_seq("KEY") 3791 return self.expression( 3792 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3793 ) 3794 3795 def _parse_key_constraint_options(self) -> t.List[str]: 3796 options = [] 3797 while True: 3798 if not self._curr: 3799 break 3800 3801 if self._match(TokenType.ON): 3802 action = None 3803 on = self._advance_any() and self._prev.text 3804 3805 if self._match_text_seq("NO", "ACTION"): 3806 action = "NO ACTION" 3807 elif self._match_text_seq("CASCADE"): 3808 action = "CASCADE" 3809 elif self._match_pair(TokenType.SET, TokenType.NULL): 3810 action = "SET NULL" 3811 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3812 action = "SET DEFAULT" 3813 else: 3814 self.raise_error("Invalid key constraint") 3815 3816 options.append(f"ON {on} {action}") 3817 elif self._match_text_seq("NOT", "ENFORCED"): 3818 options.append("NOT ENFORCED") 3819 elif self._match_text_seq("DEFERRABLE"): 3820 options.append("DEFERRABLE") 3821 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3822 options.append("INITIALLY DEFERRED") 3823 elif self._match_text_seq("NORELY"): 3824 options.append("NORELY") 3825 elif self._match_text_seq("MATCH", "FULL"): 3826 options.append("MATCH FULL") 3827 else: 3828 break 3829 3830 return options 3831 3832 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3833 if match and not self._match(TokenType.REFERENCES): 3834 return None 3835 3836 expressions = None 3837 this = self._parse_table(schema=True) 3838 options = self._parse_key_constraint_options() 3839 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3840 3841 def _parse_foreign_key(self) -> exp.ForeignKey: 3842 expressions = self._parse_wrapped_id_vars() 3843 reference = self._parse_references() 3844 options = {} 3845 3846 while self._match(TokenType.ON): 3847 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3848 self.raise_error("Expected DELETE or UPDATE") 3849 3850 kind = self._prev.text.lower() 3851 3852 if self._match_text_seq("NO", "ACTION"): 3853 action = "NO ACTION" 3854 elif self._match(TokenType.SET): 3855 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3856 action = "SET " + self._prev.text.upper() 3857 else: 3858 self._advance() 3859 action = self._prev.text.upper() 3860 3861 options[kind] = action 3862 3863 return self.expression( 3864 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3865 ) 3866 3867 def _parse_primary_key( 3868 self, wrapped_optional: bool = False, in_props: bool = False 3869 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3870 desc = ( 3871 self._match_set((TokenType.ASC, TokenType.DESC)) 3872 and self._prev.token_type == TokenType.DESC 3873 ) 3874 3875 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3876 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3877 3878 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3879 options = self._parse_key_constraint_options() 3880 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3881 3882 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3883 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3884 return this 3885 3886 bracket_kind = self._prev.token_type 3887 3888 if self._match(TokenType.COLON): 3889 expressions: t.List[exp.Expression] = [ 3890 self.expression(exp.Slice, expression=self._parse_conjunction()) 3891 ] 3892 else: 3893 expressions = self._parse_csv( 3894 lambda: self._parse_slice( 3895 self._parse_alias(self._parse_conjunction(), explicit=True) 3896 ) 3897 ) 3898 3899 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3900 if bracket_kind == TokenType.L_BRACE: 3901 this = self.expression(exp.Struct, expressions=expressions) 3902 elif not this or this.name.upper() == "ARRAY": 3903 this = self.expression(exp.Array, expressions=expressions) 3904 else: 3905 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3906 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3907 3908 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3909 self.raise_error("Expected ]") 3910 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3911 self.raise_error("Expected }") 3912 3913 self._add_comments(this) 3914 return self._parse_bracket(this) 3915 3916 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3917 if self._match(TokenType.COLON): 3918 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3919 return this 3920 3921 def _parse_case(self) -> t.Optional[exp.Expression]: 3922 ifs = [] 3923 default = None 3924 3925 comments = self._prev_comments 3926 expression = self._parse_conjunction() 3927 3928 while self._match(TokenType.WHEN): 3929 this = self._parse_conjunction() 3930 self._match(TokenType.THEN) 3931 then = self._parse_conjunction() 3932 ifs.append(self.expression(exp.If, this=this, true=then)) 3933 3934 if self._match(TokenType.ELSE): 3935 default = self._parse_conjunction() 3936 3937 if not self._match(TokenType.END): 3938 self.raise_error("Expected END after CASE", self._prev) 3939 3940 return self._parse_window( 3941 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3942 ) 3943 3944 def _parse_if(self) -> t.Optional[exp.Expression]: 3945 if self._match(TokenType.L_PAREN): 3946 args = self._parse_csv(self._parse_conjunction) 3947 this = self.validate_expression(exp.If.from_arg_list(args), args) 3948 self._match_r_paren() 3949 else: 3950 index = self._index - 1 3951 condition = self._parse_conjunction() 3952 3953 if not condition: 3954 self._retreat(index) 3955 return None 3956 3957 self._match(TokenType.THEN) 3958 true = self._parse_conjunction() 3959 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3960 self._match(TokenType.END) 3961 this = self.expression(exp.If, this=condition, true=true, false=false) 3962 3963 return self._parse_window(this) 3964 3965 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3966 if not self._match_text_seq("VALUE", "FOR"): 3967 self._retreat(self._index - 1) 3968 return None 3969 3970 return self.expression( 3971 exp.NextValueFor, 3972 this=self._parse_column(), 3973 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3974 ) 3975 3976 def _parse_extract(self) -> exp.Extract: 3977 this = self._parse_function() or self._parse_var() or self._parse_type() 3978 3979 if self._match(TokenType.FROM): 3980 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3981 3982 if not self._match(TokenType.COMMA): 3983 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3984 3985 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3986 3987 def _parse_any_value(self) -> exp.AnyValue: 3988 this = self._parse_lambda() 3989 is_max = None 3990 having = None 3991 3992 if self._match(TokenType.HAVING): 3993 self._match_texts(("MAX", "MIN")) 3994 is_max = self._prev.text == "MAX" 3995 having = self._parse_column() 3996 3997 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3998 3999 def _parse_cast(self, strict: bool) -> exp.Expression: 4000 this = self._parse_conjunction() 4001 4002 if not self._match(TokenType.ALIAS): 4003 if self._match(TokenType.COMMA): 4004 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4005 4006 self.raise_error("Expected AS after CAST") 4007 4008 fmt = None 4009 to = self._parse_types() 4010 4011 if not to: 4012 self.raise_error("Expected TYPE after CAST") 4013 elif isinstance(to, exp.Identifier): 4014 to = exp.DataType.build(to.name, udt=True) 4015 elif to.this == exp.DataType.Type.CHAR: 4016 if self._match(TokenType.CHARACTER_SET): 4017 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4018 elif self._match(TokenType.FORMAT): 4019 fmt_string = self._parse_string() 4020 fmt = self._parse_at_time_zone(fmt_string) 4021 4022 if to.this in exp.DataType.TEMPORAL_TYPES: 4023 this = self.expression( 4024 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4025 this=this, 4026 format=exp.Literal.string( 4027 format_time( 4028 fmt_string.this if fmt_string else "", 4029 self.FORMAT_MAPPING or self.TIME_MAPPING, 4030 self.FORMAT_TRIE or self.TIME_TRIE, 4031 ) 4032 ), 4033 ) 4034 4035 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4036 this.set("zone", fmt.args["zone"]) 4037 4038 return this 4039 4040 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4041 4042 def _parse_concat(self) -> t.Optional[exp.Expression]: 4043 args = self._parse_csv(self._parse_conjunction) 4044 if self.CONCAT_NULL_OUTPUTS_STRING: 4045 args = [ 4046 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4047 for arg in args 4048 if arg 4049 ] 4050 4051 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4052 # we find such a call we replace it with its argument. 4053 if len(args) == 1: 4054 return args[0] 4055 4056 return self.expression( 4057 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4058 ) 4059 4060 def _parse_string_agg(self) -> exp.Expression: 4061 if self._match(TokenType.DISTINCT): 4062 args: t.List[t.Optional[exp.Expression]] = [ 4063 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4064 ] 4065 if self._match(TokenType.COMMA): 4066 args.extend(self._parse_csv(self._parse_conjunction)) 4067 else: 4068 args = self._parse_csv(self._parse_conjunction) # type: ignore 4069 4070 index = self._index 4071 if not self._match(TokenType.R_PAREN) and args: 4072 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4073 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4074 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4075 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4076 4077 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4078 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4079 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4080 if not self._match_text_seq("WITHIN", "GROUP"): 4081 self._retreat(index) 4082 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4083 4084 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4085 order = self._parse_order(this=seq_get(args, 0)) 4086 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4087 4088 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4089 this = self._parse_bitwise() 4090 4091 if self._match(TokenType.USING): 4092 to: t.Optional[exp.Expression] = self.expression( 4093 exp.CharacterSet, this=self._parse_var() 4094 ) 4095 elif self._match(TokenType.COMMA): 4096 to = self._parse_types() 4097 else: 4098 to = None 4099 4100 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4101 4102 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4103 """ 4104 There are generally two variants of the DECODE function: 4105 4106 - DECODE(bin, charset) 4107 - DECODE(expression, search, result [, search, result] ... [, default]) 4108 4109 The second variant will always be parsed into a CASE expression. Note that NULL 4110 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4111 instead of relying on pattern matching. 4112 """ 4113 args = self._parse_csv(self._parse_conjunction) 4114 4115 if len(args) < 3: 4116 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4117 4118 expression, *expressions = args 4119 if not expression: 4120 return None 4121 4122 ifs = [] 4123 for search, result in zip(expressions[::2], expressions[1::2]): 4124 if not search or not result: 4125 return None 4126 4127 if isinstance(search, exp.Literal): 4128 ifs.append( 4129 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4130 ) 4131 elif isinstance(search, exp.Null): 4132 ifs.append( 4133 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4134 ) 4135 else: 4136 cond = exp.or_( 4137 exp.EQ(this=expression.copy(), expression=search), 4138 exp.and_( 4139 exp.Is(this=expression.copy(), expression=exp.Null()), 4140 exp.Is(this=search.copy(), expression=exp.Null()), 4141 copy=False, 4142 ), 4143 copy=False, 4144 ) 4145 ifs.append(exp.If(this=cond, true=result)) 4146 4147 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4148 4149 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4150 self._match_text_seq("KEY") 4151 key = self._parse_column() 4152 self._match_set((TokenType.COLON, TokenType.COMMA)) 4153 self._match_text_seq("VALUE") 4154 value = self._parse_bitwise() 4155 4156 if not key and not value: 4157 return None 4158 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4159 4160 def _parse_json_object(self) -> exp.JSONObject: 4161 star = self._parse_star() 4162 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4163 4164 null_handling = None 4165 if self._match_text_seq("NULL", "ON", "NULL"): 4166 null_handling = "NULL ON NULL" 4167 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4168 null_handling = "ABSENT ON NULL" 4169 4170 unique_keys = None 4171 if self._match_text_seq("WITH", "UNIQUE"): 4172 unique_keys = True 4173 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4174 unique_keys = False 4175 4176 self._match_text_seq("KEYS") 4177 4178 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4179 format_json = self._match_text_seq("FORMAT", "JSON") 4180 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4181 4182 return self.expression( 4183 exp.JSONObject, 4184 expressions=expressions, 4185 null_handling=null_handling, 4186 unique_keys=unique_keys, 4187 return_type=return_type, 4188 format_json=format_json, 4189 encoding=encoding, 4190 ) 4191 4192 def _parse_logarithm(self) -> exp.Func: 4193 # Default argument order is base, expression 4194 args = self._parse_csv(self._parse_range) 4195 4196 if len(args) > 1: 4197 if not self.LOG_BASE_FIRST: 4198 args.reverse() 4199 return exp.Log.from_arg_list(args) 4200 4201 return self.expression( 4202 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4203 ) 4204 4205 def _parse_match_against(self) -> exp.MatchAgainst: 4206 expressions = self._parse_csv(self._parse_column) 4207 4208 self._match_text_seq(")", "AGAINST", "(") 4209 4210 this = self._parse_string() 4211 4212 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4213 modifier = "IN NATURAL LANGUAGE MODE" 4214 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4215 modifier = f"{modifier} WITH QUERY EXPANSION" 4216 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4217 modifier = "IN BOOLEAN MODE" 4218 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4219 modifier = "WITH QUERY EXPANSION" 4220 else: 4221 modifier = None 4222 4223 return self.expression( 4224 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4225 ) 4226 4227 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4228 def _parse_open_json(self) -> exp.OpenJSON: 4229 this = self._parse_bitwise() 4230 path = self._match(TokenType.COMMA) and self._parse_string() 4231 4232 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4233 this = self._parse_field(any_token=True) 4234 kind = self._parse_types() 4235 path = self._parse_string() 4236 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4237 4238 return self.expression( 4239 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4240 ) 4241 4242 expressions = None 4243 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4244 self._match_l_paren() 4245 expressions = self._parse_csv(_parse_open_json_column_def) 4246 4247 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4248 4249 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4250 args = self._parse_csv(self._parse_bitwise) 4251 4252 if self._match(TokenType.IN): 4253 return self.expression( 4254 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4255 ) 4256 4257 if haystack_first: 4258 haystack = seq_get(args, 0) 4259 needle = seq_get(args, 1) 4260 else: 4261 needle = seq_get(args, 0) 4262 haystack = seq_get(args, 1) 4263 4264 return self.expression( 4265 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4266 ) 4267 4268 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4269 args = self._parse_csv(self._parse_table) 4270 return exp.JoinHint(this=func_name.upper(), expressions=args) 4271 4272 def _parse_substring(self) -> exp.Substring: 4273 # Postgres supports the form: substring(string [from int] [for int]) 4274 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4275 4276 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4277 4278 if self._match(TokenType.FROM): 4279 args.append(self._parse_bitwise()) 4280 if self._match(TokenType.FOR): 4281 args.append(self._parse_bitwise()) 4282 4283 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4284 4285 def _parse_trim(self) -> exp.Trim: 4286 # https://www.w3resource.com/sql/character-functions/trim.php 4287 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4288 4289 position = None 4290 collation = None 4291 4292 if self._match_texts(self.TRIM_TYPES): 4293 position = self._prev.text.upper() 4294 4295 expression = self._parse_bitwise() 4296 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4297 this = self._parse_bitwise() 4298 else: 4299 this = expression 4300 expression = None 4301 4302 if self._match(TokenType.COLLATE): 4303 collation = self._parse_bitwise() 4304 4305 return self.expression( 4306 exp.Trim, this=this, position=position, expression=expression, collation=collation 4307 ) 4308 4309 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4310 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4311 4312 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4313 return self._parse_window(self._parse_id_var(), alias=True) 4314 4315 def _parse_respect_or_ignore_nulls( 4316 self, this: t.Optional[exp.Expression] 4317 ) -> t.Optional[exp.Expression]: 4318 if self._match_text_seq("IGNORE", "NULLS"): 4319 return self.expression(exp.IgnoreNulls, this=this) 4320 if self._match_text_seq("RESPECT", "NULLS"): 4321 return self.expression(exp.RespectNulls, this=this) 4322 return this 4323 4324 def _parse_window( 4325 self, this: t.Optional[exp.Expression], alias: bool = False 4326 ) -> t.Optional[exp.Expression]: 4327 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4328 self._match(TokenType.WHERE) 4329 this = self.expression( 4330 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4331 ) 4332 self._match_r_paren() 4333 4334 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4335 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4336 if self._match_text_seq("WITHIN", "GROUP"): 4337 order = self._parse_wrapped(self._parse_order) 4338 this = self.expression(exp.WithinGroup, this=this, expression=order) 4339 4340 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4341 # Some dialects choose to implement and some do not. 4342 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4343 4344 # There is some code above in _parse_lambda that handles 4345 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4346 4347 # The below changes handle 4348 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4349 4350 # Oracle allows both formats 4351 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4352 # and Snowflake chose to do the same for familiarity 4353 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4354 this = self._parse_respect_or_ignore_nulls(this) 4355 4356 # bigquery select from window x AS (partition by ...) 4357 if alias: 4358 over = None 4359 self._match(TokenType.ALIAS) 4360 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4361 return this 4362 else: 4363 over = self._prev.text.upper() 4364 4365 if not self._match(TokenType.L_PAREN): 4366 return self.expression( 4367 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4368 ) 4369 4370 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4371 4372 first = self._match(TokenType.FIRST) 4373 if self._match_text_seq("LAST"): 4374 first = False 4375 4376 partition, order = self._parse_partition_and_order() 4377 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4378 4379 if kind: 4380 self._match(TokenType.BETWEEN) 4381 start = self._parse_window_spec() 4382 self._match(TokenType.AND) 4383 end = self._parse_window_spec() 4384 4385 spec = self.expression( 4386 exp.WindowSpec, 4387 kind=kind, 4388 start=start["value"], 4389 start_side=start["side"], 4390 end=end["value"], 4391 end_side=end["side"], 4392 ) 4393 else: 4394 spec = None 4395 4396 self._match_r_paren() 4397 4398 window = self.expression( 4399 exp.Window, 4400 this=this, 4401 partition_by=partition, 4402 order=order, 4403 spec=spec, 4404 alias=window_alias, 4405 over=over, 4406 first=first, 4407 ) 4408 4409 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4410 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4411 return self._parse_window(window, alias=alias) 4412 4413 return window 4414 4415 def _parse_partition_and_order( 4416 self, 4417 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4418 return self._parse_partition_by(), self._parse_order() 4419 4420 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4421 self._match(TokenType.BETWEEN) 4422 4423 return { 4424 "value": ( 4425 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4426 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4427 or self._parse_bitwise() 4428 ), 4429 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4430 } 4431 4432 def _parse_alias( 4433 self, this: t.Optional[exp.Expression], explicit: bool = False 4434 ) -> t.Optional[exp.Expression]: 4435 any_token = self._match(TokenType.ALIAS) 4436 4437 if explicit and not any_token: 4438 return this 4439 4440 if self._match(TokenType.L_PAREN): 4441 aliases = self.expression( 4442 exp.Aliases, 4443 this=this, 4444 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4445 ) 4446 self._match_r_paren(aliases) 4447 return aliases 4448 4449 alias = self._parse_id_var(any_token) 4450 4451 if alias: 4452 return self.expression(exp.Alias, this=this, alias=alias) 4453 4454 return this 4455 4456 def _parse_id_var( 4457 self, 4458 any_token: bool = True, 4459 tokens: t.Optional[t.Collection[TokenType]] = None, 4460 ) -> t.Optional[exp.Expression]: 4461 identifier = self._parse_identifier() 4462 4463 if identifier: 4464 return identifier 4465 4466 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4467 quoted = self._prev.token_type == TokenType.STRING 4468 return exp.Identifier(this=self._prev.text, quoted=quoted) 4469 4470 return None 4471 4472 def _parse_string(self) -> t.Optional[exp.Expression]: 4473 if self._match(TokenType.STRING): 4474 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4475 return self._parse_placeholder() 4476 4477 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4478 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4479 4480 def _parse_number(self) -> t.Optional[exp.Expression]: 4481 if self._match(TokenType.NUMBER): 4482 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4483 return self._parse_placeholder() 4484 4485 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4486 if self._match(TokenType.IDENTIFIER): 4487 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4488 return self._parse_placeholder() 4489 4490 def _parse_var( 4491 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4492 ) -> t.Optional[exp.Expression]: 4493 if ( 4494 (any_token and self._advance_any()) 4495 or self._match(TokenType.VAR) 4496 or (self._match_set(tokens) if tokens else False) 4497 ): 4498 return self.expression(exp.Var, this=self._prev.text) 4499 return self._parse_placeholder() 4500 4501 def _advance_any(self) -> t.Optional[Token]: 4502 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4503 self._advance() 4504 return self._prev 4505 return None 4506 4507 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4508 return self._parse_var() or self._parse_string() 4509 4510 def _parse_null(self) -> t.Optional[exp.Expression]: 4511 if self._match(TokenType.NULL): 4512 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4513 return self._parse_placeholder() 4514 4515 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4516 if self._match(TokenType.TRUE): 4517 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4518 if self._match(TokenType.FALSE): 4519 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4520 return self._parse_placeholder() 4521 4522 def _parse_star(self) -> t.Optional[exp.Expression]: 4523 if self._match(TokenType.STAR): 4524 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4525 return self._parse_placeholder() 4526 4527 def _parse_parameter(self) -> exp.Parameter: 4528 wrapped = self._match(TokenType.L_BRACE) 4529 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4530 self._match(TokenType.R_BRACE) 4531 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4532 4533 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4534 if self._match_set(self.PLACEHOLDER_PARSERS): 4535 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4536 if placeholder: 4537 return placeholder 4538 self._advance(-1) 4539 return None 4540 4541 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4542 if not self._match(TokenType.EXCEPT): 4543 return None 4544 if self._match(TokenType.L_PAREN, advance=False): 4545 return self._parse_wrapped_csv(self._parse_column) 4546 return self._parse_csv(self._parse_column) 4547 4548 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4549 if not self._match(TokenType.REPLACE): 4550 return None 4551 if self._match(TokenType.L_PAREN, advance=False): 4552 return self._parse_wrapped_csv(self._parse_expression) 4553 return self._parse_expressions() 4554 4555 def _parse_csv( 4556 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4557 ) -> t.List[exp.Expression]: 4558 parse_result = parse_method() 4559 items = [parse_result] if parse_result is not None else [] 4560 4561 while self._match(sep): 4562 self._add_comments(parse_result) 4563 parse_result = parse_method() 4564 if parse_result is not None: 4565 items.append(parse_result) 4566 4567 return items 4568 4569 def _parse_tokens( 4570 self, parse_method: t.Callable, expressions: t.Dict 4571 ) -> t.Optional[exp.Expression]: 4572 this = parse_method() 4573 4574 while self._match_set(expressions): 4575 this = self.expression( 4576 expressions[self._prev.token_type], 4577 this=this, 4578 comments=self._prev_comments, 4579 expression=parse_method(), 4580 ) 4581 4582 return this 4583 4584 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4585 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4586 4587 def _parse_wrapped_csv( 4588 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4589 ) -> t.List[exp.Expression]: 4590 return self._parse_wrapped( 4591 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4592 ) 4593 4594 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4595 wrapped = self._match(TokenType.L_PAREN) 4596 if not wrapped and not optional: 4597 self.raise_error("Expecting (") 4598 parse_result = parse_method() 4599 if wrapped: 4600 self._match_r_paren() 4601 return parse_result 4602 4603 def _parse_expressions(self) -> t.List[exp.Expression]: 4604 return self._parse_csv(self._parse_expression) 4605 4606 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4607 return self._parse_select() or self._parse_set_operations( 4608 self._parse_expression() if alias else self._parse_conjunction() 4609 ) 4610 4611 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4612 return self._parse_query_modifiers( 4613 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4614 ) 4615 4616 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4617 this = None 4618 if self._match_texts(self.TRANSACTION_KIND): 4619 this = self._prev.text 4620 4621 self._match_texts({"TRANSACTION", "WORK"}) 4622 4623 modes = [] 4624 while True: 4625 mode = [] 4626 while self._match(TokenType.VAR): 4627 mode.append(self._prev.text) 4628 4629 if mode: 4630 modes.append(" ".join(mode)) 4631 if not self._match(TokenType.COMMA): 4632 break 4633 4634 return self.expression(exp.Transaction, this=this, modes=modes) 4635 4636 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4637 chain = None 4638 savepoint = None 4639 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4640 4641 self._match_texts({"TRANSACTION", "WORK"}) 4642 4643 if self._match_text_seq("TO"): 4644 self._match_text_seq("SAVEPOINT") 4645 savepoint = self._parse_id_var() 4646 4647 if self._match(TokenType.AND): 4648 chain = not self._match_text_seq("NO") 4649 self._match_text_seq("CHAIN") 4650 4651 if is_rollback: 4652 return self.expression(exp.Rollback, savepoint=savepoint) 4653 4654 return self.expression(exp.Commit, chain=chain) 4655 4656 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4657 if not self._match_text_seq("ADD"): 4658 return None 4659 4660 self._match(TokenType.COLUMN) 4661 exists_column = self._parse_exists(not_=True) 4662 expression = self._parse_field_def() 4663 4664 if expression: 4665 expression.set("exists", exists_column) 4666 4667 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4668 if self._match_texts(("FIRST", "AFTER")): 4669 position = self._prev.text 4670 column_position = self.expression( 4671 exp.ColumnPosition, this=self._parse_column(), position=position 4672 ) 4673 expression.set("position", column_position) 4674 4675 return expression 4676 4677 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4678 drop = self._match(TokenType.DROP) and self._parse_drop() 4679 if drop and not isinstance(drop, exp.Command): 4680 drop.set("kind", drop.args.get("kind", "COLUMN")) 4681 return drop 4682 4683 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4684 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4685 return self.expression( 4686 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4687 ) 4688 4689 def _parse_add_constraint(self) -> exp.AddConstraint: 4690 this = None 4691 kind = self._prev.token_type 4692 4693 if kind == TokenType.CONSTRAINT: 4694 this = self._parse_id_var() 4695 4696 if self._match_text_seq("CHECK"): 4697 expression = self._parse_wrapped(self._parse_conjunction) 4698 enforced = self._match_text_seq("ENFORCED") 4699 4700 return self.expression( 4701 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4702 ) 4703 4704 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4705 expression = self._parse_foreign_key() 4706 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4707 expression = self._parse_primary_key() 4708 else: 4709 expression = None 4710 4711 return self.expression(exp.AddConstraint, this=this, expression=expression) 4712 4713 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4714 index = self._index - 1 4715 4716 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4717 return self._parse_csv(self._parse_add_constraint) 4718 4719 self._retreat(index) 4720 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4721 return self._parse_csv(self._parse_field_def) 4722 4723 return self._parse_csv(self._parse_add_column) 4724 4725 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4726 self._match(TokenType.COLUMN) 4727 column = self._parse_field(any_token=True) 4728 4729 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4730 return self.expression(exp.AlterColumn, this=column, drop=True) 4731 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4732 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4733 4734 self._match_text_seq("SET", "DATA") 4735 return self.expression( 4736 exp.AlterColumn, 4737 this=column, 4738 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4739 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4740 using=self._match(TokenType.USING) and self._parse_conjunction(), 4741 ) 4742 4743 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4744 index = self._index - 1 4745 4746 partition_exists = self._parse_exists() 4747 if self._match(TokenType.PARTITION, advance=False): 4748 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4749 4750 self._retreat(index) 4751 return self._parse_csv(self._parse_drop_column) 4752 4753 def _parse_alter_table_rename(self) -> exp.RenameTable: 4754 self._match_text_seq("TO") 4755 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4756 4757 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4758 start = self._prev 4759 4760 if not self._match(TokenType.TABLE): 4761 return self._parse_as_command(start) 4762 4763 exists = self._parse_exists() 4764 this = self._parse_table(schema=True) 4765 4766 if self._next: 4767 self._advance() 4768 4769 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4770 if parser: 4771 actions = ensure_list(parser(self)) 4772 4773 if not self._curr: 4774 return self.expression( 4775 exp.AlterTable, 4776 this=this, 4777 exists=exists, 4778 actions=actions, 4779 ) 4780 return self._parse_as_command(start) 4781 4782 def _parse_merge(self) -> exp.Merge: 4783 self._match(TokenType.INTO) 4784 target = self._parse_table() 4785 4786 if target and self._match(TokenType.ALIAS, advance=False): 4787 target.set("alias", self._parse_table_alias()) 4788 4789 self._match(TokenType.USING) 4790 using = self._parse_table() 4791 4792 self._match(TokenType.ON) 4793 on = self._parse_conjunction() 4794 4795 whens = [] 4796 while self._match(TokenType.WHEN): 4797 matched = not self._match(TokenType.NOT) 4798 self._match_text_seq("MATCHED") 4799 source = ( 4800 False 4801 if self._match_text_seq("BY", "TARGET") 4802 else self._match_text_seq("BY", "SOURCE") 4803 ) 4804 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4805 4806 self._match(TokenType.THEN) 4807 4808 if self._match(TokenType.INSERT): 4809 _this = self._parse_star() 4810 if _this: 4811 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4812 else: 4813 then = self.expression( 4814 exp.Insert, 4815 this=self._parse_value(), 4816 expression=self._match(TokenType.VALUES) and self._parse_value(), 4817 ) 4818 elif self._match(TokenType.UPDATE): 4819 expressions = self._parse_star() 4820 if expressions: 4821 then = self.expression(exp.Update, expressions=expressions) 4822 else: 4823 then = self.expression( 4824 exp.Update, 4825 expressions=self._match(TokenType.SET) 4826 and self._parse_csv(self._parse_equality), 4827 ) 4828 elif self._match(TokenType.DELETE): 4829 then = self.expression(exp.Var, this=self._prev.text) 4830 else: 4831 then = None 4832 4833 whens.append( 4834 self.expression( 4835 exp.When, 4836 matched=matched, 4837 source=source, 4838 condition=condition, 4839 then=then, 4840 ) 4841 ) 4842 4843 return self.expression( 4844 exp.Merge, 4845 this=target, 4846 using=using, 4847 on=on, 4848 expressions=whens, 4849 ) 4850 4851 def _parse_show(self) -> t.Optional[exp.Expression]: 4852 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4853 if parser: 4854 return parser(self) 4855 return self._parse_as_command(self._prev) 4856 4857 def _parse_set_item_assignment( 4858 self, kind: t.Optional[str] = None 4859 ) -> t.Optional[exp.Expression]: 4860 index = self._index 4861 4862 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4863 return self._parse_set_transaction(global_=kind == "GLOBAL") 4864 4865 left = self._parse_primary() or self._parse_id_var() 4866 4867 if not self._match_texts(("=", "TO")): 4868 self._retreat(index) 4869 return None 4870 4871 right = self._parse_statement() or self._parse_id_var() 4872 this = self.expression(exp.EQ, this=left, expression=right) 4873 4874 return self.expression(exp.SetItem, this=this, kind=kind) 4875 4876 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4877 self._match_text_seq("TRANSACTION") 4878 characteristics = self._parse_csv( 4879 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4880 ) 4881 return self.expression( 4882 exp.SetItem, 4883 expressions=characteristics, 4884 kind="TRANSACTION", 4885 **{"global": global_}, # type: ignore 4886 ) 4887 4888 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4889 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4890 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4891 4892 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4893 index = self._index 4894 set_ = self.expression( 4895 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4896 ) 4897 4898 if self._curr: 4899 self._retreat(index) 4900 return self._parse_as_command(self._prev) 4901 4902 return set_ 4903 4904 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4905 for option in options: 4906 if self._match_text_seq(*option.split(" ")): 4907 return exp.var(option) 4908 return None 4909 4910 def _parse_as_command(self, start: Token) -> exp.Command: 4911 while self._curr: 4912 self._advance() 4913 text = self._find_sql(start, self._prev) 4914 size = len(start.text) 4915 return exp.Command(this=text[:size], expression=text[size:]) 4916 4917 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4918 settings = [] 4919 4920 self._match_l_paren() 4921 kind = self._parse_id_var() 4922 4923 if self._match(TokenType.L_PAREN): 4924 while True: 4925 key = self._parse_id_var() 4926 value = self._parse_primary() 4927 4928 if not key and value is None: 4929 break 4930 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4931 self._match(TokenType.R_PAREN) 4932 4933 self._match_r_paren() 4934 4935 return self.expression( 4936 exp.DictProperty, 4937 this=this, 4938 kind=kind.this if kind else None, 4939 settings=settings, 4940 ) 4941 4942 def _parse_dict_range(self, this: str) -> exp.DictRange: 4943 self._match_l_paren() 4944 has_min = self._match_text_seq("MIN") 4945 if has_min: 4946 min = self._parse_var() or self._parse_primary() 4947 self._match_text_seq("MAX") 4948 max = self._parse_var() or self._parse_primary() 4949 else: 4950 max = self._parse_var() or self._parse_primary() 4951 min = exp.Literal.number(0) 4952 self._match_r_paren() 4953 return self.expression(exp.DictRange, this=this, min=min, max=max) 4954 4955 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4956 expression = self._parse_column() 4957 self._match(TokenType.IN) 4958 iterator = self._parse_column() 4959 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4960 return self.expression( 4961 exp.Comprehension, 4962 this=this, 4963 expression=expression, 4964 iterator=iterator, 4965 condition=condition, 4966 ) 4967 4968 def _find_parser( 4969 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4970 ) -> t.Optional[t.Callable]: 4971 if not self._curr: 4972 return None 4973 4974 index = self._index 4975 this = [] 4976 while True: 4977 # The current token might be multiple words 4978 curr = self._curr.text.upper() 4979 key = curr.split(" ") 4980 this.append(curr) 4981 4982 self._advance() 4983 result, trie = in_trie(trie, key) 4984 if result == TrieResult.FAILED: 4985 break 4986 4987 if result == TrieResult.EXISTS: 4988 subparser = parsers[" ".join(this)] 4989 return subparser 4990 4991 self._retreat(index) 4992 return None 4993 4994 def _match(self, token_type, advance=True, expression=None): 4995 if not self._curr: 4996 return None 4997 4998 if self._curr.token_type == token_type: 4999 if advance: 5000 self._advance() 5001 self._add_comments(expression) 5002 return True 5003 5004 return None 5005 5006 def _match_set(self, types, advance=True): 5007 if not self._curr: 5008 return None 5009 5010 if self._curr.token_type in types: 5011 if advance: 5012 self._advance() 5013 return True 5014 5015 return None 5016 5017 def _match_pair(self, token_type_a, token_type_b, advance=True): 5018 if not self._curr or not self._next: 5019 return None 5020 5021 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5022 if advance: 5023 self._advance(2) 5024 return True 5025 5026 return None 5027 5028 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5029 if not self._match(TokenType.L_PAREN, expression=expression): 5030 self.raise_error("Expecting (") 5031 5032 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5033 if not self._match(TokenType.R_PAREN, expression=expression): 5034 self.raise_error("Expecting )") 5035 5036 def _match_texts(self, texts, advance=True): 5037 if self._curr and self._curr.text.upper() in texts: 5038 if advance: 5039 self._advance() 5040 return True 5041 return False 5042 5043 def _match_text_seq(self, *texts, advance=True): 5044 index = self._index 5045 for text in texts: 5046 if self._curr and self._curr.text.upper() == text: 5047 self._advance() 5048 else: 5049 self._retreat(index) 5050 return False 5051 5052 if not advance: 5053 self._retreat(index) 5054 5055 return True 5056 5057 @t.overload 5058 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5059 ... 5060 5061 @t.overload 5062 def _replace_columns_with_dots( 5063 self, this: t.Optional[exp.Expression] 5064 ) -> t.Optional[exp.Expression]: 5065 ... 5066 5067 def _replace_columns_with_dots(self, this): 5068 if isinstance(this, exp.Dot): 5069 exp.replace_children(this, self._replace_columns_with_dots) 5070 elif isinstance(this, exp.Column): 5071 exp.replace_children(this, self._replace_columns_with_dots) 5072 table = this.args.get("table") 5073 this = ( 5074 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5075 ) 5076 5077 return this 5078 5079 def _replace_lambda( 5080 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5081 ) -> t.Optional[exp.Expression]: 5082 if not node: 5083 return node 5084 5085 for column in node.find_all(exp.Column): 5086 if column.parts[0].name in lambda_variables: 5087 dot_or_id = column.to_dot() if column.table else column.this 5088 parent = column.parent 5089 5090 while isinstance(parent, exp.Dot): 5091 if not isinstance(parent.parent, exp.Dot): 5092 parent.replace(dot_or_id) 5093 break 5094 parent = parent.parent 5095 else: 5096 if column is node: 5097 node = dot_or_id 5098 else: 5099 column.replace(dot_or_id) 5100 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.OBJECT_IDENTIFIER, 201 TokenType.INET, 202 TokenType.IPADDRESS, 203 TokenType.IPPREFIX, 204 TokenType.UNKNOWN, 205 TokenType.NULL, 206 *ENUM_TYPE_TOKENS, 207 *NESTED_TYPE_TOKENS, 208 } 209 210 SUBQUERY_PREDICATES = { 211 TokenType.ANY: exp.Any, 212 TokenType.ALL: exp.All, 213 TokenType.EXISTS: exp.Exists, 214 TokenType.SOME: exp.Any, 215 } 216 217 RESERVED_KEYWORDS = { 218 *Tokenizer.SINGLE_TOKENS.values(), 219 TokenType.SELECT, 220 } 221 222 DB_CREATABLES = { 223 TokenType.DATABASE, 224 TokenType.SCHEMA, 225 TokenType.TABLE, 226 TokenType.VIEW, 227 TokenType.DICTIONARY, 228 } 229 230 CREATABLES = { 231 TokenType.COLUMN, 232 TokenType.FUNCTION, 233 TokenType.INDEX, 234 TokenType.PROCEDURE, 235 *DB_CREATABLES, 236 } 237 238 # Tokens that can represent identifiers 239 ID_VAR_TOKENS = { 240 TokenType.VAR, 241 TokenType.ANTI, 242 TokenType.APPLY, 243 TokenType.ASC, 244 TokenType.AUTO_INCREMENT, 245 TokenType.BEGIN, 246 TokenType.CACHE, 247 TokenType.CASE, 248 TokenType.COLLATE, 249 TokenType.COMMAND, 250 TokenType.COMMENT, 251 TokenType.COMMIT, 252 TokenType.CONSTRAINT, 253 TokenType.DEFAULT, 254 TokenType.DELETE, 255 TokenType.DESC, 256 TokenType.DESCRIBE, 257 TokenType.DICTIONARY, 258 TokenType.DIV, 259 TokenType.END, 260 TokenType.EXECUTE, 261 TokenType.ESCAPE, 262 TokenType.FALSE, 263 TokenType.FIRST, 264 TokenType.FILTER, 265 TokenType.FORMAT, 266 TokenType.FULL, 267 TokenType.IS, 268 TokenType.ISNULL, 269 TokenType.INTERVAL, 270 TokenType.KEEP, 271 TokenType.LEFT, 272 TokenType.LOAD, 273 TokenType.MERGE, 274 TokenType.NATURAL, 275 TokenType.NEXT, 276 TokenType.OFFSET, 277 TokenType.ORDINALITY, 278 TokenType.OVERWRITE, 279 TokenType.PARTITION, 280 TokenType.PERCENT, 281 TokenType.PIVOT, 282 TokenType.PRAGMA, 283 TokenType.RANGE, 284 TokenType.REFERENCES, 285 TokenType.RIGHT, 286 TokenType.ROW, 287 TokenType.ROWS, 288 TokenType.SEMI, 289 TokenType.SET, 290 TokenType.SETTINGS, 291 TokenType.SHOW, 292 TokenType.TEMPORARY, 293 TokenType.TOP, 294 TokenType.TRUE, 295 TokenType.UNIQUE, 296 TokenType.UNPIVOT, 297 TokenType.UPDATE, 298 TokenType.VOLATILE, 299 TokenType.WINDOW, 300 *CREATABLES, 301 *SUBQUERY_PREDICATES, 302 *TYPE_TOKENS, 303 *NO_PAREN_FUNCTIONS, 304 } 305 306 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 307 308 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 309 TokenType.APPLY, 310 TokenType.ASOF, 311 TokenType.FULL, 312 TokenType.LEFT, 313 TokenType.LOCK, 314 TokenType.NATURAL, 315 TokenType.OFFSET, 316 TokenType.RIGHT, 317 TokenType.WINDOW, 318 } 319 320 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 321 322 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 323 324 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 325 326 FUNC_TOKENS = { 327 TokenType.COMMAND, 328 TokenType.CURRENT_DATE, 329 TokenType.CURRENT_DATETIME, 330 TokenType.CURRENT_TIMESTAMP, 331 TokenType.CURRENT_TIME, 332 TokenType.CURRENT_USER, 333 TokenType.FILTER, 334 TokenType.FIRST, 335 TokenType.FORMAT, 336 TokenType.GLOB, 337 TokenType.IDENTIFIER, 338 TokenType.INDEX, 339 TokenType.ISNULL, 340 TokenType.ILIKE, 341 TokenType.INSERT, 342 TokenType.LIKE, 343 TokenType.MERGE, 344 TokenType.OFFSET, 345 TokenType.PRIMARY_KEY, 346 TokenType.RANGE, 347 TokenType.REPLACE, 348 TokenType.RLIKE, 349 TokenType.ROW, 350 TokenType.UNNEST, 351 TokenType.VAR, 352 TokenType.LEFT, 353 TokenType.RIGHT, 354 TokenType.DATE, 355 TokenType.DATETIME, 356 TokenType.TABLE, 357 TokenType.TIMESTAMP, 358 TokenType.TIMESTAMPTZ, 359 TokenType.WINDOW, 360 TokenType.XOR, 361 *TYPE_TOKENS, 362 *SUBQUERY_PREDICATES, 363 } 364 365 CONJUNCTION = { 366 TokenType.AND: exp.And, 367 TokenType.OR: exp.Or, 368 } 369 370 EQUALITY = { 371 TokenType.EQ: exp.EQ, 372 TokenType.NEQ: exp.NEQ, 373 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 374 } 375 376 COMPARISON = { 377 TokenType.GT: exp.GT, 378 TokenType.GTE: exp.GTE, 379 TokenType.LT: exp.LT, 380 TokenType.LTE: exp.LTE, 381 } 382 383 BITWISE = { 384 TokenType.AMP: exp.BitwiseAnd, 385 TokenType.CARET: exp.BitwiseXor, 386 TokenType.PIPE: exp.BitwiseOr, 387 TokenType.DPIPE: exp.DPipe, 388 } 389 390 TERM = { 391 TokenType.DASH: exp.Sub, 392 TokenType.PLUS: exp.Add, 393 TokenType.MOD: exp.Mod, 394 TokenType.COLLATE: exp.Collate, 395 } 396 397 FACTOR = { 398 TokenType.DIV: exp.IntDiv, 399 TokenType.LR_ARROW: exp.Distance, 400 TokenType.SLASH: exp.Div, 401 TokenType.STAR: exp.Mul, 402 } 403 404 TIMES = { 405 TokenType.TIME, 406 TokenType.TIMETZ, 407 } 408 409 TIMESTAMPS = { 410 TokenType.TIMESTAMP, 411 TokenType.TIMESTAMPTZ, 412 TokenType.TIMESTAMPLTZ, 413 *TIMES, 414 } 415 416 SET_OPERATIONS = { 417 TokenType.UNION, 418 TokenType.INTERSECT, 419 TokenType.EXCEPT, 420 } 421 422 JOIN_METHODS = { 423 TokenType.NATURAL, 424 TokenType.ASOF, 425 } 426 427 JOIN_SIDES = { 428 TokenType.LEFT, 429 TokenType.RIGHT, 430 TokenType.FULL, 431 } 432 433 JOIN_KINDS = { 434 TokenType.INNER, 435 TokenType.OUTER, 436 TokenType.CROSS, 437 TokenType.SEMI, 438 TokenType.ANTI, 439 } 440 441 JOIN_HINTS: t.Set[str] = set() 442 443 LAMBDAS = { 444 TokenType.ARROW: lambda self, expressions: self.expression( 445 exp.Lambda, 446 this=self._replace_lambda( 447 self._parse_conjunction(), 448 {node.name for node in expressions}, 449 ), 450 expressions=expressions, 451 ), 452 TokenType.FARROW: lambda self, expressions: self.expression( 453 exp.Kwarg, 454 this=exp.var(expressions[0].name), 455 expression=self._parse_conjunction(), 456 ), 457 } 458 459 COLUMN_OPERATORS = { 460 TokenType.DOT: None, 461 TokenType.DCOLON: lambda self, this, to: self.expression( 462 exp.Cast if self.STRICT_CAST else exp.TryCast, 463 this=this, 464 to=to, 465 ), 466 TokenType.ARROW: lambda self, this, path: self.expression( 467 exp.JSONExtract, 468 this=this, 469 expression=path, 470 ), 471 TokenType.DARROW: lambda self, this, path: self.expression( 472 exp.JSONExtractScalar, 473 this=this, 474 expression=path, 475 ), 476 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 477 exp.JSONBExtract, 478 this=this, 479 expression=path, 480 ), 481 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 482 exp.JSONBExtractScalar, 483 this=this, 484 expression=path, 485 ), 486 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 487 exp.JSONBContains, 488 this=this, 489 expression=key, 490 ), 491 } 492 493 EXPRESSION_PARSERS = { 494 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 495 exp.Column: lambda self: self._parse_column(), 496 exp.Condition: lambda self: self._parse_conjunction(), 497 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 498 exp.Expression: lambda self: self._parse_statement(), 499 exp.From: lambda self: self._parse_from(), 500 exp.Group: lambda self: self._parse_group(), 501 exp.Having: lambda self: self._parse_having(), 502 exp.Identifier: lambda self: self._parse_id_var(), 503 exp.Join: lambda self: self._parse_join(), 504 exp.Lambda: lambda self: self._parse_lambda(), 505 exp.Lateral: lambda self: self._parse_lateral(), 506 exp.Limit: lambda self: self._parse_limit(), 507 exp.Offset: lambda self: self._parse_offset(), 508 exp.Order: lambda self: self._parse_order(), 509 exp.Ordered: lambda self: self._parse_ordered(), 510 exp.Properties: lambda self: self._parse_properties(), 511 exp.Qualify: lambda self: self._parse_qualify(), 512 exp.Returning: lambda self: self._parse_returning(), 513 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 514 exp.Table: lambda self: self._parse_table_parts(), 515 exp.TableAlias: lambda self: self._parse_table_alias(), 516 exp.Where: lambda self: self._parse_where(), 517 exp.Window: lambda self: self._parse_named_window(), 518 exp.With: lambda self: self._parse_with(), 519 "JOIN_TYPE": lambda self: self._parse_join_parts(), 520 } 521 522 STATEMENT_PARSERS = { 523 TokenType.ALTER: lambda self: self._parse_alter(), 524 TokenType.BEGIN: lambda self: self._parse_transaction(), 525 TokenType.CACHE: lambda self: self._parse_cache(), 526 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 527 TokenType.COMMENT: lambda self: self._parse_comment(), 528 TokenType.CREATE: lambda self: self._parse_create(), 529 TokenType.DELETE: lambda self: self._parse_delete(), 530 TokenType.DESC: lambda self: self._parse_describe(), 531 TokenType.DESCRIBE: lambda self: self._parse_describe(), 532 TokenType.DROP: lambda self: self._parse_drop(), 533 TokenType.INSERT: lambda self: self._parse_insert(), 534 TokenType.LOAD: lambda self: self._parse_load(), 535 TokenType.MERGE: lambda self: self._parse_merge(), 536 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 537 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 538 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 539 TokenType.SET: lambda self: self._parse_set(), 540 TokenType.UNCACHE: lambda self: self._parse_uncache(), 541 TokenType.UPDATE: lambda self: self._parse_update(), 542 TokenType.USE: lambda self: self.expression( 543 exp.Use, 544 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 545 and exp.var(self._prev.text), 546 this=self._parse_table(schema=False), 547 ), 548 } 549 550 UNARY_PARSERS = { 551 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 552 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 553 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 554 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 555 } 556 557 PRIMARY_PARSERS = { 558 TokenType.STRING: lambda self, token: self.expression( 559 exp.Literal, this=token.text, is_string=True 560 ), 561 TokenType.NUMBER: lambda self, token: self.expression( 562 exp.Literal, this=token.text, is_string=False 563 ), 564 TokenType.STAR: lambda self, _: self.expression( 565 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 566 ), 567 TokenType.NULL: lambda self, _: self.expression(exp.Null), 568 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 569 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 570 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 571 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 572 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 573 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 574 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 575 exp.National, this=token.text 576 ), 577 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 578 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 579 } 580 581 PLACEHOLDER_PARSERS = { 582 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 583 TokenType.PARAMETER: lambda self: self._parse_parameter(), 584 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 585 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 586 else None, 587 } 588 589 RANGE_PARSERS = { 590 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 591 TokenType.GLOB: binary_range_parser(exp.Glob), 592 TokenType.ILIKE: binary_range_parser(exp.ILike), 593 TokenType.IN: lambda self, this: self._parse_in(this), 594 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 595 TokenType.IS: lambda self, this: self._parse_is(this), 596 TokenType.LIKE: binary_range_parser(exp.Like), 597 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 598 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 599 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 600 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 601 } 602 603 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 604 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 605 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 606 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 607 "CHARACTER SET": lambda self: self._parse_character_set(), 608 "CHECKSUM": lambda self: self._parse_checksum(), 609 "CLUSTER BY": lambda self: self._parse_cluster(), 610 "CLUSTERED": lambda self: self._parse_clustered_by(), 611 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 612 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 613 "COPY": lambda self: self._parse_copy_property(), 614 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 615 "DEFINER": lambda self: self._parse_definer(), 616 "DETERMINISTIC": lambda self: self.expression( 617 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 618 ), 619 "DISTKEY": lambda self: self._parse_distkey(), 620 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 621 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 622 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 623 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 624 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 625 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 626 "FREESPACE": lambda self: self._parse_freespace(), 627 "HEAP": lambda self: self.expression(exp.HeapProperty), 628 "IMMUTABLE": lambda self: self.expression( 629 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 630 ), 631 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 632 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 633 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 634 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 635 "LIKE": lambda self: self._parse_create_like(), 636 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 637 "LOCK": lambda self: self._parse_locking(), 638 "LOCKING": lambda self: self._parse_locking(), 639 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 640 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 641 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 642 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 643 "NO": lambda self: self._parse_no_property(), 644 "ON": lambda self: self._parse_on_property(), 645 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 646 "PARTITION BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 648 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 649 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 650 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 651 "RETURNS": lambda self: self._parse_returns(), 652 "ROW": lambda self: self._parse_row(), 653 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 654 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 655 "SETTINGS": lambda self: self.expression( 656 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 657 ), 658 "SORTKEY": lambda self: self._parse_sortkey(), 659 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 660 "STABLE": lambda self: self.expression( 661 exp.StabilityProperty, this=exp.Literal.string("STABLE") 662 ), 663 "STORED": lambda self: self._parse_stored(), 664 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 665 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 666 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 667 "TO": lambda self: self._parse_to_table(), 668 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 669 "TTL": lambda self: self._parse_ttl(), 670 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 671 "VOLATILE": lambda self: self._parse_volatile_property(), 672 "WITH": lambda self: self._parse_with_property(), 673 } 674 675 CONSTRAINT_PARSERS = { 676 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 677 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 678 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 679 "CHARACTER SET": lambda self: self.expression( 680 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "CHECK": lambda self: self.expression( 683 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 684 ), 685 "COLLATE": lambda self: self.expression( 686 exp.CollateColumnConstraint, this=self._parse_var() 687 ), 688 "COMMENT": lambda self: self.expression( 689 exp.CommentColumnConstraint, this=self._parse_string() 690 ), 691 "COMPRESS": lambda self: self._parse_compress(), 692 "CLUSTERED": lambda self: self.expression( 693 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 694 ), 695 "NONCLUSTERED": lambda self: self.expression( 696 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 697 ), 698 "DEFAULT": lambda self: self.expression( 699 exp.DefaultColumnConstraint, this=self._parse_bitwise() 700 ), 701 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 702 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 703 "FORMAT": lambda self: self.expression( 704 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 705 ), 706 "GENERATED": lambda self: self._parse_generated_as_identity(), 707 "IDENTITY": lambda self: self._parse_auto_increment(), 708 "INLINE": lambda self: self._parse_inline(), 709 "LIKE": lambda self: self._parse_create_like(), 710 "NOT": lambda self: self._parse_not_constraint(), 711 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 712 "ON": lambda self: ( 713 self._match(TokenType.UPDATE) 714 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 715 ) 716 or self.expression(exp.OnProperty, this=self._parse_id_var()), 717 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 718 "PRIMARY KEY": lambda self: self._parse_primary_key(), 719 "REFERENCES": lambda self: self._parse_references(match=False), 720 "TITLE": lambda self: self.expression( 721 exp.TitleColumnConstraint, this=self._parse_var_or_string() 722 ), 723 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 724 "UNIQUE": lambda self: self._parse_unique(), 725 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 726 "WITH": lambda self: self.expression( 727 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 728 ), 729 } 730 731 ALTER_PARSERS = { 732 "ADD": lambda self: self._parse_alter_table_add(), 733 "ALTER": lambda self: self._parse_alter_table_alter(), 734 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 735 "DROP": lambda self: self._parse_alter_table_drop(), 736 "RENAME": lambda self: self._parse_alter_table_rename(), 737 } 738 739 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 740 741 NO_PAREN_FUNCTION_PARSERS = { 742 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 743 "CASE": lambda self: self._parse_case(), 744 "IF": lambda self: self._parse_if(), 745 "NEXT": lambda self: self._parse_next_value_for(), 746 } 747 748 INVALID_FUNC_NAME_TOKENS = { 749 TokenType.IDENTIFIER, 750 TokenType.STRING, 751 } 752 753 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 754 755 FUNCTION_PARSERS = { 756 "ANY_VALUE": lambda self: self._parse_any_value(), 757 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 758 "CONCAT": lambda self: self._parse_concat(), 759 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 760 "DECODE": lambda self: self._parse_decode(), 761 "EXTRACT": lambda self: self._parse_extract(), 762 "JSON_OBJECT": lambda self: self._parse_json_object(), 763 "LOG": lambda self: self._parse_logarithm(), 764 "MATCH": lambda self: self._parse_match_against(), 765 "OPENJSON": lambda self: self._parse_open_json(), 766 "POSITION": lambda self: self._parse_position(), 767 "SAFE_CAST": lambda self: self._parse_cast(False), 768 "STRING_AGG": lambda self: self._parse_string_agg(), 769 "SUBSTRING": lambda self: self._parse_substring(), 770 "TRIM": lambda self: self._parse_trim(), 771 "TRY_CAST": lambda self: self._parse_cast(False), 772 "TRY_CONVERT": lambda self: self._parse_convert(False), 773 } 774 775 QUERY_MODIFIER_PARSERS = { 776 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 777 TokenType.WHERE: lambda self: ("where", self._parse_where()), 778 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 779 TokenType.HAVING: lambda self: ("having", self._parse_having()), 780 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 781 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 782 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 783 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 784 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 785 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 786 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 787 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 788 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 789 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 790 TokenType.CLUSTER_BY: lambda self: ( 791 "cluster", 792 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 ), 794 TokenType.DISTRIBUTE_BY: lambda self: ( 795 "distribute", 796 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 797 ), 798 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 799 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 800 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 801 } 802 803 SET_PARSERS = { 804 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 805 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 806 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 807 "TRANSACTION": lambda self: self._parse_set_transaction(), 808 } 809 810 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 811 812 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 813 814 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 815 816 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 817 818 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 819 820 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 821 TRANSACTION_CHARACTERISTICS = { 822 "ISOLATION LEVEL REPEATABLE READ", 823 "ISOLATION LEVEL READ COMMITTED", 824 "ISOLATION LEVEL READ UNCOMMITTED", 825 "ISOLATION LEVEL SERIALIZABLE", 826 "READ WRITE", 827 "READ ONLY", 828 } 829 830 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 831 832 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 833 834 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 835 836 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 837 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 838 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 839 840 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 841 842 DISTINCT_TOKENS = {TokenType.DISTINCT} 843 844 STRICT_CAST = True 845 846 # A NULL arg in CONCAT yields NULL by default 847 CONCAT_NULL_OUTPUTS_STRING = False 848 849 PREFIXED_PIVOT_COLUMNS = False 850 IDENTIFY_PIVOT_STRINGS = False 851 852 LOG_BASE_FIRST = True 853 LOG_DEFAULTS_TO_LN = False 854 855 SUPPORTS_USER_DEFINED_TYPES = True 856 857 # Whether or not ADD is present for each column added by ALTER TABLE 858 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 859 860 __slots__ = ( 861 "error_level", 862 "error_message_context", 863 "max_errors", 864 "sql", 865 "errors", 866 "_tokens", 867 "_index", 868 "_curr", 869 "_next", 870 "_prev", 871 "_prev_comments", 872 "_tokenizer", 873 ) 874 875 # Autofilled 876 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 877 INDEX_OFFSET: int = 0 878 UNNEST_COLUMN_ONLY: bool = False 879 ALIAS_POST_TABLESAMPLE: bool = False 880 STRICT_STRING_CONCAT = False 881 NORMALIZE_FUNCTIONS = "upper" 882 NULL_ORDERING: str = "nulls_are_small" 883 SHOW_TRIE: t.Dict = {} 884 SET_TRIE: t.Dict = {} 885 FORMAT_MAPPING: t.Dict[str, str] = {} 886 FORMAT_TRIE: t.Dict = {} 887 TIME_MAPPING: t.Dict[str, str] = {} 888 TIME_TRIE: t.Dict = {} 889 890 def __init__( 891 self, 892 error_level: t.Optional[ErrorLevel] = None, 893 error_message_context: int = 100, 894 max_errors: int = 3, 895 ): 896 self.error_level = error_level or ErrorLevel.IMMEDIATE 897 self.error_message_context = error_message_context 898 self.max_errors = max_errors 899 self._tokenizer = self.TOKENIZER_CLASS() 900 self.reset() 901 902 def reset(self): 903 self.sql = "" 904 self.errors = [] 905 self._tokens = [] 906 self._index = 0 907 self._curr = None 908 self._next = None 909 self._prev = None 910 self._prev_comments = None 911 912 def parse( 913 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 914 ) -> t.List[t.Optional[exp.Expression]]: 915 """ 916 Parses a list of tokens and returns a list of syntax trees, one tree 917 per parsed SQL statement. 918 919 Args: 920 raw_tokens: The list of tokens. 921 sql: The original SQL string, used to produce helpful debug messages. 922 923 Returns: 924 The list of the produced syntax trees. 925 """ 926 return self._parse( 927 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 928 ) 929 930 def parse_into( 931 self, 932 expression_types: exp.IntoType, 933 raw_tokens: t.List[Token], 934 sql: t.Optional[str] = None, 935 ) -> t.List[t.Optional[exp.Expression]]: 936 """ 937 Parses a list of tokens into a given Expression type. If a collection of Expression 938 types is given instead, this method will try to parse the token list into each one 939 of them, stopping at the first for which the parsing succeeds. 940 941 Args: 942 expression_types: The expression type(s) to try and parse the token list into. 943 raw_tokens: The list of tokens. 944 sql: The original SQL string, used to produce helpful debug messages. 945 946 Returns: 947 The target Expression. 948 """ 949 errors = [] 950 for expression_type in ensure_list(expression_types): 951 parser = self.EXPRESSION_PARSERS.get(expression_type) 952 if not parser: 953 raise TypeError(f"No parser registered for {expression_type}") 954 955 try: 956 return self._parse(parser, raw_tokens, sql) 957 except ParseError as e: 958 e.errors[0]["into_expression"] = expression_type 959 errors.append(e) 960 961 raise ParseError( 962 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 963 errors=merge_errors(errors), 964 ) from errors[-1] 965 966 def _parse( 967 self, 968 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 969 raw_tokens: t.List[Token], 970 sql: t.Optional[str] = None, 971 ) -> t.List[t.Optional[exp.Expression]]: 972 self.reset() 973 self.sql = sql or "" 974 975 total = len(raw_tokens) 976 chunks: t.List[t.List[Token]] = [[]] 977 978 for i, token in enumerate(raw_tokens): 979 if token.token_type == TokenType.SEMICOLON: 980 if i < total - 1: 981 chunks.append([]) 982 else: 983 chunks[-1].append(token) 984 985 expressions = [] 986 987 for tokens in chunks: 988 self._index = -1 989 self._tokens = tokens 990 self._advance() 991 992 expressions.append(parse_method(self)) 993 994 if self._index < len(self._tokens): 995 self.raise_error("Invalid expression / Unexpected token") 996 997 self.check_errors() 998 999 return expressions 1000 1001 def check_errors(self) -> None: 1002 """Logs or raises any found errors, depending on the chosen error level setting.""" 1003 if self.error_level == ErrorLevel.WARN: 1004 for error in self.errors: 1005 logger.error(str(error)) 1006 elif self.error_level == ErrorLevel.RAISE and self.errors: 1007 raise ParseError( 1008 concat_messages(self.errors, self.max_errors), 1009 errors=merge_errors(self.errors), 1010 ) 1011 1012 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1013 """ 1014 Appends an error in the list of recorded errors or raises it, depending on the chosen 1015 error level setting. 1016 """ 1017 token = token or self._curr or self._prev or Token.string("") 1018 start = token.start 1019 end = token.end + 1 1020 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1021 highlight = self.sql[start:end] 1022 end_context = self.sql[end : end + self.error_message_context] 1023 1024 error = ParseError.new( 1025 f"{message}. Line {token.line}, Col: {token.col}.\n" 1026 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1027 description=message, 1028 line=token.line, 1029 col=token.col, 1030 start_context=start_context, 1031 highlight=highlight, 1032 end_context=end_context, 1033 ) 1034 1035 if self.error_level == ErrorLevel.IMMEDIATE: 1036 raise error 1037 1038 self.errors.append(error) 1039 1040 def expression( 1041 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1042 ) -> E: 1043 """ 1044 Creates a new, validated Expression. 1045 1046 Args: 1047 exp_class: The expression class to instantiate. 1048 comments: An optional list of comments to attach to the expression. 1049 kwargs: The arguments to set for the expression along with their respective values. 1050 1051 Returns: 1052 The target expression. 1053 """ 1054 instance = exp_class(**kwargs) 1055 instance.add_comments(comments) if comments else self._add_comments(instance) 1056 return self.validate_expression(instance) 1057 1058 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1059 if expression and self._prev_comments: 1060 expression.add_comments(self._prev_comments) 1061 self._prev_comments = None 1062 1063 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1064 """ 1065 Validates an Expression, making sure that all its mandatory arguments are set. 1066 1067 Args: 1068 expression: The expression to validate. 1069 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1070 1071 Returns: 1072 The validated expression. 1073 """ 1074 if self.error_level != ErrorLevel.IGNORE: 1075 for error_message in expression.error_messages(args): 1076 self.raise_error(error_message) 1077 1078 return expression 1079 1080 def _find_sql(self, start: Token, end: Token) -> str: 1081 return self.sql[start.start : end.end + 1] 1082 1083 def _advance(self, times: int = 1) -> None: 1084 self._index += times 1085 self._curr = seq_get(self._tokens, self._index) 1086 self._next = seq_get(self._tokens, self._index + 1) 1087 1088 if self._index > 0: 1089 self._prev = self._tokens[self._index - 1] 1090 self._prev_comments = self._prev.comments 1091 else: 1092 self._prev = None 1093 self._prev_comments = None 1094 1095 def _retreat(self, index: int) -> None: 1096 if index != self._index: 1097 self._advance(index - self._index) 1098 1099 def _parse_command(self) -> exp.Command: 1100 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1101 1102 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1103 start = self._prev 1104 exists = self._parse_exists() if allow_exists else None 1105 1106 self._match(TokenType.ON) 1107 1108 kind = self._match_set(self.CREATABLES) and self._prev 1109 if not kind: 1110 return self._parse_as_command(start) 1111 1112 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1113 this = self._parse_user_defined_function(kind=kind.token_type) 1114 elif kind.token_type == TokenType.TABLE: 1115 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1116 elif kind.token_type == TokenType.COLUMN: 1117 this = self._parse_column() 1118 else: 1119 this = self._parse_id_var() 1120 1121 self._match(TokenType.IS) 1122 1123 return self.expression( 1124 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1125 ) 1126 1127 def _parse_to_table( 1128 self, 1129 ) -> exp.ToTableProperty: 1130 table = self._parse_table_parts(schema=True) 1131 return self.expression(exp.ToTableProperty, this=table) 1132 1133 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1134 def _parse_ttl(self) -> exp.Expression: 1135 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1136 this = self._parse_bitwise() 1137 1138 if self._match_text_seq("DELETE"): 1139 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1140 if self._match_text_seq("RECOMPRESS"): 1141 return self.expression( 1142 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1143 ) 1144 if self._match_text_seq("TO", "DISK"): 1145 return self.expression( 1146 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1147 ) 1148 if self._match_text_seq("TO", "VOLUME"): 1149 return self.expression( 1150 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1151 ) 1152 1153 return this 1154 1155 expressions = self._parse_csv(_parse_ttl_action) 1156 where = self._parse_where() 1157 group = self._parse_group() 1158 1159 aggregates = None 1160 if group and self._match(TokenType.SET): 1161 aggregates = self._parse_csv(self._parse_set_item) 1162 1163 return self.expression( 1164 exp.MergeTreeTTL, 1165 expressions=expressions, 1166 where=where, 1167 group=group, 1168 aggregates=aggregates, 1169 ) 1170 1171 def _parse_statement(self) -> t.Optional[exp.Expression]: 1172 if self._curr is None: 1173 return None 1174 1175 if self._match_set(self.STATEMENT_PARSERS): 1176 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1177 1178 if self._match_set(Tokenizer.COMMANDS): 1179 return self._parse_command() 1180 1181 expression = self._parse_expression() 1182 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1183 return self._parse_query_modifiers(expression) 1184 1185 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1186 start = self._prev 1187 temporary = self._match(TokenType.TEMPORARY) 1188 materialized = self._match_text_seq("MATERIALIZED") 1189 1190 kind = self._match_set(self.CREATABLES) and self._prev.text 1191 if not kind: 1192 return self._parse_as_command(start) 1193 1194 return self.expression( 1195 exp.Drop, 1196 comments=start.comments, 1197 exists=exists or self._parse_exists(), 1198 this=self._parse_table(schema=True), 1199 kind=kind, 1200 temporary=temporary, 1201 materialized=materialized, 1202 cascade=self._match_text_seq("CASCADE"), 1203 constraints=self._match_text_seq("CONSTRAINTS"), 1204 purge=self._match_text_seq("PURGE"), 1205 ) 1206 1207 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1208 return ( 1209 self._match_text_seq("IF") 1210 and (not not_ or self._match(TokenType.NOT)) 1211 and self._match(TokenType.EXISTS) 1212 ) 1213 1214 def _parse_create(self) -> exp.Create | exp.Command: 1215 # Note: this can't be None because we've matched a statement parser 1216 start = self._prev 1217 comments = self._prev_comments 1218 1219 replace = start.text.upper() == "REPLACE" or self._match_pair( 1220 TokenType.OR, TokenType.REPLACE 1221 ) 1222 unique = self._match(TokenType.UNIQUE) 1223 1224 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1225 self._advance() 1226 1227 properties = None 1228 create_token = self._match_set(self.CREATABLES) and self._prev 1229 1230 if not create_token: 1231 # exp.Properties.Location.POST_CREATE 1232 properties = self._parse_properties() 1233 create_token = self._match_set(self.CREATABLES) and self._prev 1234 1235 if not properties or not create_token: 1236 return self._parse_as_command(start) 1237 1238 exists = self._parse_exists(not_=True) 1239 this = None 1240 expression: t.Optional[exp.Expression] = None 1241 indexes = None 1242 no_schema_binding = None 1243 begin = None 1244 clone = None 1245 1246 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1247 nonlocal properties 1248 if properties and temp_props: 1249 properties.expressions.extend(temp_props.expressions) 1250 elif temp_props: 1251 properties = temp_props 1252 1253 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1254 this = self._parse_user_defined_function(kind=create_token.token_type) 1255 1256 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1257 extend_props(self._parse_properties()) 1258 1259 self._match(TokenType.ALIAS) 1260 1261 if self._match(TokenType.COMMAND): 1262 expression = self._parse_as_command(self._prev) 1263 else: 1264 begin = self._match(TokenType.BEGIN) 1265 return_ = self._match_text_seq("RETURN") 1266 expression = self._parse_statement() 1267 1268 if return_: 1269 expression = self.expression(exp.Return, this=expression) 1270 elif create_token.token_type == TokenType.INDEX: 1271 this = self._parse_index(index=self._parse_id_var()) 1272 elif create_token.token_type in self.DB_CREATABLES: 1273 table_parts = self._parse_table_parts(schema=True) 1274 1275 # exp.Properties.Location.POST_NAME 1276 self._match(TokenType.COMMA) 1277 extend_props(self._parse_properties(before=True)) 1278 1279 this = self._parse_schema(this=table_parts) 1280 1281 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1282 extend_props(self._parse_properties()) 1283 1284 self._match(TokenType.ALIAS) 1285 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1286 # exp.Properties.Location.POST_ALIAS 1287 extend_props(self._parse_properties()) 1288 1289 expression = self._parse_ddl_select() 1290 1291 if create_token.token_type == TokenType.TABLE: 1292 # exp.Properties.Location.POST_EXPRESSION 1293 extend_props(self._parse_properties()) 1294 1295 indexes = [] 1296 while True: 1297 index = self._parse_index() 1298 1299 # exp.Properties.Location.POST_INDEX 1300 extend_props(self._parse_properties()) 1301 1302 if not index: 1303 break 1304 else: 1305 self._match(TokenType.COMMA) 1306 indexes.append(index) 1307 elif create_token.token_type == TokenType.VIEW: 1308 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1309 no_schema_binding = True 1310 1311 shallow = self._match_text_seq("SHALLOW") 1312 1313 if self._match_text_seq("CLONE"): 1314 clone = self._parse_table(schema=True) 1315 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1316 clone_kind = ( 1317 self._match(TokenType.L_PAREN) 1318 and self._match_texts(self.CLONE_KINDS) 1319 and self._prev.text.upper() 1320 ) 1321 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1322 self._match(TokenType.R_PAREN) 1323 clone = self.expression( 1324 exp.Clone, 1325 this=clone, 1326 when=when, 1327 kind=clone_kind, 1328 shallow=shallow, 1329 expression=clone_expression, 1330 ) 1331 1332 return self.expression( 1333 exp.Create, 1334 comments=comments, 1335 this=this, 1336 kind=create_token.text, 1337 replace=replace, 1338 unique=unique, 1339 expression=expression, 1340 exists=exists, 1341 properties=properties, 1342 indexes=indexes, 1343 no_schema_binding=no_schema_binding, 1344 begin=begin, 1345 clone=clone, 1346 ) 1347 1348 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1349 # only used for teradata currently 1350 self._match(TokenType.COMMA) 1351 1352 kwargs = { 1353 "no": self._match_text_seq("NO"), 1354 "dual": self._match_text_seq("DUAL"), 1355 "before": self._match_text_seq("BEFORE"), 1356 "default": self._match_text_seq("DEFAULT"), 1357 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1358 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1359 "after": self._match_text_seq("AFTER"), 1360 "minimum": self._match_texts(("MIN", "MINIMUM")), 1361 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1362 } 1363 1364 if self._match_texts(self.PROPERTY_PARSERS): 1365 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1366 try: 1367 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1368 except TypeError: 1369 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1370 1371 return None 1372 1373 def _parse_property(self) -> t.Optional[exp.Expression]: 1374 if self._match_texts(self.PROPERTY_PARSERS): 1375 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1376 1377 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1378 return self._parse_character_set(default=True) 1379 1380 if self._match_text_seq("COMPOUND", "SORTKEY"): 1381 return self._parse_sortkey(compound=True) 1382 1383 if self._match_text_seq("SQL", "SECURITY"): 1384 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1385 1386 assignment = self._match_pair( 1387 TokenType.VAR, TokenType.EQ, advance=False 1388 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1389 1390 if assignment: 1391 key = self._parse_var_or_string() 1392 self._match(TokenType.EQ) 1393 return self.expression( 1394 exp.Property, 1395 this=key, 1396 value=self._parse_column() or self._parse_var(any_token=True), 1397 ) 1398 1399 return None 1400 1401 def _parse_stored(self) -> exp.FileFormatProperty: 1402 self._match(TokenType.ALIAS) 1403 1404 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1405 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1406 1407 return self.expression( 1408 exp.FileFormatProperty, 1409 this=self.expression( 1410 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1411 ) 1412 if input_format or output_format 1413 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1414 ) 1415 1416 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1417 self._match(TokenType.EQ) 1418 self._match(TokenType.ALIAS) 1419 return self.expression(exp_class, this=self._parse_field()) 1420 1421 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1422 properties = [] 1423 while True: 1424 if before: 1425 prop = self._parse_property_before() 1426 else: 1427 prop = self._parse_property() 1428 1429 if not prop: 1430 break 1431 for p in ensure_list(prop): 1432 properties.append(p) 1433 1434 if properties: 1435 return self.expression(exp.Properties, expressions=properties) 1436 1437 return None 1438 1439 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1440 return self.expression( 1441 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1442 ) 1443 1444 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1445 if self._index >= 2: 1446 pre_volatile_token = self._tokens[self._index - 2] 1447 else: 1448 pre_volatile_token = None 1449 1450 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1451 return exp.VolatileProperty() 1452 1453 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1454 1455 def _parse_with_property( 1456 self, 1457 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1458 if self._match(TokenType.L_PAREN, advance=False): 1459 return self._parse_wrapped_csv(self._parse_property) 1460 1461 if self._match_text_seq("JOURNAL"): 1462 return self._parse_withjournaltable() 1463 1464 if self._match_text_seq("DATA"): 1465 return self._parse_withdata(no=False) 1466 elif self._match_text_seq("NO", "DATA"): 1467 return self._parse_withdata(no=True) 1468 1469 if not self._next: 1470 return None 1471 1472 return self._parse_withisolatedloading() 1473 1474 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1475 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1476 self._match(TokenType.EQ) 1477 1478 user = self._parse_id_var() 1479 self._match(TokenType.PARAMETER) 1480 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1481 1482 if not user or not host: 1483 return None 1484 1485 return exp.DefinerProperty(this=f"{user}@{host}") 1486 1487 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1488 self._match(TokenType.TABLE) 1489 self._match(TokenType.EQ) 1490 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1491 1492 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1493 return self.expression(exp.LogProperty, no=no) 1494 1495 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1496 return self.expression(exp.JournalProperty, **kwargs) 1497 1498 def _parse_checksum(self) -> exp.ChecksumProperty: 1499 self._match(TokenType.EQ) 1500 1501 on = None 1502 if self._match(TokenType.ON): 1503 on = True 1504 elif self._match_text_seq("OFF"): 1505 on = False 1506 1507 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1508 1509 def _parse_cluster(self) -> exp.Cluster: 1510 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1511 1512 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1513 self._match_text_seq("BY") 1514 1515 self._match_l_paren() 1516 expressions = self._parse_csv(self._parse_column) 1517 self._match_r_paren() 1518 1519 if self._match_text_seq("SORTED", "BY"): 1520 self._match_l_paren() 1521 sorted_by = self._parse_csv(self._parse_ordered) 1522 self._match_r_paren() 1523 else: 1524 sorted_by = None 1525 1526 self._match(TokenType.INTO) 1527 buckets = self._parse_number() 1528 self._match_text_seq("BUCKETS") 1529 1530 return self.expression( 1531 exp.ClusteredByProperty, 1532 expressions=expressions, 1533 sorted_by=sorted_by, 1534 buckets=buckets, 1535 ) 1536 1537 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1538 if not self._match_text_seq("GRANTS"): 1539 self._retreat(self._index - 1) 1540 return None 1541 1542 return self.expression(exp.CopyGrantsProperty) 1543 1544 def _parse_freespace(self) -> exp.FreespaceProperty: 1545 self._match(TokenType.EQ) 1546 return self.expression( 1547 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1548 ) 1549 1550 def _parse_mergeblockratio( 1551 self, no: bool = False, default: bool = False 1552 ) -> exp.MergeBlockRatioProperty: 1553 if self._match(TokenType.EQ): 1554 return self.expression( 1555 exp.MergeBlockRatioProperty, 1556 this=self._parse_number(), 1557 percent=self._match(TokenType.PERCENT), 1558 ) 1559 1560 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1561 1562 def _parse_datablocksize( 1563 self, 1564 default: t.Optional[bool] = None, 1565 minimum: t.Optional[bool] = None, 1566 maximum: t.Optional[bool] = None, 1567 ) -> exp.DataBlocksizeProperty: 1568 self._match(TokenType.EQ) 1569 size = self._parse_number() 1570 1571 units = None 1572 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1573 units = self._prev.text 1574 1575 return self.expression( 1576 exp.DataBlocksizeProperty, 1577 size=size, 1578 units=units, 1579 default=default, 1580 minimum=minimum, 1581 maximum=maximum, 1582 ) 1583 1584 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1585 self._match(TokenType.EQ) 1586 always = self._match_text_seq("ALWAYS") 1587 manual = self._match_text_seq("MANUAL") 1588 never = self._match_text_seq("NEVER") 1589 default = self._match_text_seq("DEFAULT") 1590 1591 autotemp = None 1592 if self._match_text_seq("AUTOTEMP"): 1593 autotemp = self._parse_schema() 1594 1595 return self.expression( 1596 exp.BlockCompressionProperty, 1597 always=always, 1598 manual=manual, 1599 never=never, 1600 default=default, 1601 autotemp=autotemp, 1602 ) 1603 1604 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1605 no = self._match_text_seq("NO") 1606 concurrent = self._match_text_seq("CONCURRENT") 1607 self._match_text_seq("ISOLATED", "LOADING") 1608 for_all = self._match_text_seq("FOR", "ALL") 1609 for_insert = self._match_text_seq("FOR", "INSERT") 1610 for_none = self._match_text_seq("FOR", "NONE") 1611 return self.expression( 1612 exp.IsolatedLoadingProperty, 1613 no=no, 1614 concurrent=concurrent, 1615 for_all=for_all, 1616 for_insert=for_insert, 1617 for_none=for_none, 1618 ) 1619 1620 def _parse_locking(self) -> exp.LockingProperty: 1621 if self._match(TokenType.TABLE): 1622 kind = "TABLE" 1623 elif self._match(TokenType.VIEW): 1624 kind = "VIEW" 1625 elif self._match(TokenType.ROW): 1626 kind = "ROW" 1627 elif self._match_text_seq("DATABASE"): 1628 kind = "DATABASE" 1629 else: 1630 kind = None 1631 1632 if kind in ("DATABASE", "TABLE", "VIEW"): 1633 this = self._parse_table_parts() 1634 else: 1635 this = None 1636 1637 if self._match(TokenType.FOR): 1638 for_or_in = "FOR" 1639 elif self._match(TokenType.IN): 1640 for_or_in = "IN" 1641 else: 1642 for_or_in = None 1643 1644 if self._match_text_seq("ACCESS"): 1645 lock_type = "ACCESS" 1646 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1647 lock_type = "EXCLUSIVE" 1648 elif self._match_text_seq("SHARE"): 1649 lock_type = "SHARE" 1650 elif self._match_text_seq("READ"): 1651 lock_type = "READ" 1652 elif self._match_text_seq("WRITE"): 1653 lock_type = "WRITE" 1654 elif self._match_text_seq("CHECKSUM"): 1655 lock_type = "CHECKSUM" 1656 else: 1657 lock_type = None 1658 1659 override = self._match_text_seq("OVERRIDE") 1660 1661 return self.expression( 1662 exp.LockingProperty, 1663 this=this, 1664 kind=kind, 1665 for_or_in=for_or_in, 1666 lock_type=lock_type, 1667 override=override, 1668 ) 1669 1670 def _parse_partition_by(self) -> t.List[exp.Expression]: 1671 if self._match(TokenType.PARTITION_BY): 1672 return self._parse_csv(self._parse_conjunction) 1673 return [] 1674 1675 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1676 self._match(TokenType.EQ) 1677 return self.expression( 1678 exp.PartitionedByProperty, 1679 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1680 ) 1681 1682 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1683 if self._match_text_seq("AND", "STATISTICS"): 1684 statistics = True 1685 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1686 statistics = False 1687 else: 1688 statistics = None 1689 1690 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1691 1692 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1693 if self._match_text_seq("PRIMARY", "INDEX"): 1694 return exp.NoPrimaryIndexProperty() 1695 return None 1696 1697 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1698 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1699 return exp.OnCommitProperty() 1700 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1701 return exp.OnCommitProperty(delete=True) 1702 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1703 1704 def _parse_distkey(self) -> exp.DistKeyProperty: 1705 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1706 1707 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1708 table = self._parse_table(schema=True) 1709 1710 options = [] 1711 while self._match_texts(("INCLUDING", "EXCLUDING")): 1712 this = self._prev.text.upper() 1713 1714 id_var = self._parse_id_var() 1715 if not id_var: 1716 return None 1717 1718 options.append( 1719 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1720 ) 1721 1722 return self.expression(exp.LikeProperty, this=table, expressions=options) 1723 1724 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1725 return self.expression( 1726 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1727 ) 1728 1729 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1730 self._match(TokenType.EQ) 1731 return self.expression( 1732 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1733 ) 1734 1735 def _parse_returns(self) -> exp.ReturnsProperty: 1736 value: t.Optional[exp.Expression] 1737 is_table = self._match(TokenType.TABLE) 1738 1739 if is_table: 1740 if self._match(TokenType.LT): 1741 value = self.expression( 1742 exp.Schema, 1743 this="TABLE", 1744 expressions=self._parse_csv(self._parse_struct_types), 1745 ) 1746 if not self._match(TokenType.GT): 1747 self.raise_error("Expecting >") 1748 else: 1749 value = self._parse_schema(exp.var("TABLE")) 1750 else: 1751 value = self._parse_types() 1752 1753 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1754 1755 def _parse_describe(self) -> exp.Describe: 1756 kind = self._match_set(self.CREATABLES) and self._prev.text 1757 this = self._parse_table(schema=True) 1758 properties = self._parse_properties() 1759 expressions = properties.expressions if properties else None 1760 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1761 1762 def _parse_insert(self) -> exp.Insert: 1763 comments = ensure_list(self._prev_comments) 1764 overwrite = self._match(TokenType.OVERWRITE) 1765 ignore = self._match(TokenType.IGNORE) 1766 local = self._match_text_seq("LOCAL") 1767 alternative = None 1768 1769 if self._match_text_seq("DIRECTORY"): 1770 this: t.Optional[exp.Expression] = self.expression( 1771 exp.Directory, 1772 this=self._parse_var_or_string(), 1773 local=local, 1774 row_format=self._parse_row_format(match_row=True), 1775 ) 1776 else: 1777 if self._match(TokenType.OR): 1778 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1779 1780 self._match(TokenType.INTO) 1781 comments += ensure_list(self._prev_comments) 1782 self._match(TokenType.TABLE) 1783 this = self._parse_table(schema=True) 1784 1785 returning = self._parse_returning() 1786 1787 return self.expression( 1788 exp.Insert, 1789 comments=comments, 1790 this=this, 1791 by_name=self._match_text_seq("BY", "NAME"), 1792 exists=self._parse_exists(), 1793 partition=self._parse_partition(), 1794 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1795 and self._parse_conjunction(), 1796 expression=self._parse_ddl_select(), 1797 conflict=self._parse_on_conflict(), 1798 returning=returning or self._parse_returning(), 1799 overwrite=overwrite, 1800 alternative=alternative, 1801 ignore=ignore, 1802 ) 1803 1804 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1805 conflict = self._match_text_seq("ON", "CONFLICT") 1806 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1807 1808 if not conflict and not duplicate: 1809 return None 1810 1811 nothing = None 1812 expressions = None 1813 key = None 1814 constraint = None 1815 1816 if conflict: 1817 if self._match_text_seq("ON", "CONSTRAINT"): 1818 constraint = self._parse_id_var() 1819 else: 1820 key = self._parse_csv(self._parse_value) 1821 1822 self._match_text_seq("DO") 1823 if self._match_text_seq("NOTHING"): 1824 nothing = True 1825 else: 1826 self._match(TokenType.UPDATE) 1827 self._match(TokenType.SET) 1828 expressions = self._parse_csv(self._parse_equality) 1829 1830 return self.expression( 1831 exp.OnConflict, 1832 duplicate=duplicate, 1833 expressions=expressions, 1834 nothing=nothing, 1835 key=key, 1836 constraint=constraint, 1837 ) 1838 1839 def _parse_returning(self) -> t.Optional[exp.Returning]: 1840 if not self._match(TokenType.RETURNING): 1841 return None 1842 return self.expression( 1843 exp.Returning, 1844 expressions=self._parse_csv(self._parse_expression), 1845 into=self._match(TokenType.INTO) and self._parse_table_part(), 1846 ) 1847 1848 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1849 if not self._match(TokenType.FORMAT): 1850 return None 1851 return self._parse_row_format() 1852 1853 def _parse_row_format( 1854 self, match_row: bool = False 1855 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1856 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1857 return None 1858 1859 if self._match_text_seq("SERDE"): 1860 this = self._parse_string() 1861 1862 serde_properties = None 1863 if self._match(TokenType.SERDE_PROPERTIES): 1864 serde_properties = self.expression( 1865 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1866 ) 1867 1868 return self.expression( 1869 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1870 ) 1871 1872 self._match_text_seq("DELIMITED") 1873 1874 kwargs = {} 1875 1876 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1877 kwargs["fields"] = self._parse_string() 1878 if self._match_text_seq("ESCAPED", "BY"): 1879 kwargs["escaped"] = self._parse_string() 1880 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1881 kwargs["collection_items"] = self._parse_string() 1882 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1883 kwargs["map_keys"] = self._parse_string() 1884 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1885 kwargs["lines"] = self._parse_string() 1886 if self._match_text_seq("NULL", "DEFINED", "AS"): 1887 kwargs["null"] = self._parse_string() 1888 1889 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1890 1891 def _parse_load(self) -> exp.LoadData | exp.Command: 1892 if self._match_text_seq("DATA"): 1893 local = self._match_text_seq("LOCAL") 1894 self._match_text_seq("INPATH") 1895 inpath = self._parse_string() 1896 overwrite = self._match(TokenType.OVERWRITE) 1897 self._match_pair(TokenType.INTO, TokenType.TABLE) 1898 1899 return self.expression( 1900 exp.LoadData, 1901 this=self._parse_table(schema=True), 1902 local=local, 1903 overwrite=overwrite, 1904 inpath=inpath, 1905 partition=self._parse_partition(), 1906 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1907 serde=self._match_text_seq("SERDE") and self._parse_string(), 1908 ) 1909 return self._parse_as_command(self._prev) 1910 1911 def _parse_delete(self) -> exp.Delete: 1912 # This handles MySQL's "Multiple-Table Syntax" 1913 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1914 tables = None 1915 comments = self._prev_comments 1916 if not self._match(TokenType.FROM, advance=False): 1917 tables = self._parse_csv(self._parse_table) or None 1918 1919 returning = self._parse_returning() 1920 1921 return self.expression( 1922 exp.Delete, 1923 comments=comments, 1924 tables=tables, 1925 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1926 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1927 where=self._parse_where(), 1928 returning=returning or self._parse_returning(), 1929 limit=self._parse_limit(), 1930 ) 1931 1932 def _parse_update(self) -> exp.Update: 1933 comments = self._prev_comments 1934 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1935 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1936 returning = self._parse_returning() 1937 return self.expression( 1938 exp.Update, 1939 comments=comments, 1940 **{ # type: ignore 1941 "this": this, 1942 "expressions": expressions, 1943 "from": self._parse_from(joins=True), 1944 "where": self._parse_where(), 1945 "returning": returning or self._parse_returning(), 1946 "order": self._parse_order(), 1947 "limit": self._parse_limit(), 1948 }, 1949 ) 1950 1951 def _parse_uncache(self) -> exp.Uncache: 1952 if not self._match(TokenType.TABLE): 1953 self.raise_error("Expecting TABLE after UNCACHE") 1954 1955 return self.expression( 1956 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1957 ) 1958 1959 def _parse_cache(self) -> exp.Cache: 1960 lazy = self._match_text_seq("LAZY") 1961 self._match(TokenType.TABLE) 1962 table = self._parse_table(schema=True) 1963 1964 options = [] 1965 if self._match_text_seq("OPTIONS"): 1966 self._match_l_paren() 1967 k = self._parse_string() 1968 self._match(TokenType.EQ) 1969 v = self._parse_string() 1970 options = [k, v] 1971 self._match_r_paren() 1972 1973 self._match(TokenType.ALIAS) 1974 return self.expression( 1975 exp.Cache, 1976 this=table, 1977 lazy=lazy, 1978 options=options, 1979 expression=self._parse_select(nested=True), 1980 ) 1981 1982 def _parse_partition(self) -> t.Optional[exp.Partition]: 1983 if not self._match(TokenType.PARTITION): 1984 return None 1985 1986 return self.expression( 1987 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1988 ) 1989 1990 def _parse_value(self) -> exp.Tuple: 1991 if self._match(TokenType.L_PAREN): 1992 expressions = self._parse_csv(self._parse_conjunction) 1993 self._match_r_paren() 1994 return self.expression(exp.Tuple, expressions=expressions) 1995 1996 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1997 # https://prestodb.io/docs/current/sql/values.html 1998 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1999 2000 def _parse_projections(self) -> t.List[exp.Expression]: 2001 return self._parse_expressions() 2002 2003 def _parse_select( 2004 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2005 ) -> t.Optional[exp.Expression]: 2006 cte = self._parse_with() 2007 2008 if cte: 2009 this = self._parse_statement() 2010 2011 if not this: 2012 self.raise_error("Failed to parse any statement following CTE") 2013 return cte 2014 2015 if "with" in this.arg_types: 2016 this.set("with", cte) 2017 else: 2018 self.raise_error(f"{this.key} does not support CTE") 2019 this = cte 2020 2021 return this 2022 2023 # duckdb supports leading with FROM x 2024 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2025 2026 if self._match(TokenType.SELECT): 2027 comments = self._prev_comments 2028 2029 hint = self._parse_hint() 2030 all_ = self._match(TokenType.ALL) 2031 distinct = self._match_set(self.DISTINCT_TOKENS) 2032 2033 kind = ( 2034 self._match(TokenType.ALIAS) 2035 and self._match_texts(("STRUCT", "VALUE")) 2036 and self._prev.text 2037 ) 2038 2039 if distinct: 2040 distinct = self.expression( 2041 exp.Distinct, 2042 on=self._parse_value() if self._match(TokenType.ON) else None, 2043 ) 2044 2045 if all_ and distinct: 2046 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2047 2048 limit = self._parse_limit(top=True) 2049 projections = self._parse_projections() 2050 2051 this = self.expression( 2052 exp.Select, 2053 kind=kind, 2054 hint=hint, 2055 distinct=distinct, 2056 expressions=projections, 2057 limit=limit, 2058 ) 2059 this.comments = comments 2060 2061 into = self._parse_into() 2062 if into: 2063 this.set("into", into) 2064 2065 if not from_: 2066 from_ = self._parse_from() 2067 2068 if from_: 2069 this.set("from", from_) 2070 2071 this = self._parse_query_modifiers(this) 2072 elif (table or nested) and self._match(TokenType.L_PAREN): 2073 if self._match(TokenType.PIVOT): 2074 this = self._parse_simplified_pivot() 2075 elif self._match(TokenType.FROM): 2076 this = exp.select("*").from_( 2077 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2078 ) 2079 else: 2080 this = self._parse_table() if table else self._parse_select(nested=True) 2081 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2082 2083 self._match_r_paren() 2084 2085 # We return early here so that the UNION isn't attached to the subquery by the 2086 # following call to _parse_set_operations, but instead becomes the parent node 2087 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2088 elif self._match(TokenType.VALUES): 2089 this = self.expression( 2090 exp.Values, 2091 expressions=self._parse_csv(self._parse_value), 2092 alias=self._parse_table_alias(), 2093 ) 2094 elif from_: 2095 this = exp.select("*").from_(from_.this, copy=False) 2096 else: 2097 this = None 2098 2099 return self._parse_set_operations(this) 2100 2101 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2102 if not skip_with_token and not self._match(TokenType.WITH): 2103 return None 2104 2105 comments = self._prev_comments 2106 recursive = self._match(TokenType.RECURSIVE) 2107 2108 expressions = [] 2109 while True: 2110 expressions.append(self._parse_cte()) 2111 2112 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2113 break 2114 else: 2115 self._match(TokenType.WITH) 2116 2117 return self.expression( 2118 exp.With, comments=comments, expressions=expressions, recursive=recursive 2119 ) 2120 2121 def _parse_cte(self) -> exp.CTE: 2122 alias = self._parse_table_alias() 2123 if not alias or not alias.this: 2124 self.raise_error("Expected CTE to have alias") 2125 2126 self._match(TokenType.ALIAS) 2127 return self.expression( 2128 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2129 ) 2130 2131 def _parse_table_alias( 2132 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2133 ) -> t.Optional[exp.TableAlias]: 2134 any_token = self._match(TokenType.ALIAS) 2135 alias = ( 2136 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2137 or self._parse_string_as_identifier() 2138 ) 2139 2140 index = self._index 2141 if self._match(TokenType.L_PAREN): 2142 columns = self._parse_csv(self._parse_function_parameter) 2143 self._match_r_paren() if columns else self._retreat(index) 2144 else: 2145 columns = None 2146 2147 if not alias and not columns: 2148 return None 2149 2150 return self.expression(exp.TableAlias, this=alias, columns=columns) 2151 2152 def _parse_subquery( 2153 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2154 ) -> t.Optional[exp.Subquery]: 2155 if not this: 2156 return None 2157 2158 return self.expression( 2159 exp.Subquery, 2160 this=this, 2161 pivots=self._parse_pivots(), 2162 alias=self._parse_table_alias() if parse_alias else None, 2163 ) 2164 2165 def _parse_query_modifiers( 2166 self, this: t.Optional[exp.Expression] 2167 ) -> t.Optional[exp.Expression]: 2168 if isinstance(this, self.MODIFIABLES): 2169 for join in iter(self._parse_join, None): 2170 this.append("joins", join) 2171 for lateral in iter(self._parse_lateral, None): 2172 this.append("laterals", lateral) 2173 2174 while True: 2175 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2176 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2177 key, expression = parser(self) 2178 2179 if expression: 2180 this.set(key, expression) 2181 if key == "limit": 2182 offset = expression.args.pop("offset", None) 2183 if offset: 2184 this.set("offset", exp.Offset(expression=offset)) 2185 continue 2186 break 2187 return this 2188 2189 def _parse_hint(self) -> t.Optional[exp.Hint]: 2190 if self._match(TokenType.HINT): 2191 hints = [] 2192 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2193 hints.extend(hint) 2194 2195 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2196 self.raise_error("Expected */ after HINT") 2197 2198 return self.expression(exp.Hint, expressions=hints) 2199 2200 return None 2201 2202 def _parse_into(self) -> t.Optional[exp.Into]: 2203 if not self._match(TokenType.INTO): 2204 return None 2205 2206 temp = self._match(TokenType.TEMPORARY) 2207 unlogged = self._match_text_seq("UNLOGGED") 2208 self._match(TokenType.TABLE) 2209 2210 return self.expression( 2211 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2212 ) 2213 2214 def _parse_from( 2215 self, joins: bool = False, skip_from_token: bool = False 2216 ) -> t.Optional[exp.From]: 2217 if not skip_from_token and not self._match(TokenType.FROM): 2218 return None 2219 2220 return self.expression( 2221 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2222 ) 2223 2224 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2225 if not self._match(TokenType.MATCH_RECOGNIZE): 2226 return None 2227 2228 self._match_l_paren() 2229 2230 partition = self._parse_partition_by() 2231 order = self._parse_order() 2232 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2233 2234 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2235 rows = exp.var("ONE ROW PER MATCH") 2236 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2237 text = "ALL ROWS PER MATCH" 2238 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2239 text += f" SHOW EMPTY MATCHES" 2240 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2241 text += f" OMIT EMPTY MATCHES" 2242 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2243 text += f" WITH UNMATCHED ROWS" 2244 rows = exp.var(text) 2245 else: 2246 rows = None 2247 2248 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2249 text = "AFTER MATCH SKIP" 2250 if self._match_text_seq("PAST", "LAST", "ROW"): 2251 text += f" PAST LAST ROW" 2252 elif self._match_text_seq("TO", "NEXT", "ROW"): 2253 text += f" TO NEXT ROW" 2254 elif self._match_text_seq("TO", "FIRST"): 2255 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2256 elif self._match_text_seq("TO", "LAST"): 2257 text += f" TO LAST {self._advance_any().text}" # type: ignore 2258 after = exp.var(text) 2259 else: 2260 after = None 2261 2262 if self._match_text_seq("PATTERN"): 2263 self._match_l_paren() 2264 2265 if not self._curr: 2266 self.raise_error("Expecting )", self._curr) 2267 2268 paren = 1 2269 start = self._curr 2270 2271 while self._curr and paren > 0: 2272 if self._curr.token_type == TokenType.L_PAREN: 2273 paren += 1 2274 if self._curr.token_type == TokenType.R_PAREN: 2275 paren -= 1 2276 2277 end = self._prev 2278 self._advance() 2279 2280 if paren > 0: 2281 self.raise_error("Expecting )", self._curr) 2282 2283 pattern = exp.var(self._find_sql(start, end)) 2284 else: 2285 pattern = None 2286 2287 define = ( 2288 self._parse_csv( 2289 lambda: self.expression( 2290 exp.Alias, 2291 alias=self._parse_id_var(any_token=True), 2292 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2293 ) 2294 ) 2295 if self._match_text_seq("DEFINE") 2296 else None 2297 ) 2298 2299 self._match_r_paren() 2300 2301 return self.expression( 2302 exp.MatchRecognize, 2303 partition_by=partition, 2304 order=order, 2305 measures=measures, 2306 rows=rows, 2307 after=after, 2308 pattern=pattern, 2309 define=define, 2310 alias=self._parse_table_alias(), 2311 ) 2312 2313 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2314 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2315 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2316 2317 if outer_apply or cross_apply: 2318 this = self._parse_select(table=True) 2319 view = None 2320 outer = not cross_apply 2321 elif self._match(TokenType.LATERAL): 2322 this = self._parse_select(table=True) 2323 view = self._match(TokenType.VIEW) 2324 outer = self._match(TokenType.OUTER) 2325 else: 2326 return None 2327 2328 if not this: 2329 this = ( 2330 self._parse_unnest() 2331 or self._parse_function() 2332 or self._parse_id_var(any_token=False) 2333 ) 2334 2335 while self._match(TokenType.DOT): 2336 this = exp.Dot( 2337 this=this, 2338 expression=self._parse_function() or self._parse_id_var(any_token=False), 2339 ) 2340 2341 if view: 2342 table = self._parse_id_var(any_token=False) 2343 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2344 table_alias: t.Optional[exp.TableAlias] = self.expression( 2345 exp.TableAlias, this=table, columns=columns 2346 ) 2347 elif isinstance(this, exp.Subquery) and this.alias: 2348 # Ensures parity between the Subquery's and the Lateral's "alias" args 2349 table_alias = this.args["alias"].copy() 2350 else: 2351 table_alias = self._parse_table_alias() 2352 2353 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2354 2355 def _parse_join_parts( 2356 self, 2357 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2358 return ( 2359 self._match_set(self.JOIN_METHODS) and self._prev, 2360 self._match_set(self.JOIN_SIDES) and self._prev, 2361 self._match_set(self.JOIN_KINDS) and self._prev, 2362 ) 2363 2364 def _parse_join( 2365 self, skip_join_token: bool = False, parse_bracket: bool = False 2366 ) -> t.Optional[exp.Join]: 2367 if self._match(TokenType.COMMA): 2368 return self.expression(exp.Join, this=self._parse_table()) 2369 2370 index = self._index 2371 method, side, kind = self._parse_join_parts() 2372 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2373 join = self._match(TokenType.JOIN) 2374 2375 if not skip_join_token and not join: 2376 self._retreat(index) 2377 kind = None 2378 method = None 2379 side = None 2380 2381 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2382 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2383 2384 if not skip_join_token and not join and not outer_apply and not cross_apply: 2385 return None 2386 2387 if outer_apply: 2388 side = Token(TokenType.LEFT, "LEFT") 2389 2390 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2391 2392 if method: 2393 kwargs["method"] = method.text 2394 if side: 2395 kwargs["side"] = side.text 2396 if kind: 2397 kwargs["kind"] = kind.text 2398 if hint: 2399 kwargs["hint"] = hint 2400 2401 if self._match(TokenType.ON): 2402 kwargs["on"] = self._parse_conjunction() 2403 elif self._match(TokenType.USING): 2404 kwargs["using"] = self._parse_wrapped_id_vars() 2405 elif not (kind and kind.token_type == TokenType.CROSS): 2406 index = self._index 2407 joins = self._parse_joins() 2408 2409 if joins and self._match(TokenType.ON): 2410 kwargs["on"] = self._parse_conjunction() 2411 elif joins and self._match(TokenType.USING): 2412 kwargs["using"] = self._parse_wrapped_id_vars() 2413 else: 2414 joins = None 2415 self._retreat(index) 2416 2417 kwargs["this"].set("joins", joins) 2418 2419 comments = [c for token in (method, side, kind) if token for c in token.comments] 2420 return self.expression(exp.Join, comments=comments, **kwargs) 2421 2422 def _parse_index( 2423 self, 2424 index: t.Optional[exp.Expression] = None, 2425 ) -> t.Optional[exp.Index]: 2426 if index: 2427 unique = None 2428 primary = None 2429 amp = None 2430 2431 self._match(TokenType.ON) 2432 self._match(TokenType.TABLE) # hive 2433 table = self._parse_table_parts(schema=True) 2434 else: 2435 unique = self._match(TokenType.UNIQUE) 2436 primary = self._match_text_seq("PRIMARY") 2437 amp = self._match_text_seq("AMP") 2438 2439 if not self._match(TokenType.INDEX): 2440 return None 2441 2442 index = self._parse_id_var() 2443 table = None 2444 2445 using = self._parse_field() if self._match(TokenType.USING) else None 2446 2447 if self._match(TokenType.L_PAREN, advance=False): 2448 columns = self._parse_wrapped_csv(self._parse_ordered) 2449 else: 2450 columns = None 2451 2452 return self.expression( 2453 exp.Index, 2454 this=index, 2455 table=table, 2456 using=using, 2457 columns=columns, 2458 unique=unique, 2459 primary=primary, 2460 amp=amp, 2461 partition_by=self._parse_partition_by(), 2462 ) 2463 2464 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2465 hints: t.List[exp.Expression] = [] 2466 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2467 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2468 hints.append( 2469 self.expression( 2470 exp.WithTableHint, 2471 expressions=self._parse_csv( 2472 lambda: self._parse_function() or self._parse_var(any_token=True) 2473 ), 2474 ) 2475 ) 2476 self._match_r_paren() 2477 else: 2478 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2479 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2480 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2481 2482 self._match_texts({"INDEX", "KEY"}) 2483 if self._match(TokenType.FOR): 2484 hint.set("target", self._advance_any() and self._prev.text.upper()) 2485 2486 hint.set("expressions", self._parse_wrapped_id_vars()) 2487 hints.append(hint) 2488 2489 return hints or None 2490 2491 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2492 return ( 2493 (not schema and self._parse_function(optional_parens=False)) 2494 or self._parse_id_var(any_token=False) 2495 or self._parse_string_as_identifier() 2496 or self._parse_placeholder() 2497 ) 2498 2499 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2500 catalog = None 2501 db = None 2502 table = self._parse_table_part(schema=schema) 2503 2504 while self._match(TokenType.DOT): 2505 if catalog: 2506 # This allows nesting the table in arbitrarily many dot expressions if needed 2507 table = self.expression( 2508 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2509 ) 2510 else: 2511 catalog = db 2512 db = table 2513 table = self._parse_table_part(schema=schema) 2514 2515 if not table: 2516 self.raise_error(f"Expected table name but got {self._curr}") 2517 2518 return self.expression( 2519 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2520 ) 2521 2522 def _parse_table( 2523 self, 2524 schema: bool = False, 2525 joins: bool = False, 2526 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2527 parse_bracket: bool = False, 2528 ) -> t.Optional[exp.Expression]: 2529 lateral = self._parse_lateral() 2530 if lateral: 2531 return lateral 2532 2533 unnest = self._parse_unnest() 2534 if unnest: 2535 return unnest 2536 2537 values = self._parse_derived_table_values() 2538 if values: 2539 return values 2540 2541 subquery = self._parse_select(table=True) 2542 if subquery: 2543 if not subquery.args.get("pivots"): 2544 subquery.set("pivots", self._parse_pivots()) 2545 return subquery 2546 2547 bracket = parse_bracket and self._parse_bracket(None) 2548 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2549 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2550 2551 if schema: 2552 return self._parse_schema(this=this) 2553 2554 version = self._parse_version() 2555 2556 if version: 2557 this.set("version", version) 2558 2559 if self.ALIAS_POST_TABLESAMPLE: 2560 table_sample = self._parse_table_sample() 2561 2562 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2563 if alias: 2564 this.set("alias", alias) 2565 2566 this.set("hints", self._parse_table_hints()) 2567 2568 if not this.args.get("pivots"): 2569 this.set("pivots", self._parse_pivots()) 2570 2571 if not self.ALIAS_POST_TABLESAMPLE: 2572 table_sample = self._parse_table_sample() 2573 2574 if table_sample: 2575 table_sample.set("this", this) 2576 this = table_sample 2577 2578 if joins: 2579 for join in iter(self._parse_join, None): 2580 this.append("joins", join) 2581 2582 return this 2583 2584 def _parse_version(self) -> t.Optional[exp.Version]: 2585 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2586 this = "TIMESTAMP" 2587 elif self._match(TokenType.VERSION_SNAPSHOT): 2588 this = "VERSION" 2589 else: 2590 return None 2591 2592 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2593 kind = self._prev.text.upper() 2594 start = self._parse_bitwise() 2595 self._match_texts(("TO", "AND")) 2596 end = self._parse_bitwise() 2597 expression: t.Optional[exp.Expression] = self.expression( 2598 exp.Tuple, expressions=[start, end] 2599 ) 2600 elif self._match_text_seq("CONTAINED", "IN"): 2601 kind = "CONTAINED IN" 2602 expression = self.expression( 2603 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2604 ) 2605 elif self._match(TokenType.ALL): 2606 kind = "ALL" 2607 expression = None 2608 else: 2609 self._match_text_seq("AS", "OF") 2610 kind = "AS OF" 2611 expression = self._parse_type() 2612 2613 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2614 2615 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2616 if not self._match(TokenType.UNNEST): 2617 return None 2618 2619 expressions = self._parse_wrapped_csv(self._parse_type) 2620 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2621 2622 alias = self._parse_table_alias() if with_alias else None 2623 2624 if alias and self.UNNEST_COLUMN_ONLY: 2625 if alias.args.get("columns"): 2626 self.raise_error("Unexpected extra column alias in unnest.") 2627 2628 alias.set("columns", [alias.this]) 2629 alias.set("this", None) 2630 2631 offset = None 2632 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2633 self._match(TokenType.ALIAS) 2634 offset = self._parse_id_var() or exp.to_identifier("offset") 2635 2636 return self.expression( 2637 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2638 ) 2639 2640 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2641 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2642 if not is_derived and not self._match(TokenType.VALUES): 2643 return None 2644 2645 expressions = self._parse_csv(self._parse_value) 2646 alias = self._parse_table_alias() 2647 2648 if is_derived: 2649 self._match_r_paren() 2650 2651 return self.expression( 2652 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2653 ) 2654 2655 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2656 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2657 as_modifier and self._match_text_seq("USING", "SAMPLE") 2658 ): 2659 return None 2660 2661 bucket_numerator = None 2662 bucket_denominator = None 2663 bucket_field = None 2664 percent = None 2665 rows = None 2666 size = None 2667 seed = None 2668 2669 kind = ( 2670 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2671 ) 2672 method = self._parse_var(tokens=(TokenType.ROW,)) 2673 2674 self._match(TokenType.L_PAREN) 2675 2676 num = self._parse_number() 2677 2678 if self._match_text_seq("BUCKET"): 2679 bucket_numerator = self._parse_number() 2680 self._match_text_seq("OUT", "OF") 2681 bucket_denominator = bucket_denominator = self._parse_number() 2682 self._match(TokenType.ON) 2683 bucket_field = self._parse_field() 2684 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2685 percent = num 2686 elif self._match(TokenType.ROWS): 2687 rows = num 2688 else: 2689 size = num 2690 2691 self._match(TokenType.R_PAREN) 2692 2693 if self._match(TokenType.L_PAREN): 2694 method = self._parse_var() 2695 seed = self._match(TokenType.COMMA) and self._parse_number() 2696 self._match_r_paren() 2697 elif self._match_texts(("SEED", "REPEATABLE")): 2698 seed = self._parse_wrapped(self._parse_number) 2699 2700 return self.expression( 2701 exp.TableSample, 2702 method=method, 2703 bucket_numerator=bucket_numerator, 2704 bucket_denominator=bucket_denominator, 2705 bucket_field=bucket_field, 2706 percent=percent, 2707 rows=rows, 2708 size=size, 2709 seed=seed, 2710 kind=kind, 2711 ) 2712 2713 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2714 return list(iter(self._parse_pivot, None)) or None 2715 2716 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2717 return list(iter(self._parse_join, None)) or None 2718 2719 # https://duckdb.org/docs/sql/statements/pivot 2720 def _parse_simplified_pivot(self) -> exp.Pivot: 2721 def _parse_on() -> t.Optional[exp.Expression]: 2722 this = self._parse_bitwise() 2723 return self._parse_in(this) if self._match(TokenType.IN) else this 2724 2725 this = self._parse_table() 2726 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2727 using = self._match(TokenType.USING) and self._parse_csv( 2728 lambda: self._parse_alias(self._parse_function()) 2729 ) 2730 group = self._parse_group() 2731 return self.expression( 2732 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2733 ) 2734 2735 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2736 index = self._index 2737 include_nulls = None 2738 2739 if self._match(TokenType.PIVOT): 2740 unpivot = False 2741 elif self._match(TokenType.UNPIVOT): 2742 unpivot = True 2743 2744 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2745 if self._match_text_seq("INCLUDE", "NULLS"): 2746 include_nulls = True 2747 elif self._match_text_seq("EXCLUDE", "NULLS"): 2748 include_nulls = False 2749 else: 2750 return None 2751 2752 expressions = [] 2753 field = None 2754 2755 if not self._match(TokenType.L_PAREN): 2756 self._retreat(index) 2757 return None 2758 2759 if unpivot: 2760 expressions = self._parse_csv(self._parse_column) 2761 else: 2762 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2763 2764 if not expressions: 2765 self.raise_error("Failed to parse PIVOT's aggregation list") 2766 2767 if not self._match(TokenType.FOR): 2768 self.raise_error("Expecting FOR") 2769 2770 value = self._parse_column() 2771 2772 if not self._match(TokenType.IN): 2773 self.raise_error("Expecting IN") 2774 2775 field = self._parse_in(value, alias=True) 2776 2777 self._match_r_paren() 2778 2779 pivot = self.expression( 2780 exp.Pivot, 2781 expressions=expressions, 2782 field=field, 2783 unpivot=unpivot, 2784 include_nulls=include_nulls, 2785 ) 2786 2787 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2788 pivot.set("alias", self._parse_table_alias()) 2789 2790 if not unpivot: 2791 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2792 2793 columns: t.List[exp.Expression] = [] 2794 for fld in pivot.args["field"].expressions: 2795 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2796 for name in names: 2797 if self.PREFIXED_PIVOT_COLUMNS: 2798 name = f"{name}_{field_name}" if name else field_name 2799 else: 2800 name = f"{field_name}_{name}" if name else field_name 2801 2802 columns.append(exp.to_identifier(name)) 2803 2804 pivot.set("columns", columns) 2805 2806 return pivot 2807 2808 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2809 return [agg.alias for agg in aggregations] 2810 2811 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2812 if not skip_where_token and not self._match(TokenType.WHERE): 2813 return None 2814 2815 return self.expression( 2816 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2817 ) 2818 2819 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2820 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2821 return None 2822 2823 elements = defaultdict(list) 2824 2825 if self._match(TokenType.ALL): 2826 return self.expression(exp.Group, all=True) 2827 2828 while True: 2829 expressions = self._parse_csv(self._parse_conjunction) 2830 if expressions: 2831 elements["expressions"].extend(expressions) 2832 2833 grouping_sets = self._parse_grouping_sets() 2834 if grouping_sets: 2835 elements["grouping_sets"].extend(grouping_sets) 2836 2837 rollup = None 2838 cube = None 2839 totals = None 2840 2841 with_ = self._match(TokenType.WITH) 2842 if self._match(TokenType.ROLLUP): 2843 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2844 elements["rollup"].extend(ensure_list(rollup)) 2845 2846 if self._match(TokenType.CUBE): 2847 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2848 elements["cube"].extend(ensure_list(cube)) 2849 2850 if self._match_text_seq("TOTALS"): 2851 totals = True 2852 elements["totals"] = True # type: ignore 2853 2854 if not (grouping_sets or rollup or cube or totals): 2855 break 2856 2857 return self.expression(exp.Group, **elements) # type: ignore 2858 2859 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2860 if not self._match(TokenType.GROUPING_SETS): 2861 return None 2862 2863 return self._parse_wrapped_csv(self._parse_grouping_set) 2864 2865 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2866 if self._match(TokenType.L_PAREN): 2867 grouping_set = self._parse_csv(self._parse_column) 2868 self._match_r_paren() 2869 return self.expression(exp.Tuple, expressions=grouping_set) 2870 2871 return self._parse_column() 2872 2873 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2874 if not skip_having_token and not self._match(TokenType.HAVING): 2875 return None 2876 return self.expression(exp.Having, this=self._parse_conjunction()) 2877 2878 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2879 if not self._match(TokenType.QUALIFY): 2880 return None 2881 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2882 2883 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2884 if skip_start_token: 2885 start = None 2886 elif self._match(TokenType.START_WITH): 2887 start = self._parse_conjunction() 2888 else: 2889 return None 2890 2891 self._match(TokenType.CONNECT_BY) 2892 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2893 exp.Prior, this=self._parse_bitwise() 2894 ) 2895 connect = self._parse_conjunction() 2896 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2897 return self.expression(exp.Connect, start=start, connect=connect) 2898 2899 def _parse_order( 2900 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2901 ) -> t.Optional[exp.Expression]: 2902 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2903 return this 2904 2905 return self.expression( 2906 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2907 ) 2908 2909 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2910 if not self._match(token): 2911 return None 2912 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2913 2914 def _parse_ordered(self) -> exp.Ordered: 2915 this = self._parse_conjunction() 2916 self._match(TokenType.ASC) 2917 2918 is_desc = self._match(TokenType.DESC) 2919 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2920 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2921 desc = is_desc or False 2922 asc = not desc 2923 nulls_first = is_nulls_first or False 2924 explicitly_null_ordered = is_nulls_first or is_nulls_last 2925 2926 if ( 2927 not explicitly_null_ordered 2928 and ( 2929 (asc and self.NULL_ORDERING == "nulls_are_small") 2930 or (desc and self.NULL_ORDERING != "nulls_are_small") 2931 ) 2932 and self.NULL_ORDERING != "nulls_are_last" 2933 ): 2934 nulls_first = True 2935 2936 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2937 2938 def _parse_limit( 2939 self, this: t.Optional[exp.Expression] = None, top: bool = False 2940 ) -> t.Optional[exp.Expression]: 2941 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2942 comments = self._prev_comments 2943 if top: 2944 limit_paren = self._match(TokenType.L_PAREN) 2945 expression = self._parse_number() 2946 2947 if limit_paren: 2948 self._match_r_paren() 2949 else: 2950 expression = self._parse_term() 2951 2952 if self._match(TokenType.COMMA): 2953 offset = expression 2954 expression = self._parse_term() 2955 else: 2956 offset = None 2957 2958 limit_exp = self.expression( 2959 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2960 ) 2961 2962 return limit_exp 2963 2964 if self._match(TokenType.FETCH): 2965 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2966 direction = self._prev.text if direction else "FIRST" 2967 2968 count = self._parse_number() 2969 percent = self._match(TokenType.PERCENT) 2970 2971 self._match_set((TokenType.ROW, TokenType.ROWS)) 2972 2973 only = self._match_text_seq("ONLY") 2974 with_ties = self._match_text_seq("WITH", "TIES") 2975 2976 if only and with_ties: 2977 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2978 2979 return self.expression( 2980 exp.Fetch, 2981 direction=direction, 2982 count=count, 2983 percent=percent, 2984 with_ties=with_ties, 2985 ) 2986 2987 return this 2988 2989 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2990 if not self._match(TokenType.OFFSET): 2991 return this 2992 2993 count = self._parse_term() 2994 self._match_set((TokenType.ROW, TokenType.ROWS)) 2995 return self.expression(exp.Offset, this=this, expression=count) 2996 2997 def _parse_locks(self) -> t.List[exp.Lock]: 2998 locks = [] 2999 while True: 3000 if self._match_text_seq("FOR", "UPDATE"): 3001 update = True 3002 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3003 "LOCK", "IN", "SHARE", "MODE" 3004 ): 3005 update = False 3006 else: 3007 break 3008 3009 expressions = None 3010 if self._match_text_seq("OF"): 3011 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3012 3013 wait: t.Optional[bool | exp.Expression] = None 3014 if self._match_text_seq("NOWAIT"): 3015 wait = True 3016 elif self._match_text_seq("WAIT"): 3017 wait = self._parse_primary() 3018 elif self._match_text_seq("SKIP", "LOCKED"): 3019 wait = False 3020 3021 locks.append( 3022 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3023 ) 3024 3025 return locks 3026 3027 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3028 if not self._match_set(self.SET_OPERATIONS): 3029 return this 3030 3031 token_type = self._prev.token_type 3032 3033 if token_type == TokenType.UNION: 3034 expression = exp.Union 3035 elif token_type == TokenType.EXCEPT: 3036 expression = exp.Except 3037 else: 3038 expression = exp.Intersect 3039 3040 return self.expression( 3041 expression, 3042 this=this, 3043 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3044 by_name=self._match_text_seq("BY", "NAME"), 3045 expression=self._parse_set_operations(self._parse_select(nested=True)), 3046 ) 3047 3048 def _parse_expression(self) -> t.Optional[exp.Expression]: 3049 return self._parse_alias(self._parse_conjunction()) 3050 3051 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3052 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3053 3054 def _parse_equality(self) -> t.Optional[exp.Expression]: 3055 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3056 3057 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3058 return self._parse_tokens(self._parse_range, self.COMPARISON) 3059 3060 def _parse_range(self) -> t.Optional[exp.Expression]: 3061 this = self._parse_bitwise() 3062 negate = self._match(TokenType.NOT) 3063 3064 if self._match_set(self.RANGE_PARSERS): 3065 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3066 if not expression: 3067 return this 3068 3069 this = expression 3070 elif self._match(TokenType.ISNULL): 3071 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3072 3073 # Postgres supports ISNULL and NOTNULL for conditions. 3074 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3075 if self._match(TokenType.NOTNULL): 3076 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3077 this = self.expression(exp.Not, this=this) 3078 3079 if negate: 3080 this = self.expression(exp.Not, this=this) 3081 3082 if self._match(TokenType.IS): 3083 this = self._parse_is(this) 3084 3085 return this 3086 3087 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3088 index = self._index - 1 3089 negate = self._match(TokenType.NOT) 3090 3091 if self._match_text_seq("DISTINCT", "FROM"): 3092 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3093 return self.expression(klass, this=this, expression=self._parse_expression()) 3094 3095 expression = self._parse_null() or self._parse_boolean() 3096 if not expression: 3097 self._retreat(index) 3098 return None 3099 3100 this = self.expression(exp.Is, this=this, expression=expression) 3101 return self.expression(exp.Not, this=this) if negate else this 3102 3103 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3104 unnest = self._parse_unnest(with_alias=False) 3105 if unnest: 3106 this = self.expression(exp.In, this=this, unnest=unnest) 3107 elif self._match(TokenType.L_PAREN): 3108 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3109 3110 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3111 this = self.expression(exp.In, this=this, query=expressions[0]) 3112 else: 3113 this = self.expression(exp.In, this=this, expressions=expressions) 3114 3115 self._match_r_paren(this) 3116 else: 3117 this = self.expression(exp.In, this=this, field=self._parse_field()) 3118 3119 return this 3120 3121 def _parse_between(self, this: exp.Expression) -> exp.Between: 3122 low = self._parse_bitwise() 3123 self._match(TokenType.AND) 3124 high = self._parse_bitwise() 3125 return self.expression(exp.Between, this=this, low=low, high=high) 3126 3127 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3128 if not self._match(TokenType.ESCAPE): 3129 return this 3130 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3131 3132 def _parse_interval(self) -> t.Optional[exp.Interval]: 3133 index = self._index 3134 3135 if not self._match(TokenType.INTERVAL): 3136 return None 3137 3138 if self._match(TokenType.STRING, advance=False): 3139 this = self._parse_primary() 3140 else: 3141 this = self._parse_term() 3142 3143 if not this: 3144 self._retreat(index) 3145 return None 3146 3147 unit = self._parse_function() or self._parse_var(any_token=True) 3148 3149 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3150 # each INTERVAL expression into this canonical form so it's easy to transpile 3151 if this and this.is_number: 3152 this = exp.Literal.string(this.name) 3153 elif this and this.is_string: 3154 parts = this.name.split() 3155 3156 if len(parts) == 2: 3157 if unit: 3158 # This is not actually a unit, it's something else (e.g. a "window side") 3159 unit = None 3160 self._retreat(self._index - 1) 3161 3162 this = exp.Literal.string(parts[0]) 3163 unit = self.expression(exp.Var, this=parts[1]) 3164 3165 return self.expression(exp.Interval, this=this, unit=unit) 3166 3167 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3168 this = self._parse_term() 3169 3170 while True: 3171 if self._match_set(self.BITWISE): 3172 this = self.expression( 3173 self.BITWISE[self._prev.token_type], 3174 this=this, 3175 expression=self._parse_term(), 3176 ) 3177 elif self._match(TokenType.DQMARK): 3178 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3179 elif self._match_pair(TokenType.LT, TokenType.LT): 3180 this = self.expression( 3181 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3182 ) 3183 elif self._match_pair(TokenType.GT, TokenType.GT): 3184 this = self.expression( 3185 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3186 ) 3187 else: 3188 break 3189 3190 return this 3191 3192 def _parse_term(self) -> t.Optional[exp.Expression]: 3193 return self._parse_tokens(self._parse_factor, self.TERM) 3194 3195 def _parse_factor(self) -> t.Optional[exp.Expression]: 3196 return self._parse_tokens(self._parse_unary, self.FACTOR) 3197 3198 def _parse_unary(self) -> t.Optional[exp.Expression]: 3199 if self._match_set(self.UNARY_PARSERS): 3200 return self.UNARY_PARSERS[self._prev.token_type](self) 3201 return self._parse_at_time_zone(self._parse_type()) 3202 3203 def _parse_type(self) -> t.Optional[exp.Expression]: 3204 interval = self._parse_interval() 3205 if interval: 3206 return interval 3207 3208 index = self._index 3209 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3210 this = self._parse_column() 3211 3212 if data_type: 3213 if isinstance(this, exp.Literal): 3214 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3215 if parser: 3216 return parser(self, this, data_type) 3217 return self.expression(exp.Cast, this=this, to=data_type) 3218 if not data_type.expressions: 3219 self._retreat(index) 3220 return self._parse_column() 3221 return self._parse_column_ops(data_type) 3222 3223 return this 3224 3225 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3226 this = self._parse_type() 3227 if not this: 3228 return None 3229 3230 return self.expression( 3231 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3232 ) 3233 3234 def _parse_types( 3235 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3236 ) -> t.Optional[exp.Expression]: 3237 index = self._index 3238 3239 prefix = self._match_text_seq("SYSUDTLIB", ".") 3240 3241 if not self._match_set(self.TYPE_TOKENS): 3242 identifier = allow_identifiers and self._parse_id_var( 3243 any_token=False, tokens=(TokenType.VAR,) 3244 ) 3245 3246 if identifier: 3247 tokens = self._tokenizer.tokenize(identifier.name) 3248 3249 if len(tokens) != 1: 3250 self.raise_error("Unexpected identifier", self._prev) 3251 3252 if tokens[0].token_type in self.TYPE_TOKENS: 3253 self._prev = tokens[0] 3254 elif self.SUPPORTS_USER_DEFINED_TYPES: 3255 return identifier 3256 else: 3257 return None 3258 else: 3259 return None 3260 3261 type_token = self._prev.token_type 3262 3263 if type_token == TokenType.PSEUDO_TYPE: 3264 return self.expression(exp.PseudoType, this=self._prev.text) 3265 3266 if type_token == TokenType.OBJECT_IDENTIFIER: 3267 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3268 3269 nested = type_token in self.NESTED_TYPE_TOKENS 3270 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3271 expressions = None 3272 maybe_func = False 3273 3274 if self._match(TokenType.L_PAREN): 3275 if is_struct: 3276 expressions = self._parse_csv(self._parse_struct_types) 3277 elif nested: 3278 expressions = self._parse_csv( 3279 lambda: self._parse_types( 3280 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3281 ) 3282 ) 3283 elif type_token in self.ENUM_TYPE_TOKENS: 3284 expressions = self._parse_csv(self._parse_equality) 3285 else: 3286 expressions = self._parse_csv(self._parse_type_size) 3287 3288 if not expressions or not self._match(TokenType.R_PAREN): 3289 self._retreat(index) 3290 return None 3291 3292 maybe_func = True 3293 3294 this: t.Optional[exp.Expression] = None 3295 values: t.Optional[t.List[exp.Expression]] = None 3296 3297 if nested and self._match(TokenType.LT): 3298 if is_struct: 3299 expressions = self._parse_csv(self._parse_struct_types) 3300 else: 3301 expressions = self._parse_csv( 3302 lambda: self._parse_types( 3303 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3304 ) 3305 ) 3306 3307 if not self._match(TokenType.GT): 3308 self.raise_error("Expecting >") 3309 3310 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3311 values = self._parse_csv(self._parse_conjunction) 3312 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3313 3314 if type_token in self.TIMESTAMPS: 3315 if self._match_text_seq("WITH", "TIME", "ZONE"): 3316 maybe_func = False 3317 tz_type = ( 3318 exp.DataType.Type.TIMETZ 3319 if type_token in self.TIMES 3320 else exp.DataType.Type.TIMESTAMPTZ 3321 ) 3322 this = exp.DataType(this=tz_type, expressions=expressions) 3323 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3324 maybe_func = False 3325 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3326 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3327 maybe_func = False 3328 elif type_token == TokenType.INTERVAL: 3329 if self._match_text_seq("YEAR", "TO", "MONTH"): 3330 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3331 elif self._match_text_seq("DAY", "TO", "SECOND"): 3332 span = [exp.IntervalDayToSecondSpan()] 3333 else: 3334 span = None 3335 3336 unit = not span and self._parse_var() 3337 if not unit: 3338 this = self.expression( 3339 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3340 ) 3341 else: 3342 this = self.expression(exp.Interval, unit=unit) 3343 3344 if maybe_func and check_func: 3345 index2 = self._index 3346 peek = self._parse_string() 3347 3348 if not peek: 3349 self._retreat(index) 3350 return None 3351 3352 self._retreat(index2) 3353 3354 if not this: 3355 this = exp.DataType( 3356 this=exp.DataType.Type[type_token.value], 3357 expressions=expressions, 3358 nested=nested, 3359 values=values, 3360 prefix=prefix, 3361 ) 3362 3363 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3364 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3365 3366 return this 3367 3368 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3369 this = self._parse_type() or self._parse_id_var() 3370 self._match(TokenType.COLON) 3371 return self._parse_column_def(this) 3372 3373 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3374 if not self._match_text_seq("AT", "TIME", "ZONE"): 3375 return this 3376 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3377 3378 def _parse_column(self) -> t.Optional[exp.Expression]: 3379 this = self._parse_field() 3380 if isinstance(this, exp.Identifier): 3381 this = self.expression(exp.Column, this=this) 3382 elif not this: 3383 return self._parse_bracket(this) 3384 return self._parse_column_ops(this) 3385 3386 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3387 this = self._parse_bracket(this) 3388 3389 while self._match_set(self.COLUMN_OPERATORS): 3390 op_token = self._prev.token_type 3391 op = self.COLUMN_OPERATORS.get(op_token) 3392 3393 if op_token == TokenType.DCOLON: 3394 field = self._parse_types() 3395 if not field: 3396 self.raise_error("Expected type") 3397 elif op and self._curr: 3398 self._advance() 3399 value = self._prev.text 3400 field = ( 3401 exp.Literal.number(value) 3402 if self._prev.token_type == TokenType.NUMBER 3403 else exp.Literal.string(value) 3404 ) 3405 else: 3406 field = self._parse_field(anonymous_func=True, any_token=True) 3407 3408 if isinstance(field, exp.Func): 3409 # bigquery allows function calls like x.y.count(...) 3410 # SAFE.SUBSTR(...) 3411 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3412 this = self._replace_columns_with_dots(this) 3413 3414 if op: 3415 this = op(self, this, field) 3416 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3417 this = self.expression( 3418 exp.Column, 3419 this=field, 3420 table=this.this, 3421 db=this.args.get("table"), 3422 catalog=this.args.get("db"), 3423 ) 3424 else: 3425 this = self.expression(exp.Dot, this=this, expression=field) 3426 this = self._parse_bracket(this) 3427 return this 3428 3429 def _parse_primary(self) -> t.Optional[exp.Expression]: 3430 if self._match_set(self.PRIMARY_PARSERS): 3431 token_type = self._prev.token_type 3432 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3433 3434 if token_type == TokenType.STRING: 3435 expressions = [primary] 3436 while self._match(TokenType.STRING): 3437 expressions.append(exp.Literal.string(self._prev.text)) 3438 3439 if len(expressions) > 1: 3440 return self.expression(exp.Concat, expressions=expressions) 3441 3442 return primary 3443 3444 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3445 return exp.Literal.number(f"0.{self._prev.text}") 3446 3447 if self._match(TokenType.L_PAREN): 3448 comments = self._prev_comments 3449 query = self._parse_select() 3450 3451 if query: 3452 expressions = [query] 3453 else: 3454 expressions = self._parse_expressions() 3455 3456 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3457 3458 if isinstance(this, exp.Subqueryable): 3459 this = self._parse_set_operations( 3460 self._parse_subquery(this=this, parse_alias=False) 3461 ) 3462 elif len(expressions) > 1: 3463 this = self.expression(exp.Tuple, expressions=expressions) 3464 else: 3465 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3466 3467 if this: 3468 this.add_comments(comments) 3469 3470 self._match_r_paren(expression=this) 3471 return this 3472 3473 return None 3474 3475 def _parse_field( 3476 self, 3477 any_token: bool = False, 3478 tokens: t.Optional[t.Collection[TokenType]] = None, 3479 anonymous_func: bool = False, 3480 ) -> t.Optional[exp.Expression]: 3481 return ( 3482 self._parse_primary() 3483 or self._parse_function(anonymous=anonymous_func) 3484 or self._parse_id_var(any_token=any_token, tokens=tokens) 3485 ) 3486 3487 def _parse_function( 3488 self, 3489 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3490 anonymous: bool = False, 3491 optional_parens: bool = True, 3492 ) -> t.Optional[exp.Expression]: 3493 if not self._curr: 3494 return None 3495 3496 token_type = self._curr.token_type 3497 this = self._curr.text 3498 upper = this.upper() 3499 3500 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3501 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3502 self._advance() 3503 return parser(self) 3504 3505 if not self._next or self._next.token_type != TokenType.L_PAREN: 3506 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3507 self._advance() 3508 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3509 3510 return None 3511 3512 if token_type not in self.FUNC_TOKENS: 3513 return None 3514 3515 self._advance(2) 3516 3517 parser = self.FUNCTION_PARSERS.get(upper) 3518 if parser and not anonymous: 3519 this = parser(self) 3520 else: 3521 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3522 3523 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3524 this = self.expression(subquery_predicate, this=self._parse_select()) 3525 self._match_r_paren() 3526 return this 3527 3528 if functions is None: 3529 functions = self.FUNCTIONS 3530 3531 function = functions.get(upper) 3532 3533 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3534 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3535 3536 if function and not anonymous: 3537 func = self.validate_expression(function(args), args) 3538 if not self.NORMALIZE_FUNCTIONS: 3539 func.meta["name"] = this 3540 this = func 3541 else: 3542 this = self.expression(exp.Anonymous, this=this, expressions=args) 3543 3544 self._match_r_paren(this) 3545 return self._parse_window(this) 3546 3547 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3548 return self._parse_column_def(self._parse_id_var()) 3549 3550 def _parse_user_defined_function( 3551 self, kind: t.Optional[TokenType] = None 3552 ) -> t.Optional[exp.Expression]: 3553 this = self._parse_id_var() 3554 3555 while self._match(TokenType.DOT): 3556 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3557 3558 if not self._match(TokenType.L_PAREN): 3559 return this 3560 3561 expressions = self._parse_csv(self._parse_function_parameter) 3562 self._match_r_paren() 3563 return self.expression( 3564 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3565 ) 3566 3567 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3568 literal = self._parse_primary() 3569 if literal: 3570 return self.expression(exp.Introducer, this=token.text, expression=literal) 3571 3572 return self.expression(exp.Identifier, this=token.text) 3573 3574 def _parse_session_parameter(self) -> exp.SessionParameter: 3575 kind = None 3576 this = self._parse_id_var() or self._parse_primary() 3577 3578 if this and self._match(TokenType.DOT): 3579 kind = this.name 3580 this = self._parse_var() or self._parse_primary() 3581 3582 return self.expression(exp.SessionParameter, this=this, kind=kind) 3583 3584 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3585 index = self._index 3586 3587 if self._match(TokenType.L_PAREN): 3588 expressions = t.cast( 3589 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3590 ) 3591 3592 if not self._match(TokenType.R_PAREN): 3593 self._retreat(index) 3594 else: 3595 expressions = [self._parse_id_var()] 3596 3597 if self._match_set(self.LAMBDAS): 3598 return self.LAMBDAS[self._prev.token_type](self, expressions) 3599 3600 self._retreat(index) 3601 3602 this: t.Optional[exp.Expression] 3603 3604 if self._match(TokenType.DISTINCT): 3605 this = self.expression( 3606 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3607 ) 3608 else: 3609 this = self._parse_select_or_expression(alias=alias) 3610 3611 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3612 3613 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3614 index = self._index 3615 3616 if not self.errors: 3617 try: 3618 if self._parse_select(nested=True): 3619 return this 3620 except ParseError: 3621 pass 3622 finally: 3623 self.errors.clear() 3624 self._retreat(index) 3625 3626 if not self._match(TokenType.L_PAREN): 3627 return this 3628 3629 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3630 3631 self._match_r_paren() 3632 return self.expression(exp.Schema, this=this, expressions=args) 3633 3634 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3635 return self._parse_column_def(self._parse_field(any_token=True)) 3636 3637 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3638 # column defs are not really columns, they're identifiers 3639 if isinstance(this, exp.Column): 3640 this = this.this 3641 3642 kind = self._parse_types(schema=True) 3643 3644 if self._match_text_seq("FOR", "ORDINALITY"): 3645 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3646 3647 constraints: t.List[exp.Expression] = [] 3648 3649 if not kind and self._match(TokenType.ALIAS): 3650 constraints.append( 3651 self.expression( 3652 exp.ComputedColumnConstraint, 3653 this=self._parse_conjunction(), 3654 persisted=self._match_text_seq("PERSISTED"), 3655 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3656 ) 3657 ) 3658 3659 while True: 3660 constraint = self._parse_column_constraint() 3661 if not constraint: 3662 break 3663 constraints.append(constraint) 3664 3665 if not kind and not constraints: 3666 return this 3667 3668 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3669 3670 def _parse_auto_increment( 3671 self, 3672 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3673 start = None 3674 increment = None 3675 3676 if self._match(TokenType.L_PAREN, advance=False): 3677 args = self._parse_wrapped_csv(self._parse_bitwise) 3678 start = seq_get(args, 0) 3679 increment = seq_get(args, 1) 3680 elif self._match_text_seq("START"): 3681 start = self._parse_bitwise() 3682 self._match_text_seq("INCREMENT") 3683 increment = self._parse_bitwise() 3684 3685 if start and increment: 3686 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3687 3688 return exp.AutoIncrementColumnConstraint() 3689 3690 def _parse_compress(self) -> exp.CompressColumnConstraint: 3691 if self._match(TokenType.L_PAREN, advance=False): 3692 return self.expression( 3693 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3694 ) 3695 3696 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3697 3698 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3699 if self._match_text_seq("BY", "DEFAULT"): 3700 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3701 this = self.expression( 3702 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3703 ) 3704 else: 3705 self._match_text_seq("ALWAYS") 3706 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3707 3708 self._match(TokenType.ALIAS) 3709 identity = self._match_text_seq("IDENTITY") 3710 3711 if self._match(TokenType.L_PAREN): 3712 if self._match(TokenType.START_WITH): 3713 this.set("start", self._parse_bitwise()) 3714 if self._match_text_seq("INCREMENT", "BY"): 3715 this.set("increment", self._parse_bitwise()) 3716 if self._match_text_seq("MINVALUE"): 3717 this.set("minvalue", self._parse_bitwise()) 3718 if self._match_text_seq("MAXVALUE"): 3719 this.set("maxvalue", self._parse_bitwise()) 3720 3721 if self._match_text_seq("CYCLE"): 3722 this.set("cycle", True) 3723 elif self._match_text_seq("NO", "CYCLE"): 3724 this.set("cycle", False) 3725 3726 if not identity: 3727 this.set("expression", self._parse_bitwise()) 3728 3729 self._match_r_paren() 3730 3731 return this 3732 3733 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3734 self._match_text_seq("LENGTH") 3735 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3736 3737 def _parse_not_constraint( 3738 self, 3739 ) -> t.Optional[exp.Expression]: 3740 if self._match_text_seq("NULL"): 3741 return self.expression(exp.NotNullColumnConstraint) 3742 if self._match_text_seq("CASESPECIFIC"): 3743 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3744 if self._match_text_seq("FOR", "REPLICATION"): 3745 return self.expression(exp.NotForReplicationColumnConstraint) 3746 return None 3747 3748 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3749 if self._match(TokenType.CONSTRAINT): 3750 this = self._parse_id_var() 3751 else: 3752 this = None 3753 3754 if self._match_texts(self.CONSTRAINT_PARSERS): 3755 return self.expression( 3756 exp.ColumnConstraint, 3757 this=this, 3758 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3759 ) 3760 3761 return this 3762 3763 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3764 if not self._match(TokenType.CONSTRAINT): 3765 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3766 3767 this = self._parse_id_var() 3768 expressions = [] 3769 3770 while True: 3771 constraint = self._parse_unnamed_constraint() or self._parse_function() 3772 if not constraint: 3773 break 3774 expressions.append(constraint) 3775 3776 return self.expression(exp.Constraint, this=this, expressions=expressions) 3777 3778 def _parse_unnamed_constraint( 3779 self, constraints: t.Optional[t.Collection[str]] = None 3780 ) -> t.Optional[exp.Expression]: 3781 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3782 return None 3783 3784 constraint = self._prev.text.upper() 3785 if constraint not in self.CONSTRAINT_PARSERS: 3786 self.raise_error(f"No parser found for schema constraint {constraint}.") 3787 3788 return self.CONSTRAINT_PARSERS[constraint](self) 3789 3790 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3791 self._match_text_seq("KEY") 3792 return self.expression( 3793 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3794 ) 3795 3796 def _parse_key_constraint_options(self) -> t.List[str]: 3797 options = [] 3798 while True: 3799 if not self._curr: 3800 break 3801 3802 if self._match(TokenType.ON): 3803 action = None 3804 on = self._advance_any() and self._prev.text 3805 3806 if self._match_text_seq("NO", "ACTION"): 3807 action = "NO ACTION" 3808 elif self._match_text_seq("CASCADE"): 3809 action = "CASCADE" 3810 elif self._match_pair(TokenType.SET, TokenType.NULL): 3811 action = "SET NULL" 3812 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3813 action = "SET DEFAULT" 3814 else: 3815 self.raise_error("Invalid key constraint") 3816 3817 options.append(f"ON {on} {action}") 3818 elif self._match_text_seq("NOT", "ENFORCED"): 3819 options.append("NOT ENFORCED") 3820 elif self._match_text_seq("DEFERRABLE"): 3821 options.append("DEFERRABLE") 3822 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3823 options.append("INITIALLY DEFERRED") 3824 elif self._match_text_seq("NORELY"): 3825 options.append("NORELY") 3826 elif self._match_text_seq("MATCH", "FULL"): 3827 options.append("MATCH FULL") 3828 else: 3829 break 3830 3831 return options 3832 3833 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3834 if match and not self._match(TokenType.REFERENCES): 3835 return None 3836 3837 expressions = None 3838 this = self._parse_table(schema=True) 3839 options = self._parse_key_constraint_options() 3840 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3841 3842 def _parse_foreign_key(self) -> exp.ForeignKey: 3843 expressions = self._parse_wrapped_id_vars() 3844 reference = self._parse_references() 3845 options = {} 3846 3847 while self._match(TokenType.ON): 3848 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3849 self.raise_error("Expected DELETE or UPDATE") 3850 3851 kind = self._prev.text.lower() 3852 3853 if self._match_text_seq("NO", "ACTION"): 3854 action = "NO ACTION" 3855 elif self._match(TokenType.SET): 3856 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3857 action = "SET " + self._prev.text.upper() 3858 else: 3859 self._advance() 3860 action = self._prev.text.upper() 3861 3862 options[kind] = action 3863 3864 return self.expression( 3865 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3866 ) 3867 3868 def _parse_primary_key( 3869 self, wrapped_optional: bool = False, in_props: bool = False 3870 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3871 desc = ( 3872 self._match_set((TokenType.ASC, TokenType.DESC)) 3873 and self._prev.token_type == TokenType.DESC 3874 ) 3875 3876 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3877 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3878 3879 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3880 options = self._parse_key_constraint_options() 3881 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3882 3883 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3884 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3885 return this 3886 3887 bracket_kind = self._prev.token_type 3888 3889 if self._match(TokenType.COLON): 3890 expressions: t.List[exp.Expression] = [ 3891 self.expression(exp.Slice, expression=self._parse_conjunction()) 3892 ] 3893 else: 3894 expressions = self._parse_csv( 3895 lambda: self._parse_slice( 3896 self._parse_alias(self._parse_conjunction(), explicit=True) 3897 ) 3898 ) 3899 3900 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3901 if bracket_kind == TokenType.L_BRACE: 3902 this = self.expression(exp.Struct, expressions=expressions) 3903 elif not this or this.name.upper() == "ARRAY": 3904 this = self.expression(exp.Array, expressions=expressions) 3905 else: 3906 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3907 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3908 3909 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3910 self.raise_error("Expected ]") 3911 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3912 self.raise_error("Expected }") 3913 3914 self._add_comments(this) 3915 return self._parse_bracket(this) 3916 3917 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3918 if self._match(TokenType.COLON): 3919 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3920 return this 3921 3922 def _parse_case(self) -> t.Optional[exp.Expression]: 3923 ifs = [] 3924 default = None 3925 3926 comments = self._prev_comments 3927 expression = self._parse_conjunction() 3928 3929 while self._match(TokenType.WHEN): 3930 this = self._parse_conjunction() 3931 self._match(TokenType.THEN) 3932 then = self._parse_conjunction() 3933 ifs.append(self.expression(exp.If, this=this, true=then)) 3934 3935 if self._match(TokenType.ELSE): 3936 default = self._parse_conjunction() 3937 3938 if not self._match(TokenType.END): 3939 self.raise_error("Expected END after CASE", self._prev) 3940 3941 return self._parse_window( 3942 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3943 ) 3944 3945 def _parse_if(self) -> t.Optional[exp.Expression]: 3946 if self._match(TokenType.L_PAREN): 3947 args = self._parse_csv(self._parse_conjunction) 3948 this = self.validate_expression(exp.If.from_arg_list(args), args) 3949 self._match_r_paren() 3950 else: 3951 index = self._index - 1 3952 condition = self._parse_conjunction() 3953 3954 if not condition: 3955 self._retreat(index) 3956 return None 3957 3958 self._match(TokenType.THEN) 3959 true = self._parse_conjunction() 3960 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3961 self._match(TokenType.END) 3962 this = self.expression(exp.If, this=condition, true=true, false=false) 3963 3964 return self._parse_window(this) 3965 3966 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3967 if not self._match_text_seq("VALUE", "FOR"): 3968 self._retreat(self._index - 1) 3969 return None 3970 3971 return self.expression( 3972 exp.NextValueFor, 3973 this=self._parse_column(), 3974 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3975 ) 3976 3977 def _parse_extract(self) -> exp.Extract: 3978 this = self._parse_function() or self._parse_var() or self._parse_type() 3979 3980 if self._match(TokenType.FROM): 3981 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3982 3983 if not self._match(TokenType.COMMA): 3984 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3985 3986 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3987 3988 def _parse_any_value(self) -> exp.AnyValue: 3989 this = self._parse_lambda() 3990 is_max = None 3991 having = None 3992 3993 if self._match(TokenType.HAVING): 3994 self._match_texts(("MAX", "MIN")) 3995 is_max = self._prev.text == "MAX" 3996 having = self._parse_column() 3997 3998 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3999 4000 def _parse_cast(self, strict: bool) -> exp.Expression: 4001 this = self._parse_conjunction() 4002 4003 if not self._match(TokenType.ALIAS): 4004 if self._match(TokenType.COMMA): 4005 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4006 4007 self.raise_error("Expected AS after CAST") 4008 4009 fmt = None 4010 to = self._parse_types() 4011 4012 if not to: 4013 self.raise_error("Expected TYPE after CAST") 4014 elif isinstance(to, exp.Identifier): 4015 to = exp.DataType.build(to.name, udt=True) 4016 elif to.this == exp.DataType.Type.CHAR: 4017 if self._match(TokenType.CHARACTER_SET): 4018 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4019 elif self._match(TokenType.FORMAT): 4020 fmt_string = self._parse_string() 4021 fmt = self._parse_at_time_zone(fmt_string) 4022 4023 if to.this in exp.DataType.TEMPORAL_TYPES: 4024 this = self.expression( 4025 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4026 this=this, 4027 format=exp.Literal.string( 4028 format_time( 4029 fmt_string.this if fmt_string else "", 4030 self.FORMAT_MAPPING or self.TIME_MAPPING, 4031 self.FORMAT_TRIE or self.TIME_TRIE, 4032 ) 4033 ), 4034 ) 4035 4036 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4037 this.set("zone", fmt.args["zone"]) 4038 4039 return this 4040 4041 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4042 4043 def _parse_concat(self) -> t.Optional[exp.Expression]: 4044 args = self._parse_csv(self._parse_conjunction) 4045 if self.CONCAT_NULL_OUTPUTS_STRING: 4046 args = [ 4047 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4048 for arg in args 4049 if arg 4050 ] 4051 4052 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4053 # we find such a call we replace it with its argument. 4054 if len(args) == 1: 4055 return args[0] 4056 4057 return self.expression( 4058 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4059 ) 4060 4061 def _parse_string_agg(self) -> exp.Expression: 4062 if self._match(TokenType.DISTINCT): 4063 args: t.List[t.Optional[exp.Expression]] = [ 4064 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4065 ] 4066 if self._match(TokenType.COMMA): 4067 args.extend(self._parse_csv(self._parse_conjunction)) 4068 else: 4069 args = self._parse_csv(self._parse_conjunction) # type: ignore 4070 4071 index = self._index 4072 if not self._match(TokenType.R_PAREN) and args: 4073 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4074 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4075 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4076 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4077 4078 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4079 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4080 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4081 if not self._match_text_seq("WITHIN", "GROUP"): 4082 self._retreat(index) 4083 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4084 4085 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4086 order = self._parse_order(this=seq_get(args, 0)) 4087 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4088 4089 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4090 this = self._parse_bitwise() 4091 4092 if self._match(TokenType.USING): 4093 to: t.Optional[exp.Expression] = self.expression( 4094 exp.CharacterSet, this=self._parse_var() 4095 ) 4096 elif self._match(TokenType.COMMA): 4097 to = self._parse_types() 4098 else: 4099 to = None 4100 4101 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4102 4103 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4104 """ 4105 There are generally two variants of the DECODE function: 4106 4107 - DECODE(bin, charset) 4108 - DECODE(expression, search, result [, search, result] ... [, default]) 4109 4110 The second variant will always be parsed into a CASE expression. Note that NULL 4111 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4112 instead of relying on pattern matching. 4113 """ 4114 args = self._parse_csv(self._parse_conjunction) 4115 4116 if len(args) < 3: 4117 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4118 4119 expression, *expressions = args 4120 if not expression: 4121 return None 4122 4123 ifs = [] 4124 for search, result in zip(expressions[::2], expressions[1::2]): 4125 if not search or not result: 4126 return None 4127 4128 if isinstance(search, exp.Literal): 4129 ifs.append( 4130 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4131 ) 4132 elif isinstance(search, exp.Null): 4133 ifs.append( 4134 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4135 ) 4136 else: 4137 cond = exp.or_( 4138 exp.EQ(this=expression.copy(), expression=search), 4139 exp.and_( 4140 exp.Is(this=expression.copy(), expression=exp.Null()), 4141 exp.Is(this=search.copy(), expression=exp.Null()), 4142 copy=False, 4143 ), 4144 copy=False, 4145 ) 4146 ifs.append(exp.If(this=cond, true=result)) 4147 4148 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4149 4150 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4151 self._match_text_seq("KEY") 4152 key = self._parse_column() 4153 self._match_set((TokenType.COLON, TokenType.COMMA)) 4154 self._match_text_seq("VALUE") 4155 value = self._parse_bitwise() 4156 4157 if not key and not value: 4158 return None 4159 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4160 4161 def _parse_json_object(self) -> exp.JSONObject: 4162 star = self._parse_star() 4163 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4164 4165 null_handling = None 4166 if self._match_text_seq("NULL", "ON", "NULL"): 4167 null_handling = "NULL ON NULL" 4168 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4169 null_handling = "ABSENT ON NULL" 4170 4171 unique_keys = None 4172 if self._match_text_seq("WITH", "UNIQUE"): 4173 unique_keys = True 4174 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4175 unique_keys = False 4176 4177 self._match_text_seq("KEYS") 4178 4179 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4180 format_json = self._match_text_seq("FORMAT", "JSON") 4181 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4182 4183 return self.expression( 4184 exp.JSONObject, 4185 expressions=expressions, 4186 null_handling=null_handling, 4187 unique_keys=unique_keys, 4188 return_type=return_type, 4189 format_json=format_json, 4190 encoding=encoding, 4191 ) 4192 4193 def _parse_logarithm(self) -> exp.Func: 4194 # Default argument order is base, expression 4195 args = self._parse_csv(self._parse_range) 4196 4197 if len(args) > 1: 4198 if not self.LOG_BASE_FIRST: 4199 args.reverse() 4200 return exp.Log.from_arg_list(args) 4201 4202 return self.expression( 4203 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4204 ) 4205 4206 def _parse_match_against(self) -> exp.MatchAgainst: 4207 expressions = self._parse_csv(self._parse_column) 4208 4209 self._match_text_seq(")", "AGAINST", "(") 4210 4211 this = self._parse_string() 4212 4213 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4214 modifier = "IN NATURAL LANGUAGE MODE" 4215 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4216 modifier = f"{modifier} WITH QUERY EXPANSION" 4217 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4218 modifier = "IN BOOLEAN MODE" 4219 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4220 modifier = "WITH QUERY EXPANSION" 4221 else: 4222 modifier = None 4223 4224 return self.expression( 4225 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4226 ) 4227 4228 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4229 def _parse_open_json(self) -> exp.OpenJSON: 4230 this = self._parse_bitwise() 4231 path = self._match(TokenType.COMMA) and self._parse_string() 4232 4233 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4234 this = self._parse_field(any_token=True) 4235 kind = self._parse_types() 4236 path = self._parse_string() 4237 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4238 4239 return self.expression( 4240 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4241 ) 4242 4243 expressions = None 4244 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4245 self._match_l_paren() 4246 expressions = self._parse_csv(_parse_open_json_column_def) 4247 4248 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4249 4250 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4251 args = self._parse_csv(self._parse_bitwise) 4252 4253 if self._match(TokenType.IN): 4254 return self.expression( 4255 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4256 ) 4257 4258 if haystack_first: 4259 haystack = seq_get(args, 0) 4260 needle = seq_get(args, 1) 4261 else: 4262 needle = seq_get(args, 0) 4263 haystack = seq_get(args, 1) 4264 4265 return self.expression( 4266 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4267 ) 4268 4269 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4270 args = self._parse_csv(self._parse_table) 4271 return exp.JoinHint(this=func_name.upper(), expressions=args) 4272 4273 def _parse_substring(self) -> exp.Substring: 4274 # Postgres supports the form: substring(string [from int] [for int]) 4275 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4276 4277 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4278 4279 if self._match(TokenType.FROM): 4280 args.append(self._parse_bitwise()) 4281 if self._match(TokenType.FOR): 4282 args.append(self._parse_bitwise()) 4283 4284 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4285 4286 def _parse_trim(self) -> exp.Trim: 4287 # https://www.w3resource.com/sql/character-functions/trim.php 4288 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4289 4290 position = None 4291 collation = None 4292 4293 if self._match_texts(self.TRIM_TYPES): 4294 position = self._prev.text.upper() 4295 4296 expression = self._parse_bitwise() 4297 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4298 this = self._parse_bitwise() 4299 else: 4300 this = expression 4301 expression = None 4302 4303 if self._match(TokenType.COLLATE): 4304 collation = self._parse_bitwise() 4305 4306 return self.expression( 4307 exp.Trim, this=this, position=position, expression=expression, collation=collation 4308 ) 4309 4310 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4311 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4312 4313 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4314 return self._parse_window(self._parse_id_var(), alias=True) 4315 4316 def _parse_respect_or_ignore_nulls( 4317 self, this: t.Optional[exp.Expression] 4318 ) -> t.Optional[exp.Expression]: 4319 if self._match_text_seq("IGNORE", "NULLS"): 4320 return self.expression(exp.IgnoreNulls, this=this) 4321 if self._match_text_seq("RESPECT", "NULLS"): 4322 return self.expression(exp.RespectNulls, this=this) 4323 return this 4324 4325 def _parse_window( 4326 self, this: t.Optional[exp.Expression], alias: bool = False 4327 ) -> t.Optional[exp.Expression]: 4328 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4329 self._match(TokenType.WHERE) 4330 this = self.expression( 4331 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4332 ) 4333 self._match_r_paren() 4334 4335 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4336 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4337 if self._match_text_seq("WITHIN", "GROUP"): 4338 order = self._parse_wrapped(self._parse_order) 4339 this = self.expression(exp.WithinGroup, this=this, expression=order) 4340 4341 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4342 # Some dialects choose to implement and some do not. 4343 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4344 4345 # There is some code above in _parse_lambda that handles 4346 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4347 4348 # The below changes handle 4349 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4350 4351 # Oracle allows both formats 4352 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4353 # and Snowflake chose to do the same for familiarity 4354 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4355 this = self._parse_respect_or_ignore_nulls(this) 4356 4357 # bigquery select from window x AS (partition by ...) 4358 if alias: 4359 over = None 4360 self._match(TokenType.ALIAS) 4361 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4362 return this 4363 else: 4364 over = self._prev.text.upper() 4365 4366 if not self._match(TokenType.L_PAREN): 4367 return self.expression( 4368 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4369 ) 4370 4371 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4372 4373 first = self._match(TokenType.FIRST) 4374 if self._match_text_seq("LAST"): 4375 first = False 4376 4377 partition, order = self._parse_partition_and_order() 4378 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4379 4380 if kind: 4381 self._match(TokenType.BETWEEN) 4382 start = self._parse_window_spec() 4383 self._match(TokenType.AND) 4384 end = self._parse_window_spec() 4385 4386 spec = self.expression( 4387 exp.WindowSpec, 4388 kind=kind, 4389 start=start["value"], 4390 start_side=start["side"], 4391 end=end["value"], 4392 end_side=end["side"], 4393 ) 4394 else: 4395 spec = None 4396 4397 self._match_r_paren() 4398 4399 window = self.expression( 4400 exp.Window, 4401 this=this, 4402 partition_by=partition, 4403 order=order, 4404 spec=spec, 4405 alias=window_alias, 4406 over=over, 4407 first=first, 4408 ) 4409 4410 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4411 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4412 return self._parse_window(window, alias=alias) 4413 4414 return window 4415 4416 def _parse_partition_and_order( 4417 self, 4418 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4419 return self._parse_partition_by(), self._parse_order() 4420 4421 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4422 self._match(TokenType.BETWEEN) 4423 4424 return { 4425 "value": ( 4426 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4427 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4428 or self._parse_bitwise() 4429 ), 4430 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4431 } 4432 4433 def _parse_alias( 4434 self, this: t.Optional[exp.Expression], explicit: bool = False 4435 ) -> t.Optional[exp.Expression]: 4436 any_token = self._match(TokenType.ALIAS) 4437 4438 if explicit and not any_token: 4439 return this 4440 4441 if self._match(TokenType.L_PAREN): 4442 aliases = self.expression( 4443 exp.Aliases, 4444 this=this, 4445 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4446 ) 4447 self._match_r_paren(aliases) 4448 return aliases 4449 4450 alias = self._parse_id_var(any_token) 4451 4452 if alias: 4453 return self.expression(exp.Alias, this=this, alias=alias) 4454 4455 return this 4456 4457 def _parse_id_var( 4458 self, 4459 any_token: bool = True, 4460 tokens: t.Optional[t.Collection[TokenType]] = None, 4461 ) -> t.Optional[exp.Expression]: 4462 identifier = self._parse_identifier() 4463 4464 if identifier: 4465 return identifier 4466 4467 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4468 quoted = self._prev.token_type == TokenType.STRING 4469 return exp.Identifier(this=self._prev.text, quoted=quoted) 4470 4471 return None 4472 4473 def _parse_string(self) -> t.Optional[exp.Expression]: 4474 if self._match(TokenType.STRING): 4475 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4476 return self._parse_placeholder() 4477 4478 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4479 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4480 4481 def _parse_number(self) -> t.Optional[exp.Expression]: 4482 if self._match(TokenType.NUMBER): 4483 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4484 return self._parse_placeholder() 4485 4486 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4487 if self._match(TokenType.IDENTIFIER): 4488 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4489 return self._parse_placeholder() 4490 4491 def _parse_var( 4492 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4493 ) -> t.Optional[exp.Expression]: 4494 if ( 4495 (any_token and self._advance_any()) 4496 or self._match(TokenType.VAR) 4497 or (self._match_set(tokens) if tokens else False) 4498 ): 4499 return self.expression(exp.Var, this=self._prev.text) 4500 return self._parse_placeholder() 4501 4502 def _advance_any(self) -> t.Optional[Token]: 4503 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4504 self._advance() 4505 return self._prev 4506 return None 4507 4508 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4509 return self._parse_var() or self._parse_string() 4510 4511 def _parse_null(self) -> t.Optional[exp.Expression]: 4512 if self._match(TokenType.NULL): 4513 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4514 return self._parse_placeholder() 4515 4516 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4517 if self._match(TokenType.TRUE): 4518 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4519 if self._match(TokenType.FALSE): 4520 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4521 return self._parse_placeholder() 4522 4523 def _parse_star(self) -> t.Optional[exp.Expression]: 4524 if self._match(TokenType.STAR): 4525 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4526 return self._parse_placeholder() 4527 4528 def _parse_parameter(self) -> exp.Parameter: 4529 wrapped = self._match(TokenType.L_BRACE) 4530 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4531 self._match(TokenType.R_BRACE) 4532 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4533 4534 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4535 if self._match_set(self.PLACEHOLDER_PARSERS): 4536 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4537 if placeholder: 4538 return placeholder 4539 self._advance(-1) 4540 return None 4541 4542 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4543 if not self._match(TokenType.EXCEPT): 4544 return None 4545 if self._match(TokenType.L_PAREN, advance=False): 4546 return self._parse_wrapped_csv(self._parse_column) 4547 return self._parse_csv(self._parse_column) 4548 4549 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4550 if not self._match(TokenType.REPLACE): 4551 return None 4552 if self._match(TokenType.L_PAREN, advance=False): 4553 return self._parse_wrapped_csv(self._parse_expression) 4554 return self._parse_expressions() 4555 4556 def _parse_csv( 4557 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4558 ) -> t.List[exp.Expression]: 4559 parse_result = parse_method() 4560 items = [parse_result] if parse_result is not None else [] 4561 4562 while self._match(sep): 4563 self._add_comments(parse_result) 4564 parse_result = parse_method() 4565 if parse_result is not None: 4566 items.append(parse_result) 4567 4568 return items 4569 4570 def _parse_tokens( 4571 self, parse_method: t.Callable, expressions: t.Dict 4572 ) -> t.Optional[exp.Expression]: 4573 this = parse_method() 4574 4575 while self._match_set(expressions): 4576 this = self.expression( 4577 expressions[self._prev.token_type], 4578 this=this, 4579 comments=self._prev_comments, 4580 expression=parse_method(), 4581 ) 4582 4583 return this 4584 4585 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4586 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4587 4588 def _parse_wrapped_csv( 4589 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4590 ) -> t.List[exp.Expression]: 4591 return self._parse_wrapped( 4592 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4593 ) 4594 4595 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4596 wrapped = self._match(TokenType.L_PAREN) 4597 if not wrapped and not optional: 4598 self.raise_error("Expecting (") 4599 parse_result = parse_method() 4600 if wrapped: 4601 self._match_r_paren() 4602 return parse_result 4603 4604 def _parse_expressions(self) -> t.List[exp.Expression]: 4605 return self._parse_csv(self._parse_expression) 4606 4607 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4608 return self._parse_select() or self._parse_set_operations( 4609 self._parse_expression() if alias else self._parse_conjunction() 4610 ) 4611 4612 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4613 return self._parse_query_modifiers( 4614 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4615 ) 4616 4617 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4618 this = None 4619 if self._match_texts(self.TRANSACTION_KIND): 4620 this = self._prev.text 4621 4622 self._match_texts({"TRANSACTION", "WORK"}) 4623 4624 modes = [] 4625 while True: 4626 mode = [] 4627 while self._match(TokenType.VAR): 4628 mode.append(self._prev.text) 4629 4630 if mode: 4631 modes.append(" ".join(mode)) 4632 if not self._match(TokenType.COMMA): 4633 break 4634 4635 return self.expression(exp.Transaction, this=this, modes=modes) 4636 4637 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4638 chain = None 4639 savepoint = None 4640 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4641 4642 self._match_texts({"TRANSACTION", "WORK"}) 4643 4644 if self._match_text_seq("TO"): 4645 self._match_text_seq("SAVEPOINT") 4646 savepoint = self._parse_id_var() 4647 4648 if self._match(TokenType.AND): 4649 chain = not self._match_text_seq("NO") 4650 self._match_text_seq("CHAIN") 4651 4652 if is_rollback: 4653 return self.expression(exp.Rollback, savepoint=savepoint) 4654 4655 return self.expression(exp.Commit, chain=chain) 4656 4657 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4658 if not self._match_text_seq("ADD"): 4659 return None 4660 4661 self._match(TokenType.COLUMN) 4662 exists_column = self._parse_exists(not_=True) 4663 expression = self._parse_field_def() 4664 4665 if expression: 4666 expression.set("exists", exists_column) 4667 4668 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4669 if self._match_texts(("FIRST", "AFTER")): 4670 position = self._prev.text 4671 column_position = self.expression( 4672 exp.ColumnPosition, this=self._parse_column(), position=position 4673 ) 4674 expression.set("position", column_position) 4675 4676 return expression 4677 4678 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4679 drop = self._match(TokenType.DROP) and self._parse_drop() 4680 if drop and not isinstance(drop, exp.Command): 4681 drop.set("kind", drop.args.get("kind", "COLUMN")) 4682 return drop 4683 4684 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4685 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4686 return self.expression( 4687 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4688 ) 4689 4690 def _parse_add_constraint(self) -> exp.AddConstraint: 4691 this = None 4692 kind = self._prev.token_type 4693 4694 if kind == TokenType.CONSTRAINT: 4695 this = self._parse_id_var() 4696 4697 if self._match_text_seq("CHECK"): 4698 expression = self._parse_wrapped(self._parse_conjunction) 4699 enforced = self._match_text_seq("ENFORCED") 4700 4701 return self.expression( 4702 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4703 ) 4704 4705 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4706 expression = self._parse_foreign_key() 4707 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4708 expression = self._parse_primary_key() 4709 else: 4710 expression = None 4711 4712 return self.expression(exp.AddConstraint, this=this, expression=expression) 4713 4714 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4715 index = self._index - 1 4716 4717 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4718 return self._parse_csv(self._parse_add_constraint) 4719 4720 self._retreat(index) 4721 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4722 return self._parse_csv(self._parse_field_def) 4723 4724 return self._parse_csv(self._parse_add_column) 4725 4726 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4727 self._match(TokenType.COLUMN) 4728 column = self._parse_field(any_token=True) 4729 4730 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4731 return self.expression(exp.AlterColumn, this=column, drop=True) 4732 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4733 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4734 4735 self._match_text_seq("SET", "DATA") 4736 return self.expression( 4737 exp.AlterColumn, 4738 this=column, 4739 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4740 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4741 using=self._match(TokenType.USING) and self._parse_conjunction(), 4742 ) 4743 4744 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4745 index = self._index - 1 4746 4747 partition_exists = self._parse_exists() 4748 if self._match(TokenType.PARTITION, advance=False): 4749 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4750 4751 self._retreat(index) 4752 return self._parse_csv(self._parse_drop_column) 4753 4754 def _parse_alter_table_rename(self) -> exp.RenameTable: 4755 self._match_text_seq("TO") 4756 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4757 4758 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4759 start = self._prev 4760 4761 if not self._match(TokenType.TABLE): 4762 return self._parse_as_command(start) 4763 4764 exists = self._parse_exists() 4765 this = self._parse_table(schema=True) 4766 4767 if self._next: 4768 self._advance() 4769 4770 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4771 if parser: 4772 actions = ensure_list(parser(self)) 4773 4774 if not self._curr: 4775 return self.expression( 4776 exp.AlterTable, 4777 this=this, 4778 exists=exists, 4779 actions=actions, 4780 ) 4781 return self._parse_as_command(start) 4782 4783 def _parse_merge(self) -> exp.Merge: 4784 self._match(TokenType.INTO) 4785 target = self._parse_table() 4786 4787 if target and self._match(TokenType.ALIAS, advance=False): 4788 target.set("alias", self._parse_table_alias()) 4789 4790 self._match(TokenType.USING) 4791 using = self._parse_table() 4792 4793 self._match(TokenType.ON) 4794 on = self._parse_conjunction() 4795 4796 whens = [] 4797 while self._match(TokenType.WHEN): 4798 matched = not self._match(TokenType.NOT) 4799 self._match_text_seq("MATCHED") 4800 source = ( 4801 False 4802 if self._match_text_seq("BY", "TARGET") 4803 else self._match_text_seq("BY", "SOURCE") 4804 ) 4805 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4806 4807 self._match(TokenType.THEN) 4808 4809 if self._match(TokenType.INSERT): 4810 _this = self._parse_star() 4811 if _this: 4812 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4813 else: 4814 then = self.expression( 4815 exp.Insert, 4816 this=self._parse_value(), 4817 expression=self._match(TokenType.VALUES) and self._parse_value(), 4818 ) 4819 elif self._match(TokenType.UPDATE): 4820 expressions = self._parse_star() 4821 if expressions: 4822 then = self.expression(exp.Update, expressions=expressions) 4823 else: 4824 then = self.expression( 4825 exp.Update, 4826 expressions=self._match(TokenType.SET) 4827 and self._parse_csv(self._parse_equality), 4828 ) 4829 elif self._match(TokenType.DELETE): 4830 then = self.expression(exp.Var, this=self._prev.text) 4831 else: 4832 then = None 4833 4834 whens.append( 4835 self.expression( 4836 exp.When, 4837 matched=matched, 4838 source=source, 4839 condition=condition, 4840 then=then, 4841 ) 4842 ) 4843 4844 return self.expression( 4845 exp.Merge, 4846 this=target, 4847 using=using, 4848 on=on, 4849 expressions=whens, 4850 ) 4851 4852 def _parse_show(self) -> t.Optional[exp.Expression]: 4853 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4854 if parser: 4855 return parser(self) 4856 return self._parse_as_command(self._prev) 4857 4858 def _parse_set_item_assignment( 4859 self, kind: t.Optional[str] = None 4860 ) -> t.Optional[exp.Expression]: 4861 index = self._index 4862 4863 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4864 return self._parse_set_transaction(global_=kind == "GLOBAL") 4865 4866 left = self._parse_primary() or self._parse_id_var() 4867 4868 if not self._match_texts(("=", "TO")): 4869 self._retreat(index) 4870 return None 4871 4872 right = self._parse_statement() or self._parse_id_var() 4873 this = self.expression(exp.EQ, this=left, expression=right) 4874 4875 return self.expression(exp.SetItem, this=this, kind=kind) 4876 4877 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4878 self._match_text_seq("TRANSACTION") 4879 characteristics = self._parse_csv( 4880 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4881 ) 4882 return self.expression( 4883 exp.SetItem, 4884 expressions=characteristics, 4885 kind="TRANSACTION", 4886 **{"global": global_}, # type: ignore 4887 ) 4888 4889 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4890 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4891 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4892 4893 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4894 index = self._index 4895 set_ = self.expression( 4896 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4897 ) 4898 4899 if self._curr: 4900 self._retreat(index) 4901 return self._parse_as_command(self._prev) 4902 4903 return set_ 4904 4905 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4906 for option in options: 4907 if self._match_text_seq(*option.split(" ")): 4908 return exp.var(option) 4909 return None 4910 4911 def _parse_as_command(self, start: Token) -> exp.Command: 4912 while self._curr: 4913 self._advance() 4914 text = self._find_sql(start, self._prev) 4915 size = len(start.text) 4916 return exp.Command(this=text[:size], expression=text[size:]) 4917 4918 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4919 settings = [] 4920 4921 self._match_l_paren() 4922 kind = self._parse_id_var() 4923 4924 if self._match(TokenType.L_PAREN): 4925 while True: 4926 key = self._parse_id_var() 4927 value = self._parse_primary() 4928 4929 if not key and value is None: 4930 break 4931 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4932 self._match(TokenType.R_PAREN) 4933 4934 self._match_r_paren() 4935 4936 return self.expression( 4937 exp.DictProperty, 4938 this=this, 4939 kind=kind.this if kind else None, 4940 settings=settings, 4941 ) 4942 4943 def _parse_dict_range(self, this: str) -> exp.DictRange: 4944 self._match_l_paren() 4945 has_min = self._match_text_seq("MIN") 4946 if has_min: 4947 min = self._parse_var() or self._parse_primary() 4948 self._match_text_seq("MAX") 4949 max = self._parse_var() or self._parse_primary() 4950 else: 4951 max = self._parse_var() or self._parse_primary() 4952 min = exp.Literal.number(0) 4953 self._match_r_paren() 4954 return self.expression(exp.DictRange, this=this, min=min, max=max) 4955 4956 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4957 expression = self._parse_column() 4958 self._match(TokenType.IN) 4959 iterator = self._parse_column() 4960 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4961 return self.expression( 4962 exp.Comprehension, 4963 this=this, 4964 expression=expression, 4965 iterator=iterator, 4966 condition=condition, 4967 ) 4968 4969 def _find_parser( 4970 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4971 ) -> t.Optional[t.Callable]: 4972 if not self._curr: 4973 return None 4974 4975 index = self._index 4976 this = [] 4977 while True: 4978 # The current token might be multiple words 4979 curr = self._curr.text.upper() 4980 key = curr.split(" ") 4981 this.append(curr) 4982 4983 self._advance() 4984 result, trie = in_trie(trie, key) 4985 if result == TrieResult.FAILED: 4986 break 4987 4988 if result == TrieResult.EXISTS: 4989 subparser = parsers[" ".join(this)] 4990 return subparser 4991 4992 self._retreat(index) 4993 return None 4994 4995 def _match(self, token_type, advance=True, expression=None): 4996 if not self._curr: 4997 return None 4998 4999 if self._curr.token_type == token_type: 5000 if advance: 5001 self._advance() 5002 self._add_comments(expression) 5003 return True 5004 5005 return None 5006 5007 def _match_set(self, types, advance=True): 5008 if not self._curr: 5009 return None 5010 5011 if self._curr.token_type in types: 5012 if advance: 5013 self._advance() 5014 return True 5015 5016 return None 5017 5018 def _match_pair(self, token_type_a, token_type_b, advance=True): 5019 if not self._curr or not self._next: 5020 return None 5021 5022 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5023 if advance: 5024 self._advance(2) 5025 return True 5026 5027 return None 5028 5029 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5030 if not self._match(TokenType.L_PAREN, expression=expression): 5031 self.raise_error("Expecting (") 5032 5033 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5034 if not self._match(TokenType.R_PAREN, expression=expression): 5035 self.raise_error("Expecting )") 5036 5037 def _match_texts(self, texts, advance=True): 5038 if self._curr and self._curr.text.upper() in texts: 5039 if advance: 5040 self._advance() 5041 return True 5042 return False 5043 5044 def _match_text_seq(self, *texts, advance=True): 5045 index = self._index 5046 for text in texts: 5047 if self._curr and self._curr.text.upper() == text: 5048 self._advance() 5049 else: 5050 self._retreat(index) 5051 return False 5052 5053 if not advance: 5054 self._retreat(index) 5055 5056 return True 5057 5058 @t.overload 5059 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5060 ... 5061 5062 @t.overload 5063 def _replace_columns_with_dots( 5064 self, this: t.Optional[exp.Expression] 5065 ) -> t.Optional[exp.Expression]: 5066 ... 5067 5068 def _replace_columns_with_dots(self, this): 5069 if isinstance(this, exp.Dot): 5070 exp.replace_children(this, self._replace_columns_with_dots) 5071 elif isinstance(this, exp.Column): 5072 exp.replace_children(this, self._replace_columns_with_dots) 5073 table = this.args.get("table") 5074 this = ( 5075 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5076 ) 5077 5078 return this 5079 5080 def _replace_lambda( 5081 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5082 ) -> t.Optional[exp.Expression]: 5083 if not node: 5084 return node 5085 5086 for column in node.find_all(exp.Column): 5087 if column.parts[0].name in lambda_variables: 5088 dot_or_id = column.to_dot() if column.table else column.this 5089 parent = column.parent 5090 5091 while isinstance(parent, exp.Dot): 5092 if not isinstance(parent.parent, exp.Dot): 5093 parent.replace(dot_or_id) 5094 break 5095 parent = parent.parent 5096 else: 5097 if column is node: 5098 node = dot_or_id 5099 else: 5100 column.replace(dot_or_id) 5101 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
890 def __init__( 891 self, 892 error_level: t.Optional[ErrorLevel] = None, 893 error_message_context: int = 100, 894 max_errors: int = 3, 895 ): 896 self.error_level = error_level or ErrorLevel.IMMEDIATE 897 self.error_message_context = error_message_context 898 self.max_errors = max_errors 899 self._tokenizer = self.TOKENIZER_CLASS() 900 self.reset()
912 def parse( 913 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 914 ) -> t.List[t.Optional[exp.Expression]]: 915 """ 916 Parses a list of tokens and returns a list of syntax trees, one tree 917 per parsed SQL statement. 918 919 Args: 920 raw_tokens: The list of tokens. 921 sql: The original SQL string, used to produce helpful debug messages. 922 923 Returns: 924 The list of the produced syntax trees. 925 """ 926 return self._parse( 927 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 928 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
930 def parse_into( 931 self, 932 expression_types: exp.IntoType, 933 raw_tokens: t.List[Token], 934 sql: t.Optional[str] = None, 935 ) -> t.List[t.Optional[exp.Expression]]: 936 """ 937 Parses a list of tokens into a given Expression type. If a collection of Expression 938 types is given instead, this method will try to parse the token list into each one 939 of them, stopping at the first for which the parsing succeeds. 940 941 Args: 942 expression_types: The expression type(s) to try and parse the token list into. 943 raw_tokens: The list of tokens. 944 sql: The original SQL string, used to produce helpful debug messages. 945 946 Returns: 947 The target Expression. 948 """ 949 errors = [] 950 for expression_type in ensure_list(expression_types): 951 parser = self.EXPRESSION_PARSERS.get(expression_type) 952 if not parser: 953 raise TypeError(f"No parser registered for {expression_type}") 954 955 try: 956 return self._parse(parser, raw_tokens, sql) 957 except ParseError as e: 958 e.errors[0]["into_expression"] = expression_type 959 errors.append(e) 960 961 raise ParseError( 962 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 963 errors=merge_errors(errors), 964 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1001 def check_errors(self) -> None: 1002 """Logs or raises any found errors, depending on the chosen error level setting.""" 1003 if self.error_level == ErrorLevel.WARN: 1004 for error in self.errors: 1005 logger.error(str(error)) 1006 elif self.error_level == ErrorLevel.RAISE and self.errors: 1007 raise ParseError( 1008 concat_messages(self.errors, self.max_errors), 1009 errors=merge_errors(self.errors), 1010 )
Logs or raises any found errors, depending on the chosen error level setting.
1012 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1013 """ 1014 Appends an error in the list of recorded errors or raises it, depending on the chosen 1015 error level setting. 1016 """ 1017 token = token or self._curr or self._prev or Token.string("") 1018 start = token.start 1019 end = token.end + 1 1020 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1021 highlight = self.sql[start:end] 1022 end_context = self.sql[end : end + self.error_message_context] 1023 1024 error = ParseError.new( 1025 f"{message}. Line {token.line}, Col: {token.col}.\n" 1026 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1027 description=message, 1028 line=token.line, 1029 col=token.col, 1030 start_context=start_context, 1031 highlight=highlight, 1032 end_context=end_context, 1033 ) 1034 1035 if self.error_level == ErrorLevel.IMMEDIATE: 1036 raise error 1037 1038 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1040 def expression( 1041 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1042 ) -> E: 1043 """ 1044 Creates a new, validated Expression. 1045 1046 Args: 1047 exp_class: The expression class to instantiate. 1048 comments: An optional list of comments to attach to the expression. 1049 kwargs: The arguments to set for the expression along with their respective values. 1050 1051 Returns: 1052 The target expression. 1053 """ 1054 instance = exp_class(**kwargs) 1055 instance.add_comments(comments) if comments else self._add_comments(instance) 1056 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1063 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1064 """ 1065 Validates an Expression, making sure that all its mandatory arguments are set. 1066 1067 Args: 1068 expression: The expression to validate. 1069 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1070 1071 Returns: 1072 The validated expression. 1073 """ 1074 if self.error_level != ErrorLevel.IGNORE: 1075 for error_message in expression.error_messages(args): 1076 self.raise_error(error_message) 1077 1078 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.