sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.MONEY, 181 TokenType.SMALLMONEY, 182 TokenType.ROWVERSION, 183 TokenType.IMAGE, 184 TokenType.VARIANT, 185 TokenType.OBJECT, 186 TokenType.INET, 187 TokenType.ENUM, 188 *NESTED_TYPE_TOKENS, 189 } 190 191 SUBQUERY_PREDICATES = { 192 TokenType.ANY: exp.Any, 193 TokenType.ALL: exp.All, 194 TokenType.EXISTS: exp.Exists, 195 TokenType.SOME: exp.Any, 196 } 197 198 RESERVED_KEYWORDS = { 199 *Tokenizer.SINGLE_TOKENS.values(), 200 TokenType.SELECT, 201 } 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 # Tokens that can represent identifiers 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.CASE, 229 TokenType.COLLATE, 230 TokenType.COMMAND, 231 TokenType.COMMENT, 232 TokenType.COMMIT, 233 TokenType.CONSTRAINT, 234 TokenType.DEFAULT, 235 TokenType.DELETE, 236 TokenType.DESC, 237 TokenType.DESCRIBE, 238 TokenType.DICTIONARY, 239 TokenType.DIV, 240 TokenType.END, 241 TokenType.EXECUTE, 242 TokenType.ESCAPE, 243 TokenType.FALSE, 244 TokenType.FIRST, 245 TokenType.FILTER, 246 TokenType.FORMAT, 247 TokenType.FULL, 248 TokenType.IF, 249 TokenType.IS, 250 TokenType.ISNULL, 251 TokenType.INTERVAL, 252 TokenType.KEEP, 253 TokenType.LEFT, 254 TokenType.LOAD, 255 TokenType.MERGE, 256 TokenType.NATURAL, 257 TokenType.NEXT, 258 TokenType.OFFSET, 259 TokenType.ORDINALITY, 260 TokenType.OVERWRITE, 261 TokenType.PARTITION, 262 TokenType.PERCENT, 263 TokenType.PIVOT, 264 TokenType.PRAGMA, 265 TokenType.RANGE, 266 TokenType.REFERENCES, 267 TokenType.RIGHT, 268 TokenType.ROW, 269 TokenType.ROWS, 270 TokenType.SEMI, 271 TokenType.SET, 272 TokenType.SETTINGS, 273 TokenType.SHOW, 274 TokenType.TEMPORARY, 275 TokenType.TOP, 276 TokenType.TRUE, 277 TokenType.UNIQUE, 278 TokenType.UNPIVOT, 279 TokenType.UPDATE, 280 TokenType.VOLATILE, 281 TokenType.WINDOW, 282 *CREATABLES, 283 *SUBQUERY_PREDICATES, 284 *TYPE_TOKENS, 285 *NO_PAREN_FUNCTIONS, 286 } 287 288 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 289 290 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 291 TokenType.APPLY, 292 TokenType.ASOF, 293 TokenType.FULL, 294 TokenType.LEFT, 295 TokenType.LOCK, 296 TokenType.NATURAL, 297 TokenType.OFFSET, 298 TokenType.RIGHT, 299 TokenType.WINDOW, 300 } 301 302 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 303 304 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 305 306 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 307 308 FUNC_TOKENS = { 309 TokenType.COMMAND, 310 TokenType.CURRENT_DATE, 311 TokenType.CURRENT_DATETIME, 312 TokenType.CURRENT_TIMESTAMP, 313 TokenType.CURRENT_TIME, 314 TokenType.CURRENT_USER, 315 TokenType.FILTER, 316 TokenType.FIRST, 317 TokenType.FORMAT, 318 TokenType.GLOB, 319 TokenType.IDENTIFIER, 320 TokenType.INDEX, 321 TokenType.ISNULL, 322 TokenType.ILIKE, 323 TokenType.LIKE, 324 TokenType.MERGE, 325 TokenType.OFFSET, 326 TokenType.PRIMARY_KEY, 327 TokenType.RANGE, 328 TokenType.REPLACE, 329 TokenType.ROW, 330 TokenType.UNNEST, 331 TokenType.VAR, 332 TokenType.LEFT, 333 TokenType.RIGHT, 334 TokenType.DATE, 335 TokenType.DATETIME, 336 TokenType.TABLE, 337 TokenType.TIMESTAMP, 338 TokenType.TIMESTAMPTZ, 339 TokenType.WINDOW, 340 *TYPE_TOKENS, 341 *SUBQUERY_PREDICATES, 342 } 343 344 CONJUNCTION = { 345 TokenType.AND: exp.And, 346 TokenType.OR: exp.Or, 347 } 348 349 EQUALITY = { 350 TokenType.EQ: exp.EQ, 351 TokenType.NEQ: exp.NEQ, 352 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 353 } 354 355 COMPARISON = { 356 TokenType.GT: exp.GT, 357 TokenType.GTE: exp.GTE, 358 TokenType.LT: exp.LT, 359 TokenType.LTE: exp.LTE, 360 } 361 362 BITWISE = { 363 TokenType.AMP: exp.BitwiseAnd, 364 TokenType.CARET: exp.BitwiseXor, 365 TokenType.PIPE: exp.BitwiseOr, 366 TokenType.DPIPE: exp.DPipe, 367 } 368 369 TERM = { 370 TokenType.DASH: exp.Sub, 371 TokenType.PLUS: exp.Add, 372 TokenType.MOD: exp.Mod, 373 TokenType.COLLATE: exp.Collate, 374 } 375 376 FACTOR = { 377 TokenType.DIV: exp.IntDiv, 378 TokenType.LR_ARROW: exp.Distance, 379 TokenType.SLASH: exp.Div, 380 TokenType.STAR: exp.Mul, 381 } 382 383 TIMESTAMPS = { 384 TokenType.TIME, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.TIMESTAMPLTZ, 388 } 389 390 SET_OPERATIONS = { 391 TokenType.UNION, 392 TokenType.INTERSECT, 393 TokenType.EXCEPT, 394 } 395 396 JOIN_METHODS = { 397 TokenType.NATURAL, 398 TokenType.ASOF, 399 } 400 401 JOIN_SIDES = { 402 TokenType.LEFT, 403 TokenType.RIGHT, 404 TokenType.FULL, 405 } 406 407 JOIN_KINDS = { 408 TokenType.INNER, 409 TokenType.OUTER, 410 TokenType.CROSS, 411 TokenType.SEMI, 412 TokenType.ANTI, 413 } 414 415 JOIN_HINTS: t.Set[str] = set() 416 417 LAMBDAS = { 418 TokenType.ARROW: lambda self, expressions: self.expression( 419 exp.Lambda, 420 this=self._replace_lambda( 421 self._parse_conjunction(), 422 {node.name for node in expressions}, 423 ), 424 expressions=expressions, 425 ), 426 TokenType.FARROW: lambda self, expressions: self.expression( 427 exp.Kwarg, 428 this=exp.var(expressions[0].name), 429 expression=self._parse_conjunction(), 430 ), 431 } 432 433 COLUMN_OPERATORS = { 434 TokenType.DOT: None, 435 TokenType.DCOLON: lambda self, this, to: self.expression( 436 exp.Cast if self.STRICT_CAST else exp.TryCast, 437 this=this, 438 to=to, 439 ), 440 TokenType.ARROW: lambda self, this, path: self.expression( 441 exp.JSONExtract, 442 this=this, 443 expression=path, 444 ), 445 TokenType.DARROW: lambda self, this, path: self.expression( 446 exp.JSONExtractScalar, 447 this=this, 448 expression=path, 449 ), 450 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtract, 452 this=this, 453 expression=path, 454 ), 455 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtractScalar, 457 this=this, 458 expression=path, 459 ), 460 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 461 exp.JSONBContains, 462 this=this, 463 expression=key, 464 ), 465 } 466 467 EXPRESSION_PARSERS = { 468 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 469 exp.Column: lambda self: self._parse_column(), 470 exp.Condition: lambda self: self._parse_conjunction(), 471 exp.DataType: lambda self: self._parse_types(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.From: lambda self: self._parse_from(), 474 exp.Group: lambda self: self._parse_group(), 475 exp.Having: lambda self: self._parse_having(), 476 exp.Identifier: lambda self: self._parse_id_var(), 477 exp.Join: lambda self: self._parse_join(), 478 exp.Lambda: lambda self: self._parse_lambda(), 479 exp.Lateral: lambda self: self._parse_lateral(), 480 exp.Limit: lambda self: self._parse_limit(), 481 exp.Offset: lambda self: self._parse_offset(), 482 exp.Order: lambda self: self._parse_order(), 483 exp.Ordered: lambda self: self._parse_ordered(), 484 exp.Properties: lambda self: self._parse_properties(), 485 exp.Qualify: lambda self: self._parse_qualify(), 486 exp.Returning: lambda self: self._parse_returning(), 487 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 488 exp.Table: lambda self: self._parse_table_parts(), 489 exp.TableAlias: lambda self: self._parse_table_alias(), 490 exp.Where: lambda self: self._parse_where(), 491 exp.Window: lambda self: self._parse_named_window(), 492 exp.With: lambda self: self._parse_with(), 493 "JOIN_TYPE": lambda self: self._parse_join_parts(), 494 } 495 496 STATEMENT_PARSERS = { 497 TokenType.ALTER: lambda self: self._parse_alter(), 498 TokenType.BEGIN: lambda self: self._parse_transaction(), 499 TokenType.CACHE: lambda self: self._parse_cache(), 500 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 501 TokenType.COMMENT: lambda self: self._parse_comment(), 502 TokenType.CREATE: lambda self: self._parse_create(), 503 TokenType.DELETE: lambda self: self._parse_delete(), 504 TokenType.DESC: lambda self: self._parse_describe(), 505 TokenType.DESCRIBE: lambda self: self._parse_describe(), 506 TokenType.DROP: lambda self: self._parse_drop(), 507 TokenType.END: lambda self: self._parse_commit_or_rollback(), 508 TokenType.FROM: lambda self: exp.select("*").from_( 509 t.cast(exp.From, self._parse_from(skip_from_token=True)) 510 ), 511 TokenType.INSERT: lambda self: self._parse_insert(), 512 TokenType.LOAD: lambda self: self._parse_load(), 513 TokenType.MERGE: lambda self: self._parse_merge(), 514 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 515 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 516 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 517 TokenType.SET: lambda self: self._parse_set(), 518 TokenType.UNCACHE: lambda self: self._parse_uncache(), 519 TokenType.UPDATE: lambda self: self._parse_update(), 520 TokenType.USE: lambda self: self.expression( 521 exp.Use, 522 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 523 and exp.var(self._prev.text), 524 this=self._parse_table(schema=False), 525 ), 526 } 527 528 UNARY_PARSERS = { 529 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 530 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 531 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 532 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 533 } 534 535 PRIMARY_PARSERS = { 536 TokenType.STRING: lambda self, token: self.expression( 537 exp.Literal, this=token.text, is_string=True 538 ), 539 TokenType.NUMBER: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=False 541 ), 542 TokenType.STAR: lambda self, _: self.expression( 543 exp.Star, 544 **{"except": self._parse_except(), "replace": self._parse_replace()}, 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "COPY": lambda self: self._parse_copy_property(), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 785 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 786 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 787 788 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 789 790 STRICT_CAST = True 791 792 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 793 794 CONVERT_TYPE_FIRST = False 795 796 PREFIXED_PIVOT_COLUMNS = False 797 IDENTIFY_PIVOT_STRINGS = False 798 799 LOG_BASE_FIRST = True 800 LOG_DEFAULTS_TO_LN = False 801 802 __slots__ = ( 803 "error_level", 804 "error_message_context", 805 "max_errors", 806 "sql", 807 "errors", 808 "_tokens", 809 "_index", 810 "_curr", 811 "_next", 812 "_prev", 813 "_prev_comments", 814 ) 815 816 # Autofilled 817 INDEX_OFFSET: int = 0 818 UNNEST_COLUMN_ONLY: bool = False 819 ALIAS_POST_TABLESAMPLE: bool = False 820 STRICT_STRING_CONCAT = False 821 NULL_ORDERING: str = "nulls_are_small" 822 SHOW_TRIE: t.Dict = {} 823 SET_TRIE: t.Dict = {} 824 FORMAT_MAPPING: t.Dict[str, str] = {} 825 FORMAT_TRIE: t.Dict = {} 826 TIME_MAPPING: t.Dict[str, str] = {} 827 TIME_TRIE: t.Dict = {} 828 829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset() 839 840 def reset(self): 841 self.sql = "" 842 self.errors = [] 843 self._tokens = [] 844 self._index = 0 845 self._curr = None 846 self._next = None 847 self._prev = None 848 self._prev_comments = None 849 850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 ) 867 868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1] 903 904 def _parse( 905 self, 906 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 907 raw_tokens: t.List[Token], 908 sql: t.Optional[str] = None, 909 ) -> t.List[t.Optional[exp.Expression]]: 910 self.reset() 911 self.sql = sql or "" 912 913 total = len(raw_tokens) 914 chunks: t.List[t.List[Token]] = [[]] 915 916 for i, token in enumerate(raw_tokens): 917 if token.token_type == TokenType.SEMICOLON: 918 if i < total - 1: 919 chunks.append([]) 920 else: 921 chunks[-1].append(token) 922 923 expressions = [] 924 925 for tokens in chunks: 926 self._index = -1 927 self._tokens = tokens 928 self._advance() 929 930 expressions.append(parse_method(self)) 931 932 if self._index < len(self._tokens): 933 self.raise_error("Invalid expression / Unexpected token") 934 935 self.check_errors() 936 937 return expressions 938 939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 ) 949 950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error) 977 978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance) 995 996 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 997 if expression and self._prev_comments: 998 expression.add_comments(self._prev_comments) 999 self._prev_comments = None 1000 1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression 1017 1018 def _find_sql(self, start: Token, end: Token) -> str: 1019 return self.sql[start.start : end.end + 1] 1020 1021 def _advance(self, times: int = 1) -> None: 1022 self._index += times 1023 self._curr = seq_get(self._tokens, self._index) 1024 self._next = seq_get(self._tokens, self._index + 1) 1025 1026 if self._index > 0: 1027 self._prev = self._tokens[self._index - 1] 1028 self._prev_comments = self._prev.comments 1029 else: 1030 self._prev = None 1031 self._prev_comments = None 1032 1033 def _retreat(self, index: int) -> None: 1034 if index != self._index: 1035 self._advance(index - self._index) 1036 1037 def _parse_command(self) -> exp.Command: 1038 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1039 1040 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1041 start = self._prev 1042 exists = self._parse_exists() if allow_exists else None 1043 1044 self._match(TokenType.ON) 1045 1046 kind = self._match_set(self.CREATABLES) and self._prev 1047 if not kind: 1048 return self._parse_as_command(start) 1049 1050 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1051 this = self._parse_user_defined_function(kind=kind.token_type) 1052 elif kind.token_type == TokenType.TABLE: 1053 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1054 elif kind.token_type == TokenType.COLUMN: 1055 this = self._parse_column() 1056 else: 1057 this = self._parse_id_var() 1058 1059 self._match(TokenType.IS) 1060 1061 return self.expression( 1062 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1063 ) 1064 1065 def _parse_to_table( 1066 self, 1067 ) -> exp.ToTableProperty: 1068 table = self._parse_table_parts(schema=True) 1069 return self.expression(exp.ToTableProperty, this=table) 1070 1071 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1072 def _parse_ttl(self) -> exp.Expression: 1073 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1074 this = self._parse_bitwise() 1075 1076 if self._match_text_seq("DELETE"): 1077 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1078 if self._match_text_seq("RECOMPRESS"): 1079 return self.expression( 1080 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1081 ) 1082 if self._match_text_seq("TO", "DISK"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1085 ) 1086 if self._match_text_seq("TO", "VOLUME"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1089 ) 1090 1091 return this 1092 1093 expressions = self._parse_csv(_parse_ttl_action) 1094 where = self._parse_where() 1095 group = self._parse_group() 1096 1097 aggregates = None 1098 if group and self._match(TokenType.SET): 1099 aggregates = self._parse_csv(self._parse_set_item) 1100 1101 return self.expression( 1102 exp.MergeTreeTTL, 1103 expressions=expressions, 1104 where=where, 1105 group=group, 1106 aggregates=aggregates, 1107 ) 1108 1109 def _parse_statement(self) -> t.Optional[exp.Expression]: 1110 if self._curr is None: 1111 return None 1112 1113 if self._match_set(self.STATEMENT_PARSERS): 1114 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1115 1116 if self._match_set(Tokenizer.COMMANDS): 1117 return self._parse_command() 1118 1119 expression = self._parse_expression() 1120 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1121 return self._parse_query_modifiers(expression) 1122 1123 def _parse_drop(self) -> exp.Drop | exp.Command: 1124 start = self._prev 1125 temporary = self._match(TokenType.TEMPORARY) 1126 materialized = self._match_text_seq("MATERIALIZED") 1127 1128 kind = self._match_set(self.CREATABLES) and self._prev.text 1129 if not kind: 1130 return self._parse_as_command(start) 1131 1132 return self.expression( 1133 exp.Drop, 1134 exists=self._parse_exists(), 1135 this=self._parse_table(schema=True), 1136 kind=kind, 1137 temporary=temporary, 1138 materialized=materialized, 1139 cascade=self._match_text_seq("CASCADE"), 1140 constraints=self._match_text_seq("CONSTRAINTS"), 1141 purge=self._match_text_seq("PURGE"), 1142 ) 1143 1144 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1145 return ( 1146 self._match(TokenType.IF) 1147 and (not not_ or self._match(TokenType.NOT)) 1148 and self._match(TokenType.EXISTS) 1149 ) 1150 1151 def _parse_create(self) -> exp.Create | exp.Command: 1152 # Note: this can't be None because we've matched a statement parser 1153 start = self._prev 1154 replace = start.text.upper() == "REPLACE" or self._match_pair( 1155 TokenType.OR, TokenType.REPLACE 1156 ) 1157 unique = self._match(TokenType.UNIQUE) 1158 1159 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1160 self._advance() 1161 1162 properties = None 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not create_token: 1166 # exp.Properties.Location.POST_CREATE 1167 properties = self._parse_properties() 1168 create_token = self._match_set(self.CREATABLES) and self._prev 1169 1170 if not properties or not create_token: 1171 return self._parse_as_command(start) 1172 1173 exists = self._parse_exists(not_=True) 1174 this = None 1175 expression = None 1176 indexes = None 1177 no_schema_binding = None 1178 begin = None 1179 clone = None 1180 1181 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1182 nonlocal properties 1183 if properties and temp_props: 1184 properties.expressions.extend(temp_props.expressions) 1185 elif temp_props: 1186 properties = temp_props 1187 1188 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1189 this = self._parse_user_defined_function(kind=create_token.token_type) 1190 1191 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1192 extend_props(self._parse_properties()) 1193 1194 self._match(TokenType.ALIAS) 1195 begin = self._match(TokenType.BEGIN) 1196 return_ = self._match_text_seq("RETURN") 1197 expression = self._parse_statement() 1198 1199 if return_: 1200 expression = self.expression(exp.Return, this=expression) 1201 elif create_token.token_type == TokenType.INDEX: 1202 this = self._parse_index(index=self._parse_id_var()) 1203 elif create_token.token_type in self.DB_CREATABLES: 1204 table_parts = self._parse_table_parts(schema=True) 1205 1206 # exp.Properties.Location.POST_NAME 1207 self._match(TokenType.COMMA) 1208 extend_props(self._parse_properties(before=True)) 1209 1210 this = self._parse_schema(this=table_parts) 1211 1212 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1213 extend_props(self._parse_properties()) 1214 1215 self._match(TokenType.ALIAS) 1216 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1217 # exp.Properties.Location.POST_ALIAS 1218 extend_props(self._parse_properties()) 1219 1220 expression = self._parse_ddl_select() 1221 1222 if create_token.token_type == TokenType.TABLE: 1223 indexes = [] 1224 while True: 1225 index = self._parse_index() 1226 1227 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1228 extend_props(self._parse_properties()) 1229 1230 if not index: 1231 break 1232 else: 1233 self._match(TokenType.COMMA) 1234 indexes.append(index) 1235 elif create_token.token_type == TokenType.VIEW: 1236 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1237 no_schema_binding = True 1238 1239 if self._match_text_seq("CLONE"): 1240 clone = self._parse_table(schema=True) 1241 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1242 clone_kind = ( 1243 self._match(TokenType.L_PAREN) 1244 and self._match_texts(self.CLONE_KINDS) 1245 and self._prev.text.upper() 1246 ) 1247 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1248 self._match(TokenType.R_PAREN) 1249 clone = self.expression( 1250 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1251 ) 1252 1253 return self.expression( 1254 exp.Create, 1255 this=this, 1256 kind=create_token.text, 1257 replace=replace, 1258 unique=unique, 1259 expression=expression, 1260 exists=exists, 1261 properties=properties, 1262 indexes=indexes, 1263 no_schema_binding=no_schema_binding, 1264 begin=begin, 1265 clone=clone, 1266 ) 1267 1268 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1269 # only used for teradata currently 1270 self._match(TokenType.COMMA) 1271 1272 kwargs = { 1273 "no": self._match_text_seq("NO"), 1274 "dual": self._match_text_seq("DUAL"), 1275 "before": self._match_text_seq("BEFORE"), 1276 "default": self._match_text_seq("DEFAULT"), 1277 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1278 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1279 "after": self._match_text_seq("AFTER"), 1280 "minimum": self._match_texts(("MIN", "MINIMUM")), 1281 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1282 } 1283 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1286 try: 1287 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1288 except TypeError: 1289 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1290 1291 return None 1292 1293 def _parse_property(self) -> t.Optional[exp.Expression]: 1294 if self._match_texts(self.PROPERTY_PARSERS): 1295 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1296 1297 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1298 return self._parse_character_set(default=True) 1299 1300 if self._match_text_seq("COMPOUND", "SORTKEY"): 1301 return self._parse_sortkey(compound=True) 1302 1303 if self._match_text_seq("SQL", "SECURITY"): 1304 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1305 1306 assignment = self._match_pair( 1307 TokenType.VAR, TokenType.EQ, advance=False 1308 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1309 1310 if assignment: 1311 key = self._parse_var_or_string() 1312 self._match(TokenType.EQ) 1313 return self.expression(exp.Property, this=key, value=self._parse_column()) 1314 1315 return None 1316 1317 def _parse_stored(self) -> exp.FileFormatProperty: 1318 self._match(TokenType.ALIAS) 1319 1320 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1321 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1322 1323 return self.expression( 1324 exp.FileFormatProperty, 1325 this=self.expression( 1326 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1327 ) 1328 if input_format or output_format 1329 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1330 ) 1331 1332 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1333 self._match(TokenType.EQ) 1334 self._match(TokenType.ALIAS) 1335 return self.expression(exp_class, this=self._parse_field()) 1336 1337 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1338 properties = [] 1339 while True: 1340 if before: 1341 prop = self._parse_property_before() 1342 else: 1343 prop = self._parse_property() 1344 1345 if not prop: 1346 break 1347 for p in ensure_list(prop): 1348 properties.append(p) 1349 1350 if properties: 1351 return self.expression(exp.Properties, expressions=properties) 1352 1353 return None 1354 1355 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1356 return self.expression( 1357 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1358 ) 1359 1360 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1361 if self._index >= 2: 1362 pre_volatile_token = self._tokens[self._index - 2] 1363 else: 1364 pre_volatile_token = None 1365 1366 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1367 return exp.VolatileProperty() 1368 1369 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1370 1371 def _parse_with_property( 1372 self, 1373 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1374 self._match(TokenType.WITH) 1375 if self._match(TokenType.L_PAREN, advance=False): 1376 return self._parse_wrapped_csv(self._parse_property) 1377 1378 if self._match_text_seq("JOURNAL"): 1379 return self._parse_withjournaltable() 1380 1381 if self._match_text_seq("DATA"): 1382 return self._parse_withdata(no=False) 1383 elif self._match_text_seq("NO", "DATA"): 1384 return self._parse_withdata(no=True) 1385 1386 if not self._next: 1387 return None 1388 1389 return self._parse_withisolatedloading() 1390 1391 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1392 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1393 self._match(TokenType.EQ) 1394 1395 user = self._parse_id_var() 1396 self._match(TokenType.PARAMETER) 1397 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1398 1399 if not user or not host: 1400 return None 1401 1402 return exp.DefinerProperty(this=f"{user}@{host}") 1403 1404 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1405 self._match(TokenType.TABLE) 1406 self._match(TokenType.EQ) 1407 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1408 1409 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1410 return self.expression(exp.LogProperty, no=no) 1411 1412 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1413 return self.expression(exp.JournalProperty, **kwargs) 1414 1415 def _parse_checksum(self) -> exp.ChecksumProperty: 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 1424 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1425 1426 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1427 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1428 1429 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1430 if not self._match_text_seq("GRANTS"): 1431 self._retreat(self._index - 1) 1432 return None 1433 1434 return self.expression(exp.CopyGrantsProperty) 1435 1436 def _parse_freespace(self) -> exp.FreespaceProperty: 1437 self._match(TokenType.EQ) 1438 return self.expression( 1439 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1440 ) 1441 1442 def _parse_mergeblockratio( 1443 self, no: bool = False, default: bool = False 1444 ) -> exp.MergeBlockRatioProperty: 1445 if self._match(TokenType.EQ): 1446 return self.expression( 1447 exp.MergeBlockRatioProperty, 1448 this=self._parse_number(), 1449 percent=self._match(TokenType.PERCENT), 1450 ) 1451 1452 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1453 1454 def _parse_datablocksize( 1455 self, 1456 default: t.Optional[bool] = None, 1457 minimum: t.Optional[bool] = None, 1458 maximum: t.Optional[bool] = None, 1459 ) -> exp.DataBlocksizeProperty: 1460 self._match(TokenType.EQ) 1461 size = self._parse_number() 1462 1463 units = None 1464 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1465 units = self._prev.text 1466 1467 return self.expression( 1468 exp.DataBlocksizeProperty, 1469 size=size, 1470 units=units, 1471 default=default, 1472 minimum=minimum, 1473 maximum=maximum, 1474 ) 1475 1476 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1477 self._match(TokenType.EQ) 1478 always = self._match_text_seq("ALWAYS") 1479 manual = self._match_text_seq("MANUAL") 1480 never = self._match_text_seq("NEVER") 1481 default = self._match_text_seq("DEFAULT") 1482 1483 autotemp = None 1484 if self._match_text_seq("AUTOTEMP"): 1485 autotemp = self._parse_schema() 1486 1487 return self.expression( 1488 exp.BlockCompressionProperty, 1489 always=always, 1490 manual=manual, 1491 never=never, 1492 default=default, 1493 autotemp=autotemp, 1494 ) 1495 1496 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1497 no = self._match_text_seq("NO") 1498 concurrent = self._match_text_seq("CONCURRENT") 1499 self._match_text_seq("ISOLATED", "LOADING") 1500 for_all = self._match_text_seq("FOR", "ALL") 1501 for_insert = self._match_text_seq("FOR", "INSERT") 1502 for_none = self._match_text_seq("FOR", "NONE") 1503 return self.expression( 1504 exp.IsolatedLoadingProperty, 1505 no=no, 1506 concurrent=concurrent, 1507 for_all=for_all, 1508 for_insert=for_insert, 1509 for_none=for_none, 1510 ) 1511 1512 def _parse_locking(self) -> exp.LockingProperty: 1513 if self._match(TokenType.TABLE): 1514 kind = "TABLE" 1515 elif self._match(TokenType.VIEW): 1516 kind = "VIEW" 1517 elif self._match(TokenType.ROW): 1518 kind = "ROW" 1519 elif self._match_text_seq("DATABASE"): 1520 kind = "DATABASE" 1521 else: 1522 kind = None 1523 1524 if kind in ("DATABASE", "TABLE", "VIEW"): 1525 this = self._parse_table_parts() 1526 else: 1527 this = None 1528 1529 if self._match(TokenType.FOR): 1530 for_or_in = "FOR" 1531 elif self._match(TokenType.IN): 1532 for_or_in = "IN" 1533 else: 1534 for_or_in = None 1535 1536 if self._match_text_seq("ACCESS"): 1537 lock_type = "ACCESS" 1538 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1539 lock_type = "EXCLUSIVE" 1540 elif self._match_text_seq("SHARE"): 1541 lock_type = "SHARE" 1542 elif self._match_text_seq("READ"): 1543 lock_type = "READ" 1544 elif self._match_text_seq("WRITE"): 1545 lock_type = "WRITE" 1546 elif self._match_text_seq("CHECKSUM"): 1547 lock_type = "CHECKSUM" 1548 else: 1549 lock_type = None 1550 1551 override = self._match_text_seq("OVERRIDE") 1552 1553 return self.expression( 1554 exp.LockingProperty, 1555 this=this, 1556 kind=kind, 1557 for_or_in=for_or_in, 1558 lock_type=lock_type, 1559 override=override, 1560 ) 1561 1562 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1563 if self._match(TokenType.PARTITION_BY): 1564 return self._parse_csv(self._parse_conjunction) 1565 return [] 1566 1567 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1568 self._match(TokenType.EQ) 1569 return self.expression( 1570 exp.PartitionedByProperty, 1571 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1572 ) 1573 1574 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1575 if self._match_text_seq("AND", "STATISTICS"): 1576 statistics = True 1577 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1578 statistics = False 1579 else: 1580 statistics = None 1581 1582 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1583 1584 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1585 if self._match_text_seq("PRIMARY", "INDEX"): 1586 return exp.NoPrimaryIndexProperty() 1587 return None 1588 1589 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1590 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1591 return exp.OnCommitProperty() 1592 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1593 return exp.OnCommitProperty(delete=True) 1594 return None 1595 1596 def _parse_distkey(self) -> exp.DistKeyProperty: 1597 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1598 1599 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1600 table = self._parse_table(schema=True) 1601 1602 options = [] 1603 while self._match_texts(("INCLUDING", "EXCLUDING")): 1604 this = self._prev.text.upper() 1605 1606 id_var = self._parse_id_var() 1607 if not id_var: 1608 return None 1609 1610 options.append( 1611 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1612 ) 1613 1614 return self.expression(exp.LikeProperty, this=table, expressions=options) 1615 1616 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1617 return self.expression( 1618 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1619 ) 1620 1621 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1622 self._match(TokenType.EQ) 1623 return self.expression( 1624 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1625 ) 1626 1627 def _parse_returns(self) -> exp.ReturnsProperty: 1628 value: t.Optional[exp.Expression] 1629 is_table = self._match(TokenType.TABLE) 1630 1631 if is_table: 1632 if self._match(TokenType.LT): 1633 value = self.expression( 1634 exp.Schema, 1635 this="TABLE", 1636 expressions=self._parse_csv(self._parse_struct_types), 1637 ) 1638 if not self._match(TokenType.GT): 1639 self.raise_error("Expecting >") 1640 else: 1641 value = self._parse_schema(exp.var("TABLE")) 1642 else: 1643 value = self._parse_types() 1644 1645 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1646 1647 def _parse_describe(self) -> exp.Describe: 1648 kind = self._match_set(self.CREATABLES) and self._prev.text 1649 this = self._parse_table() 1650 return self.expression(exp.Describe, this=this, kind=kind) 1651 1652 def _parse_insert(self) -> exp.Insert: 1653 overwrite = self._match(TokenType.OVERWRITE) 1654 local = self._match_text_seq("LOCAL") 1655 alternative = None 1656 1657 if self._match_text_seq("DIRECTORY"): 1658 this: t.Optional[exp.Expression] = self.expression( 1659 exp.Directory, 1660 this=self._parse_var_or_string(), 1661 local=local, 1662 row_format=self._parse_row_format(match_row=True), 1663 ) 1664 else: 1665 if self._match(TokenType.OR): 1666 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1667 1668 self._match(TokenType.INTO) 1669 self._match(TokenType.TABLE) 1670 this = self._parse_table(schema=True) 1671 1672 return self.expression( 1673 exp.Insert, 1674 this=this, 1675 exists=self._parse_exists(), 1676 partition=self._parse_partition(), 1677 expression=self._parse_ddl_select(), 1678 conflict=self._parse_on_conflict(), 1679 returning=self._parse_returning(), 1680 overwrite=overwrite, 1681 alternative=alternative, 1682 ) 1683 1684 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1685 conflict = self._match_text_seq("ON", "CONFLICT") 1686 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1687 1688 if not conflict and not duplicate: 1689 return None 1690 1691 nothing = None 1692 expressions = None 1693 key = None 1694 constraint = None 1695 1696 if conflict: 1697 if self._match_text_seq("ON", "CONSTRAINT"): 1698 constraint = self._parse_id_var() 1699 else: 1700 key = self._parse_csv(self._parse_value) 1701 1702 self._match_text_seq("DO") 1703 if self._match_text_seq("NOTHING"): 1704 nothing = True 1705 else: 1706 self._match(TokenType.UPDATE) 1707 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1708 1709 return self.expression( 1710 exp.OnConflict, 1711 duplicate=duplicate, 1712 expressions=expressions, 1713 nothing=nothing, 1714 key=key, 1715 constraint=constraint, 1716 ) 1717 1718 def _parse_returning(self) -> t.Optional[exp.Returning]: 1719 if not self._match(TokenType.RETURNING): 1720 return None 1721 1722 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1723 1724 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1725 if not self._match(TokenType.FORMAT): 1726 return None 1727 return self._parse_row_format() 1728 1729 def _parse_row_format( 1730 self, match_row: bool = False 1731 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1732 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1733 return None 1734 1735 if self._match_text_seq("SERDE"): 1736 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1737 1738 self._match_text_seq("DELIMITED") 1739 1740 kwargs = {} 1741 1742 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1743 kwargs["fields"] = self._parse_string() 1744 if self._match_text_seq("ESCAPED", "BY"): 1745 kwargs["escaped"] = self._parse_string() 1746 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1747 kwargs["collection_items"] = self._parse_string() 1748 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1749 kwargs["map_keys"] = self._parse_string() 1750 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1751 kwargs["lines"] = self._parse_string() 1752 if self._match_text_seq("NULL", "DEFINED", "AS"): 1753 kwargs["null"] = self._parse_string() 1754 1755 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1756 1757 def _parse_load(self) -> exp.LoadData | exp.Command: 1758 if self._match_text_seq("DATA"): 1759 local = self._match_text_seq("LOCAL") 1760 self._match_text_seq("INPATH") 1761 inpath = self._parse_string() 1762 overwrite = self._match(TokenType.OVERWRITE) 1763 self._match_pair(TokenType.INTO, TokenType.TABLE) 1764 1765 return self.expression( 1766 exp.LoadData, 1767 this=self._parse_table(schema=True), 1768 local=local, 1769 overwrite=overwrite, 1770 inpath=inpath, 1771 partition=self._parse_partition(), 1772 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1773 serde=self._match_text_seq("SERDE") and self._parse_string(), 1774 ) 1775 return self._parse_as_command(self._prev) 1776 1777 def _parse_delete(self) -> exp.Delete: 1778 self._match(TokenType.FROM) 1779 1780 return self.expression( 1781 exp.Delete, 1782 this=self._parse_table(), 1783 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1784 where=self._parse_where(), 1785 returning=self._parse_returning(), 1786 ) 1787 1788 def _parse_update(self) -> exp.Update: 1789 return self.expression( 1790 exp.Update, 1791 **{ # type: ignore 1792 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1793 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1794 "from": self._parse_from(modifiers=True), 1795 "where": self._parse_where(), 1796 "returning": self._parse_returning(), 1797 }, 1798 ) 1799 1800 def _parse_uncache(self) -> exp.Uncache: 1801 if not self._match(TokenType.TABLE): 1802 self.raise_error("Expecting TABLE after UNCACHE") 1803 1804 return self.expression( 1805 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1806 ) 1807 1808 def _parse_cache(self) -> exp.Cache: 1809 lazy = self._match_text_seq("LAZY") 1810 self._match(TokenType.TABLE) 1811 table = self._parse_table(schema=True) 1812 1813 options = [] 1814 if self._match_text_seq("OPTIONS"): 1815 self._match_l_paren() 1816 k = self._parse_string() 1817 self._match(TokenType.EQ) 1818 v = self._parse_string() 1819 options = [k, v] 1820 self._match_r_paren() 1821 1822 self._match(TokenType.ALIAS) 1823 return self.expression( 1824 exp.Cache, 1825 this=table, 1826 lazy=lazy, 1827 options=options, 1828 expression=self._parse_select(nested=True), 1829 ) 1830 1831 def _parse_partition(self) -> t.Optional[exp.Partition]: 1832 if not self._match(TokenType.PARTITION): 1833 return None 1834 1835 return self.expression( 1836 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1837 ) 1838 1839 def _parse_value(self) -> exp.Tuple: 1840 if self._match(TokenType.L_PAREN): 1841 expressions = self._parse_csv(self._parse_conjunction) 1842 self._match_r_paren() 1843 return self.expression(exp.Tuple, expressions=expressions) 1844 1845 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1846 # Source: https://prestodb.io/docs/current/sql/values.html 1847 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1848 1849 def _parse_select( 1850 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1851 ) -> t.Optional[exp.Expression]: 1852 cte = self._parse_with() 1853 if cte: 1854 this = self._parse_statement() 1855 1856 if not this: 1857 self.raise_error("Failed to parse any statement following CTE") 1858 return cte 1859 1860 if "with" in this.arg_types: 1861 this.set("with", cte) 1862 else: 1863 self.raise_error(f"{this.key} does not support CTE") 1864 this = cte 1865 elif self._match(TokenType.SELECT): 1866 comments = self._prev_comments 1867 1868 hint = self._parse_hint() 1869 all_ = self._match(TokenType.ALL) 1870 distinct = self._match(TokenType.DISTINCT) 1871 1872 kind = ( 1873 self._match(TokenType.ALIAS) 1874 and self._match_texts(("STRUCT", "VALUE")) 1875 and self._prev.text 1876 ) 1877 1878 if distinct: 1879 distinct = self.expression( 1880 exp.Distinct, 1881 on=self._parse_value() if self._match(TokenType.ON) else None, 1882 ) 1883 1884 if all_ and distinct: 1885 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1886 1887 limit = self._parse_limit(top=True) 1888 expressions = self._parse_csv(self._parse_expression) 1889 1890 this = self.expression( 1891 exp.Select, 1892 kind=kind, 1893 hint=hint, 1894 distinct=distinct, 1895 expressions=expressions, 1896 limit=limit, 1897 ) 1898 this.comments = comments 1899 1900 into = self._parse_into() 1901 if into: 1902 this.set("into", into) 1903 1904 from_ = self._parse_from() 1905 if from_: 1906 this.set("from", from_) 1907 1908 this = self._parse_query_modifiers(this) 1909 elif (table or nested) and self._match(TokenType.L_PAREN): 1910 if self._match(TokenType.PIVOT): 1911 this = self._parse_simplified_pivot() 1912 elif self._match(TokenType.FROM): 1913 this = exp.select("*").from_( 1914 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1915 ) 1916 else: 1917 this = self._parse_table() if table else self._parse_select(nested=True) 1918 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1919 1920 self._match_r_paren() 1921 1922 # early return so that subquery unions aren't parsed again 1923 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1924 # Union ALL should be a property of the top select node, not the subquery 1925 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1926 elif self._match(TokenType.VALUES): 1927 this = self.expression( 1928 exp.Values, 1929 expressions=self._parse_csv(self._parse_value), 1930 alias=self._parse_table_alias(), 1931 ) 1932 else: 1933 this = None 1934 1935 return self._parse_set_operations(this) 1936 1937 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1938 if not skip_with_token and not self._match(TokenType.WITH): 1939 return None 1940 1941 comments = self._prev_comments 1942 recursive = self._match(TokenType.RECURSIVE) 1943 1944 expressions = [] 1945 while True: 1946 expressions.append(self._parse_cte()) 1947 1948 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1949 break 1950 else: 1951 self._match(TokenType.WITH) 1952 1953 return self.expression( 1954 exp.With, comments=comments, expressions=expressions, recursive=recursive 1955 ) 1956 1957 def _parse_cte(self) -> exp.CTE: 1958 alias = self._parse_table_alias() 1959 if not alias or not alias.this: 1960 self.raise_error("Expected CTE to have alias") 1961 1962 self._match(TokenType.ALIAS) 1963 return self.expression( 1964 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1965 ) 1966 1967 def _parse_table_alias( 1968 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1969 ) -> t.Optional[exp.TableAlias]: 1970 any_token = self._match(TokenType.ALIAS) 1971 alias = ( 1972 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1973 or self._parse_string_as_identifier() 1974 ) 1975 1976 index = self._index 1977 if self._match(TokenType.L_PAREN): 1978 columns = self._parse_csv(self._parse_function_parameter) 1979 self._match_r_paren() if columns else self._retreat(index) 1980 else: 1981 columns = None 1982 1983 if not alias and not columns: 1984 return None 1985 1986 return self.expression(exp.TableAlias, this=alias, columns=columns) 1987 1988 def _parse_subquery( 1989 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1990 ) -> t.Optional[exp.Subquery]: 1991 if not this: 1992 return None 1993 1994 return self.expression( 1995 exp.Subquery, 1996 this=this, 1997 pivots=self._parse_pivots(), 1998 alias=self._parse_table_alias() if parse_alias else None, 1999 ) 2000 2001 def _parse_query_modifiers( 2002 self, this: t.Optional[exp.Expression] 2003 ) -> t.Optional[exp.Expression]: 2004 if isinstance(this, self.MODIFIABLES): 2005 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2006 expression = parser(self) 2007 2008 if expression: 2009 if key == "limit": 2010 offset = expression.args.pop("offset", None) 2011 if offset: 2012 this.set("offset", exp.Offset(expression=offset)) 2013 this.set(key, expression) 2014 return this 2015 2016 def _parse_hint(self) -> t.Optional[exp.Hint]: 2017 if self._match(TokenType.HINT): 2018 hints = self._parse_csv(self._parse_function) 2019 2020 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2021 self.raise_error("Expected */ after HINT") 2022 2023 return self.expression(exp.Hint, expressions=hints) 2024 2025 return None 2026 2027 def _parse_into(self) -> t.Optional[exp.Into]: 2028 if not self._match(TokenType.INTO): 2029 return None 2030 2031 temp = self._match(TokenType.TEMPORARY) 2032 unlogged = self._match_text_seq("UNLOGGED") 2033 self._match(TokenType.TABLE) 2034 2035 return self.expression( 2036 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2037 ) 2038 2039 def _parse_from( 2040 self, modifiers: bool = False, skip_from_token: bool = False 2041 ) -> t.Optional[exp.From]: 2042 if not skip_from_token and not self._match(TokenType.FROM): 2043 return None 2044 2045 comments = self._prev_comments 2046 this = self._parse_table() 2047 2048 return self.expression( 2049 exp.From, 2050 comments=comments, 2051 this=self._parse_query_modifiers(this) if modifiers else this, 2052 ) 2053 2054 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2055 if not self._match(TokenType.MATCH_RECOGNIZE): 2056 return None 2057 2058 self._match_l_paren() 2059 2060 partition = self._parse_partition_by() 2061 order = self._parse_order() 2062 measures = ( 2063 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2064 ) 2065 2066 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2067 rows = exp.var("ONE ROW PER MATCH") 2068 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2069 text = "ALL ROWS PER MATCH" 2070 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2071 text += f" SHOW EMPTY MATCHES" 2072 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2073 text += f" OMIT EMPTY MATCHES" 2074 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2075 text += f" WITH UNMATCHED ROWS" 2076 rows = exp.var(text) 2077 else: 2078 rows = None 2079 2080 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2081 text = "AFTER MATCH SKIP" 2082 if self._match_text_seq("PAST", "LAST", "ROW"): 2083 text += f" PAST LAST ROW" 2084 elif self._match_text_seq("TO", "NEXT", "ROW"): 2085 text += f" TO NEXT ROW" 2086 elif self._match_text_seq("TO", "FIRST"): 2087 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2088 elif self._match_text_seq("TO", "LAST"): 2089 text += f" TO LAST {self._advance_any().text}" # type: ignore 2090 after = exp.var(text) 2091 else: 2092 after = None 2093 2094 if self._match_text_seq("PATTERN"): 2095 self._match_l_paren() 2096 2097 if not self._curr: 2098 self.raise_error("Expecting )", self._curr) 2099 2100 paren = 1 2101 start = self._curr 2102 2103 while self._curr and paren > 0: 2104 if self._curr.token_type == TokenType.L_PAREN: 2105 paren += 1 2106 if self._curr.token_type == TokenType.R_PAREN: 2107 paren -= 1 2108 2109 end = self._prev 2110 self._advance() 2111 2112 if paren > 0: 2113 self.raise_error("Expecting )", self._curr) 2114 2115 pattern = exp.var(self._find_sql(start, end)) 2116 else: 2117 pattern = None 2118 2119 define = ( 2120 self._parse_csv( 2121 lambda: self.expression( 2122 exp.Alias, 2123 alias=self._parse_id_var(any_token=True), 2124 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2125 ) 2126 ) 2127 if self._match_text_seq("DEFINE") 2128 else None 2129 ) 2130 2131 self._match_r_paren() 2132 2133 return self.expression( 2134 exp.MatchRecognize, 2135 partition_by=partition, 2136 order=order, 2137 measures=measures, 2138 rows=rows, 2139 after=after, 2140 pattern=pattern, 2141 define=define, 2142 alias=self._parse_table_alias(), 2143 ) 2144 2145 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2146 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2147 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2148 2149 if outer_apply or cross_apply: 2150 this = self._parse_select(table=True) 2151 view = None 2152 outer = not cross_apply 2153 elif self._match(TokenType.LATERAL): 2154 this = self._parse_select(table=True) 2155 view = self._match(TokenType.VIEW) 2156 outer = self._match(TokenType.OUTER) 2157 else: 2158 return None 2159 2160 if not this: 2161 this = self._parse_function() or self._parse_id_var(any_token=False) 2162 while self._match(TokenType.DOT): 2163 this = exp.Dot( 2164 this=this, 2165 expression=self._parse_function() or self._parse_id_var(any_token=False), 2166 ) 2167 2168 if view: 2169 table = self._parse_id_var(any_token=False) 2170 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2171 table_alias: t.Optional[exp.TableAlias] = self.expression( 2172 exp.TableAlias, this=table, columns=columns 2173 ) 2174 elif isinstance(this, exp.Subquery) and this.alias: 2175 # Ensures parity between the Subquery's and the Lateral's "alias" args 2176 table_alias = this.args["alias"].copy() 2177 else: 2178 table_alias = self._parse_table_alias() 2179 2180 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2181 2182 def _parse_join_parts( 2183 self, 2184 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2185 return ( 2186 self._match_set(self.JOIN_METHODS) and self._prev, 2187 self._match_set(self.JOIN_SIDES) and self._prev, 2188 self._match_set(self.JOIN_KINDS) and self._prev, 2189 ) 2190 2191 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2192 if self._match(TokenType.COMMA): 2193 return self.expression(exp.Join, this=self._parse_table()) 2194 2195 index = self._index 2196 method, side, kind = self._parse_join_parts() 2197 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2198 join = self._match(TokenType.JOIN) 2199 2200 if not skip_join_token and not join: 2201 self._retreat(index) 2202 kind = None 2203 method = None 2204 side = None 2205 2206 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2207 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2208 2209 if not skip_join_token and not join and not outer_apply and not cross_apply: 2210 return None 2211 2212 if outer_apply: 2213 side = Token(TokenType.LEFT, "LEFT") 2214 2215 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2216 2217 if method: 2218 kwargs["method"] = method.text 2219 if side: 2220 kwargs["side"] = side.text 2221 if kind: 2222 kwargs["kind"] = kind.text 2223 if hint: 2224 kwargs["hint"] = hint 2225 2226 if self._match(TokenType.ON): 2227 kwargs["on"] = self._parse_conjunction() 2228 elif self._match(TokenType.USING): 2229 kwargs["using"] = self._parse_wrapped_id_vars() 2230 2231 return self.expression(exp.Join, **kwargs) 2232 2233 def _parse_index( 2234 self, 2235 index: t.Optional[exp.Expression] = None, 2236 ) -> t.Optional[exp.Index]: 2237 if index: 2238 unique = None 2239 primary = None 2240 amp = None 2241 2242 self._match(TokenType.ON) 2243 self._match(TokenType.TABLE) # hive 2244 table = self._parse_table_parts(schema=True) 2245 else: 2246 unique = self._match(TokenType.UNIQUE) 2247 primary = self._match_text_seq("PRIMARY") 2248 amp = self._match_text_seq("AMP") 2249 2250 if not self._match(TokenType.INDEX): 2251 return None 2252 2253 index = self._parse_id_var() 2254 table = None 2255 2256 using = self._parse_field() if self._match(TokenType.USING) else None 2257 2258 if self._match(TokenType.L_PAREN, advance=False): 2259 columns = self._parse_wrapped_csv(self._parse_ordered) 2260 else: 2261 columns = None 2262 2263 return self.expression( 2264 exp.Index, 2265 this=index, 2266 table=table, 2267 using=using, 2268 columns=columns, 2269 unique=unique, 2270 primary=primary, 2271 amp=amp, 2272 partition_by=self._parse_partition_by(), 2273 ) 2274 2275 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2276 return ( 2277 (not schema and self._parse_function(optional_parens=False)) 2278 or self._parse_id_var(any_token=False) 2279 or self._parse_string_as_identifier() 2280 or self._parse_placeholder() 2281 ) 2282 2283 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2284 catalog = None 2285 db = None 2286 table = self._parse_table_part(schema=schema) 2287 2288 while self._match(TokenType.DOT): 2289 if catalog: 2290 # This allows nesting the table in arbitrarily many dot expressions if needed 2291 table = self.expression( 2292 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2293 ) 2294 else: 2295 catalog = db 2296 db = table 2297 table = self._parse_table_part(schema=schema) 2298 2299 if not table: 2300 self.raise_error(f"Expected table name but got {self._curr}") 2301 2302 return self.expression( 2303 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2304 ) 2305 2306 def _parse_table( 2307 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2308 ) -> t.Optional[exp.Expression]: 2309 lateral = self._parse_lateral() 2310 if lateral: 2311 return lateral 2312 2313 unnest = self._parse_unnest() 2314 if unnest: 2315 return unnest 2316 2317 values = self._parse_derived_table_values() 2318 if values: 2319 return values 2320 2321 subquery = self._parse_select(table=True) 2322 if subquery: 2323 if not subquery.args.get("pivots"): 2324 subquery.set("pivots", self._parse_pivots()) 2325 return subquery 2326 2327 this: exp.Expression = self._parse_table_parts(schema=schema) 2328 2329 if schema: 2330 return self._parse_schema(this=this) 2331 2332 if self.ALIAS_POST_TABLESAMPLE: 2333 table_sample = self._parse_table_sample() 2334 2335 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2336 if alias: 2337 this.set("alias", alias) 2338 2339 if not this.args.get("pivots"): 2340 this.set("pivots", self._parse_pivots()) 2341 2342 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2343 this.set( 2344 "hints", 2345 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2346 ) 2347 self._match_r_paren() 2348 2349 if not self.ALIAS_POST_TABLESAMPLE: 2350 table_sample = self._parse_table_sample() 2351 2352 if table_sample: 2353 table_sample.set("this", this) 2354 this = table_sample 2355 2356 return this 2357 2358 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2359 if not self._match(TokenType.UNNEST): 2360 return None 2361 2362 expressions = self._parse_wrapped_csv(self._parse_type) 2363 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2364 2365 alias = self._parse_table_alias() if with_alias else None 2366 2367 if alias and self.UNNEST_COLUMN_ONLY: 2368 if alias.args.get("columns"): 2369 self.raise_error("Unexpected extra column alias in unnest.") 2370 2371 alias.set("columns", [alias.this]) 2372 alias.set("this", None) 2373 2374 offset = None 2375 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2376 self._match(TokenType.ALIAS) 2377 offset = self._parse_id_var() or exp.to_identifier("offset") 2378 2379 return self.expression( 2380 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2381 ) 2382 2383 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2384 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2385 if not is_derived and not self._match(TokenType.VALUES): 2386 return None 2387 2388 expressions = self._parse_csv(self._parse_value) 2389 alias = self._parse_table_alias() 2390 2391 if is_derived: 2392 self._match_r_paren() 2393 2394 return self.expression( 2395 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2396 ) 2397 2398 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2399 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2400 as_modifier and self._match_text_seq("USING", "SAMPLE") 2401 ): 2402 return None 2403 2404 bucket_numerator = None 2405 bucket_denominator = None 2406 bucket_field = None 2407 percent = None 2408 rows = None 2409 size = None 2410 seed = None 2411 2412 kind = ( 2413 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2414 ) 2415 method = self._parse_var(tokens=(TokenType.ROW,)) 2416 2417 self._match(TokenType.L_PAREN) 2418 2419 num = self._parse_number() 2420 2421 if self._match_text_seq("BUCKET"): 2422 bucket_numerator = self._parse_number() 2423 self._match_text_seq("OUT", "OF") 2424 bucket_denominator = bucket_denominator = self._parse_number() 2425 self._match(TokenType.ON) 2426 bucket_field = self._parse_field() 2427 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2428 percent = num 2429 elif self._match(TokenType.ROWS): 2430 rows = num 2431 else: 2432 size = num 2433 2434 self._match(TokenType.R_PAREN) 2435 2436 if self._match(TokenType.L_PAREN): 2437 method = self._parse_var() 2438 seed = self._match(TokenType.COMMA) and self._parse_number() 2439 self._match_r_paren() 2440 elif self._match_texts(("SEED", "REPEATABLE")): 2441 seed = self._parse_wrapped(self._parse_number) 2442 2443 return self.expression( 2444 exp.TableSample, 2445 method=method, 2446 bucket_numerator=bucket_numerator, 2447 bucket_denominator=bucket_denominator, 2448 bucket_field=bucket_field, 2449 percent=percent, 2450 rows=rows, 2451 size=size, 2452 seed=seed, 2453 kind=kind, 2454 ) 2455 2456 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2457 return list(iter(self._parse_pivot, None)) 2458 2459 # https://duckdb.org/docs/sql/statements/pivot 2460 def _parse_simplified_pivot(self) -> exp.Pivot: 2461 def _parse_on() -> t.Optional[exp.Expression]: 2462 this = self._parse_bitwise() 2463 return self._parse_in(this) if self._match(TokenType.IN) else this 2464 2465 this = self._parse_table() 2466 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2467 using = self._match(TokenType.USING) and self._parse_csv( 2468 lambda: self._parse_alias(self._parse_function()) 2469 ) 2470 group = self._parse_group() 2471 return self.expression( 2472 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2473 ) 2474 2475 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2476 index = self._index 2477 2478 if self._match(TokenType.PIVOT): 2479 unpivot = False 2480 elif self._match(TokenType.UNPIVOT): 2481 unpivot = True 2482 else: 2483 return None 2484 2485 expressions = [] 2486 field = None 2487 2488 if not self._match(TokenType.L_PAREN): 2489 self._retreat(index) 2490 return None 2491 2492 if unpivot: 2493 expressions = self._parse_csv(self._parse_column) 2494 else: 2495 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2496 2497 if not expressions: 2498 self.raise_error("Failed to parse PIVOT's aggregation list") 2499 2500 if not self._match(TokenType.FOR): 2501 self.raise_error("Expecting FOR") 2502 2503 value = self._parse_column() 2504 2505 if not self._match(TokenType.IN): 2506 self.raise_error("Expecting IN") 2507 2508 field = self._parse_in(value, alias=True) 2509 2510 self._match_r_paren() 2511 2512 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2513 2514 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2515 pivot.set("alias", self._parse_table_alias()) 2516 2517 if not unpivot: 2518 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2519 2520 columns: t.List[exp.Expression] = [] 2521 for fld in pivot.args["field"].expressions: 2522 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2523 for name in names: 2524 if self.PREFIXED_PIVOT_COLUMNS: 2525 name = f"{name}_{field_name}" if name else field_name 2526 else: 2527 name = f"{field_name}_{name}" if name else field_name 2528 2529 columns.append(exp.to_identifier(name)) 2530 2531 pivot.set("columns", columns) 2532 2533 return pivot 2534 2535 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2536 return [agg.alias for agg in aggregations] 2537 2538 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2539 if not skip_where_token and not self._match(TokenType.WHERE): 2540 return None 2541 2542 return self.expression( 2543 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2544 ) 2545 2546 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2547 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2548 return None 2549 2550 elements = defaultdict(list) 2551 2552 while True: 2553 expressions = self._parse_csv(self._parse_conjunction) 2554 if expressions: 2555 elements["expressions"].extend(expressions) 2556 2557 grouping_sets = self._parse_grouping_sets() 2558 if grouping_sets: 2559 elements["grouping_sets"].extend(grouping_sets) 2560 2561 rollup = None 2562 cube = None 2563 totals = None 2564 2565 with_ = self._match(TokenType.WITH) 2566 if self._match(TokenType.ROLLUP): 2567 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2568 elements["rollup"].extend(ensure_list(rollup)) 2569 2570 if self._match(TokenType.CUBE): 2571 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2572 elements["cube"].extend(ensure_list(cube)) 2573 2574 if self._match_text_seq("TOTALS"): 2575 totals = True 2576 elements["totals"] = True # type: ignore 2577 2578 if not (grouping_sets or rollup or cube or totals): 2579 break 2580 2581 return self.expression(exp.Group, **elements) # type: ignore 2582 2583 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2584 if not self._match(TokenType.GROUPING_SETS): 2585 return None 2586 2587 return self._parse_wrapped_csv(self._parse_grouping_set) 2588 2589 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2590 if self._match(TokenType.L_PAREN): 2591 grouping_set = self._parse_csv(self._parse_column) 2592 self._match_r_paren() 2593 return self.expression(exp.Tuple, expressions=grouping_set) 2594 2595 return self._parse_column() 2596 2597 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2598 if not skip_having_token and not self._match(TokenType.HAVING): 2599 return None 2600 return self.expression(exp.Having, this=self._parse_conjunction()) 2601 2602 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2603 if not self._match(TokenType.QUALIFY): 2604 return None 2605 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2606 2607 def _parse_order( 2608 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2609 ) -> t.Optional[exp.Expression]: 2610 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2611 return this 2612 2613 return self.expression( 2614 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2615 ) 2616 2617 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2618 if not self._match(token): 2619 return None 2620 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2621 2622 def _parse_ordered(self) -> exp.Ordered: 2623 this = self._parse_conjunction() 2624 self._match(TokenType.ASC) 2625 2626 is_desc = self._match(TokenType.DESC) 2627 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2628 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2629 desc = is_desc or False 2630 asc = not desc 2631 nulls_first = is_nulls_first or False 2632 explicitly_null_ordered = is_nulls_first or is_nulls_last 2633 2634 if ( 2635 not explicitly_null_ordered 2636 and ( 2637 (asc and self.NULL_ORDERING == "nulls_are_small") 2638 or (desc and self.NULL_ORDERING != "nulls_are_small") 2639 ) 2640 and self.NULL_ORDERING != "nulls_are_last" 2641 ): 2642 nulls_first = True 2643 2644 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2645 2646 def _parse_limit( 2647 self, this: t.Optional[exp.Expression] = None, top: bool = False 2648 ) -> t.Optional[exp.Expression]: 2649 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2650 limit_paren = self._match(TokenType.L_PAREN) 2651 expression = self._parse_number() if top else self._parse_term() 2652 2653 if self._match(TokenType.COMMA): 2654 offset = expression 2655 expression = self._parse_term() 2656 else: 2657 offset = None 2658 2659 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2660 2661 if limit_paren: 2662 self._match_r_paren() 2663 2664 return limit_exp 2665 2666 if self._match(TokenType.FETCH): 2667 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2668 direction = self._prev.text if direction else "FIRST" 2669 2670 count = self._parse_number() 2671 percent = self._match(TokenType.PERCENT) 2672 2673 self._match_set((TokenType.ROW, TokenType.ROWS)) 2674 2675 only = self._match_text_seq("ONLY") 2676 with_ties = self._match_text_seq("WITH", "TIES") 2677 2678 if only and with_ties: 2679 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2680 2681 return self.expression( 2682 exp.Fetch, 2683 direction=direction, 2684 count=count, 2685 percent=percent, 2686 with_ties=with_ties, 2687 ) 2688 2689 return this 2690 2691 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2692 if not self._match(TokenType.OFFSET): 2693 return this 2694 2695 count = self._parse_number() 2696 self._match_set((TokenType.ROW, TokenType.ROWS)) 2697 return self.expression(exp.Offset, this=this, expression=count) 2698 2699 def _parse_locks(self) -> t.List[exp.Lock]: 2700 locks = [] 2701 while True: 2702 if self._match_text_seq("FOR", "UPDATE"): 2703 update = True 2704 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2705 "LOCK", "IN", "SHARE", "MODE" 2706 ): 2707 update = False 2708 else: 2709 break 2710 2711 expressions = None 2712 if self._match_text_seq("OF"): 2713 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2714 2715 wait: t.Optional[bool | exp.Expression] = None 2716 if self._match_text_seq("NOWAIT"): 2717 wait = True 2718 elif self._match_text_seq("WAIT"): 2719 wait = self._parse_primary() 2720 elif self._match_text_seq("SKIP", "LOCKED"): 2721 wait = False 2722 2723 locks.append( 2724 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2725 ) 2726 2727 return locks 2728 2729 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2730 if not self._match_set(self.SET_OPERATIONS): 2731 return this 2732 2733 token_type = self._prev.token_type 2734 2735 if token_type == TokenType.UNION: 2736 expression = exp.Union 2737 elif token_type == TokenType.EXCEPT: 2738 expression = exp.Except 2739 else: 2740 expression = exp.Intersect 2741 2742 return self.expression( 2743 expression, 2744 this=this, 2745 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2746 expression=self._parse_set_operations(self._parse_select(nested=True)), 2747 ) 2748 2749 def _parse_expression(self) -> t.Optional[exp.Expression]: 2750 return self._parse_alias(self._parse_conjunction()) 2751 2752 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2753 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2754 2755 def _parse_equality(self) -> t.Optional[exp.Expression]: 2756 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2757 2758 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2759 return self._parse_tokens(self._parse_range, self.COMPARISON) 2760 2761 def _parse_range(self) -> t.Optional[exp.Expression]: 2762 this = self._parse_bitwise() 2763 negate = self._match(TokenType.NOT) 2764 2765 if self._match_set(self.RANGE_PARSERS): 2766 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2767 if not expression: 2768 return this 2769 2770 this = expression 2771 elif self._match(TokenType.ISNULL): 2772 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2773 2774 # Postgres supports ISNULL and NOTNULL for conditions. 2775 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2776 if self._match(TokenType.NOTNULL): 2777 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2778 this = self.expression(exp.Not, this=this) 2779 2780 if negate: 2781 this = self.expression(exp.Not, this=this) 2782 2783 if self._match(TokenType.IS): 2784 this = self._parse_is(this) 2785 2786 return this 2787 2788 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2789 index = self._index - 1 2790 negate = self._match(TokenType.NOT) 2791 2792 if self._match_text_seq("DISTINCT", "FROM"): 2793 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2794 return self.expression(klass, this=this, expression=self._parse_expression()) 2795 2796 expression = self._parse_null() or self._parse_boolean() 2797 if not expression: 2798 self._retreat(index) 2799 return None 2800 2801 this = self.expression(exp.Is, this=this, expression=expression) 2802 return self.expression(exp.Not, this=this) if negate else this 2803 2804 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2805 unnest = self._parse_unnest(with_alias=False) 2806 if unnest: 2807 this = self.expression(exp.In, this=this, unnest=unnest) 2808 elif self._match(TokenType.L_PAREN): 2809 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2810 2811 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2812 this = self.expression(exp.In, this=this, query=expressions[0]) 2813 else: 2814 this = self.expression(exp.In, this=this, expressions=expressions) 2815 2816 self._match_r_paren(this) 2817 else: 2818 this = self.expression(exp.In, this=this, field=self._parse_field()) 2819 2820 return this 2821 2822 def _parse_between(self, this: exp.Expression) -> exp.Between: 2823 low = self._parse_bitwise() 2824 self._match(TokenType.AND) 2825 high = self._parse_bitwise() 2826 return self.expression(exp.Between, this=this, low=low, high=high) 2827 2828 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2829 if not self._match(TokenType.ESCAPE): 2830 return this 2831 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2832 2833 def _parse_interval(self) -> t.Optional[exp.Interval]: 2834 if not self._match(TokenType.INTERVAL): 2835 return None 2836 2837 this = self._parse_primary() or self._parse_term() 2838 unit = self._parse_function() or self._parse_var() 2839 2840 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2841 # each INTERVAL expression into this canonical form so it's easy to transpile 2842 if this and this.is_number: 2843 this = exp.Literal.string(this.name) 2844 elif this and this.is_string: 2845 parts = this.name.split() 2846 2847 if len(parts) == 2: 2848 if unit: 2849 # this is not actually a unit, it's something else 2850 unit = None 2851 self._retreat(self._index - 1) 2852 else: 2853 this = exp.Literal.string(parts[0]) 2854 unit = self.expression(exp.Var, this=parts[1]) 2855 2856 return self.expression(exp.Interval, this=this, unit=unit) 2857 2858 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2859 this = self._parse_term() 2860 2861 while True: 2862 if self._match_set(self.BITWISE): 2863 this = self.expression( 2864 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2865 ) 2866 elif self._match_pair(TokenType.LT, TokenType.LT): 2867 this = self.expression( 2868 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2869 ) 2870 elif self._match_pair(TokenType.GT, TokenType.GT): 2871 this = self.expression( 2872 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2873 ) 2874 else: 2875 break 2876 2877 return this 2878 2879 def _parse_term(self) -> t.Optional[exp.Expression]: 2880 return self._parse_tokens(self._parse_factor, self.TERM) 2881 2882 def _parse_factor(self) -> t.Optional[exp.Expression]: 2883 return self._parse_tokens(self._parse_unary, self.FACTOR) 2884 2885 def _parse_unary(self) -> t.Optional[exp.Expression]: 2886 if self._match_set(self.UNARY_PARSERS): 2887 return self.UNARY_PARSERS[self._prev.token_type](self) 2888 return self._parse_at_time_zone(self._parse_type()) 2889 2890 def _parse_type(self) -> t.Optional[exp.Expression]: 2891 interval = self._parse_interval() 2892 if interval: 2893 return interval 2894 2895 index = self._index 2896 data_type = self._parse_types(check_func=True) 2897 this = self._parse_column() 2898 2899 if data_type: 2900 if isinstance(this, exp.Literal): 2901 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2902 if parser: 2903 return parser(self, this, data_type) 2904 return self.expression(exp.Cast, this=this, to=data_type) 2905 if not data_type.expressions: 2906 self._retreat(index) 2907 return self._parse_column() 2908 return self._parse_column_ops(data_type) 2909 2910 return this 2911 2912 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2913 this = self._parse_type() 2914 if not this: 2915 return None 2916 2917 return self.expression( 2918 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2919 ) 2920 2921 def _parse_types( 2922 self, check_func: bool = False, schema: bool = False 2923 ) -> t.Optional[exp.Expression]: 2924 index = self._index 2925 2926 prefix = self._match_text_seq("SYSUDTLIB", ".") 2927 2928 if not self._match_set(self.TYPE_TOKENS): 2929 return None 2930 2931 type_token = self._prev.token_type 2932 2933 if type_token == TokenType.PSEUDO_TYPE: 2934 return self.expression(exp.PseudoType, this=self._prev.text) 2935 2936 nested = type_token in self.NESTED_TYPE_TOKENS 2937 is_struct = type_token == TokenType.STRUCT 2938 expressions = None 2939 maybe_func = False 2940 2941 if self._match(TokenType.L_PAREN): 2942 if is_struct: 2943 expressions = self._parse_csv(self._parse_struct_types) 2944 elif nested: 2945 expressions = self._parse_csv( 2946 lambda: self._parse_types(check_func=check_func, schema=schema) 2947 ) 2948 elif type_token in self.ENUM_TYPE_TOKENS: 2949 expressions = self._parse_csv(self._parse_primary) 2950 else: 2951 expressions = self._parse_csv(self._parse_type_size) 2952 2953 if not expressions or not self._match(TokenType.R_PAREN): 2954 self._retreat(index) 2955 return None 2956 2957 maybe_func = True 2958 2959 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2960 this = exp.DataType( 2961 this=exp.DataType.Type.ARRAY, 2962 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2963 nested=True, 2964 ) 2965 2966 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2967 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2968 2969 return this 2970 2971 if self._match(TokenType.L_BRACKET): 2972 self._retreat(index) 2973 return None 2974 2975 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2976 if nested and self._match(TokenType.LT): 2977 if is_struct: 2978 expressions = self._parse_csv(self._parse_struct_types) 2979 else: 2980 expressions = self._parse_csv( 2981 lambda: self._parse_types(check_func=check_func, schema=schema) 2982 ) 2983 2984 if not self._match(TokenType.GT): 2985 self.raise_error("Expecting >") 2986 2987 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2988 values = self._parse_csv(self._parse_conjunction) 2989 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2990 2991 value: t.Optional[exp.Expression] = None 2992 if type_token in self.TIMESTAMPS: 2993 if self._match_text_seq("WITH", "TIME", "ZONE"): 2994 maybe_func = False 2995 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2996 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 2997 maybe_func = False 2998 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2999 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3000 maybe_func = False 3001 elif type_token == TokenType.INTERVAL: 3002 unit = self._parse_var() 3003 3004 if not unit: 3005 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3006 else: 3007 value = self.expression(exp.Interval, unit=unit) 3008 3009 if maybe_func and check_func: 3010 index2 = self._index 3011 peek = self._parse_string() 3012 3013 if not peek: 3014 self._retreat(index) 3015 return None 3016 3017 self._retreat(index2) 3018 3019 if value: 3020 return value 3021 3022 return exp.DataType( 3023 this=exp.DataType.Type[type_token.value.upper()], 3024 expressions=expressions, 3025 nested=nested, 3026 values=values, 3027 prefix=prefix, 3028 ) 3029 3030 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3031 this = self._parse_type() or self._parse_id_var() 3032 self._match(TokenType.COLON) 3033 return self._parse_column_def(this) 3034 3035 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3036 if not self._match_text_seq("AT", "TIME", "ZONE"): 3037 return this 3038 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3039 3040 def _parse_column(self) -> t.Optional[exp.Expression]: 3041 this = self._parse_field() 3042 if isinstance(this, exp.Identifier): 3043 this = self.expression(exp.Column, this=this) 3044 elif not this: 3045 return self._parse_bracket(this) 3046 return self._parse_column_ops(this) 3047 3048 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3049 this = self._parse_bracket(this) 3050 3051 while self._match_set(self.COLUMN_OPERATORS): 3052 op_token = self._prev.token_type 3053 op = self.COLUMN_OPERATORS.get(op_token) 3054 3055 if op_token == TokenType.DCOLON: 3056 field = self._parse_types() 3057 if not field: 3058 self.raise_error("Expected type") 3059 elif op and self._curr: 3060 self._advance() 3061 value = self._prev.text 3062 field = ( 3063 exp.Literal.number(value) 3064 if self._prev.token_type == TokenType.NUMBER 3065 else exp.Literal.string(value) 3066 ) 3067 else: 3068 field = self._parse_field(anonymous_func=True, any_token=True) 3069 3070 if isinstance(field, exp.Func): 3071 # bigquery allows function calls like x.y.count(...) 3072 # SAFE.SUBSTR(...) 3073 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3074 this = self._replace_columns_with_dots(this) 3075 3076 if op: 3077 this = op(self, this, field) 3078 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3079 this = self.expression( 3080 exp.Column, 3081 this=field, 3082 table=this.this, 3083 db=this.args.get("table"), 3084 catalog=this.args.get("db"), 3085 ) 3086 else: 3087 this = self.expression(exp.Dot, this=this, expression=field) 3088 this = self._parse_bracket(this) 3089 return this 3090 3091 def _parse_primary(self) -> t.Optional[exp.Expression]: 3092 if self._match_set(self.PRIMARY_PARSERS): 3093 token_type = self._prev.token_type 3094 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3095 3096 if token_type == TokenType.STRING: 3097 expressions = [primary] 3098 while self._match(TokenType.STRING): 3099 expressions.append(exp.Literal.string(self._prev.text)) 3100 3101 if len(expressions) > 1: 3102 return self.expression(exp.Concat, expressions=expressions) 3103 3104 return primary 3105 3106 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3107 return exp.Literal.number(f"0.{self._prev.text}") 3108 3109 if self._match(TokenType.L_PAREN): 3110 comments = self._prev_comments 3111 query = self._parse_select() 3112 3113 if query: 3114 expressions = [query] 3115 else: 3116 expressions = self._parse_csv(self._parse_expression) 3117 3118 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3119 3120 if isinstance(this, exp.Subqueryable): 3121 this = self._parse_set_operations( 3122 self._parse_subquery(this=this, parse_alias=False) 3123 ) 3124 elif len(expressions) > 1: 3125 this = self.expression(exp.Tuple, expressions=expressions) 3126 else: 3127 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3128 3129 if this: 3130 this.add_comments(comments) 3131 3132 self._match_r_paren(expression=this) 3133 return this 3134 3135 return None 3136 3137 def _parse_field( 3138 self, 3139 any_token: bool = False, 3140 tokens: t.Optional[t.Collection[TokenType]] = None, 3141 anonymous_func: bool = False, 3142 ) -> t.Optional[exp.Expression]: 3143 return ( 3144 self._parse_primary() 3145 or self._parse_function(anonymous=anonymous_func) 3146 or self._parse_id_var(any_token=any_token, tokens=tokens) 3147 ) 3148 3149 def _parse_function( 3150 self, 3151 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3152 anonymous: bool = False, 3153 optional_parens: bool = True, 3154 ) -> t.Optional[exp.Expression]: 3155 if not self._curr: 3156 return None 3157 3158 token_type = self._curr.token_type 3159 3160 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3161 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3162 3163 if not self._next or self._next.token_type != TokenType.L_PAREN: 3164 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3165 self._advance() 3166 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3167 3168 return None 3169 3170 if token_type not in self.FUNC_TOKENS: 3171 return None 3172 3173 this = self._curr.text 3174 upper = this.upper() 3175 self._advance(2) 3176 3177 parser = self.FUNCTION_PARSERS.get(upper) 3178 3179 if parser and not anonymous: 3180 this = parser(self) 3181 else: 3182 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3183 3184 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3185 this = self.expression(subquery_predicate, this=self._parse_select()) 3186 self._match_r_paren() 3187 return this 3188 3189 if functions is None: 3190 functions = self.FUNCTIONS 3191 3192 function = functions.get(upper) 3193 3194 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3195 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3196 3197 if function and not anonymous: 3198 this = self.validate_expression(function(args), args) 3199 else: 3200 this = self.expression(exp.Anonymous, this=this, expressions=args) 3201 3202 self._match_r_paren(this) 3203 return self._parse_window(this) 3204 3205 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3206 return self._parse_column_def(self._parse_id_var()) 3207 3208 def _parse_user_defined_function( 3209 self, kind: t.Optional[TokenType] = None 3210 ) -> t.Optional[exp.Expression]: 3211 this = self._parse_id_var() 3212 3213 while self._match(TokenType.DOT): 3214 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3215 3216 if not self._match(TokenType.L_PAREN): 3217 return this 3218 3219 expressions = self._parse_csv(self._parse_function_parameter) 3220 self._match_r_paren() 3221 return self.expression( 3222 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3223 ) 3224 3225 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3226 literal = self._parse_primary() 3227 if literal: 3228 return self.expression(exp.Introducer, this=token.text, expression=literal) 3229 3230 return self.expression(exp.Identifier, this=token.text) 3231 3232 def _parse_session_parameter(self) -> exp.SessionParameter: 3233 kind = None 3234 this = self._parse_id_var() or self._parse_primary() 3235 3236 if this and self._match(TokenType.DOT): 3237 kind = this.name 3238 this = self._parse_var() or self._parse_primary() 3239 3240 return self.expression(exp.SessionParameter, this=this, kind=kind) 3241 3242 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3243 index = self._index 3244 3245 if self._match(TokenType.L_PAREN): 3246 expressions = self._parse_csv(self._parse_id_var) 3247 3248 if not self._match(TokenType.R_PAREN): 3249 self._retreat(index) 3250 else: 3251 expressions = [self._parse_id_var()] 3252 3253 if self._match_set(self.LAMBDAS): 3254 return self.LAMBDAS[self._prev.token_type](self, expressions) 3255 3256 self._retreat(index) 3257 3258 this: t.Optional[exp.Expression] 3259 3260 if self._match(TokenType.DISTINCT): 3261 this = self.expression( 3262 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3263 ) 3264 else: 3265 this = self._parse_select_or_expression(alias=alias) 3266 3267 if isinstance(this, exp.EQ): 3268 left = this.this 3269 if isinstance(left, exp.Column): 3270 left.replace(exp.var(left.text("this"))) 3271 3272 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3273 3274 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3275 index = self._index 3276 3277 if not self.errors: 3278 try: 3279 if self._parse_select(nested=True): 3280 return this 3281 except ParseError: 3282 pass 3283 finally: 3284 self.errors.clear() 3285 self._retreat(index) 3286 3287 if not self._match(TokenType.L_PAREN): 3288 return this 3289 3290 args = self._parse_csv( 3291 lambda: self._parse_constraint() 3292 or self._parse_column_def(self._parse_field(any_token=True)) 3293 ) 3294 3295 self._match_r_paren() 3296 return self.expression(exp.Schema, this=this, expressions=args) 3297 3298 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3299 # column defs are not really columns, they're identifiers 3300 if isinstance(this, exp.Column): 3301 this = this.this 3302 3303 kind = self._parse_types(schema=True) 3304 3305 if self._match_text_seq("FOR", "ORDINALITY"): 3306 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3307 3308 constraints = [] 3309 while True: 3310 constraint = self._parse_column_constraint() 3311 if not constraint: 3312 break 3313 constraints.append(constraint) 3314 3315 if not kind and not constraints: 3316 return this 3317 3318 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3319 3320 def _parse_auto_increment( 3321 self, 3322 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3323 start = None 3324 increment = None 3325 3326 if self._match(TokenType.L_PAREN, advance=False): 3327 args = self._parse_wrapped_csv(self._parse_bitwise) 3328 start = seq_get(args, 0) 3329 increment = seq_get(args, 1) 3330 elif self._match_text_seq("START"): 3331 start = self._parse_bitwise() 3332 self._match_text_seq("INCREMENT") 3333 increment = self._parse_bitwise() 3334 3335 if start and increment: 3336 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3337 3338 return exp.AutoIncrementColumnConstraint() 3339 3340 def _parse_compress(self) -> exp.CompressColumnConstraint: 3341 if self._match(TokenType.L_PAREN, advance=False): 3342 return self.expression( 3343 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3344 ) 3345 3346 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3347 3348 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3349 if self._match_text_seq("BY", "DEFAULT"): 3350 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3351 this = self.expression( 3352 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3353 ) 3354 else: 3355 self._match_text_seq("ALWAYS") 3356 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3357 3358 self._match(TokenType.ALIAS) 3359 identity = self._match_text_seq("IDENTITY") 3360 3361 if self._match(TokenType.L_PAREN): 3362 if self._match_text_seq("START", "WITH"): 3363 this.set("start", self._parse_bitwise()) 3364 if self._match_text_seq("INCREMENT", "BY"): 3365 this.set("increment", self._parse_bitwise()) 3366 if self._match_text_seq("MINVALUE"): 3367 this.set("minvalue", self._parse_bitwise()) 3368 if self._match_text_seq("MAXVALUE"): 3369 this.set("maxvalue", self._parse_bitwise()) 3370 3371 if self._match_text_seq("CYCLE"): 3372 this.set("cycle", True) 3373 elif self._match_text_seq("NO", "CYCLE"): 3374 this.set("cycle", False) 3375 3376 if not identity: 3377 this.set("expression", self._parse_bitwise()) 3378 3379 self._match_r_paren() 3380 3381 return this 3382 3383 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3384 self._match_text_seq("LENGTH") 3385 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3386 3387 def _parse_not_constraint( 3388 self, 3389 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3390 if self._match_text_seq("NULL"): 3391 return self.expression(exp.NotNullColumnConstraint) 3392 if self._match_text_seq("CASESPECIFIC"): 3393 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3394 return None 3395 3396 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3397 if self._match(TokenType.CONSTRAINT): 3398 this = self._parse_id_var() 3399 else: 3400 this = None 3401 3402 if self._match_texts(self.CONSTRAINT_PARSERS): 3403 return self.expression( 3404 exp.ColumnConstraint, 3405 this=this, 3406 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3407 ) 3408 3409 return this 3410 3411 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3412 if not self._match(TokenType.CONSTRAINT): 3413 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3414 3415 this = self._parse_id_var() 3416 expressions = [] 3417 3418 while True: 3419 constraint = self._parse_unnamed_constraint() or self._parse_function() 3420 if not constraint: 3421 break 3422 expressions.append(constraint) 3423 3424 return self.expression(exp.Constraint, this=this, expressions=expressions) 3425 3426 def _parse_unnamed_constraint( 3427 self, constraints: t.Optional[t.Collection[str]] = None 3428 ) -> t.Optional[exp.Expression]: 3429 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3430 return None 3431 3432 constraint = self._prev.text.upper() 3433 if constraint not in self.CONSTRAINT_PARSERS: 3434 self.raise_error(f"No parser found for schema constraint {constraint}.") 3435 3436 return self.CONSTRAINT_PARSERS[constraint](self) 3437 3438 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3439 self._match_text_seq("KEY") 3440 return self.expression( 3441 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3442 ) 3443 3444 def _parse_key_constraint_options(self) -> t.List[str]: 3445 options = [] 3446 while True: 3447 if not self._curr: 3448 break 3449 3450 if self._match(TokenType.ON): 3451 action = None 3452 on = self._advance_any() and self._prev.text 3453 3454 if self._match_text_seq("NO", "ACTION"): 3455 action = "NO ACTION" 3456 elif self._match_text_seq("CASCADE"): 3457 action = "CASCADE" 3458 elif self._match_pair(TokenType.SET, TokenType.NULL): 3459 action = "SET NULL" 3460 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3461 action = "SET DEFAULT" 3462 else: 3463 self.raise_error("Invalid key constraint") 3464 3465 options.append(f"ON {on} {action}") 3466 elif self._match_text_seq("NOT", "ENFORCED"): 3467 options.append("NOT ENFORCED") 3468 elif self._match_text_seq("DEFERRABLE"): 3469 options.append("DEFERRABLE") 3470 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3471 options.append("INITIALLY DEFERRED") 3472 elif self._match_text_seq("NORELY"): 3473 options.append("NORELY") 3474 elif self._match_text_seq("MATCH", "FULL"): 3475 options.append("MATCH FULL") 3476 else: 3477 break 3478 3479 return options 3480 3481 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3482 if match and not self._match(TokenType.REFERENCES): 3483 return None 3484 3485 expressions = None 3486 this = self._parse_id_var() 3487 3488 if self._match(TokenType.L_PAREN, advance=False): 3489 expressions = self._parse_wrapped_id_vars() 3490 3491 options = self._parse_key_constraint_options() 3492 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3493 3494 def _parse_foreign_key(self) -> exp.ForeignKey: 3495 expressions = self._parse_wrapped_id_vars() 3496 reference = self._parse_references() 3497 options = {} 3498 3499 while self._match(TokenType.ON): 3500 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3501 self.raise_error("Expected DELETE or UPDATE") 3502 3503 kind = self._prev.text.lower() 3504 3505 if self._match_text_seq("NO", "ACTION"): 3506 action = "NO ACTION" 3507 elif self._match(TokenType.SET): 3508 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3509 action = "SET " + self._prev.text.upper() 3510 else: 3511 self._advance() 3512 action = self._prev.text.upper() 3513 3514 options[kind] = action 3515 3516 return self.expression( 3517 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3518 ) 3519 3520 def _parse_primary_key( 3521 self, wrapped_optional: bool = False, in_props: bool = False 3522 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3523 desc = ( 3524 self._match_set((TokenType.ASC, TokenType.DESC)) 3525 and self._prev.token_type == TokenType.DESC 3526 ) 3527 3528 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3529 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3530 3531 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3532 options = self._parse_key_constraint_options() 3533 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3534 3535 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3536 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3537 return this 3538 3539 bracket_kind = self._prev.token_type 3540 3541 if self._match(TokenType.COLON): 3542 expressions: t.List[t.Optional[exp.Expression]] = [ 3543 self.expression(exp.Slice, expression=self._parse_conjunction()) 3544 ] 3545 else: 3546 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3547 3548 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3549 if bracket_kind == TokenType.L_BRACE: 3550 this = self.expression(exp.Struct, expressions=expressions) 3551 elif not this or this.name.upper() == "ARRAY": 3552 this = self.expression(exp.Array, expressions=expressions) 3553 else: 3554 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3555 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3556 3557 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3558 self.raise_error("Expected ]") 3559 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3560 self.raise_error("Expected }") 3561 3562 self._add_comments(this) 3563 return self._parse_bracket(this) 3564 3565 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3566 if self._match(TokenType.COLON): 3567 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3568 return this 3569 3570 def _parse_case(self) -> t.Optional[exp.Expression]: 3571 ifs = [] 3572 default = None 3573 3574 expression = self._parse_conjunction() 3575 3576 while self._match(TokenType.WHEN): 3577 this = self._parse_conjunction() 3578 self._match(TokenType.THEN) 3579 then = self._parse_conjunction() 3580 ifs.append(self.expression(exp.If, this=this, true=then)) 3581 3582 if self._match(TokenType.ELSE): 3583 default = self._parse_conjunction() 3584 3585 if not self._match(TokenType.END): 3586 self.raise_error("Expected END after CASE", self._prev) 3587 3588 return self._parse_window( 3589 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3590 ) 3591 3592 def _parse_if(self) -> t.Optional[exp.Expression]: 3593 if self._match(TokenType.L_PAREN): 3594 args = self._parse_csv(self._parse_conjunction) 3595 this = self.validate_expression(exp.If.from_arg_list(args), args) 3596 self._match_r_paren() 3597 else: 3598 index = self._index - 1 3599 condition = self._parse_conjunction() 3600 3601 if not condition: 3602 self._retreat(index) 3603 return None 3604 3605 self._match(TokenType.THEN) 3606 true = self._parse_conjunction() 3607 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3608 self._match(TokenType.END) 3609 this = self.expression(exp.If, this=condition, true=true, false=false) 3610 3611 return self._parse_window(this) 3612 3613 def _parse_extract(self) -> exp.Extract: 3614 this = self._parse_function() or self._parse_var() or self._parse_type() 3615 3616 if self._match(TokenType.FROM): 3617 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3618 3619 if not self._match(TokenType.COMMA): 3620 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3621 3622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3623 3624 def _parse_cast(self, strict: bool) -> exp.Expression: 3625 this = self._parse_conjunction() 3626 3627 if not self._match(TokenType.ALIAS): 3628 if self._match(TokenType.COMMA): 3629 return self.expression( 3630 exp.CastToStrType, this=this, expression=self._parse_string() 3631 ) 3632 else: 3633 self.raise_error("Expected AS after CAST") 3634 3635 to = self._parse_types() 3636 3637 if not to: 3638 self.raise_error("Expected TYPE after CAST") 3639 elif to.this == exp.DataType.Type.CHAR: 3640 if self._match(TokenType.CHARACTER_SET): 3641 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3642 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3643 fmt = self._parse_string() 3644 3645 return self.expression( 3646 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3647 this=this, 3648 format=exp.Literal.string( 3649 format_time( 3650 fmt.this if fmt else "", 3651 self.FORMAT_MAPPING or self.TIME_MAPPING, 3652 self.FORMAT_TRIE or self.TIME_TRIE, 3653 ) 3654 ), 3655 ) 3656 3657 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3658 3659 def _parse_concat(self) -> t.Optional[exp.Expression]: 3660 args = self._parse_csv(self._parse_conjunction) 3661 if self.CONCAT_NULL_OUTPUTS_STRING: 3662 args = [ 3663 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3664 for arg in args 3665 if arg 3666 ] 3667 3668 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3669 # we find such a call we replace it with its argument. 3670 if len(args) == 1: 3671 return args[0] 3672 3673 return self.expression( 3674 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3675 ) 3676 3677 def _parse_string_agg(self) -> exp.Expression: 3678 expression: t.Optional[exp.Expression] 3679 3680 if self._match(TokenType.DISTINCT): 3681 args = self._parse_csv(self._parse_conjunction) 3682 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3683 else: 3684 args = self._parse_csv(self._parse_conjunction) 3685 expression = seq_get(args, 0) 3686 3687 index = self._index 3688 if not self._match(TokenType.R_PAREN): 3689 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3690 order = self._parse_order(this=expression) 3691 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3692 3693 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3694 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3695 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3696 if not self._match_text_seq("WITHIN", "GROUP"): 3697 self._retreat(index) 3698 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3699 3700 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3701 order = self._parse_order(this=expression) 3702 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3703 3704 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3705 to: t.Optional[exp.Expression] 3706 this = self._parse_bitwise() 3707 3708 if self._match(TokenType.USING): 3709 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3710 elif self._match(TokenType.COMMA): 3711 to = self._parse_bitwise() 3712 else: 3713 to = None 3714 3715 # Swap the argument order if needed to produce the correct AST 3716 if self.CONVERT_TYPE_FIRST: 3717 this, to = to, this 3718 3719 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3720 3721 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3722 """ 3723 There are generally two variants of the DECODE function: 3724 3725 - DECODE(bin, charset) 3726 - DECODE(expression, search, result [, search, result] ... [, default]) 3727 3728 The second variant will always be parsed into a CASE expression. Note that NULL 3729 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3730 instead of relying on pattern matching. 3731 """ 3732 args = self._parse_csv(self._parse_conjunction) 3733 3734 if len(args) < 3: 3735 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3736 3737 expression, *expressions = args 3738 if not expression: 3739 return None 3740 3741 ifs = [] 3742 for search, result in zip(expressions[::2], expressions[1::2]): 3743 if not search or not result: 3744 return None 3745 3746 if isinstance(search, exp.Literal): 3747 ifs.append( 3748 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3749 ) 3750 elif isinstance(search, exp.Null): 3751 ifs.append( 3752 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3753 ) 3754 else: 3755 cond = exp.or_( 3756 exp.EQ(this=expression.copy(), expression=search), 3757 exp.and_( 3758 exp.Is(this=expression.copy(), expression=exp.Null()), 3759 exp.Is(this=search.copy(), expression=exp.Null()), 3760 copy=False, 3761 ), 3762 copy=False, 3763 ) 3764 ifs.append(exp.If(this=cond, true=result)) 3765 3766 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3767 3768 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3769 self._match_text_seq("KEY") 3770 key = self._parse_field() 3771 self._match(TokenType.COLON) 3772 self._match_text_seq("VALUE") 3773 value = self._parse_field() 3774 3775 if not key and not value: 3776 return None 3777 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3778 3779 def _parse_json_object(self) -> exp.JSONObject: 3780 star = self._parse_star() 3781 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3782 3783 null_handling = None 3784 if self._match_text_seq("NULL", "ON", "NULL"): 3785 null_handling = "NULL ON NULL" 3786 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3787 null_handling = "ABSENT ON NULL" 3788 3789 unique_keys = None 3790 if self._match_text_seq("WITH", "UNIQUE"): 3791 unique_keys = True 3792 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3793 unique_keys = False 3794 3795 self._match_text_seq("KEYS") 3796 3797 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3798 format_json = self._match_text_seq("FORMAT", "JSON") 3799 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3800 3801 return self.expression( 3802 exp.JSONObject, 3803 expressions=expressions, 3804 null_handling=null_handling, 3805 unique_keys=unique_keys, 3806 return_type=return_type, 3807 format_json=format_json, 3808 encoding=encoding, 3809 ) 3810 3811 def _parse_logarithm(self) -> exp.Func: 3812 # Default argument order is base, expression 3813 args = self._parse_csv(self._parse_range) 3814 3815 if len(args) > 1: 3816 if not self.LOG_BASE_FIRST: 3817 args.reverse() 3818 return exp.Log.from_arg_list(args) 3819 3820 return self.expression( 3821 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3822 ) 3823 3824 def _parse_match_against(self) -> exp.MatchAgainst: 3825 expressions = self._parse_csv(self._parse_column) 3826 3827 self._match_text_seq(")", "AGAINST", "(") 3828 3829 this = self._parse_string() 3830 3831 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3832 modifier = "IN NATURAL LANGUAGE MODE" 3833 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3834 modifier = f"{modifier} WITH QUERY EXPANSION" 3835 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3836 modifier = "IN BOOLEAN MODE" 3837 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3838 modifier = "WITH QUERY EXPANSION" 3839 else: 3840 modifier = None 3841 3842 return self.expression( 3843 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3844 ) 3845 3846 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3847 def _parse_open_json(self) -> exp.OpenJSON: 3848 this = self._parse_bitwise() 3849 path = self._match(TokenType.COMMA) and self._parse_string() 3850 3851 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3852 this = self._parse_field(any_token=True) 3853 kind = self._parse_types() 3854 path = self._parse_string() 3855 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3856 3857 return self.expression( 3858 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3859 ) 3860 3861 expressions = None 3862 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3863 self._match_l_paren() 3864 expressions = self._parse_csv(_parse_open_json_column_def) 3865 3866 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3867 3868 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3869 args = self._parse_csv(self._parse_bitwise) 3870 3871 if self._match(TokenType.IN): 3872 return self.expression( 3873 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3874 ) 3875 3876 if haystack_first: 3877 haystack = seq_get(args, 0) 3878 needle = seq_get(args, 1) 3879 else: 3880 needle = seq_get(args, 0) 3881 haystack = seq_get(args, 1) 3882 3883 return self.expression( 3884 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3885 ) 3886 3887 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3888 args = self._parse_csv(self._parse_table) 3889 return exp.JoinHint(this=func_name.upper(), expressions=args) 3890 3891 def _parse_substring(self) -> exp.Substring: 3892 # Postgres supports the form: substring(string [from int] [for int]) 3893 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3894 3895 args = self._parse_csv(self._parse_bitwise) 3896 3897 if self._match(TokenType.FROM): 3898 args.append(self._parse_bitwise()) 3899 if self._match(TokenType.FOR): 3900 args.append(self._parse_bitwise()) 3901 3902 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3903 3904 def _parse_trim(self) -> exp.Trim: 3905 # https://www.w3resource.com/sql/character-functions/trim.php 3906 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3907 3908 position = None 3909 collation = None 3910 3911 if self._match_texts(self.TRIM_TYPES): 3912 position = self._prev.text.upper() 3913 3914 expression = self._parse_bitwise() 3915 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3916 this = self._parse_bitwise() 3917 else: 3918 this = expression 3919 expression = None 3920 3921 if self._match(TokenType.COLLATE): 3922 collation = self._parse_bitwise() 3923 3924 return self.expression( 3925 exp.Trim, this=this, position=position, expression=expression, collation=collation 3926 ) 3927 3928 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3929 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3930 3931 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3932 return self._parse_window(self._parse_id_var(), alias=True) 3933 3934 def _parse_respect_or_ignore_nulls( 3935 self, this: t.Optional[exp.Expression] 3936 ) -> t.Optional[exp.Expression]: 3937 if self._match_text_seq("IGNORE", "NULLS"): 3938 return self.expression(exp.IgnoreNulls, this=this) 3939 if self._match_text_seq("RESPECT", "NULLS"): 3940 return self.expression(exp.RespectNulls, this=this) 3941 return this 3942 3943 def _parse_window( 3944 self, this: t.Optional[exp.Expression], alias: bool = False 3945 ) -> t.Optional[exp.Expression]: 3946 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3947 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3948 self._match_r_paren() 3949 3950 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3951 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3952 if self._match_text_seq("WITHIN", "GROUP"): 3953 order = self._parse_wrapped(self._parse_order) 3954 this = self.expression(exp.WithinGroup, this=this, expression=order) 3955 3956 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3957 # Some dialects choose to implement and some do not. 3958 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3959 3960 # There is some code above in _parse_lambda that handles 3961 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3962 3963 # The below changes handle 3964 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3965 3966 # Oracle allows both formats 3967 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3968 # and Snowflake chose to do the same for familiarity 3969 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3970 this = self._parse_respect_or_ignore_nulls(this) 3971 3972 # bigquery select from window x AS (partition by ...) 3973 if alias: 3974 over = None 3975 self._match(TokenType.ALIAS) 3976 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3977 return this 3978 else: 3979 over = self._prev.text.upper() 3980 3981 if not self._match(TokenType.L_PAREN): 3982 return self.expression( 3983 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3984 ) 3985 3986 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3987 3988 first = self._match(TokenType.FIRST) 3989 if self._match_text_seq("LAST"): 3990 first = False 3991 3992 partition = self._parse_partition_by() 3993 order = self._parse_order() 3994 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3995 3996 if kind: 3997 self._match(TokenType.BETWEEN) 3998 start = self._parse_window_spec() 3999 self._match(TokenType.AND) 4000 end = self._parse_window_spec() 4001 4002 spec = self.expression( 4003 exp.WindowSpec, 4004 kind=kind, 4005 start=start["value"], 4006 start_side=start["side"], 4007 end=end["value"], 4008 end_side=end["side"], 4009 ) 4010 else: 4011 spec = None 4012 4013 self._match_r_paren() 4014 4015 return self.expression( 4016 exp.Window, 4017 this=this, 4018 partition_by=partition, 4019 order=order, 4020 spec=spec, 4021 alias=window_alias, 4022 over=over, 4023 first=first, 4024 ) 4025 4026 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4027 self._match(TokenType.BETWEEN) 4028 4029 return { 4030 "value": ( 4031 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4032 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4033 or self._parse_bitwise() 4034 ), 4035 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4036 } 4037 4038 def _parse_alias( 4039 self, this: t.Optional[exp.Expression], explicit: bool = False 4040 ) -> t.Optional[exp.Expression]: 4041 any_token = self._match(TokenType.ALIAS) 4042 4043 if explicit and not any_token: 4044 return this 4045 4046 if self._match(TokenType.L_PAREN): 4047 aliases = self.expression( 4048 exp.Aliases, 4049 this=this, 4050 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4051 ) 4052 self._match_r_paren(aliases) 4053 return aliases 4054 4055 alias = self._parse_id_var(any_token) 4056 4057 if alias: 4058 return self.expression(exp.Alias, this=this, alias=alias) 4059 4060 return this 4061 4062 def _parse_id_var( 4063 self, 4064 any_token: bool = True, 4065 tokens: t.Optional[t.Collection[TokenType]] = None, 4066 ) -> t.Optional[exp.Expression]: 4067 identifier = self._parse_identifier() 4068 4069 if identifier: 4070 return identifier 4071 4072 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4073 quoted = self._prev.token_type == TokenType.STRING 4074 return exp.Identifier(this=self._prev.text, quoted=quoted) 4075 4076 return None 4077 4078 def _parse_string(self) -> t.Optional[exp.Expression]: 4079 if self._match(TokenType.STRING): 4080 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4081 return self._parse_placeholder() 4082 4083 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4084 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4085 4086 def _parse_number(self) -> t.Optional[exp.Expression]: 4087 if self._match(TokenType.NUMBER): 4088 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4089 return self._parse_placeholder() 4090 4091 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4092 if self._match(TokenType.IDENTIFIER): 4093 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4094 return self._parse_placeholder() 4095 4096 def _parse_var( 4097 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4098 ) -> t.Optional[exp.Expression]: 4099 if ( 4100 (any_token and self._advance_any()) 4101 or self._match(TokenType.VAR) 4102 or (self._match_set(tokens) if tokens else False) 4103 ): 4104 return self.expression(exp.Var, this=self._prev.text) 4105 return self._parse_placeholder() 4106 4107 def _advance_any(self) -> t.Optional[Token]: 4108 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4109 self._advance() 4110 return self._prev 4111 return None 4112 4113 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4114 return self._parse_var() or self._parse_string() 4115 4116 def _parse_null(self) -> t.Optional[exp.Expression]: 4117 if self._match(TokenType.NULL): 4118 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4119 return None 4120 4121 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4122 if self._match(TokenType.TRUE): 4123 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4124 if self._match(TokenType.FALSE): 4125 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4126 return None 4127 4128 def _parse_star(self) -> t.Optional[exp.Expression]: 4129 if self._match(TokenType.STAR): 4130 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4131 return None 4132 4133 def _parse_parameter(self) -> exp.Parameter: 4134 wrapped = self._match(TokenType.L_BRACE) 4135 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4136 self._match(TokenType.R_BRACE) 4137 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4138 4139 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4140 if self._match_set(self.PLACEHOLDER_PARSERS): 4141 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4142 if placeholder: 4143 return placeholder 4144 self._advance(-1) 4145 return None 4146 4147 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4148 if not self._match(TokenType.EXCEPT): 4149 return None 4150 if self._match(TokenType.L_PAREN, advance=False): 4151 return self._parse_wrapped_csv(self._parse_column) 4152 return self._parse_csv(self._parse_column) 4153 4154 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4155 if not self._match(TokenType.REPLACE): 4156 return None 4157 if self._match(TokenType.L_PAREN, advance=False): 4158 return self._parse_wrapped_csv(self._parse_expression) 4159 return self._parse_csv(self._parse_expression) 4160 4161 def _parse_csv( 4162 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4163 ) -> t.List[t.Optional[exp.Expression]]: 4164 parse_result = parse_method() 4165 items = [parse_result] if parse_result is not None else [] 4166 4167 while self._match(sep): 4168 self._add_comments(parse_result) 4169 parse_result = parse_method() 4170 if parse_result is not None: 4171 items.append(parse_result) 4172 4173 return items 4174 4175 def _parse_tokens( 4176 self, parse_method: t.Callable, expressions: t.Dict 4177 ) -> t.Optional[exp.Expression]: 4178 this = parse_method() 4179 4180 while self._match_set(expressions): 4181 this = self.expression( 4182 expressions[self._prev.token_type], 4183 this=this, 4184 comments=self._prev_comments, 4185 expression=parse_method(), 4186 ) 4187 4188 return this 4189 4190 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4191 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4192 4193 def _parse_wrapped_csv( 4194 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4195 ) -> t.List[t.Optional[exp.Expression]]: 4196 return self._parse_wrapped( 4197 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4198 ) 4199 4200 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4201 wrapped = self._match(TokenType.L_PAREN) 4202 if not wrapped and not optional: 4203 self.raise_error("Expecting (") 4204 parse_result = parse_method() 4205 if wrapped: 4206 self._match_r_paren() 4207 return parse_result 4208 4209 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4210 return self._parse_select() or self._parse_set_operations( 4211 self._parse_expression() if alias else self._parse_conjunction() 4212 ) 4213 4214 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4215 return self._parse_query_modifiers( 4216 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4217 ) 4218 4219 def _parse_transaction(self) -> exp.Transaction: 4220 this = None 4221 if self._match_texts(self.TRANSACTION_KIND): 4222 this = self._prev.text 4223 4224 self._match_texts({"TRANSACTION", "WORK"}) 4225 4226 modes = [] 4227 while True: 4228 mode = [] 4229 while self._match(TokenType.VAR): 4230 mode.append(self._prev.text) 4231 4232 if mode: 4233 modes.append(" ".join(mode)) 4234 if not self._match(TokenType.COMMA): 4235 break 4236 4237 return self.expression(exp.Transaction, this=this, modes=modes) 4238 4239 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4240 chain = None 4241 savepoint = None 4242 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4243 4244 self._match_texts({"TRANSACTION", "WORK"}) 4245 4246 if self._match_text_seq("TO"): 4247 self._match_text_seq("SAVEPOINT") 4248 savepoint = self._parse_id_var() 4249 4250 if self._match(TokenType.AND): 4251 chain = not self._match_text_seq("NO") 4252 self._match_text_seq("CHAIN") 4253 4254 if is_rollback: 4255 return self.expression(exp.Rollback, savepoint=savepoint) 4256 4257 return self.expression(exp.Commit, chain=chain) 4258 4259 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4260 if not self._match_text_seq("ADD"): 4261 return None 4262 4263 self._match(TokenType.COLUMN) 4264 exists_column = self._parse_exists(not_=True) 4265 expression = self._parse_column_def(self._parse_field(any_token=True)) 4266 4267 if expression: 4268 expression.set("exists", exists_column) 4269 4270 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4271 if self._match_texts(("FIRST", "AFTER")): 4272 position = self._prev.text 4273 column_position = self.expression( 4274 exp.ColumnPosition, this=self._parse_column(), position=position 4275 ) 4276 expression.set("position", column_position) 4277 4278 return expression 4279 4280 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4281 drop = self._match(TokenType.DROP) and self._parse_drop() 4282 if drop and not isinstance(drop, exp.Command): 4283 drop.set("kind", drop.args.get("kind", "COLUMN")) 4284 return drop 4285 4286 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4287 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4288 return self.expression( 4289 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4290 ) 4291 4292 def _parse_add_constraint(self) -> exp.AddConstraint: 4293 this = None 4294 kind = self._prev.token_type 4295 4296 if kind == TokenType.CONSTRAINT: 4297 this = self._parse_id_var() 4298 4299 if self._match_text_seq("CHECK"): 4300 expression = self._parse_wrapped(self._parse_conjunction) 4301 enforced = self._match_text_seq("ENFORCED") 4302 4303 return self.expression( 4304 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4305 ) 4306 4307 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4308 expression = self._parse_foreign_key() 4309 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4310 expression = self._parse_primary_key() 4311 else: 4312 expression = None 4313 4314 return self.expression(exp.AddConstraint, this=this, expression=expression) 4315 4316 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4317 index = self._index - 1 4318 4319 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4320 return self._parse_csv(self._parse_add_constraint) 4321 4322 self._retreat(index) 4323 return self._parse_csv(self._parse_add_column) 4324 4325 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4326 self._match(TokenType.COLUMN) 4327 column = self._parse_field(any_token=True) 4328 4329 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4330 return self.expression(exp.AlterColumn, this=column, drop=True) 4331 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4332 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4333 4334 self._match_text_seq("SET", "DATA") 4335 return self.expression( 4336 exp.AlterColumn, 4337 this=column, 4338 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4339 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4340 using=self._match(TokenType.USING) and self._parse_conjunction(), 4341 ) 4342 4343 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4344 index = self._index - 1 4345 4346 partition_exists = self._parse_exists() 4347 if self._match(TokenType.PARTITION, advance=False): 4348 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4349 4350 self._retreat(index) 4351 return self._parse_csv(self._parse_drop_column) 4352 4353 def _parse_alter_table_rename(self) -> exp.RenameTable: 4354 self._match_text_seq("TO") 4355 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4356 4357 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4358 start = self._prev 4359 4360 if not self._match(TokenType.TABLE): 4361 return self._parse_as_command(start) 4362 4363 exists = self._parse_exists() 4364 this = self._parse_table(schema=True) 4365 4366 if self._next: 4367 self._advance() 4368 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4369 4370 if parser: 4371 actions = ensure_list(parser(self)) 4372 4373 if not self._curr: 4374 return self.expression( 4375 exp.AlterTable, 4376 this=this, 4377 exists=exists, 4378 actions=actions, 4379 ) 4380 return self._parse_as_command(start) 4381 4382 def _parse_merge(self) -> exp.Merge: 4383 self._match(TokenType.INTO) 4384 target = self._parse_table() 4385 4386 self._match(TokenType.USING) 4387 using = self._parse_table() 4388 4389 self._match(TokenType.ON) 4390 on = self._parse_conjunction() 4391 4392 whens = [] 4393 while self._match(TokenType.WHEN): 4394 matched = not self._match(TokenType.NOT) 4395 self._match_text_seq("MATCHED") 4396 source = ( 4397 False 4398 if self._match_text_seq("BY", "TARGET") 4399 else self._match_text_seq("BY", "SOURCE") 4400 ) 4401 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4402 4403 self._match(TokenType.THEN) 4404 4405 if self._match(TokenType.INSERT): 4406 _this = self._parse_star() 4407 if _this: 4408 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4409 else: 4410 then = self.expression( 4411 exp.Insert, 4412 this=self._parse_value(), 4413 expression=self._match(TokenType.VALUES) and self._parse_value(), 4414 ) 4415 elif self._match(TokenType.UPDATE): 4416 expressions = self._parse_star() 4417 if expressions: 4418 then = self.expression(exp.Update, expressions=expressions) 4419 else: 4420 then = self.expression( 4421 exp.Update, 4422 expressions=self._match(TokenType.SET) 4423 and self._parse_csv(self._parse_equality), 4424 ) 4425 elif self._match(TokenType.DELETE): 4426 then = self.expression(exp.Var, this=self._prev.text) 4427 else: 4428 then = None 4429 4430 whens.append( 4431 self.expression( 4432 exp.When, 4433 matched=matched, 4434 source=source, 4435 condition=condition, 4436 then=then, 4437 ) 4438 ) 4439 4440 return self.expression( 4441 exp.Merge, 4442 this=target, 4443 using=using, 4444 on=on, 4445 expressions=whens, 4446 ) 4447 4448 def _parse_show(self) -> t.Optional[exp.Expression]: 4449 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4450 if parser: 4451 return parser(self) 4452 self._advance() 4453 return self.expression(exp.Show, this=self._prev.text.upper()) 4454 4455 def _parse_set_item_assignment( 4456 self, kind: t.Optional[str] = None 4457 ) -> t.Optional[exp.Expression]: 4458 index = self._index 4459 4460 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4461 return self._parse_set_transaction(global_=kind == "GLOBAL") 4462 4463 left = self._parse_primary() or self._parse_id_var() 4464 4465 if not self._match_texts(("=", "TO")): 4466 self._retreat(index) 4467 return None 4468 4469 right = self._parse_statement() or self._parse_id_var() 4470 this = self.expression(exp.EQ, this=left, expression=right) 4471 4472 return self.expression(exp.SetItem, this=this, kind=kind) 4473 4474 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4475 self._match_text_seq("TRANSACTION") 4476 characteristics = self._parse_csv( 4477 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4478 ) 4479 return self.expression( 4480 exp.SetItem, 4481 expressions=characteristics, 4482 kind="TRANSACTION", 4483 **{"global": global_}, # type: ignore 4484 ) 4485 4486 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4487 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4488 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4489 4490 def _parse_set(self) -> exp.Set | exp.Command: 4491 index = self._index 4492 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4493 4494 if self._curr: 4495 self._retreat(index) 4496 return self._parse_as_command(self._prev) 4497 4498 return set_ 4499 4500 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4501 for option in options: 4502 if self._match_text_seq(*option.split(" ")): 4503 return exp.var(option) 4504 return None 4505 4506 def _parse_as_command(self, start: Token) -> exp.Command: 4507 while self._curr: 4508 self._advance() 4509 text = self._find_sql(start, self._prev) 4510 size = len(start.text) 4511 return exp.Command(this=text[:size], expression=text[size:]) 4512 4513 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4514 settings = [] 4515 4516 self._match_l_paren() 4517 kind = self._parse_id_var() 4518 4519 if self._match(TokenType.L_PAREN): 4520 while True: 4521 key = self._parse_id_var() 4522 value = self._parse_primary() 4523 4524 if not key and value is None: 4525 break 4526 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4527 self._match(TokenType.R_PAREN) 4528 4529 self._match_r_paren() 4530 4531 return self.expression( 4532 exp.DictProperty, 4533 this=this, 4534 kind=kind.this if kind else None, 4535 settings=settings, 4536 ) 4537 4538 def _parse_dict_range(self, this: str) -> exp.DictRange: 4539 self._match_l_paren() 4540 has_min = self._match_text_seq("MIN") 4541 if has_min: 4542 min = self._parse_var() or self._parse_primary() 4543 self._match_text_seq("MAX") 4544 max = self._parse_var() or self._parse_primary() 4545 else: 4546 max = self._parse_var() or self._parse_primary() 4547 min = exp.Literal.number(0) 4548 self._match_r_paren() 4549 return self.expression(exp.DictRange, this=this, min=min, max=max) 4550 4551 def _find_parser( 4552 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4553 ) -> t.Optional[t.Callable]: 4554 if not self._curr: 4555 return None 4556 4557 index = self._index 4558 this = [] 4559 while True: 4560 # The current token might be multiple words 4561 curr = self._curr.text.upper() 4562 key = curr.split(" ") 4563 this.append(curr) 4564 self._advance() 4565 result, trie = in_trie(trie, key) 4566 if result == 0: 4567 break 4568 if result == 2: 4569 subparser = parsers[" ".join(this)] 4570 return subparser 4571 self._retreat(index) 4572 return None 4573 4574 def _match(self, token_type, advance=True, expression=None): 4575 if not self._curr: 4576 return None 4577 4578 if self._curr.token_type == token_type: 4579 if advance: 4580 self._advance() 4581 self._add_comments(expression) 4582 return True 4583 4584 return None 4585 4586 def _match_set(self, types, advance=True): 4587 if not self._curr: 4588 return None 4589 4590 if self._curr.token_type in types: 4591 if advance: 4592 self._advance() 4593 return True 4594 4595 return None 4596 4597 def _match_pair(self, token_type_a, token_type_b, advance=True): 4598 if not self._curr or not self._next: 4599 return None 4600 4601 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4602 if advance: 4603 self._advance(2) 4604 return True 4605 4606 return None 4607 4608 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4609 if not self._match(TokenType.L_PAREN, expression=expression): 4610 self.raise_error("Expecting (") 4611 4612 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4613 if not self._match(TokenType.R_PAREN, expression=expression): 4614 self.raise_error("Expecting )") 4615 4616 def _match_texts(self, texts, advance=True): 4617 if self._curr and self._curr.text.upper() in texts: 4618 if advance: 4619 self._advance() 4620 return True 4621 return False 4622 4623 def _match_text_seq(self, *texts, advance=True): 4624 index = self._index 4625 for text in texts: 4626 if self._curr and self._curr.text.upper() == text: 4627 self._advance() 4628 else: 4629 self._retreat(index) 4630 return False 4631 4632 if not advance: 4633 self._retreat(index) 4634 4635 return True 4636 4637 @t.overload 4638 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4639 ... 4640 4641 @t.overload 4642 def _replace_columns_with_dots( 4643 self, this: t.Optional[exp.Expression] 4644 ) -> t.Optional[exp.Expression]: 4645 ... 4646 4647 def _replace_columns_with_dots(self, this): 4648 if isinstance(this, exp.Dot): 4649 exp.replace_children(this, self._replace_columns_with_dots) 4650 elif isinstance(this, exp.Column): 4651 exp.replace_children(this, self._replace_columns_with_dots) 4652 table = this.args.get("table") 4653 this = ( 4654 self.expression(exp.Dot, this=table, expression=this.this) 4655 if table 4656 else self.expression(exp.Var, this=this.name) 4657 ) 4658 elif isinstance(this, exp.Identifier): 4659 this = self.expression(exp.Var, this=this.name) 4660 4661 return this 4662 4663 def _replace_lambda( 4664 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4665 ) -> t.Optional[exp.Expression]: 4666 if not node: 4667 return node 4668 4669 for column in node.find_all(exp.Column): 4670 if column.parts[0].name in lambda_variables: 4671 dot_or_id = column.to_dot() if column.table else column.this 4672 parent = column.parent 4673 4674 while isinstance(parent, exp.Dot): 4675 if not isinstance(parent.parent, exp.Dot): 4676 parent.replace(dot_or_id) 4677 break 4678 parent = parent.parent 4679 else: 4680 if column is node: 4681 node = dot_or_id 4682 else: 4683 column.replace(dot_or_id) 4684 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, 545 **{"except": self._parse_except(), "replace": self._parse_replace()}, 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 786 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 787 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 788 789 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 790 791 STRICT_CAST = True 792 793 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 794 795 CONVERT_TYPE_FIRST = False 796 797 PREFIXED_PIVOT_COLUMNS = False 798 IDENTIFY_PIVOT_STRINGS = False 799 800 LOG_BASE_FIRST = True 801 LOG_DEFAULTS_TO_LN = False 802 803 __slots__ = ( 804 "error_level", 805 "error_message_context", 806 "max_errors", 807 "sql", 808 "errors", 809 "_tokens", 810 "_index", 811 "_curr", 812 "_next", 813 "_prev", 814 "_prev_comments", 815 ) 816 817 # Autofilled 818 INDEX_OFFSET: int = 0 819 UNNEST_COLUMN_ONLY: bool = False 820 ALIAS_POST_TABLESAMPLE: bool = False 821 STRICT_STRING_CONCAT = False 822 NULL_ORDERING: str = "nulls_are_small" 823 SHOW_TRIE: t.Dict = {} 824 SET_TRIE: t.Dict = {} 825 FORMAT_MAPPING: t.Dict[str, str] = {} 826 FORMAT_TRIE: t.Dict = {} 827 TIME_MAPPING: t.Dict[str, str] = {} 828 TIME_TRIE: t.Dict = {} 829 830 def __init__( 831 self, 832 error_level: t.Optional[ErrorLevel] = None, 833 error_message_context: int = 100, 834 max_errors: int = 3, 835 ): 836 self.error_level = error_level or ErrorLevel.IMMEDIATE 837 self.error_message_context = error_message_context 838 self.max_errors = max_errors 839 self.reset() 840 841 def reset(self): 842 self.sql = "" 843 self.errors = [] 844 self._tokens = [] 845 self._index = 0 846 self._curr = None 847 self._next = None 848 self._prev = None 849 self._prev_comments = None 850 851 def parse( 852 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 853 ) -> t.List[t.Optional[exp.Expression]]: 854 """ 855 Parses a list of tokens and returns a list of syntax trees, one tree 856 per parsed SQL statement. 857 858 Args: 859 raw_tokens: The list of tokens. 860 sql: The original SQL string, used to produce helpful debug messages. 861 862 Returns: 863 The list of the produced syntax trees. 864 """ 865 return self._parse( 866 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 867 ) 868 869 def parse_into( 870 self, 871 expression_types: exp.IntoType, 872 raw_tokens: t.List[Token], 873 sql: t.Optional[str] = None, 874 ) -> t.List[t.Optional[exp.Expression]]: 875 """ 876 Parses a list of tokens into a given Expression type. If a collection of Expression 877 types is given instead, this method will try to parse the token list into each one 878 of them, stopping at the first for which the parsing succeeds. 879 880 Args: 881 expression_types: The expression type(s) to try and parse the token list into. 882 raw_tokens: The list of tokens. 883 sql: The original SQL string, used to produce helpful debug messages. 884 885 Returns: 886 The target Expression. 887 """ 888 errors = [] 889 for expression_type in ensure_list(expression_types): 890 parser = self.EXPRESSION_PARSERS.get(expression_type) 891 if not parser: 892 raise TypeError(f"No parser registered for {expression_type}") 893 894 try: 895 return self._parse(parser, raw_tokens, sql) 896 except ParseError as e: 897 e.errors[0]["into_expression"] = expression_type 898 errors.append(e) 899 900 raise ParseError( 901 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 902 errors=merge_errors(errors), 903 ) from errors[-1] 904 905 def _parse( 906 self, 907 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 908 raw_tokens: t.List[Token], 909 sql: t.Optional[str] = None, 910 ) -> t.List[t.Optional[exp.Expression]]: 911 self.reset() 912 self.sql = sql or "" 913 914 total = len(raw_tokens) 915 chunks: t.List[t.List[Token]] = [[]] 916 917 for i, token in enumerate(raw_tokens): 918 if token.token_type == TokenType.SEMICOLON: 919 if i < total - 1: 920 chunks.append([]) 921 else: 922 chunks[-1].append(token) 923 924 expressions = [] 925 926 for tokens in chunks: 927 self._index = -1 928 self._tokens = tokens 929 self._advance() 930 931 expressions.append(parse_method(self)) 932 933 if self._index < len(self._tokens): 934 self.raise_error("Invalid expression / Unexpected token") 935 936 self.check_errors() 937 938 return expressions 939 940 def check_errors(self) -> None: 941 """Logs or raises any found errors, depending on the chosen error level setting.""" 942 if self.error_level == ErrorLevel.WARN: 943 for error in self.errors: 944 logger.error(str(error)) 945 elif self.error_level == ErrorLevel.RAISE and self.errors: 946 raise ParseError( 947 concat_messages(self.errors, self.max_errors), 948 errors=merge_errors(self.errors), 949 ) 950 951 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 952 """ 953 Appends an error in the list of recorded errors or raises it, depending on the chosen 954 error level setting. 955 """ 956 token = token or self._curr or self._prev or Token.string("") 957 start = token.start 958 end = token.end + 1 959 start_context = self.sql[max(start - self.error_message_context, 0) : start] 960 highlight = self.sql[start:end] 961 end_context = self.sql[end : end + self.error_message_context] 962 963 error = ParseError.new( 964 f"{message}. Line {token.line}, Col: {token.col}.\n" 965 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 966 description=message, 967 line=token.line, 968 col=token.col, 969 start_context=start_context, 970 highlight=highlight, 971 end_context=end_context, 972 ) 973 974 if self.error_level == ErrorLevel.IMMEDIATE: 975 raise error 976 977 self.errors.append(error) 978 979 def expression( 980 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 981 ) -> E: 982 """ 983 Creates a new, validated Expression. 984 985 Args: 986 exp_class: The expression class to instantiate. 987 comments: An optional list of comments to attach to the expression. 988 kwargs: The arguments to set for the expression along with their respective values. 989 990 Returns: 991 The target expression. 992 """ 993 instance = exp_class(**kwargs) 994 instance.add_comments(comments) if comments else self._add_comments(instance) 995 return self.validate_expression(instance) 996 997 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 998 if expression and self._prev_comments: 999 expression.add_comments(self._prev_comments) 1000 self._prev_comments = None 1001 1002 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1003 """ 1004 Validates an Expression, making sure that all its mandatory arguments are set. 1005 1006 Args: 1007 expression: The expression to validate. 1008 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1009 1010 Returns: 1011 The validated expression. 1012 """ 1013 if self.error_level != ErrorLevel.IGNORE: 1014 for error_message in expression.error_messages(args): 1015 self.raise_error(error_message) 1016 1017 return expression 1018 1019 def _find_sql(self, start: Token, end: Token) -> str: 1020 return self.sql[start.start : end.end + 1] 1021 1022 def _advance(self, times: int = 1) -> None: 1023 self._index += times 1024 self._curr = seq_get(self._tokens, self._index) 1025 self._next = seq_get(self._tokens, self._index + 1) 1026 1027 if self._index > 0: 1028 self._prev = self._tokens[self._index - 1] 1029 self._prev_comments = self._prev.comments 1030 else: 1031 self._prev = None 1032 self._prev_comments = None 1033 1034 def _retreat(self, index: int) -> None: 1035 if index != self._index: 1036 self._advance(index - self._index) 1037 1038 def _parse_command(self) -> exp.Command: 1039 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1040 1041 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1042 start = self._prev 1043 exists = self._parse_exists() if allow_exists else None 1044 1045 self._match(TokenType.ON) 1046 1047 kind = self._match_set(self.CREATABLES) and self._prev 1048 if not kind: 1049 return self._parse_as_command(start) 1050 1051 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1052 this = self._parse_user_defined_function(kind=kind.token_type) 1053 elif kind.token_type == TokenType.TABLE: 1054 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1055 elif kind.token_type == TokenType.COLUMN: 1056 this = self._parse_column() 1057 else: 1058 this = self._parse_id_var() 1059 1060 self._match(TokenType.IS) 1061 1062 return self.expression( 1063 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1064 ) 1065 1066 def _parse_to_table( 1067 self, 1068 ) -> exp.ToTableProperty: 1069 table = self._parse_table_parts(schema=True) 1070 return self.expression(exp.ToTableProperty, this=table) 1071 1072 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1073 def _parse_ttl(self) -> exp.Expression: 1074 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1075 this = self._parse_bitwise() 1076 1077 if self._match_text_seq("DELETE"): 1078 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1079 if self._match_text_seq("RECOMPRESS"): 1080 return self.expression( 1081 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1082 ) 1083 if self._match_text_seq("TO", "DISK"): 1084 return self.expression( 1085 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1086 ) 1087 if self._match_text_seq("TO", "VOLUME"): 1088 return self.expression( 1089 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1090 ) 1091 1092 return this 1093 1094 expressions = self._parse_csv(_parse_ttl_action) 1095 where = self._parse_where() 1096 group = self._parse_group() 1097 1098 aggregates = None 1099 if group and self._match(TokenType.SET): 1100 aggregates = self._parse_csv(self._parse_set_item) 1101 1102 return self.expression( 1103 exp.MergeTreeTTL, 1104 expressions=expressions, 1105 where=where, 1106 group=group, 1107 aggregates=aggregates, 1108 ) 1109 1110 def _parse_statement(self) -> t.Optional[exp.Expression]: 1111 if self._curr is None: 1112 return None 1113 1114 if self._match_set(self.STATEMENT_PARSERS): 1115 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1116 1117 if self._match_set(Tokenizer.COMMANDS): 1118 return self._parse_command() 1119 1120 expression = self._parse_expression() 1121 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1122 return self._parse_query_modifiers(expression) 1123 1124 def _parse_drop(self) -> exp.Drop | exp.Command: 1125 start = self._prev 1126 temporary = self._match(TokenType.TEMPORARY) 1127 materialized = self._match_text_seq("MATERIALIZED") 1128 1129 kind = self._match_set(self.CREATABLES) and self._prev.text 1130 if not kind: 1131 return self._parse_as_command(start) 1132 1133 return self.expression( 1134 exp.Drop, 1135 exists=self._parse_exists(), 1136 this=self._parse_table(schema=True), 1137 kind=kind, 1138 temporary=temporary, 1139 materialized=materialized, 1140 cascade=self._match_text_seq("CASCADE"), 1141 constraints=self._match_text_seq("CONSTRAINTS"), 1142 purge=self._match_text_seq("PURGE"), 1143 ) 1144 1145 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1146 return ( 1147 self._match(TokenType.IF) 1148 and (not not_ or self._match(TokenType.NOT)) 1149 and self._match(TokenType.EXISTS) 1150 ) 1151 1152 def _parse_create(self) -> exp.Create | exp.Command: 1153 # Note: this can't be None because we've matched a statement parser 1154 start = self._prev 1155 replace = start.text.upper() == "REPLACE" or self._match_pair( 1156 TokenType.OR, TokenType.REPLACE 1157 ) 1158 unique = self._match(TokenType.UNIQUE) 1159 1160 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1161 self._advance() 1162 1163 properties = None 1164 create_token = self._match_set(self.CREATABLES) and self._prev 1165 1166 if not create_token: 1167 # exp.Properties.Location.POST_CREATE 1168 properties = self._parse_properties() 1169 create_token = self._match_set(self.CREATABLES) and self._prev 1170 1171 if not properties or not create_token: 1172 return self._parse_as_command(start) 1173 1174 exists = self._parse_exists(not_=True) 1175 this = None 1176 expression = None 1177 indexes = None 1178 no_schema_binding = None 1179 begin = None 1180 clone = None 1181 1182 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1183 nonlocal properties 1184 if properties and temp_props: 1185 properties.expressions.extend(temp_props.expressions) 1186 elif temp_props: 1187 properties = temp_props 1188 1189 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1190 this = self._parse_user_defined_function(kind=create_token.token_type) 1191 1192 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1193 extend_props(self._parse_properties()) 1194 1195 self._match(TokenType.ALIAS) 1196 begin = self._match(TokenType.BEGIN) 1197 return_ = self._match_text_seq("RETURN") 1198 expression = self._parse_statement() 1199 1200 if return_: 1201 expression = self.expression(exp.Return, this=expression) 1202 elif create_token.token_type == TokenType.INDEX: 1203 this = self._parse_index(index=self._parse_id_var()) 1204 elif create_token.token_type in self.DB_CREATABLES: 1205 table_parts = self._parse_table_parts(schema=True) 1206 1207 # exp.Properties.Location.POST_NAME 1208 self._match(TokenType.COMMA) 1209 extend_props(self._parse_properties(before=True)) 1210 1211 this = self._parse_schema(this=table_parts) 1212 1213 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1214 extend_props(self._parse_properties()) 1215 1216 self._match(TokenType.ALIAS) 1217 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1218 # exp.Properties.Location.POST_ALIAS 1219 extend_props(self._parse_properties()) 1220 1221 expression = self._parse_ddl_select() 1222 1223 if create_token.token_type == TokenType.TABLE: 1224 indexes = [] 1225 while True: 1226 index = self._parse_index() 1227 1228 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1229 extend_props(self._parse_properties()) 1230 1231 if not index: 1232 break 1233 else: 1234 self._match(TokenType.COMMA) 1235 indexes.append(index) 1236 elif create_token.token_type == TokenType.VIEW: 1237 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1238 no_schema_binding = True 1239 1240 if self._match_text_seq("CLONE"): 1241 clone = self._parse_table(schema=True) 1242 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1243 clone_kind = ( 1244 self._match(TokenType.L_PAREN) 1245 and self._match_texts(self.CLONE_KINDS) 1246 and self._prev.text.upper() 1247 ) 1248 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1249 self._match(TokenType.R_PAREN) 1250 clone = self.expression( 1251 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1252 ) 1253 1254 return self.expression( 1255 exp.Create, 1256 this=this, 1257 kind=create_token.text, 1258 replace=replace, 1259 unique=unique, 1260 expression=expression, 1261 exists=exists, 1262 properties=properties, 1263 indexes=indexes, 1264 no_schema_binding=no_schema_binding, 1265 begin=begin, 1266 clone=clone, 1267 ) 1268 1269 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1270 # only used for teradata currently 1271 self._match(TokenType.COMMA) 1272 1273 kwargs = { 1274 "no": self._match_text_seq("NO"), 1275 "dual": self._match_text_seq("DUAL"), 1276 "before": self._match_text_seq("BEFORE"), 1277 "default": self._match_text_seq("DEFAULT"), 1278 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1279 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1280 "after": self._match_text_seq("AFTER"), 1281 "minimum": self._match_texts(("MIN", "MINIMUM")), 1282 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1283 } 1284 1285 if self._match_texts(self.PROPERTY_PARSERS): 1286 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1287 try: 1288 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1289 except TypeError: 1290 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1291 1292 return None 1293 1294 def _parse_property(self) -> t.Optional[exp.Expression]: 1295 if self._match_texts(self.PROPERTY_PARSERS): 1296 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1297 1298 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1299 return self._parse_character_set(default=True) 1300 1301 if self._match_text_seq("COMPOUND", "SORTKEY"): 1302 return self._parse_sortkey(compound=True) 1303 1304 if self._match_text_seq("SQL", "SECURITY"): 1305 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1306 1307 assignment = self._match_pair( 1308 TokenType.VAR, TokenType.EQ, advance=False 1309 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1310 1311 if assignment: 1312 key = self._parse_var_or_string() 1313 self._match(TokenType.EQ) 1314 return self.expression(exp.Property, this=key, value=self._parse_column()) 1315 1316 return None 1317 1318 def _parse_stored(self) -> exp.FileFormatProperty: 1319 self._match(TokenType.ALIAS) 1320 1321 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1322 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1323 1324 return self.expression( 1325 exp.FileFormatProperty, 1326 this=self.expression( 1327 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1328 ) 1329 if input_format or output_format 1330 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1331 ) 1332 1333 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1334 self._match(TokenType.EQ) 1335 self._match(TokenType.ALIAS) 1336 return self.expression(exp_class, this=self._parse_field()) 1337 1338 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1339 properties = [] 1340 while True: 1341 if before: 1342 prop = self._parse_property_before() 1343 else: 1344 prop = self._parse_property() 1345 1346 if not prop: 1347 break 1348 for p in ensure_list(prop): 1349 properties.append(p) 1350 1351 if properties: 1352 return self.expression(exp.Properties, expressions=properties) 1353 1354 return None 1355 1356 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1357 return self.expression( 1358 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1359 ) 1360 1361 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1362 if self._index >= 2: 1363 pre_volatile_token = self._tokens[self._index - 2] 1364 else: 1365 pre_volatile_token = None 1366 1367 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1368 return exp.VolatileProperty() 1369 1370 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1371 1372 def _parse_with_property( 1373 self, 1374 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1375 self._match(TokenType.WITH) 1376 if self._match(TokenType.L_PAREN, advance=False): 1377 return self._parse_wrapped_csv(self._parse_property) 1378 1379 if self._match_text_seq("JOURNAL"): 1380 return self._parse_withjournaltable() 1381 1382 if self._match_text_seq("DATA"): 1383 return self._parse_withdata(no=False) 1384 elif self._match_text_seq("NO", "DATA"): 1385 return self._parse_withdata(no=True) 1386 1387 if not self._next: 1388 return None 1389 1390 return self._parse_withisolatedloading() 1391 1392 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1393 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1394 self._match(TokenType.EQ) 1395 1396 user = self._parse_id_var() 1397 self._match(TokenType.PARAMETER) 1398 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1399 1400 if not user or not host: 1401 return None 1402 1403 return exp.DefinerProperty(this=f"{user}@{host}") 1404 1405 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1406 self._match(TokenType.TABLE) 1407 self._match(TokenType.EQ) 1408 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1409 1410 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1411 return self.expression(exp.LogProperty, no=no) 1412 1413 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1414 return self.expression(exp.JournalProperty, **kwargs) 1415 1416 def _parse_checksum(self) -> exp.ChecksumProperty: 1417 self._match(TokenType.EQ) 1418 1419 on = None 1420 if self._match(TokenType.ON): 1421 on = True 1422 elif self._match_text_seq("OFF"): 1423 on = False 1424 1425 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1426 1427 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1428 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1429 1430 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1431 if not self._match_text_seq("GRANTS"): 1432 self._retreat(self._index - 1) 1433 return None 1434 1435 return self.expression(exp.CopyGrantsProperty) 1436 1437 def _parse_freespace(self) -> exp.FreespaceProperty: 1438 self._match(TokenType.EQ) 1439 return self.expression( 1440 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1441 ) 1442 1443 def _parse_mergeblockratio( 1444 self, no: bool = False, default: bool = False 1445 ) -> exp.MergeBlockRatioProperty: 1446 if self._match(TokenType.EQ): 1447 return self.expression( 1448 exp.MergeBlockRatioProperty, 1449 this=self._parse_number(), 1450 percent=self._match(TokenType.PERCENT), 1451 ) 1452 1453 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1454 1455 def _parse_datablocksize( 1456 self, 1457 default: t.Optional[bool] = None, 1458 minimum: t.Optional[bool] = None, 1459 maximum: t.Optional[bool] = None, 1460 ) -> exp.DataBlocksizeProperty: 1461 self._match(TokenType.EQ) 1462 size = self._parse_number() 1463 1464 units = None 1465 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1466 units = self._prev.text 1467 1468 return self.expression( 1469 exp.DataBlocksizeProperty, 1470 size=size, 1471 units=units, 1472 default=default, 1473 minimum=minimum, 1474 maximum=maximum, 1475 ) 1476 1477 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1478 self._match(TokenType.EQ) 1479 always = self._match_text_seq("ALWAYS") 1480 manual = self._match_text_seq("MANUAL") 1481 never = self._match_text_seq("NEVER") 1482 default = self._match_text_seq("DEFAULT") 1483 1484 autotemp = None 1485 if self._match_text_seq("AUTOTEMP"): 1486 autotemp = self._parse_schema() 1487 1488 return self.expression( 1489 exp.BlockCompressionProperty, 1490 always=always, 1491 manual=manual, 1492 never=never, 1493 default=default, 1494 autotemp=autotemp, 1495 ) 1496 1497 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1498 no = self._match_text_seq("NO") 1499 concurrent = self._match_text_seq("CONCURRENT") 1500 self._match_text_seq("ISOLATED", "LOADING") 1501 for_all = self._match_text_seq("FOR", "ALL") 1502 for_insert = self._match_text_seq("FOR", "INSERT") 1503 for_none = self._match_text_seq("FOR", "NONE") 1504 return self.expression( 1505 exp.IsolatedLoadingProperty, 1506 no=no, 1507 concurrent=concurrent, 1508 for_all=for_all, 1509 for_insert=for_insert, 1510 for_none=for_none, 1511 ) 1512 1513 def _parse_locking(self) -> exp.LockingProperty: 1514 if self._match(TokenType.TABLE): 1515 kind = "TABLE" 1516 elif self._match(TokenType.VIEW): 1517 kind = "VIEW" 1518 elif self._match(TokenType.ROW): 1519 kind = "ROW" 1520 elif self._match_text_seq("DATABASE"): 1521 kind = "DATABASE" 1522 else: 1523 kind = None 1524 1525 if kind in ("DATABASE", "TABLE", "VIEW"): 1526 this = self._parse_table_parts() 1527 else: 1528 this = None 1529 1530 if self._match(TokenType.FOR): 1531 for_or_in = "FOR" 1532 elif self._match(TokenType.IN): 1533 for_or_in = "IN" 1534 else: 1535 for_or_in = None 1536 1537 if self._match_text_seq("ACCESS"): 1538 lock_type = "ACCESS" 1539 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1540 lock_type = "EXCLUSIVE" 1541 elif self._match_text_seq("SHARE"): 1542 lock_type = "SHARE" 1543 elif self._match_text_seq("READ"): 1544 lock_type = "READ" 1545 elif self._match_text_seq("WRITE"): 1546 lock_type = "WRITE" 1547 elif self._match_text_seq("CHECKSUM"): 1548 lock_type = "CHECKSUM" 1549 else: 1550 lock_type = None 1551 1552 override = self._match_text_seq("OVERRIDE") 1553 1554 return self.expression( 1555 exp.LockingProperty, 1556 this=this, 1557 kind=kind, 1558 for_or_in=for_or_in, 1559 lock_type=lock_type, 1560 override=override, 1561 ) 1562 1563 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1564 if self._match(TokenType.PARTITION_BY): 1565 return self._parse_csv(self._parse_conjunction) 1566 return [] 1567 1568 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1569 self._match(TokenType.EQ) 1570 return self.expression( 1571 exp.PartitionedByProperty, 1572 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1573 ) 1574 1575 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1576 if self._match_text_seq("AND", "STATISTICS"): 1577 statistics = True 1578 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1579 statistics = False 1580 else: 1581 statistics = None 1582 1583 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1584 1585 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1586 if self._match_text_seq("PRIMARY", "INDEX"): 1587 return exp.NoPrimaryIndexProperty() 1588 return None 1589 1590 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1591 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1592 return exp.OnCommitProperty() 1593 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1594 return exp.OnCommitProperty(delete=True) 1595 return None 1596 1597 def _parse_distkey(self) -> exp.DistKeyProperty: 1598 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1599 1600 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1601 table = self._parse_table(schema=True) 1602 1603 options = [] 1604 while self._match_texts(("INCLUDING", "EXCLUDING")): 1605 this = self._prev.text.upper() 1606 1607 id_var = self._parse_id_var() 1608 if not id_var: 1609 return None 1610 1611 options.append( 1612 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1613 ) 1614 1615 return self.expression(exp.LikeProperty, this=table, expressions=options) 1616 1617 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1618 return self.expression( 1619 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1620 ) 1621 1622 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1623 self._match(TokenType.EQ) 1624 return self.expression( 1625 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1626 ) 1627 1628 def _parse_returns(self) -> exp.ReturnsProperty: 1629 value: t.Optional[exp.Expression] 1630 is_table = self._match(TokenType.TABLE) 1631 1632 if is_table: 1633 if self._match(TokenType.LT): 1634 value = self.expression( 1635 exp.Schema, 1636 this="TABLE", 1637 expressions=self._parse_csv(self._parse_struct_types), 1638 ) 1639 if not self._match(TokenType.GT): 1640 self.raise_error("Expecting >") 1641 else: 1642 value = self._parse_schema(exp.var("TABLE")) 1643 else: 1644 value = self._parse_types() 1645 1646 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1647 1648 def _parse_describe(self) -> exp.Describe: 1649 kind = self._match_set(self.CREATABLES) and self._prev.text 1650 this = self._parse_table() 1651 return self.expression(exp.Describe, this=this, kind=kind) 1652 1653 def _parse_insert(self) -> exp.Insert: 1654 overwrite = self._match(TokenType.OVERWRITE) 1655 local = self._match_text_seq("LOCAL") 1656 alternative = None 1657 1658 if self._match_text_seq("DIRECTORY"): 1659 this: t.Optional[exp.Expression] = self.expression( 1660 exp.Directory, 1661 this=self._parse_var_or_string(), 1662 local=local, 1663 row_format=self._parse_row_format(match_row=True), 1664 ) 1665 else: 1666 if self._match(TokenType.OR): 1667 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1668 1669 self._match(TokenType.INTO) 1670 self._match(TokenType.TABLE) 1671 this = self._parse_table(schema=True) 1672 1673 return self.expression( 1674 exp.Insert, 1675 this=this, 1676 exists=self._parse_exists(), 1677 partition=self._parse_partition(), 1678 expression=self._parse_ddl_select(), 1679 conflict=self._parse_on_conflict(), 1680 returning=self._parse_returning(), 1681 overwrite=overwrite, 1682 alternative=alternative, 1683 ) 1684 1685 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1686 conflict = self._match_text_seq("ON", "CONFLICT") 1687 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1688 1689 if not conflict and not duplicate: 1690 return None 1691 1692 nothing = None 1693 expressions = None 1694 key = None 1695 constraint = None 1696 1697 if conflict: 1698 if self._match_text_seq("ON", "CONSTRAINT"): 1699 constraint = self._parse_id_var() 1700 else: 1701 key = self._parse_csv(self._parse_value) 1702 1703 self._match_text_seq("DO") 1704 if self._match_text_seq("NOTHING"): 1705 nothing = True 1706 else: 1707 self._match(TokenType.UPDATE) 1708 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1709 1710 return self.expression( 1711 exp.OnConflict, 1712 duplicate=duplicate, 1713 expressions=expressions, 1714 nothing=nothing, 1715 key=key, 1716 constraint=constraint, 1717 ) 1718 1719 def _parse_returning(self) -> t.Optional[exp.Returning]: 1720 if not self._match(TokenType.RETURNING): 1721 return None 1722 1723 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1724 1725 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1726 if not self._match(TokenType.FORMAT): 1727 return None 1728 return self._parse_row_format() 1729 1730 def _parse_row_format( 1731 self, match_row: bool = False 1732 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1733 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1734 return None 1735 1736 if self._match_text_seq("SERDE"): 1737 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1738 1739 self._match_text_seq("DELIMITED") 1740 1741 kwargs = {} 1742 1743 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1744 kwargs["fields"] = self._parse_string() 1745 if self._match_text_seq("ESCAPED", "BY"): 1746 kwargs["escaped"] = self._parse_string() 1747 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1748 kwargs["collection_items"] = self._parse_string() 1749 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1750 kwargs["map_keys"] = self._parse_string() 1751 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1752 kwargs["lines"] = self._parse_string() 1753 if self._match_text_seq("NULL", "DEFINED", "AS"): 1754 kwargs["null"] = self._parse_string() 1755 1756 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1757 1758 def _parse_load(self) -> exp.LoadData | exp.Command: 1759 if self._match_text_seq("DATA"): 1760 local = self._match_text_seq("LOCAL") 1761 self._match_text_seq("INPATH") 1762 inpath = self._parse_string() 1763 overwrite = self._match(TokenType.OVERWRITE) 1764 self._match_pair(TokenType.INTO, TokenType.TABLE) 1765 1766 return self.expression( 1767 exp.LoadData, 1768 this=self._parse_table(schema=True), 1769 local=local, 1770 overwrite=overwrite, 1771 inpath=inpath, 1772 partition=self._parse_partition(), 1773 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1774 serde=self._match_text_seq("SERDE") and self._parse_string(), 1775 ) 1776 return self._parse_as_command(self._prev) 1777 1778 def _parse_delete(self) -> exp.Delete: 1779 self._match(TokenType.FROM) 1780 1781 return self.expression( 1782 exp.Delete, 1783 this=self._parse_table(), 1784 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1785 where=self._parse_where(), 1786 returning=self._parse_returning(), 1787 ) 1788 1789 def _parse_update(self) -> exp.Update: 1790 return self.expression( 1791 exp.Update, 1792 **{ # type: ignore 1793 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1794 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1795 "from": self._parse_from(modifiers=True), 1796 "where": self._parse_where(), 1797 "returning": self._parse_returning(), 1798 }, 1799 ) 1800 1801 def _parse_uncache(self) -> exp.Uncache: 1802 if not self._match(TokenType.TABLE): 1803 self.raise_error("Expecting TABLE after UNCACHE") 1804 1805 return self.expression( 1806 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1807 ) 1808 1809 def _parse_cache(self) -> exp.Cache: 1810 lazy = self._match_text_seq("LAZY") 1811 self._match(TokenType.TABLE) 1812 table = self._parse_table(schema=True) 1813 1814 options = [] 1815 if self._match_text_seq("OPTIONS"): 1816 self._match_l_paren() 1817 k = self._parse_string() 1818 self._match(TokenType.EQ) 1819 v = self._parse_string() 1820 options = [k, v] 1821 self._match_r_paren() 1822 1823 self._match(TokenType.ALIAS) 1824 return self.expression( 1825 exp.Cache, 1826 this=table, 1827 lazy=lazy, 1828 options=options, 1829 expression=self._parse_select(nested=True), 1830 ) 1831 1832 def _parse_partition(self) -> t.Optional[exp.Partition]: 1833 if not self._match(TokenType.PARTITION): 1834 return None 1835 1836 return self.expression( 1837 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1838 ) 1839 1840 def _parse_value(self) -> exp.Tuple: 1841 if self._match(TokenType.L_PAREN): 1842 expressions = self._parse_csv(self._parse_conjunction) 1843 self._match_r_paren() 1844 return self.expression(exp.Tuple, expressions=expressions) 1845 1846 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1847 # Source: https://prestodb.io/docs/current/sql/values.html 1848 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1849 1850 def _parse_select( 1851 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1852 ) -> t.Optional[exp.Expression]: 1853 cte = self._parse_with() 1854 if cte: 1855 this = self._parse_statement() 1856 1857 if not this: 1858 self.raise_error("Failed to parse any statement following CTE") 1859 return cte 1860 1861 if "with" in this.arg_types: 1862 this.set("with", cte) 1863 else: 1864 self.raise_error(f"{this.key} does not support CTE") 1865 this = cte 1866 elif self._match(TokenType.SELECT): 1867 comments = self._prev_comments 1868 1869 hint = self._parse_hint() 1870 all_ = self._match(TokenType.ALL) 1871 distinct = self._match(TokenType.DISTINCT) 1872 1873 kind = ( 1874 self._match(TokenType.ALIAS) 1875 and self._match_texts(("STRUCT", "VALUE")) 1876 and self._prev.text 1877 ) 1878 1879 if distinct: 1880 distinct = self.expression( 1881 exp.Distinct, 1882 on=self._parse_value() if self._match(TokenType.ON) else None, 1883 ) 1884 1885 if all_ and distinct: 1886 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1887 1888 limit = self._parse_limit(top=True) 1889 expressions = self._parse_csv(self._parse_expression) 1890 1891 this = self.expression( 1892 exp.Select, 1893 kind=kind, 1894 hint=hint, 1895 distinct=distinct, 1896 expressions=expressions, 1897 limit=limit, 1898 ) 1899 this.comments = comments 1900 1901 into = self._parse_into() 1902 if into: 1903 this.set("into", into) 1904 1905 from_ = self._parse_from() 1906 if from_: 1907 this.set("from", from_) 1908 1909 this = self._parse_query_modifiers(this) 1910 elif (table or nested) and self._match(TokenType.L_PAREN): 1911 if self._match(TokenType.PIVOT): 1912 this = self._parse_simplified_pivot() 1913 elif self._match(TokenType.FROM): 1914 this = exp.select("*").from_( 1915 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1916 ) 1917 else: 1918 this = self._parse_table() if table else self._parse_select(nested=True) 1919 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1920 1921 self._match_r_paren() 1922 1923 # early return so that subquery unions aren't parsed again 1924 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1925 # Union ALL should be a property of the top select node, not the subquery 1926 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1927 elif self._match(TokenType.VALUES): 1928 this = self.expression( 1929 exp.Values, 1930 expressions=self._parse_csv(self._parse_value), 1931 alias=self._parse_table_alias(), 1932 ) 1933 else: 1934 this = None 1935 1936 return self._parse_set_operations(this) 1937 1938 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1939 if not skip_with_token and not self._match(TokenType.WITH): 1940 return None 1941 1942 comments = self._prev_comments 1943 recursive = self._match(TokenType.RECURSIVE) 1944 1945 expressions = [] 1946 while True: 1947 expressions.append(self._parse_cte()) 1948 1949 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1950 break 1951 else: 1952 self._match(TokenType.WITH) 1953 1954 return self.expression( 1955 exp.With, comments=comments, expressions=expressions, recursive=recursive 1956 ) 1957 1958 def _parse_cte(self) -> exp.CTE: 1959 alias = self._parse_table_alias() 1960 if not alias or not alias.this: 1961 self.raise_error("Expected CTE to have alias") 1962 1963 self._match(TokenType.ALIAS) 1964 return self.expression( 1965 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1966 ) 1967 1968 def _parse_table_alias( 1969 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1970 ) -> t.Optional[exp.TableAlias]: 1971 any_token = self._match(TokenType.ALIAS) 1972 alias = ( 1973 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1974 or self._parse_string_as_identifier() 1975 ) 1976 1977 index = self._index 1978 if self._match(TokenType.L_PAREN): 1979 columns = self._parse_csv(self._parse_function_parameter) 1980 self._match_r_paren() if columns else self._retreat(index) 1981 else: 1982 columns = None 1983 1984 if not alias and not columns: 1985 return None 1986 1987 return self.expression(exp.TableAlias, this=alias, columns=columns) 1988 1989 def _parse_subquery( 1990 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1991 ) -> t.Optional[exp.Subquery]: 1992 if not this: 1993 return None 1994 1995 return self.expression( 1996 exp.Subquery, 1997 this=this, 1998 pivots=self._parse_pivots(), 1999 alias=self._parse_table_alias() if parse_alias else None, 2000 ) 2001 2002 def _parse_query_modifiers( 2003 self, this: t.Optional[exp.Expression] 2004 ) -> t.Optional[exp.Expression]: 2005 if isinstance(this, self.MODIFIABLES): 2006 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2007 expression = parser(self) 2008 2009 if expression: 2010 if key == "limit": 2011 offset = expression.args.pop("offset", None) 2012 if offset: 2013 this.set("offset", exp.Offset(expression=offset)) 2014 this.set(key, expression) 2015 return this 2016 2017 def _parse_hint(self) -> t.Optional[exp.Hint]: 2018 if self._match(TokenType.HINT): 2019 hints = self._parse_csv(self._parse_function) 2020 2021 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2022 self.raise_error("Expected */ after HINT") 2023 2024 return self.expression(exp.Hint, expressions=hints) 2025 2026 return None 2027 2028 def _parse_into(self) -> t.Optional[exp.Into]: 2029 if not self._match(TokenType.INTO): 2030 return None 2031 2032 temp = self._match(TokenType.TEMPORARY) 2033 unlogged = self._match_text_seq("UNLOGGED") 2034 self._match(TokenType.TABLE) 2035 2036 return self.expression( 2037 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2038 ) 2039 2040 def _parse_from( 2041 self, modifiers: bool = False, skip_from_token: bool = False 2042 ) -> t.Optional[exp.From]: 2043 if not skip_from_token and not self._match(TokenType.FROM): 2044 return None 2045 2046 comments = self._prev_comments 2047 this = self._parse_table() 2048 2049 return self.expression( 2050 exp.From, 2051 comments=comments, 2052 this=self._parse_query_modifiers(this) if modifiers else this, 2053 ) 2054 2055 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2056 if not self._match(TokenType.MATCH_RECOGNIZE): 2057 return None 2058 2059 self._match_l_paren() 2060 2061 partition = self._parse_partition_by() 2062 order = self._parse_order() 2063 measures = ( 2064 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2065 ) 2066 2067 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2068 rows = exp.var("ONE ROW PER MATCH") 2069 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2070 text = "ALL ROWS PER MATCH" 2071 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2072 text += f" SHOW EMPTY MATCHES" 2073 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2074 text += f" OMIT EMPTY MATCHES" 2075 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2076 text += f" WITH UNMATCHED ROWS" 2077 rows = exp.var(text) 2078 else: 2079 rows = None 2080 2081 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2082 text = "AFTER MATCH SKIP" 2083 if self._match_text_seq("PAST", "LAST", "ROW"): 2084 text += f" PAST LAST ROW" 2085 elif self._match_text_seq("TO", "NEXT", "ROW"): 2086 text += f" TO NEXT ROW" 2087 elif self._match_text_seq("TO", "FIRST"): 2088 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2089 elif self._match_text_seq("TO", "LAST"): 2090 text += f" TO LAST {self._advance_any().text}" # type: ignore 2091 after = exp.var(text) 2092 else: 2093 after = None 2094 2095 if self._match_text_seq("PATTERN"): 2096 self._match_l_paren() 2097 2098 if not self._curr: 2099 self.raise_error("Expecting )", self._curr) 2100 2101 paren = 1 2102 start = self._curr 2103 2104 while self._curr and paren > 0: 2105 if self._curr.token_type == TokenType.L_PAREN: 2106 paren += 1 2107 if self._curr.token_type == TokenType.R_PAREN: 2108 paren -= 1 2109 2110 end = self._prev 2111 self._advance() 2112 2113 if paren > 0: 2114 self.raise_error("Expecting )", self._curr) 2115 2116 pattern = exp.var(self._find_sql(start, end)) 2117 else: 2118 pattern = None 2119 2120 define = ( 2121 self._parse_csv( 2122 lambda: self.expression( 2123 exp.Alias, 2124 alias=self._parse_id_var(any_token=True), 2125 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2126 ) 2127 ) 2128 if self._match_text_seq("DEFINE") 2129 else None 2130 ) 2131 2132 self._match_r_paren() 2133 2134 return self.expression( 2135 exp.MatchRecognize, 2136 partition_by=partition, 2137 order=order, 2138 measures=measures, 2139 rows=rows, 2140 after=after, 2141 pattern=pattern, 2142 define=define, 2143 alias=self._parse_table_alias(), 2144 ) 2145 2146 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2147 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2148 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2149 2150 if outer_apply or cross_apply: 2151 this = self._parse_select(table=True) 2152 view = None 2153 outer = not cross_apply 2154 elif self._match(TokenType.LATERAL): 2155 this = self._parse_select(table=True) 2156 view = self._match(TokenType.VIEW) 2157 outer = self._match(TokenType.OUTER) 2158 else: 2159 return None 2160 2161 if not this: 2162 this = self._parse_function() or self._parse_id_var(any_token=False) 2163 while self._match(TokenType.DOT): 2164 this = exp.Dot( 2165 this=this, 2166 expression=self._parse_function() or self._parse_id_var(any_token=False), 2167 ) 2168 2169 if view: 2170 table = self._parse_id_var(any_token=False) 2171 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2172 table_alias: t.Optional[exp.TableAlias] = self.expression( 2173 exp.TableAlias, this=table, columns=columns 2174 ) 2175 elif isinstance(this, exp.Subquery) and this.alias: 2176 # Ensures parity between the Subquery's and the Lateral's "alias" args 2177 table_alias = this.args["alias"].copy() 2178 else: 2179 table_alias = self._parse_table_alias() 2180 2181 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2182 2183 def _parse_join_parts( 2184 self, 2185 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2186 return ( 2187 self._match_set(self.JOIN_METHODS) and self._prev, 2188 self._match_set(self.JOIN_SIDES) and self._prev, 2189 self._match_set(self.JOIN_KINDS) and self._prev, 2190 ) 2191 2192 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2193 if self._match(TokenType.COMMA): 2194 return self.expression(exp.Join, this=self._parse_table()) 2195 2196 index = self._index 2197 method, side, kind = self._parse_join_parts() 2198 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2199 join = self._match(TokenType.JOIN) 2200 2201 if not skip_join_token and not join: 2202 self._retreat(index) 2203 kind = None 2204 method = None 2205 side = None 2206 2207 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2208 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2209 2210 if not skip_join_token and not join and not outer_apply and not cross_apply: 2211 return None 2212 2213 if outer_apply: 2214 side = Token(TokenType.LEFT, "LEFT") 2215 2216 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2217 2218 if method: 2219 kwargs["method"] = method.text 2220 if side: 2221 kwargs["side"] = side.text 2222 if kind: 2223 kwargs["kind"] = kind.text 2224 if hint: 2225 kwargs["hint"] = hint 2226 2227 if self._match(TokenType.ON): 2228 kwargs["on"] = self._parse_conjunction() 2229 elif self._match(TokenType.USING): 2230 kwargs["using"] = self._parse_wrapped_id_vars() 2231 2232 return self.expression(exp.Join, **kwargs) 2233 2234 def _parse_index( 2235 self, 2236 index: t.Optional[exp.Expression] = None, 2237 ) -> t.Optional[exp.Index]: 2238 if index: 2239 unique = None 2240 primary = None 2241 amp = None 2242 2243 self._match(TokenType.ON) 2244 self._match(TokenType.TABLE) # hive 2245 table = self._parse_table_parts(schema=True) 2246 else: 2247 unique = self._match(TokenType.UNIQUE) 2248 primary = self._match_text_seq("PRIMARY") 2249 amp = self._match_text_seq("AMP") 2250 2251 if not self._match(TokenType.INDEX): 2252 return None 2253 2254 index = self._parse_id_var() 2255 table = None 2256 2257 using = self._parse_field() if self._match(TokenType.USING) else None 2258 2259 if self._match(TokenType.L_PAREN, advance=False): 2260 columns = self._parse_wrapped_csv(self._parse_ordered) 2261 else: 2262 columns = None 2263 2264 return self.expression( 2265 exp.Index, 2266 this=index, 2267 table=table, 2268 using=using, 2269 columns=columns, 2270 unique=unique, 2271 primary=primary, 2272 amp=amp, 2273 partition_by=self._parse_partition_by(), 2274 ) 2275 2276 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2277 return ( 2278 (not schema and self._parse_function(optional_parens=False)) 2279 or self._parse_id_var(any_token=False) 2280 or self._parse_string_as_identifier() 2281 or self._parse_placeholder() 2282 ) 2283 2284 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2285 catalog = None 2286 db = None 2287 table = self._parse_table_part(schema=schema) 2288 2289 while self._match(TokenType.DOT): 2290 if catalog: 2291 # This allows nesting the table in arbitrarily many dot expressions if needed 2292 table = self.expression( 2293 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2294 ) 2295 else: 2296 catalog = db 2297 db = table 2298 table = self._parse_table_part(schema=schema) 2299 2300 if not table: 2301 self.raise_error(f"Expected table name but got {self._curr}") 2302 2303 return self.expression( 2304 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2305 ) 2306 2307 def _parse_table( 2308 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2309 ) -> t.Optional[exp.Expression]: 2310 lateral = self._parse_lateral() 2311 if lateral: 2312 return lateral 2313 2314 unnest = self._parse_unnest() 2315 if unnest: 2316 return unnest 2317 2318 values = self._parse_derived_table_values() 2319 if values: 2320 return values 2321 2322 subquery = self._parse_select(table=True) 2323 if subquery: 2324 if not subquery.args.get("pivots"): 2325 subquery.set("pivots", self._parse_pivots()) 2326 return subquery 2327 2328 this: exp.Expression = self._parse_table_parts(schema=schema) 2329 2330 if schema: 2331 return self._parse_schema(this=this) 2332 2333 if self.ALIAS_POST_TABLESAMPLE: 2334 table_sample = self._parse_table_sample() 2335 2336 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2337 if alias: 2338 this.set("alias", alias) 2339 2340 if not this.args.get("pivots"): 2341 this.set("pivots", self._parse_pivots()) 2342 2343 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2344 this.set( 2345 "hints", 2346 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2347 ) 2348 self._match_r_paren() 2349 2350 if not self.ALIAS_POST_TABLESAMPLE: 2351 table_sample = self._parse_table_sample() 2352 2353 if table_sample: 2354 table_sample.set("this", this) 2355 this = table_sample 2356 2357 return this 2358 2359 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2360 if not self._match(TokenType.UNNEST): 2361 return None 2362 2363 expressions = self._parse_wrapped_csv(self._parse_type) 2364 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2365 2366 alias = self._parse_table_alias() if with_alias else None 2367 2368 if alias and self.UNNEST_COLUMN_ONLY: 2369 if alias.args.get("columns"): 2370 self.raise_error("Unexpected extra column alias in unnest.") 2371 2372 alias.set("columns", [alias.this]) 2373 alias.set("this", None) 2374 2375 offset = None 2376 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2377 self._match(TokenType.ALIAS) 2378 offset = self._parse_id_var() or exp.to_identifier("offset") 2379 2380 return self.expression( 2381 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2382 ) 2383 2384 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2385 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2386 if not is_derived and not self._match(TokenType.VALUES): 2387 return None 2388 2389 expressions = self._parse_csv(self._parse_value) 2390 alias = self._parse_table_alias() 2391 2392 if is_derived: 2393 self._match_r_paren() 2394 2395 return self.expression( 2396 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2397 ) 2398 2399 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2400 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2401 as_modifier and self._match_text_seq("USING", "SAMPLE") 2402 ): 2403 return None 2404 2405 bucket_numerator = None 2406 bucket_denominator = None 2407 bucket_field = None 2408 percent = None 2409 rows = None 2410 size = None 2411 seed = None 2412 2413 kind = ( 2414 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2415 ) 2416 method = self._parse_var(tokens=(TokenType.ROW,)) 2417 2418 self._match(TokenType.L_PAREN) 2419 2420 num = self._parse_number() 2421 2422 if self._match_text_seq("BUCKET"): 2423 bucket_numerator = self._parse_number() 2424 self._match_text_seq("OUT", "OF") 2425 bucket_denominator = bucket_denominator = self._parse_number() 2426 self._match(TokenType.ON) 2427 bucket_field = self._parse_field() 2428 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2429 percent = num 2430 elif self._match(TokenType.ROWS): 2431 rows = num 2432 else: 2433 size = num 2434 2435 self._match(TokenType.R_PAREN) 2436 2437 if self._match(TokenType.L_PAREN): 2438 method = self._parse_var() 2439 seed = self._match(TokenType.COMMA) and self._parse_number() 2440 self._match_r_paren() 2441 elif self._match_texts(("SEED", "REPEATABLE")): 2442 seed = self._parse_wrapped(self._parse_number) 2443 2444 return self.expression( 2445 exp.TableSample, 2446 method=method, 2447 bucket_numerator=bucket_numerator, 2448 bucket_denominator=bucket_denominator, 2449 bucket_field=bucket_field, 2450 percent=percent, 2451 rows=rows, 2452 size=size, 2453 seed=seed, 2454 kind=kind, 2455 ) 2456 2457 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2458 return list(iter(self._parse_pivot, None)) 2459 2460 # https://duckdb.org/docs/sql/statements/pivot 2461 def _parse_simplified_pivot(self) -> exp.Pivot: 2462 def _parse_on() -> t.Optional[exp.Expression]: 2463 this = self._parse_bitwise() 2464 return self._parse_in(this) if self._match(TokenType.IN) else this 2465 2466 this = self._parse_table() 2467 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2468 using = self._match(TokenType.USING) and self._parse_csv( 2469 lambda: self._parse_alias(self._parse_function()) 2470 ) 2471 group = self._parse_group() 2472 return self.expression( 2473 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2474 ) 2475 2476 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2477 index = self._index 2478 2479 if self._match(TokenType.PIVOT): 2480 unpivot = False 2481 elif self._match(TokenType.UNPIVOT): 2482 unpivot = True 2483 else: 2484 return None 2485 2486 expressions = [] 2487 field = None 2488 2489 if not self._match(TokenType.L_PAREN): 2490 self._retreat(index) 2491 return None 2492 2493 if unpivot: 2494 expressions = self._parse_csv(self._parse_column) 2495 else: 2496 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2497 2498 if not expressions: 2499 self.raise_error("Failed to parse PIVOT's aggregation list") 2500 2501 if not self._match(TokenType.FOR): 2502 self.raise_error("Expecting FOR") 2503 2504 value = self._parse_column() 2505 2506 if not self._match(TokenType.IN): 2507 self.raise_error("Expecting IN") 2508 2509 field = self._parse_in(value, alias=True) 2510 2511 self._match_r_paren() 2512 2513 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2514 2515 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2516 pivot.set("alias", self._parse_table_alias()) 2517 2518 if not unpivot: 2519 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2520 2521 columns: t.List[exp.Expression] = [] 2522 for fld in pivot.args["field"].expressions: 2523 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2524 for name in names: 2525 if self.PREFIXED_PIVOT_COLUMNS: 2526 name = f"{name}_{field_name}" if name else field_name 2527 else: 2528 name = f"{field_name}_{name}" if name else field_name 2529 2530 columns.append(exp.to_identifier(name)) 2531 2532 pivot.set("columns", columns) 2533 2534 return pivot 2535 2536 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2537 return [agg.alias for agg in aggregations] 2538 2539 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2540 if not skip_where_token and not self._match(TokenType.WHERE): 2541 return None 2542 2543 return self.expression( 2544 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2545 ) 2546 2547 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2548 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2549 return None 2550 2551 elements = defaultdict(list) 2552 2553 while True: 2554 expressions = self._parse_csv(self._parse_conjunction) 2555 if expressions: 2556 elements["expressions"].extend(expressions) 2557 2558 grouping_sets = self._parse_grouping_sets() 2559 if grouping_sets: 2560 elements["grouping_sets"].extend(grouping_sets) 2561 2562 rollup = None 2563 cube = None 2564 totals = None 2565 2566 with_ = self._match(TokenType.WITH) 2567 if self._match(TokenType.ROLLUP): 2568 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2569 elements["rollup"].extend(ensure_list(rollup)) 2570 2571 if self._match(TokenType.CUBE): 2572 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2573 elements["cube"].extend(ensure_list(cube)) 2574 2575 if self._match_text_seq("TOTALS"): 2576 totals = True 2577 elements["totals"] = True # type: ignore 2578 2579 if not (grouping_sets or rollup or cube or totals): 2580 break 2581 2582 return self.expression(exp.Group, **elements) # type: ignore 2583 2584 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2585 if not self._match(TokenType.GROUPING_SETS): 2586 return None 2587 2588 return self._parse_wrapped_csv(self._parse_grouping_set) 2589 2590 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2591 if self._match(TokenType.L_PAREN): 2592 grouping_set = self._parse_csv(self._parse_column) 2593 self._match_r_paren() 2594 return self.expression(exp.Tuple, expressions=grouping_set) 2595 2596 return self._parse_column() 2597 2598 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2599 if not skip_having_token and not self._match(TokenType.HAVING): 2600 return None 2601 return self.expression(exp.Having, this=self._parse_conjunction()) 2602 2603 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2604 if not self._match(TokenType.QUALIFY): 2605 return None 2606 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2607 2608 def _parse_order( 2609 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2610 ) -> t.Optional[exp.Expression]: 2611 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2612 return this 2613 2614 return self.expression( 2615 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2616 ) 2617 2618 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2619 if not self._match(token): 2620 return None 2621 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2622 2623 def _parse_ordered(self) -> exp.Ordered: 2624 this = self._parse_conjunction() 2625 self._match(TokenType.ASC) 2626 2627 is_desc = self._match(TokenType.DESC) 2628 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2629 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2630 desc = is_desc or False 2631 asc = not desc 2632 nulls_first = is_nulls_first or False 2633 explicitly_null_ordered = is_nulls_first or is_nulls_last 2634 2635 if ( 2636 not explicitly_null_ordered 2637 and ( 2638 (asc and self.NULL_ORDERING == "nulls_are_small") 2639 or (desc and self.NULL_ORDERING != "nulls_are_small") 2640 ) 2641 and self.NULL_ORDERING != "nulls_are_last" 2642 ): 2643 nulls_first = True 2644 2645 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2646 2647 def _parse_limit( 2648 self, this: t.Optional[exp.Expression] = None, top: bool = False 2649 ) -> t.Optional[exp.Expression]: 2650 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2651 limit_paren = self._match(TokenType.L_PAREN) 2652 expression = self._parse_number() if top else self._parse_term() 2653 2654 if self._match(TokenType.COMMA): 2655 offset = expression 2656 expression = self._parse_term() 2657 else: 2658 offset = None 2659 2660 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2661 2662 if limit_paren: 2663 self._match_r_paren() 2664 2665 return limit_exp 2666 2667 if self._match(TokenType.FETCH): 2668 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2669 direction = self._prev.text if direction else "FIRST" 2670 2671 count = self._parse_number() 2672 percent = self._match(TokenType.PERCENT) 2673 2674 self._match_set((TokenType.ROW, TokenType.ROWS)) 2675 2676 only = self._match_text_seq("ONLY") 2677 with_ties = self._match_text_seq("WITH", "TIES") 2678 2679 if only and with_ties: 2680 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2681 2682 return self.expression( 2683 exp.Fetch, 2684 direction=direction, 2685 count=count, 2686 percent=percent, 2687 with_ties=with_ties, 2688 ) 2689 2690 return this 2691 2692 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2693 if not self._match(TokenType.OFFSET): 2694 return this 2695 2696 count = self._parse_number() 2697 self._match_set((TokenType.ROW, TokenType.ROWS)) 2698 return self.expression(exp.Offset, this=this, expression=count) 2699 2700 def _parse_locks(self) -> t.List[exp.Lock]: 2701 locks = [] 2702 while True: 2703 if self._match_text_seq("FOR", "UPDATE"): 2704 update = True 2705 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2706 "LOCK", "IN", "SHARE", "MODE" 2707 ): 2708 update = False 2709 else: 2710 break 2711 2712 expressions = None 2713 if self._match_text_seq("OF"): 2714 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2715 2716 wait: t.Optional[bool | exp.Expression] = None 2717 if self._match_text_seq("NOWAIT"): 2718 wait = True 2719 elif self._match_text_seq("WAIT"): 2720 wait = self._parse_primary() 2721 elif self._match_text_seq("SKIP", "LOCKED"): 2722 wait = False 2723 2724 locks.append( 2725 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2726 ) 2727 2728 return locks 2729 2730 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2731 if not self._match_set(self.SET_OPERATIONS): 2732 return this 2733 2734 token_type = self._prev.token_type 2735 2736 if token_type == TokenType.UNION: 2737 expression = exp.Union 2738 elif token_type == TokenType.EXCEPT: 2739 expression = exp.Except 2740 else: 2741 expression = exp.Intersect 2742 2743 return self.expression( 2744 expression, 2745 this=this, 2746 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2747 expression=self._parse_set_operations(self._parse_select(nested=True)), 2748 ) 2749 2750 def _parse_expression(self) -> t.Optional[exp.Expression]: 2751 return self._parse_alias(self._parse_conjunction()) 2752 2753 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2754 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2755 2756 def _parse_equality(self) -> t.Optional[exp.Expression]: 2757 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2758 2759 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2760 return self._parse_tokens(self._parse_range, self.COMPARISON) 2761 2762 def _parse_range(self) -> t.Optional[exp.Expression]: 2763 this = self._parse_bitwise() 2764 negate = self._match(TokenType.NOT) 2765 2766 if self._match_set(self.RANGE_PARSERS): 2767 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2768 if not expression: 2769 return this 2770 2771 this = expression 2772 elif self._match(TokenType.ISNULL): 2773 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2774 2775 # Postgres supports ISNULL and NOTNULL for conditions. 2776 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2777 if self._match(TokenType.NOTNULL): 2778 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2779 this = self.expression(exp.Not, this=this) 2780 2781 if negate: 2782 this = self.expression(exp.Not, this=this) 2783 2784 if self._match(TokenType.IS): 2785 this = self._parse_is(this) 2786 2787 return this 2788 2789 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2790 index = self._index - 1 2791 negate = self._match(TokenType.NOT) 2792 2793 if self._match_text_seq("DISTINCT", "FROM"): 2794 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2795 return self.expression(klass, this=this, expression=self._parse_expression()) 2796 2797 expression = self._parse_null() or self._parse_boolean() 2798 if not expression: 2799 self._retreat(index) 2800 return None 2801 2802 this = self.expression(exp.Is, this=this, expression=expression) 2803 return self.expression(exp.Not, this=this) if negate else this 2804 2805 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2806 unnest = self._parse_unnest(with_alias=False) 2807 if unnest: 2808 this = self.expression(exp.In, this=this, unnest=unnest) 2809 elif self._match(TokenType.L_PAREN): 2810 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2811 2812 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2813 this = self.expression(exp.In, this=this, query=expressions[0]) 2814 else: 2815 this = self.expression(exp.In, this=this, expressions=expressions) 2816 2817 self._match_r_paren(this) 2818 else: 2819 this = self.expression(exp.In, this=this, field=self._parse_field()) 2820 2821 return this 2822 2823 def _parse_between(self, this: exp.Expression) -> exp.Between: 2824 low = self._parse_bitwise() 2825 self._match(TokenType.AND) 2826 high = self._parse_bitwise() 2827 return self.expression(exp.Between, this=this, low=low, high=high) 2828 2829 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2830 if not self._match(TokenType.ESCAPE): 2831 return this 2832 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2833 2834 def _parse_interval(self) -> t.Optional[exp.Interval]: 2835 if not self._match(TokenType.INTERVAL): 2836 return None 2837 2838 this = self._parse_primary() or self._parse_term() 2839 unit = self._parse_function() or self._parse_var() 2840 2841 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2842 # each INTERVAL expression into this canonical form so it's easy to transpile 2843 if this and this.is_number: 2844 this = exp.Literal.string(this.name) 2845 elif this and this.is_string: 2846 parts = this.name.split() 2847 2848 if len(parts) == 2: 2849 if unit: 2850 # this is not actually a unit, it's something else 2851 unit = None 2852 self._retreat(self._index - 1) 2853 else: 2854 this = exp.Literal.string(parts[0]) 2855 unit = self.expression(exp.Var, this=parts[1]) 2856 2857 return self.expression(exp.Interval, this=this, unit=unit) 2858 2859 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2860 this = self._parse_term() 2861 2862 while True: 2863 if self._match_set(self.BITWISE): 2864 this = self.expression( 2865 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2866 ) 2867 elif self._match_pair(TokenType.LT, TokenType.LT): 2868 this = self.expression( 2869 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2870 ) 2871 elif self._match_pair(TokenType.GT, TokenType.GT): 2872 this = self.expression( 2873 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2874 ) 2875 else: 2876 break 2877 2878 return this 2879 2880 def _parse_term(self) -> t.Optional[exp.Expression]: 2881 return self._parse_tokens(self._parse_factor, self.TERM) 2882 2883 def _parse_factor(self) -> t.Optional[exp.Expression]: 2884 return self._parse_tokens(self._parse_unary, self.FACTOR) 2885 2886 def _parse_unary(self) -> t.Optional[exp.Expression]: 2887 if self._match_set(self.UNARY_PARSERS): 2888 return self.UNARY_PARSERS[self._prev.token_type](self) 2889 return self._parse_at_time_zone(self._parse_type()) 2890 2891 def _parse_type(self) -> t.Optional[exp.Expression]: 2892 interval = self._parse_interval() 2893 if interval: 2894 return interval 2895 2896 index = self._index 2897 data_type = self._parse_types(check_func=True) 2898 this = self._parse_column() 2899 2900 if data_type: 2901 if isinstance(this, exp.Literal): 2902 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2903 if parser: 2904 return parser(self, this, data_type) 2905 return self.expression(exp.Cast, this=this, to=data_type) 2906 if not data_type.expressions: 2907 self._retreat(index) 2908 return self._parse_column() 2909 return self._parse_column_ops(data_type) 2910 2911 return this 2912 2913 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2914 this = self._parse_type() 2915 if not this: 2916 return None 2917 2918 return self.expression( 2919 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2920 ) 2921 2922 def _parse_types( 2923 self, check_func: bool = False, schema: bool = False 2924 ) -> t.Optional[exp.Expression]: 2925 index = self._index 2926 2927 prefix = self._match_text_seq("SYSUDTLIB", ".") 2928 2929 if not self._match_set(self.TYPE_TOKENS): 2930 return None 2931 2932 type_token = self._prev.token_type 2933 2934 if type_token == TokenType.PSEUDO_TYPE: 2935 return self.expression(exp.PseudoType, this=self._prev.text) 2936 2937 nested = type_token in self.NESTED_TYPE_TOKENS 2938 is_struct = type_token == TokenType.STRUCT 2939 expressions = None 2940 maybe_func = False 2941 2942 if self._match(TokenType.L_PAREN): 2943 if is_struct: 2944 expressions = self._parse_csv(self._parse_struct_types) 2945 elif nested: 2946 expressions = self._parse_csv( 2947 lambda: self._parse_types(check_func=check_func, schema=schema) 2948 ) 2949 elif type_token in self.ENUM_TYPE_TOKENS: 2950 expressions = self._parse_csv(self._parse_primary) 2951 else: 2952 expressions = self._parse_csv(self._parse_type_size) 2953 2954 if not expressions or not self._match(TokenType.R_PAREN): 2955 self._retreat(index) 2956 return None 2957 2958 maybe_func = True 2959 2960 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2961 this = exp.DataType( 2962 this=exp.DataType.Type.ARRAY, 2963 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2964 nested=True, 2965 ) 2966 2967 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2968 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2969 2970 return this 2971 2972 if self._match(TokenType.L_BRACKET): 2973 self._retreat(index) 2974 return None 2975 2976 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2977 if nested and self._match(TokenType.LT): 2978 if is_struct: 2979 expressions = self._parse_csv(self._parse_struct_types) 2980 else: 2981 expressions = self._parse_csv( 2982 lambda: self._parse_types(check_func=check_func, schema=schema) 2983 ) 2984 2985 if not self._match(TokenType.GT): 2986 self.raise_error("Expecting >") 2987 2988 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2989 values = self._parse_csv(self._parse_conjunction) 2990 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2991 2992 value: t.Optional[exp.Expression] = None 2993 if type_token in self.TIMESTAMPS: 2994 if self._match_text_seq("WITH", "TIME", "ZONE"): 2995 maybe_func = False 2996 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2997 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 2998 maybe_func = False 2999 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3000 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3001 maybe_func = False 3002 elif type_token == TokenType.INTERVAL: 3003 unit = self._parse_var() 3004 3005 if not unit: 3006 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3007 else: 3008 value = self.expression(exp.Interval, unit=unit) 3009 3010 if maybe_func and check_func: 3011 index2 = self._index 3012 peek = self._parse_string() 3013 3014 if not peek: 3015 self._retreat(index) 3016 return None 3017 3018 self._retreat(index2) 3019 3020 if value: 3021 return value 3022 3023 return exp.DataType( 3024 this=exp.DataType.Type[type_token.value.upper()], 3025 expressions=expressions, 3026 nested=nested, 3027 values=values, 3028 prefix=prefix, 3029 ) 3030 3031 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3032 this = self._parse_type() or self._parse_id_var() 3033 self._match(TokenType.COLON) 3034 return self._parse_column_def(this) 3035 3036 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3037 if not self._match_text_seq("AT", "TIME", "ZONE"): 3038 return this 3039 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3040 3041 def _parse_column(self) -> t.Optional[exp.Expression]: 3042 this = self._parse_field() 3043 if isinstance(this, exp.Identifier): 3044 this = self.expression(exp.Column, this=this) 3045 elif not this: 3046 return self._parse_bracket(this) 3047 return self._parse_column_ops(this) 3048 3049 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3050 this = self._parse_bracket(this) 3051 3052 while self._match_set(self.COLUMN_OPERATORS): 3053 op_token = self._prev.token_type 3054 op = self.COLUMN_OPERATORS.get(op_token) 3055 3056 if op_token == TokenType.DCOLON: 3057 field = self._parse_types() 3058 if not field: 3059 self.raise_error("Expected type") 3060 elif op and self._curr: 3061 self._advance() 3062 value = self._prev.text 3063 field = ( 3064 exp.Literal.number(value) 3065 if self._prev.token_type == TokenType.NUMBER 3066 else exp.Literal.string(value) 3067 ) 3068 else: 3069 field = self._parse_field(anonymous_func=True, any_token=True) 3070 3071 if isinstance(field, exp.Func): 3072 # bigquery allows function calls like x.y.count(...) 3073 # SAFE.SUBSTR(...) 3074 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3075 this = self._replace_columns_with_dots(this) 3076 3077 if op: 3078 this = op(self, this, field) 3079 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3080 this = self.expression( 3081 exp.Column, 3082 this=field, 3083 table=this.this, 3084 db=this.args.get("table"), 3085 catalog=this.args.get("db"), 3086 ) 3087 else: 3088 this = self.expression(exp.Dot, this=this, expression=field) 3089 this = self._parse_bracket(this) 3090 return this 3091 3092 def _parse_primary(self) -> t.Optional[exp.Expression]: 3093 if self._match_set(self.PRIMARY_PARSERS): 3094 token_type = self._prev.token_type 3095 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3096 3097 if token_type == TokenType.STRING: 3098 expressions = [primary] 3099 while self._match(TokenType.STRING): 3100 expressions.append(exp.Literal.string(self._prev.text)) 3101 3102 if len(expressions) > 1: 3103 return self.expression(exp.Concat, expressions=expressions) 3104 3105 return primary 3106 3107 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3108 return exp.Literal.number(f"0.{self._prev.text}") 3109 3110 if self._match(TokenType.L_PAREN): 3111 comments = self._prev_comments 3112 query = self._parse_select() 3113 3114 if query: 3115 expressions = [query] 3116 else: 3117 expressions = self._parse_csv(self._parse_expression) 3118 3119 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3120 3121 if isinstance(this, exp.Subqueryable): 3122 this = self._parse_set_operations( 3123 self._parse_subquery(this=this, parse_alias=False) 3124 ) 3125 elif len(expressions) > 1: 3126 this = self.expression(exp.Tuple, expressions=expressions) 3127 else: 3128 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3129 3130 if this: 3131 this.add_comments(comments) 3132 3133 self._match_r_paren(expression=this) 3134 return this 3135 3136 return None 3137 3138 def _parse_field( 3139 self, 3140 any_token: bool = False, 3141 tokens: t.Optional[t.Collection[TokenType]] = None, 3142 anonymous_func: bool = False, 3143 ) -> t.Optional[exp.Expression]: 3144 return ( 3145 self._parse_primary() 3146 or self._parse_function(anonymous=anonymous_func) 3147 or self._parse_id_var(any_token=any_token, tokens=tokens) 3148 ) 3149 3150 def _parse_function( 3151 self, 3152 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3153 anonymous: bool = False, 3154 optional_parens: bool = True, 3155 ) -> t.Optional[exp.Expression]: 3156 if not self._curr: 3157 return None 3158 3159 token_type = self._curr.token_type 3160 3161 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3162 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3163 3164 if not self._next or self._next.token_type != TokenType.L_PAREN: 3165 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3166 self._advance() 3167 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3168 3169 return None 3170 3171 if token_type not in self.FUNC_TOKENS: 3172 return None 3173 3174 this = self._curr.text 3175 upper = this.upper() 3176 self._advance(2) 3177 3178 parser = self.FUNCTION_PARSERS.get(upper) 3179 3180 if parser and not anonymous: 3181 this = parser(self) 3182 else: 3183 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3184 3185 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3186 this = self.expression(subquery_predicate, this=self._parse_select()) 3187 self._match_r_paren() 3188 return this 3189 3190 if functions is None: 3191 functions = self.FUNCTIONS 3192 3193 function = functions.get(upper) 3194 3195 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3196 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3197 3198 if function and not anonymous: 3199 this = self.validate_expression(function(args), args) 3200 else: 3201 this = self.expression(exp.Anonymous, this=this, expressions=args) 3202 3203 self._match_r_paren(this) 3204 return self._parse_window(this) 3205 3206 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3207 return self._parse_column_def(self._parse_id_var()) 3208 3209 def _parse_user_defined_function( 3210 self, kind: t.Optional[TokenType] = None 3211 ) -> t.Optional[exp.Expression]: 3212 this = self._parse_id_var() 3213 3214 while self._match(TokenType.DOT): 3215 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3216 3217 if not self._match(TokenType.L_PAREN): 3218 return this 3219 3220 expressions = self._parse_csv(self._parse_function_parameter) 3221 self._match_r_paren() 3222 return self.expression( 3223 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3224 ) 3225 3226 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3227 literal = self._parse_primary() 3228 if literal: 3229 return self.expression(exp.Introducer, this=token.text, expression=literal) 3230 3231 return self.expression(exp.Identifier, this=token.text) 3232 3233 def _parse_session_parameter(self) -> exp.SessionParameter: 3234 kind = None 3235 this = self._parse_id_var() or self._parse_primary() 3236 3237 if this and self._match(TokenType.DOT): 3238 kind = this.name 3239 this = self._parse_var() or self._parse_primary() 3240 3241 return self.expression(exp.SessionParameter, this=this, kind=kind) 3242 3243 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3244 index = self._index 3245 3246 if self._match(TokenType.L_PAREN): 3247 expressions = self._parse_csv(self._parse_id_var) 3248 3249 if not self._match(TokenType.R_PAREN): 3250 self._retreat(index) 3251 else: 3252 expressions = [self._parse_id_var()] 3253 3254 if self._match_set(self.LAMBDAS): 3255 return self.LAMBDAS[self._prev.token_type](self, expressions) 3256 3257 self._retreat(index) 3258 3259 this: t.Optional[exp.Expression] 3260 3261 if self._match(TokenType.DISTINCT): 3262 this = self.expression( 3263 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3264 ) 3265 else: 3266 this = self._parse_select_or_expression(alias=alias) 3267 3268 if isinstance(this, exp.EQ): 3269 left = this.this 3270 if isinstance(left, exp.Column): 3271 left.replace(exp.var(left.text("this"))) 3272 3273 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3274 3275 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3276 index = self._index 3277 3278 if not self.errors: 3279 try: 3280 if self._parse_select(nested=True): 3281 return this 3282 except ParseError: 3283 pass 3284 finally: 3285 self.errors.clear() 3286 self._retreat(index) 3287 3288 if not self._match(TokenType.L_PAREN): 3289 return this 3290 3291 args = self._parse_csv( 3292 lambda: self._parse_constraint() 3293 or self._parse_column_def(self._parse_field(any_token=True)) 3294 ) 3295 3296 self._match_r_paren() 3297 return self.expression(exp.Schema, this=this, expressions=args) 3298 3299 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3300 # column defs are not really columns, they're identifiers 3301 if isinstance(this, exp.Column): 3302 this = this.this 3303 3304 kind = self._parse_types(schema=True) 3305 3306 if self._match_text_seq("FOR", "ORDINALITY"): 3307 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3308 3309 constraints = [] 3310 while True: 3311 constraint = self._parse_column_constraint() 3312 if not constraint: 3313 break 3314 constraints.append(constraint) 3315 3316 if not kind and not constraints: 3317 return this 3318 3319 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3320 3321 def _parse_auto_increment( 3322 self, 3323 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3324 start = None 3325 increment = None 3326 3327 if self._match(TokenType.L_PAREN, advance=False): 3328 args = self._parse_wrapped_csv(self._parse_bitwise) 3329 start = seq_get(args, 0) 3330 increment = seq_get(args, 1) 3331 elif self._match_text_seq("START"): 3332 start = self._parse_bitwise() 3333 self._match_text_seq("INCREMENT") 3334 increment = self._parse_bitwise() 3335 3336 if start and increment: 3337 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3338 3339 return exp.AutoIncrementColumnConstraint() 3340 3341 def _parse_compress(self) -> exp.CompressColumnConstraint: 3342 if self._match(TokenType.L_PAREN, advance=False): 3343 return self.expression( 3344 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3345 ) 3346 3347 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3348 3349 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3350 if self._match_text_seq("BY", "DEFAULT"): 3351 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3352 this = self.expression( 3353 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3354 ) 3355 else: 3356 self._match_text_seq("ALWAYS") 3357 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3358 3359 self._match(TokenType.ALIAS) 3360 identity = self._match_text_seq("IDENTITY") 3361 3362 if self._match(TokenType.L_PAREN): 3363 if self._match_text_seq("START", "WITH"): 3364 this.set("start", self._parse_bitwise()) 3365 if self._match_text_seq("INCREMENT", "BY"): 3366 this.set("increment", self._parse_bitwise()) 3367 if self._match_text_seq("MINVALUE"): 3368 this.set("minvalue", self._parse_bitwise()) 3369 if self._match_text_seq("MAXVALUE"): 3370 this.set("maxvalue", self._parse_bitwise()) 3371 3372 if self._match_text_seq("CYCLE"): 3373 this.set("cycle", True) 3374 elif self._match_text_seq("NO", "CYCLE"): 3375 this.set("cycle", False) 3376 3377 if not identity: 3378 this.set("expression", self._parse_bitwise()) 3379 3380 self._match_r_paren() 3381 3382 return this 3383 3384 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3385 self._match_text_seq("LENGTH") 3386 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3387 3388 def _parse_not_constraint( 3389 self, 3390 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3391 if self._match_text_seq("NULL"): 3392 return self.expression(exp.NotNullColumnConstraint) 3393 if self._match_text_seq("CASESPECIFIC"): 3394 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3395 return None 3396 3397 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3398 if self._match(TokenType.CONSTRAINT): 3399 this = self._parse_id_var() 3400 else: 3401 this = None 3402 3403 if self._match_texts(self.CONSTRAINT_PARSERS): 3404 return self.expression( 3405 exp.ColumnConstraint, 3406 this=this, 3407 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3408 ) 3409 3410 return this 3411 3412 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3413 if not self._match(TokenType.CONSTRAINT): 3414 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3415 3416 this = self._parse_id_var() 3417 expressions = [] 3418 3419 while True: 3420 constraint = self._parse_unnamed_constraint() or self._parse_function() 3421 if not constraint: 3422 break 3423 expressions.append(constraint) 3424 3425 return self.expression(exp.Constraint, this=this, expressions=expressions) 3426 3427 def _parse_unnamed_constraint( 3428 self, constraints: t.Optional[t.Collection[str]] = None 3429 ) -> t.Optional[exp.Expression]: 3430 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3431 return None 3432 3433 constraint = self._prev.text.upper() 3434 if constraint not in self.CONSTRAINT_PARSERS: 3435 self.raise_error(f"No parser found for schema constraint {constraint}.") 3436 3437 return self.CONSTRAINT_PARSERS[constraint](self) 3438 3439 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3440 self._match_text_seq("KEY") 3441 return self.expression( 3442 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3443 ) 3444 3445 def _parse_key_constraint_options(self) -> t.List[str]: 3446 options = [] 3447 while True: 3448 if not self._curr: 3449 break 3450 3451 if self._match(TokenType.ON): 3452 action = None 3453 on = self._advance_any() and self._prev.text 3454 3455 if self._match_text_seq("NO", "ACTION"): 3456 action = "NO ACTION" 3457 elif self._match_text_seq("CASCADE"): 3458 action = "CASCADE" 3459 elif self._match_pair(TokenType.SET, TokenType.NULL): 3460 action = "SET NULL" 3461 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3462 action = "SET DEFAULT" 3463 else: 3464 self.raise_error("Invalid key constraint") 3465 3466 options.append(f"ON {on} {action}") 3467 elif self._match_text_seq("NOT", "ENFORCED"): 3468 options.append("NOT ENFORCED") 3469 elif self._match_text_seq("DEFERRABLE"): 3470 options.append("DEFERRABLE") 3471 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3472 options.append("INITIALLY DEFERRED") 3473 elif self._match_text_seq("NORELY"): 3474 options.append("NORELY") 3475 elif self._match_text_seq("MATCH", "FULL"): 3476 options.append("MATCH FULL") 3477 else: 3478 break 3479 3480 return options 3481 3482 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3483 if match and not self._match(TokenType.REFERENCES): 3484 return None 3485 3486 expressions = None 3487 this = self._parse_id_var() 3488 3489 if self._match(TokenType.L_PAREN, advance=False): 3490 expressions = self._parse_wrapped_id_vars() 3491 3492 options = self._parse_key_constraint_options() 3493 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3494 3495 def _parse_foreign_key(self) -> exp.ForeignKey: 3496 expressions = self._parse_wrapped_id_vars() 3497 reference = self._parse_references() 3498 options = {} 3499 3500 while self._match(TokenType.ON): 3501 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3502 self.raise_error("Expected DELETE or UPDATE") 3503 3504 kind = self._prev.text.lower() 3505 3506 if self._match_text_seq("NO", "ACTION"): 3507 action = "NO ACTION" 3508 elif self._match(TokenType.SET): 3509 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3510 action = "SET " + self._prev.text.upper() 3511 else: 3512 self._advance() 3513 action = self._prev.text.upper() 3514 3515 options[kind] = action 3516 3517 return self.expression( 3518 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3519 ) 3520 3521 def _parse_primary_key( 3522 self, wrapped_optional: bool = False, in_props: bool = False 3523 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3524 desc = ( 3525 self._match_set((TokenType.ASC, TokenType.DESC)) 3526 and self._prev.token_type == TokenType.DESC 3527 ) 3528 3529 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3530 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3531 3532 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3533 options = self._parse_key_constraint_options() 3534 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3535 3536 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3537 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3538 return this 3539 3540 bracket_kind = self._prev.token_type 3541 3542 if self._match(TokenType.COLON): 3543 expressions: t.List[t.Optional[exp.Expression]] = [ 3544 self.expression(exp.Slice, expression=self._parse_conjunction()) 3545 ] 3546 else: 3547 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3548 3549 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3550 if bracket_kind == TokenType.L_BRACE: 3551 this = self.expression(exp.Struct, expressions=expressions) 3552 elif not this or this.name.upper() == "ARRAY": 3553 this = self.expression(exp.Array, expressions=expressions) 3554 else: 3555 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3556 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3557 3558 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3559 self.raise_error("Expected ]") 3560 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3561 self.raise_error("Expected }") 3562 3563 self._add_comments(this) 3564 return self._parse_bracket(this) 3565 3566 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3567 if self._match(TokenType.COLON): 3568 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3569 return this 3570 3571 def _parse_case(self) -> t.Optional[exp.Expression]: 3572 ifs = [] 3573 default = None 3574 3575 expression = self._parse_conjunction() 3576 3577 while self._match(TokenType.WHEN): 3578 this = self._parse_conjunction() 3579 self._match(TokenType.THEN) 3580 then = self._parse_conjunction() 3581 ifs.append(self.expression(exp.If, this=this, true=then)) 3582 3583 if self._match(TokenType.ELSE): 3584 default = self._parse_conjunction() 3585 3586 if not self._match(TokenType.END): 3587 self.raise_error("Expected END after CASE", self._prev) 3588 3589 return self._parse_window( 3590 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3591 ) 3592 3593 def _parse_if(self) -> t.Optional[exp.Expression]: 3594 if self._match(TokenType.L_PAREN): 3595 args = self._parse_csv(self._parse_conjunction) 3596 this = self.validate_expression(exp.If.from_arg_list(args), args) 3597 self._match_r_paren() 3598 else: 3599 index = self._index - 1 3600 condition = self._parse_conjunction() 3601 3602 if not condition: 3603 self._retreat(index) 3604 return None 3605 3606 self._match(TokenType.THEN) 3607 true = self._parse_conjunction() 3608 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3609 self._match(TokenType.END) 3610 this = self.expression(exp.If, this=condition, true=true, false=false) 3611 3612 return self._parse_window(this) 3613 3614 def _parse_extract(self) -> exp.Extract: 3615 this = self._parse_function() or self._parse_var() or self._parse_type() 3616 3617 if self._match(TokenType.FROM): 3618 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3619 3620 if not self._match(TokenType.COMMA): 3621 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3622 3623 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3624 3625 def _parse_cast(self, strict: bool) -> exp.Expression: 3626 this = self._parse_conjunction() 3627 3628 if not self._match(TokenType.ALIAS): 3629 if self._match(TokenType.COMMA): 3630 return self.expression( 3631 exp.CastToStrType, this=this, expression=self._parse_string() 3632 ) 3633 else: 3634 self.raise_error("Expected AS after CAST") 3635 3636 to = self._parse_types() 3637 3638 if not to: 3639 self.raise_error("Expected TYPE after CAST") 3640 elif to.this == exp.DataType.Type.CHAR: 3641 if self._match(TokenType.CHARACTER_SET): 3642 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3643 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3644 fmt = self._parse_string() 3645 3646 return self.expression( 3647 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3648 this=this, 3649 format=exp.Literal.string( 3650 format_time( 3651 fmt.this if fmt else "", 3652 self.FORMAT_MAPPING or self.TIME_MAPPING, 3653 self.FORMAT_TRIE or self.TIME_TRIE, 3654 ) 3655 ), 3656 ) 3657 3658 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3659 3660 def _parse_concat(self) -> t.Optional[exp.Expression]: 3661 args = self._parse_csv(self._parse_conjunction) 3662 if self.CONCAT_NULL_OUTPUTS_STRING: 3663 args = [ 3664 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3665 for arg in args 3666 if arg 3667 ] 3668 3669 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3670 # we find such a call we replace it with its argument. 3671 if len(args) == 1: 3672 return args[0] 3673 3674 return self.expression( 3675 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3676 ) 3677 3678 def _parse_string_agg(self) -> exp.Expression: 3679 expression: t.Optional[exp.Expression] 3680 3681 if self._match(TokenType.DISTINCT): 3682 args = self._parse_csv(self._parse_conjunction) 3683 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3684 else: 3685 args = self._parse_csv(self._parse_conjunction) 3686 expression = seq_get(args, 0) 3687 3688 index = self._index 3689 if not self._match(TokenType.R_PAREN): 3690 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3691 order = self._parse_order(this=expression) 3692 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3693 3694 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3695 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3696 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3697 if not self._match_text_seq("WITHIN", "GROUP"): 3698 self._retreat(index) 3699 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3700 3701 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3702 order = self._parse_order(this=expression) 3703 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3704 3705 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3706 to: t.Optional[exp.Expression] 3707 this = self._parse_bitwise() 3708 3709 if self._match(TokenType.USING): 3710 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3711 elif self._match(TokenType.COMMA): 3712 to = self._parse_bitwise() 3713 else: 3714 to = None 3715 3716 # Swap the argument order if needed to produce the correct AST 3717 if self.CONVERT_TYPE_FIRST: 3718 this, to = to, this 3719 3720 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3721 3722 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3723 """ 3724 There are generally two variants of the DECODE function: 3725 3726 - DECODE(bin, charset) 3727 - DECODE(expression, search, result [, search, result] ... [, default]) 3728 3729 The second variant will always be parsed into a CASE expression. Note that NULL 3730 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3731 instead of relying on pattern matching. 3732 """ 3733 args = self._parse_csv(self._parse_conjunction) 3734 3735 if len(args) < 3: 3736 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3737 3738 expression, *expressions = args 3739 if not expression: 3740 return None 3741 3742 ifs = [] 3743 for search, result in zip(expressions[::2], expressions[1::2]): 3744 if not search or not result: 3745 return None 3746 3747 if isinstance(search, exp.Literal): 3748 ifs.append( 3749 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3750 ) 3751 elif isinstance(search, exp.Null): 3752 ifs.append( 3753 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3754 ) 3755 else: 3756 cond = exp.or_( 3757 exp.EQ(this=expression.copy(), expression=search), 3758 exp.and_( 3759 exp.Is(this=expression.copy(), expression=exp.Null()), 3760 exp.Is(this=search.copy(), expression=exp.Null()), 3761 copy=False, 3762 ), 3763 copy=False, 3764 ) 3765 ifs.append(exp.If(this=cond, true=result)) 3766 3767 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3768 3769 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3770 self._match_text_seq("KEY") 3771 key = self._parse_field() 3772 self._match(TokenType.COLON) 3773 self._match_text_seq("VALUE") 3774 value = self._parse_field() 3775 3776 if not key and not value: 3777 return None 3778 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3779 3780 def _parse_json_object(self) -> exp.JSONObject: 3781 star = self._parse_star() 3782 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3783 3784 null_handling = None 3785 if self._match_text_seq("NULL", "ON", "NULL"): 3786 null_handling = "NULL ON NULL" 3787 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3788 null_handling = "ABSENT ON NULL" 3789 3790 unique_keys = None 3791 if self._match_text_seq("WITH", "UNIQUE"): 3792 unique_keys = True 3793 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3794 unique_keys = False 3795 3796 self._match_text_seq("KEYS") 3797 3798 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3799 format_json = self._match_text_seq("FORMAT", "JSON") 3800 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3801 3802 return self.expression( 3803 exp.JSONObject, 3804 expressions=expressions, 3805 null_handling=null_handling, 3806 unique_keys=unique_keys, 3807 return_type=return_type, 3808 format_json=format_json, 3809 encoding=encoding, 3810 ) 3811 3812 def _parse_logarithm(self) -> exp.Func: 3813 # Default argument order is base, expression 3814 args = self._parse_csv(self._parse_range) 3815 3816 if len(args) > 1: 3817 if not self.LOG_BASE_FIRST: 3818 args.reverse() 3819 return exp.Log.from_arg_list(args) 3820 3821 return self.expression( 3822 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3823 ) 3824 3825 def _parse_match_against(self) -> exp.MatchAgainst: 3826 expressions = self._parse_csv(self._parse_column) 3827 3828 self._match_text_seq(")", "AGAINST", "(") 3829 3830 this = self._parse_string() 3831 3832 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3833 modifier = "IN NATURAL LANGUAGE MODE" 3834 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3835 modifier = f"{modifier} WITH QUERY EXPANSION" 3836 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3837 modifier = "IN BOOLEAN MODE" 3838 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3839 modifier = "WITH QUERY EXPANSION" 3840 else: 3841 modifier = None 3842 3843 return self.expression( 3844 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3845 ) 3846 3847 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3848 def _parse_open_json(self) -> exp.OpenJSON: 3849 this = self._parse_bitwise() 3850 path = self._match(TokenType.COMMA) and self._parse_string() 3851 3852 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3853 this = self._parse_field(any_token=True) 3854 kind = self._parse_types() 3855 path = self._parse_string() 3856 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3857 3858 return self.expression( 3859 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3860 ) 3861 3862 expressions = None 3863 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3864 self._match_l_paren() 3865 expressions = self._parse_csv(_parse_open_json_column_def) 3866 3867 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3868 3869 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3870 args = self._parse_csv(self._parse_bitwise) 3871 3872 if self._match(TokenType.IN): 3873 return self.expression( 3874 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3875 ) 3876 3877 if haystack_first: 3878 haystack = seq_get(args, 0) 3879 needle = seq_get(args, 1) 3880 else: 3881 needle = seq_get(args, 0) 3882 haystack = seq_get(args, 1) 3883 3884 return self.expression( 3885 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3886 ) 3887 3888 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3889 args = self._parse_csv(self._parse_table) 3890 return exp.JoinHint(this=func_name.upper(), expressions=args) 3891 3892 def _parse_substring(self) -> exp.Substring: 3893 # Postgres supports the form: substring(string [from int] [for int]) 3894 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3895 3896 args = self._parse_csv(self._parse_bitwise) 3897 3898 if self._match(TokenType.FROM): 3899 args.append(self._parse_bitwise()) 3900 if self._match(TokenType.FOR): 3901 args.append(self._parse_bitwise()) 3902 3903 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3904 3905 def _parse_trim(self) -> exp.Trim: 3906 # https://www.w3resource.com/sql/character-functions/trim.php 3907 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3908 3909 position = None 3910 collation = None 3911 3912 if self._match_texts(self.TRIM_TYPES): 3913 position = self._prev.text.upper() 3914 3915 expression = self._parse_bitwise() 3916 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3917 this = self._parse_bitwise() 3918 else: 3919 this = expression 3920 expression = None 3921 3922 if self._match(TokenType.COLLATE): 3923 collation = self._parse_bitwise() 3924 3925 return self.expression( 3926 exp.Trim, this=this, position=position, expression=expression, collation=collation 3927 ) 3928 3929 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3930 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3931 3932 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3933 return self._parse_window(self._parse_id_var(), alias=True) 3934 3935 def _parse_respect_or_ignore_nulls( 3936 self, this: t.Optional[exp.Expression] 3937 ) -> t.Optional[exp.Expression]: 3938 if self._match_text_seq("IGNORE", "NULLS"): 3939 return self.expression(exp.IgnoreNulls, this=this) 3940 if self._match_text_seq("RESPECT", "NULLS"): 3941 return self.expression(exp.RespectNulls, this=this) 3942 return this 3943 3944 def _parse_window( 3945 self, this: t.Optional[exp.Expression], alias: bool = False 3946 ) -> t.Optional[exp.Expression]: 3947 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3948 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3949 self._match_r_paren() 3950 3951 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3952 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3953 if self._match_text_seq("WITHIN", "GROUP"): 3954 order = self._parse_wrapped(self._parse_order) 3955 this = self.expression(exp.WithinGroup, this=this, expression=order) 3956 3957 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3958 # Some dialects choose to implement and some do not. 3959 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3960 3961 # There is some code above in _parse_lambda that handles 3962 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3963 3964 # The below changes handle 3965 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3966 3967 # Oracle allows both formats 3968 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3969 # and Snowflake chose to do the same for familiarity 3970 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3971 this = self._parse_respect_or_ignore_nulls(this) 3972 3973 # bigquery select from window x AS (partition by ...) 3974 if alias: 3975 over = None 3976 self._match(TokenType.ALIAS) 3977 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3978 return this 3979 else: 3980 over = self._prev.text.upper() 3981 3982 if not self._match(TokenType.L_PAREN): 3983 return self.expression( 3984 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3985 ) 3986 3987 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3988 3989 first = self._match(TokenType.FIRST) 3990 if self._match_text_seq("LAST"): 3991 first = False 3992 3993 partition = self._parse_partition_by() 3994 order = self._parse_order() 3995 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3996 3997 if kind: 3998 self._match(TokenType.BETWEEN) 3999 start = self._parse_window_spec() 4000 self._match(TokenType.AND) 4001 end = self._parse_window_spec() 4002 4003 spec = self.expression( 4004 exp.WindowSpec, 4005 kind=kind, 4006 start=start["value"], 4007 start_side=start["side"], 4008 end=end["value"], 4009 end_side=end["side"], 4010 ) 4011 else: 4012 spec = None 4013 4014 self._match_r_paren() 4015 4016 return self.expression( 4017 exp.Window, 4018 this=this, 4019 partition_by=partition, 4020 order=order, 4021 spec=spec, 4022 alias=window_alias, 4023 over=over, 4024 first=first, 4025 ) 4026 4027 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4028 self._match(TokenType.BETWEEN) 4029 4030 return { 4031 "value": ( 4032 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4033 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4034 or self._parse_bitwise() 4035 ), 4036 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4037 } 4038 4039 def _parse_alias( 4040 self, this: t.Optional[exp.Expression], explicit: bool = False 4041 ) -> t.Optional[exp.Expression]: 4042 any_token = self._match(TokenType.ALIAS) 4043 4044 if explicit and not any_token: 4045 return this 4046 4047 if self._match(TokenType.L_PAREN): 4048 aliases = self.expression( 4049 exp.Aliases, 4050 this=this, 4051 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4052 ) 4053 self._match_r_paren(aliases) 4054 return aliases 4055 4056 alias = self._parse_id_var(any_token) 4057 4058 if alias: 4059 return self.expression(exp.Alias, this=this, alias=alias) 4060 4061 return this 4062 4063 def _parse_id_var( 4064 self, 4065 any_token: bool = True, 4066 tokens: t.Optional[t.Collection[TokenType]] = None, 4067 ) -> t.Optional[exp.Expression]: 4068 identifier = self._parse_identifier() 4069 4070 if identifier: 4071 return identifier 4072 4073 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4074 quoted = self._prev.token_type == TokenType.STRING 4075 return exp.Identifier(this=self._prev.text, quoted=quoted) 4076 4077 return None 4078 4079 def _parse_string(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.STRING): 4081 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4082 return self._parse_placeholder() 4083 4084 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4085 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4086 4087 def _parse_number(self) -> t.Optional[exp.Expression]: 4088 if self._match(TokenType.NUMBER): 4089 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4090 return self._parse_placeholder() 4091 4092 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4093 if self._match(TokenType.IDENTIFIER): 4094 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4095 return self._parse_placeholder() 4096 4097 def _parse_var( 4098 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4099 ) -> t.Optional[exp.Expression]: 4100 if ( 4101 (any_token and self._advance_any()) 4102 or self._match(TokenType.VAR) 4103 or (self._match_set(tokens) if tokens else False) 4104 ): 4105 return self.expression(exp.Var, this=self._prev.text) 4106 return self._parse_placeholder() 4107 4108 def _advance_any(self) -> t.Optional[Token]: 4109 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4110 self._advance() 4111 return self._prev 4112 return None 4113 4114 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4115 return self._parse_var() or self._parse_string() 4116 4117 def _parse_null(self) -> t.Optional[exp.Expression]: 4118 if self._match(TokenType.NULL): 4119 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4120 return None 4121 4122 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4123 if self._match(TokenType.TRUE): 4124 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4125 if self._match(TokenType.FALSE): 4126 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4127 return None 4128 4129 def _parse_star(self) -> t.Optional[exp.Expression]: 4130 if self._match(TokenType.STAR): 4131 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4132 return None 4133 4134 def _parse_parameter(self) -> exp.Parameter: 4135 wrapped = self._match(TokenType.L_BRACE) 4136 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4137 self._match(TokenType.R_BRACE) 4138 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4139 4140 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4141 if self._match_set(self.PLACEHOLDER_PARSERS): 4142 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4143 if placeholder: 4144 return placeholder 4145 self._advance(-1) 4146 return None 4147 4148 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4149 if not self._match(TokenType.EXCEPT): 4150 return None 4151 if self._match(TokenType.L_PAREN, advance=False): 4152 return self._parse_wrapped_csv(self._parse_column) 4153 return self._parse_csv(self._parse_column) 4154 4155 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4156 if not self._match(TokenType.REPLACE): 4157 return None 4158 if self._match(TokenType.L_PAREN, advance=False): 4159 return self._parse_wrapped_csv(self._parse_expression) 4160 return self._parse_csv(self._parse_expression) 4161 4162 def _parse_csv( 4163 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4164 ) -> t.List[t.Optional[exp.Expression]]: 4165 parse_result = parse_method() 4166 items = [parse_result] if parse_result is not None else [] 4167 4168 while self._match(sep): 4169 self._add_comments(parse_result) 4170 parse_result = parse_method() 4171 if parse_result is not None: 4172 items.append(parse_result) 4173 4174 return items 4175 4176 def _parse_tokens( 4177 self, parse_method: t.Callable, expressions: t.Dict 4178 ) -> t.Optional[exp.Expression]: 4179 this = parse_method() 4180 4181 while self._match_set(expressions): 4182 this = self.expression( 4183 expressions[self._prev.token_type], 4184 this=this, 4185 comments=self._prev_comments, 4186 expression=parse_method(), 4187 ) 4188 4189 return this 4190 4191 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4192 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4193 4194 def _parse_wrapped_csv( 4195 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4196 ) -> t.List[t.Optional[exp.Expression]]: 4197 return self._parse_wrapped( 4198 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4199 ) 4200 4201 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4202 wrapped = self._match(TokenType.L_PAREN) 4203 if not wrapped and not optional: 4204 self.raise_error("Expecting (") 4205 parse_result = parse_method() 4206 if wrapped: 4207 self._match_r_paren() 4208 return parse_result 4209 4210 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4211 return self._parse_select() or self._parse_set_operations( 4212 self._parse_expression() if alias else self._parse_conjunction() 4213 ) 4214 4215 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4216 return self._parse_query_modifiers( 4217 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4218 ) 4219 4220 def _parse_transaction(self) -> exp.Transaction: 4221 this = None 4222 if self._match_texts(self.TRANSACTION_KIND): 4223 this = self._prev.text 4224 4225 self._match_texts({"TRANSACTION", "WORK"}) 4226 4227 modes = [] 4228 while True: 4229 mode = [] 4230 while self._match(TokenType.VAR): 4231 mode.append(self._prev.text) 4232 4233 if mode: 4234 modes.append(" ".join(mode)) 4235 if not self._match(TokenType.COMMA): 4236 break 4237 4238 return self.expression(exp.Transaction, this=this, modes=modes) 4239 4240 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4241 chain = None 4242 savepoint = None 4243 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4244 4245 self._match_texts({"TRANSACTION", "WORK"}) 4246 4247 if self._match_text_seq("TO"): 4248 self._match_text_seq("SAVEPOINT") 4249 savepoint = self._parse_id_var() 4250 4251 if self._match(TokenType.AND): 4252 chain = not self._match_text_seq("NO") 4253 self._match_text_seq("CHAIN") 4254 4255 if is_rollback: 4256 return self.expression(exp.Rollback, savepoint=savepoint) 4257 4258 return self.expression(exp.Commit, chain=chain) 4259 4260 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4261 if not self._match_text_seq("ADD"): 4262 return None 4263 4264 self._match(TokenType.COLUMN) 4265 exists_column = self._parse_exists(not_=True) 4266 expression = self._parse_column_def(self._parse_field(any_token=True)) 4267 4268 if expression: 4269 expression.set("exists", exists_column) 4270 4271 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4272 if self._match_texts(("FIRST", "AFTER")): 4273 position = self._prev.text 4274 column_position = self.expression( 4275 exp.ColumnPosition, this=self._parse_column(), position=position 4276 ) 4277 expression.set("position", column_position) 4278 4279 return expression 4280 4281 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4282 drop = self._match(TokenType.DROP) and self._parse_drop() 4283 if drop and not isinstance(drop, exp.Command): 4284 drop.set("kind", drop.args.get("kind", "COLUMN")) 4285 return drop 4286 4287 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4288 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4289 return self.expression( 4290 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4291 ) 4292 4293 def _parse_add_constraint(self) -> exp.AddConstraint: 4294 this = None 4295 kind = self._prev.token_type 4296 4297 if kind == TokenType.CONSTRAINT: 4298 this = self._parse_id_var() 4299 4300 if self._match_text_seq("CHECK"): 4301 expression = self._parse_wrapped(self._parse_conjunction) 4302 enforced = self._match_text_seq("ENFORCED") 4303 4304 return self.expression( 4305 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4306 ) 4307 4308 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4309 expression = self._parse_foreign_key() 4310 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4311 expression = self._parse_primary_key() 4312 else: 4313 expression = None 4314 4315 return self.expression(exp.AddConstraint, this=this, expression=expression) 4316 4317 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4318 index = self._index - 1 4319 4320 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4321 return self._parse_csv(self._parse_add_constraint) 4322 4323 self._retreat(index) 4324 return self._parse_csv(self._parse_add_column) 4325 4326 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4327 self._match(TokenType.COLUMN) 4328 column = self._parse_field(any_token=True) 4329 4330 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4331 return self.expression(exp.AlterColumn, this=column, drop=True) 4332 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4333 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4334 4335 self._match_text_seq("SET", "DATA") 4336 return self.expression( 4337 exp.AlterColumn, 4338 this=column, 4339 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4340 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4341 using=self._match(TokenType.USING) and self._parse_conjunction(), 4342 ) 4343 4344 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4345 index = self._index - 1 4346 4347 partition_exists = self._parse_exists() 4348 if self._match(TokenType.PARTITION, advance=False): 4349 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4350 4351 self._retreat(index) 4352 return self._parse_csv(self._parse_drop_column) 4353 4354 def _parse_alter_table_rename(self) -> exp.RenameTable: 4355 self._match_text_seq("TO") 4356 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4357 4358 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4359 start = self._prev 4360 4361 if not self._match(TokenType.TABLE): 4362 return self._parse_as_command(start) 4363 4364 exists = self._parse_exists() 4365 this = self._parse_table(schema=True) 4366 4367 if self._next: 4368 self._advance() 4369 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4370 4371 if parser: 4372 actions = ensure_list(parser(self)) 4373 4374 if not self._curr: 4375 return self.expression( 4376 exp.AlterTable, 4377 this=this, 4378 exists=exists, 4379 actions=actions, 4380 ) 4381 return self._parse_as_command(start) 4382 4383 def _parse_merge(self) -> exp.Merge: 4384 self._match(TokenType.INTO) 4385 target = self._parse_table() 4386 4387 self._match(TokenType.USING) 4388 using = self._parse_table() 4389 4390 self._match(TokenType.ON) 4391 on = self._parse_conjunction() 4392 4393 whens = [] 4394 while self._match(TokenType.WHEN): 4395 matched = not self._match(TokenType.NOT) 4396 self._match_text_seq("MATCHED") 4397 source = ( 4398 False 4399 if self._match_text_seq("BY", "TARGET") 4400 else self._match_text_seq("BY", "SOURCE") 4401 ) 4402 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4403 4404 self._match(TokenType.THEN) 4405 4406 if self._match(TokenType.INSERT): 4407 _this = self._parse_star() 4408 if _this: 4409 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4410 else: 4411 then = self.expression( 4412 exp.Insert, 4413 this=self._parse_value(), 4414 expression=self._match(TokenType.VALUES) and self._parse_value(), 4415 ) 4416 elif self._match(TokenType.UPDATE): 4417 expressions = self._parse_star() 4418 if expressions: 4419 then = self.expression(exp.Update, expressions=expressions) 4420 else: 4421 then = self.expression( 4422 exp.Update, 4423 expressions=self._match(TokenType.SET) 4424 and self._parse_csv(self._parse_equality), 4425 ) 4426 elif self._match(TokenType.DELETE): 4427 then = self.expression(exp.Var, this=self._prev.text) 4428 else: 4429 then = None 4430 4431 whens.append( 4432 self.expression( 4433 exp.When, 4434 matched=matched, 4435 source=source, 4436 condition=condition, 4437 then=then, 4438 ) 4439 ) 4440 4441 return self.expression( 4442 exp.Merge, 4443 this=target, 4444 using=using, 4445 on=on, 4446 expressions=whens, 4447 ) 4448 4449 def _parse_show(self) -> t.Optional[exp.Expression]: 4450 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4451 if parser: 4452 return parser(self) 4453 self._advance() 4454 return self.expression(exp.Show, this=self._prev.text.upper()) 4455 4456 def _parse_set_item_assignment( 4457 self, kind: t.Optional[str] = None 4458 ) -> t.Optional[exp.Expression]: 4459 index = self._index 4460 4461 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4462 return self._parse_set_transaction(global_=kind == "GLOBAL") 4463 4464 left = self._parse_primary() or self._parse_id_var() 4465 4466 if not self._match_texts(("=", "TO")): 4467 self._retreat(index) 4468 return None 4469 4470 right = self._parse_statement() or self._parse_id_var() 4471 this = self.expression(exp.EQ, this=left, expression=right) 4472 4473 return self.expression(exp.SetItem, this=this, kind=kind) 4474 4475 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4476 self._match_text_seq("TRANSACTION") 4477 characteristics = self._parse_csv( 4478 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4479 ) 4480 return self.expression( 4481 exp.SetItem, 4482 expressions=characteristics, 4483 kind="TRANSACTION", 4484 **{"global": global_}, # type: ignore 4485 ) 4486 4487 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4488 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4489 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4490 4491 def _parse_set(self) -> exp.Set | exp.Command: 4492 index = self._index 4493 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4494 4495 if self._curr: 4496 self._retreat(index) 4497 return self._parse_as_command(self._prev) 4498 4499 return set_ 4500 4501 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4502 for option in options: 4503 if self._match_text_seq(*option.split(" ")): 4504 return exp.var(option) 4505 return None 4506 4507 def _parse_as_command(self, start: Token) -> exp.Command: 4508 while self._curr: 4509 self._advance() 4510 text = self._find_sql(start, self._prev) 4511 size = len(start.text) 4512 return exp.Command(this=text[:size], expression=text[size:]) 4513 4514 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4515 settings = [] 4516 4517 self._match_l_paren() 4518 kind = self._parse_id_var() 4519 4520 if self._match(TokenType.L_PAREN): 4521 while True: 4522 key = self._parse_id_var() 4523 value = self._parse_primary() 4524 4525 if not key and value is None: 4526 break 4527 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4528 self._match(TokenType.R_PAREN) 4529 4530 self._match_r_paren() 4531 4532 return self.expression( 4533 exp.DictProperty, 4534 this=this, 4535 kind=kind.this if kind else None, 4536 settings=settings, 4537 ) 4538 4539 def _parse_dict_range(self, this: str) -> exp.DictRange: 4540 self._match_l_paren() 4541 has_min = self._match_text_seq("MIN") 4542 if has_min: 4543 min = self._parse_var() or self._parse_primary() 4544 self._match_text_seq("MAX") 4545 max = self._parse_var() or self._parse_primary() 4546 else: 4547 max = self._parse_var() or self._parse_primary() 4548 min = exp.Literal.number(0) 4549 self._match_r_paren() 4550 return self.expression(exp.DictRange, this=this, min=min, max=max) 4551 4552 def _find_parser( 4553 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4554 ) -> t.Optional[t.Callable]: 4555 if not self._curr: 4556 return None 4557 4558 index = self._index 4559 this = [] 4560 while True: 4561 # The current token might be multiple words 4562 curr = self._curr.text.upper() 4563 key = curr.split(" ") 4564 this.append(curr) 4565 self._advance() 4566 result, trie = in_trie(trie, key) 4567 if result == 0: 4568 break 4569 if result == 2: 4570 subparser = parsers[" ".join(this)] 4571 return subparser 4572 self._retreat(index) 4573 return None 4574 4575 def _match(self, token_type, advance=True, expression=None): 4576 if not self._curr: 4577 return None 4578 4579 if self._curr.token_type == token_type: 4580 if advance: 4581 self._advance() 4582 self._add_comments(expression) 4583 return True 4584 4585 return None 4586 4587 def _match_set(self, types, advance=True): 4588 if not self._curr: 4589 return None 4590 4591 if self._curr.token_type in types: 4592 if advance: 4593 self._advance() 4594 return True 4595 4596 return None 4597 4598 def _match_pair(self, token_type_a, token_type_b, advance=True): 4599 if not self._curr or not self._next: 4600 return None 4601 4602 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4603 if advance: 4604 self._advance(2) 4605 return True 4606 4607 return None 4608 4609 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4610 if not self._match(TokenType.L_PAREN, expression=expression): 4611 self.raise_error("Expecting (") 4612 4613 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4614 if not self._match(TokenType.R_PAREN, expression=expression): 4615 self.raise_error("Expecting )") 4616 4617 def _match_texts(self, texts, advance=True): 4618 if self._curr and self._curr.text.upper() in texts: 4619 if advance: 4620 self._advance() 4621 return True 4622 return False 4623 4624 def _match_text_seq(self, *texts, advance=True): 4625 index = self._index 4626 for text in texts: 4627 if self._curr and self._curr.text.upper() == text: 4628 self._advance() 4629 else: 4630 self._retreat(index) 4631 return False 4632 4633 if not advance: 4634 self._retreat(index) 4635 4636 return True 4637 4638 @t.overload 4639 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4640 ... 4641 4642 @t.overload 4643 def _replace_columns_with_dots( 4644 self, this: t.Optional[exp.Expression] 4645 ) -> t.Optional[exp.Expression]: 4646 ... 4647 4648 def _replace_columns_with_dots(self, this): 4649 if isinstance(this, exp.Dot): 4650 exp.replace_children(this, self._replace_columns_with_dots) 4651 elif isinstance(this, exp.Column): 4652 exp.replace_children(this, self._replace_columns_with_dots) 4653 table = this.args.get("table") 4654 this = ( 4655 self.expression(exp.Dot, this=table, expression=this.this) 4656 if table 4657 else self.expression(exp.Var, this=this.name) 4658 ) 4659 elif isinstance(this, exp.Identifier): 4660 this = self.expression(exp.Var, this=this.name) 4661 4662 return this 4663 4664 def _replace_lambda( 4665 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4666 ) -> t.Optional[exp.Expression]: 4667 if not node: 4668 return node 4669 4670 for column in node.find_all(exp.Column): 4671 if column.parts[0].name in lambda_variables: 4672 dot_or_id = column.to_dot() if column.table else column.this 4673 parent = column.parent 4674 4675 while isinstance(parent, exp.Dot): 4676 if not isinstance(parent.parent, exp.Dot): 4677 parent.replace(dot_or_id) 4678 break 4679 parent = parent.parent 4680 else: 4681 if column is node: 4682 node = dot_or_id 4683 else: 4684 column.replace(dot_or_id) 4685 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
830 def __init__( 831 self, 832 error_level: t.Optional[ErrorLevel] = None, 833 error_message_context: int = 100, 834 max_errors: int = 3, 835 ): 836 self.error_level = error_level or ErrorLevel.IMMEDIATE 837 self.error_message_context = error_message_context 838 self.max_errors = max_errors 839 self.reset()
851 def parse( 852 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 853 ) -> t.List[t.Optional[exp.Expression]]: 854 """ 855 Parses a list of tokens and returns a list of syntax trees, one tree 856 per parsed SQL statement. 857 858 Args: 859 raw_tokens: The list of tokens. 860 sql: The original SQL string, used to produce helpful debug messages. 861 862 Returns: 863 The list of the produced syntax trees. 864 """ 865 return self._parse( 866 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 867 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
869 def parse_into( 870 self, 871 expression_types: exp.IntoType, 872 raw_tokens: t.List[Token], 873 sql: t.Optional[str] = None, 874 ) -> t.List[t.Optional[exp.Expression]]: 875 """ 876 Parses a list of tokens into a given Expression type. If a collection of Expression 877 types is given instead, this method will try to parse the token list into each one 878 of them, stopping at the first for which the parsing succeeds. 879 880 Args: 881 expression_types: The expression type(s) to try and parse the token list into. 882 raw_tokens: The list of tokens. 883 sql: The original SQL string, used to produce helpful debug messages. 884 885 Returns: 886 The target Expression. 887 """ 888 errors = [] 889 for expression_type in ensure_list(expression_types): 890 parser = self.EXPRESSION_PARSERS.get(expression_type) 891 if not parser: 892 raise TypeError(f"No parser registered for {expression_type}") 893 894 try: 895 return self._parse(parser, raw_tokens, sql) 896 except ParseError as e: 897 e.errors[0]["into_expression"] = expression_type 898 errors.append(e) 899 900 raise ParseError( 901 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 902 errors=merge_errors(errors), 903 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
940 def check_errors(self) -> None: 941 """Logs or raises any found errors, depending on the chosen error level setting.""" 942 if self.error_level == ErrorLevel.WARN: 943 for error in self.errors: 944 logger.error(str(error)) 945 elif self.error_level == ErrorLevel.RAISE and self.errors: 946 raise ParseError( 947 concat_messages(self.errors, self.max_errors), 948 errors=merge_errors(self.errors), 949 )
Logs or raises any found errors, depending on the chosen error level setting.
951 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 952 """ 953 Appends an error in the list of recorded errors or raises it, depending on the chosen 954 error level setting. 955 """ 956 token = token or self._curr or self._prev or Token.string("") 957 start = token.start 958 end = token.end + 1 959 start_context = self.sql[max(start - self.error_message_context, 0) : start] 960 highlight = self.sql[start:end] 961 end_context = self.sql[end : end + self.error_message_context] 962 963 error = ParseError.new( 964 f"{message}. Line {token.line}, Col: {token.col}.\n" 965 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 966 description=message, 967 line=token.line, 968 col=token.col, 969 start_context=start_context, 970 highlight=highlight, 971 end_context=end_context, 972 ) 973 974 if self.error_level == ErrorLevel.IMMEDIATE: 975 raise error 976 977 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
979 def expression( 980 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 981 ) -> E: 982 """ 983 Creates a new, validated Expression. 984 985 Args: 986 exp_class: The expression class to instantiate. 987 comments: An optional list of comments to attach to the expression. 988 kwargs: The arguments to set for the expression along with their respective values. 989 990 Returns: 991 The target expression. 992 """ 993 instance = exp_class(**kwargs) 994 instance.add_comments(comments) if comments else self._add_comments(instance) 995 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1002 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1003 """ 1004 Validates an Expression, making sure that all its mandatory arguments are set. 1005 1006 Args: 1007 expression: The expression to validate. 1008 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1009 1010 Returns: 1011 The validated expression. 1012 """ 1013 if self.error_level != ErrorLevel.IGNORE: 1014 for error_message in expression.error_messages(args): 1015 self.raise_error(error_message) 1016 1017 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.