sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.RLIKE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 TokenType.XOR, 343 *TYPE_TOKENS, 344 *SUBQUERY_PREDICATES, 345 } 346 347 CONJUNCTION = { 348 TokenType.AND: exp.And, 349 TokenType.OR: exp.Or, 350 } 351 352 EQUALITY = { 353 TokenType.EQ: exp.EQ, 354 TokenType.NEQ: exp.NEQ, 355 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 356 } 357 358 COMPARISON = { 359 TokenType.GT: exp.GT, 360 TokenType.GTE: exp.GTE, 361 TokenType.LT: exp.LT, 362 TokenType.LTE: exp.LTE, 363 } 364 365 BITWISE = { 366 TokenType.AMP: exp.BitwiseAnd, 367 TokenType.CARET: exp.BitwiseXor, 368 TokenType.PIPE: exp.BitwiseOr, 369 TokenType.DPIPE: exp.DPipe, 370 } 371 372 TERM = { 373 TokenType.DASH: exp.Sub, 374 TokenType.PLUS: exp.Add, 375 TokenType.MOD: exp.Mod, 376 TokenType.COLLATE: exp.Collate, 377 } 378 379 FACTOR = { 380 TokenType.DIV: exp.IntDiv, 381 TokenType.LR_ARROW: exp.Distance, 382 TokenType.SLASH: exp.Div, 383 TokenType.STAR: exp.Mul, 384 } 385 386 TIMESTAMPS = { 387 TokenType.TIME, 388 TokenType.TIMESTAMP, 389 TokenType.TIMESTAMPTZ, 390 TokenType.TIMESTAMPLTZ, 391 } 392 393 SET_OPERATIONS = { 394 TokenType.UNION, 395 TokenType.INTERSECT, 396 TokenType.EXCEPT, 397 } 398 399 JOIN_METHODS = { 400 TokenType.NATURAL, 401 TokenType.ASOF, 402 } 403 404 JOIN_SIDES = { 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.FULL, 408 } 409 410 JOIN_KINDS = { 411 TokenType.INNER, 412 TokenType.OUTER, 413 TokenType.CROSS, 414 TokenType.SEMI, 415 TokenType.ANTI, 416 } 417 418 JOIN_HINTS: t.Set[str] = set() 419 420 LAMBDAS = { 421 TokenType.ARROW: lambda self, expressions: self.expression( 422 exp.Lambda, 423 this=self._replace_lambda( 424 self._parse_conjunction(), 425 {node.name for node in expressions}, 426 ), 427 expressions=expressions, 428 ), 429 TokenType.FARROW: lambda self, expressions: self.expression( 430 exp.Kwarg, 431 this=exp.var(expressions[0].name), 432 expression=self._parse_conjunction(), 433 ), 434 } 435 436 COLUMN_OPERATORS = { 437 TokenType.DOT: None, 438 TokenType.DCOLON: lambda self, this, to: self.expression( 439 exp.Cast if self.STRICT_CAST else exp.TryCast, 440 this=this, 441 to=to, 442 ), 443 TokenType.ARROW: lambda self, this, path: self.expression( 444 exp.JSONExtract, 445 this=this, 446 expression=path, 447 ), 448 TokenType.DARROW: lambda self, this, path: self.expression( 449 exp.JSONExtractScalar, 450 this=this, 451 expression=path, 452 ), 453 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 454 exp.JSONBExtract, 455 this=this, 456 expression=path, 457 ), 458 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 459 exp.JSONBExtractScalar, 460 this=this, 461 expression=path, 462 ), 463 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 464 exp.JSONBContains, 465 this=this, 466 expression=key, 467 ), 468 } 469 470 EXPRESSION_PARSERS = { 471 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 472 exp.Column: lambda self: self._parse_column(), 473 exp.Condition: lambda self: self._parse_conjunction(), 474 exp.DataType: lambda self: self._parse_types(), 475 exp.Expression: lambda self: self._parse_statement(), 476 exp.From: lambda self: self._parse_from(), 477 exp.Group: lambda self: self._parse_group(), 478 exp.Having: lambda self: self._parse_having(), 479 exp.Identifier: lambda self: self._parse_id_var(), 480 exp.Join: lambda self: self._parse_join(), 481 exp.Lambda: lambda self: self._parse_lambda(), 482 exp.Lateral: lambda self: self._parse_lateral(), 483 exp.Limit: lambda self: self._parse_limit(), 484 exp.Offset: lambda self: self._parse_offset(), 485 exp.Order: lambda self: self._parse_order(), 486 exp.Ordered: lambda self: self._parse_ordered(), 487 exp.Properties: lambda self: self._parse_properties(), 488 exp.Qualify: lambda self: self._parse_qualify(), 489 exp.Returning: lambda self: self._parse_returning(), 490 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 491 exp.Table: lambda self: self._parse_table_parts(), 492 exp.TableAlias: lambda self: self._parse_table_alias(), 493 exp.Where: lambda self: self._parse_where(), 494 exp.Window: lambda self: self._parse_named_window(), 495 exp.With: lambda self: self._parse_with(), 496 "JOIN_TYPE": lambda self: self._parse_join_parts(), 497 } 498 499 STATEMENT_PARSERS = { 500 TokenType.ALTER: lambda self: self._parse_alter(), 501 TokenType.BEGIN: lambda self: self._parse_transaction(), 502 TokenType.CACHE: lambda self: self._parse_cache(), 503 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 504 TokenType.COMMENT: lambda self: self._parse_comment(), 505 TokenType.CREATE: lambda self: self._parse_create(), 506 TokenType.DELETE: lambda self: self._parse_delete(), 507 TokenType.DESC: lambda self: self._parse_describe(), 508 TokenType.DESCRIBE: lambda self: self._parse_describe(), 509 TokenType.DROP: lambda self: self._parse_drop(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 comments=start.comments, 1149 exists=self._parse_exists(), 1150 this=self._parse_table(schema=True), 1151 kind=kind, 1152 temporary=temporary, 1153 materialized=materialized, 1154 cascade=self._match_text_seq("CASCADE"), 1155 constraints=self._match_text_seq("CONSTRAINTS"), 1156 purge=self._match_text_seq("PURGE"), 1157 ) 1158 1159 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1160 return ( 1161 self._match(TokenType.IF) 1162 and (not not_ or self._match(TokenType.NOT)) 1163 and self._match(TokenType.EXISTS) 1164 ) 1165 1166 def _parse_create(self) -> exp.Create | exp.Command: 1167 # Note: this can't be None because we've matched a statement parser 1168 start = self._prev 1169 replace = start.text.upper() == "REPLACE" or self._match_pair( 1170 TokenType.OR, TokenType.REPLACE 1171 ) 1172 unique = self._match(TokenType.UNIQUE) 1173 1174 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1175 self._advance() 1176 1177 properties = None 1178 create_token = self._match_set(self.CREATABLES) and self._prev 1179 1180 if not create_token: 1181 # exp.Properties.Location.POST_CREATE 1182 properties = self._parse_properties() 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not properties or not create_token: 1186 return self._parse_as_command(start) 1187 1188 exists = self._parse_exists(not_=True) 1189 this = None 1190 expression = None 1191 indexes = None 1192 no_schema_binding = None 1193 begin = None 1194 clone = None 1195 1196 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1197 nonlocal properties 1198 if properties and temp_props: 1199 properties.expressions.extend(temp_props.expressions) 1200 elif temp_props: 1201 properties = temp_props 1202 1203 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1204 this = self._parse_user_defined_function(kind=create_token.token_type) 1205 1206 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 begin = self._match(TokenType.BEGIN) 1211 return_ = self._match_text_seq("RETURN") 1212 expression = self._parse_statement() 1213 1214 if return_: 1215 expression = self.expression(exp.Return, this=expression) 1216 elif create_token.token_type == TokenType.INDEX: 1217 this = self._parse_index(index=self._parse_id_var()) 1218 elif create_token.token_type in self.DB_CREATABLES: 1219 table_parts = self._parse_table_parts(schema=True) 1220 1221 # exp.Properties.Location.POST_NAME 1222 self._match(TokenType.COMMA) 1223 extend_props(self._parse_properties(before=True)) 1224 1225 this = self._parse_schema(this=table_parts) 1226 1227 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1228 extend_props(self._parse_properties()) 1229 1230 self._match(TokenType.ALIAS) 1231 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1232 # exp.Properties.Location.POST_ALIAS 1233 extend_props(self._parse_properties()) 1234 1235 expression = self._parse_ddl_select() 1236 1237 if create_token.token_type == TokenType.TABLE: 1238 # exp.Properties.Location.POST_EXPRESSION 1239 extend_props(self._parse_properties()) 1240 1241 indexes = [] 1242 while True: 1243 index = self._parse_index() 1244 1245 # exp.Properties.Location.POST_INDEX 1246 extend_props(self._parse_properties()) 1247 1248 if not index: 1249 break 1250 else: 1251 self._match(TokenType.COMMA) 1252 indexes.append(index) 1253 elif create_token.token_type == TokenType.VIEW: 1254 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1255 no_schema_binding = True 1256 1257 if self._match_text_seq("CLONE"): 1258 clone = self._parse_table(schema=True) 1259 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1260 clone_kind = ( 1261 self._match(TokenType.L_PAREN) 1262 and self._match_texts(self.CLONE_KINDS) 1263 and self._prev.text.upper() 1264 ) 1265 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1266 self._match(TokenType.R_PAREN) 1267 clone = self.expression( 1268 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1269 ) 1270 1271 return self.expression( 1272 exp.Create, 1273 this=this, 1274 kind=create_token.text, 1275 replace=replace, 1276 unique=unique, 1277 expression=expression, 1278 exists=exists, 1279 properties=properties, 1280 indexes=indexes, 1281 no_schema_binding=no_schema_binding, 1282 begin=begin, 1283 clone=clone, 1284 ) 1285 1286 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1287 # only used for teradata currently 1288 self._match(TokenType.COMMA) 1289 1290 kwargs = { 1291 "no": self._match_text_seq("NO"), 1292 "dual": self._match_text_seq("DUAL"), 1293 "before": self._match_text_seq("BEFORE"), 1294 "default": self._match_text_seq("DEFAULT"), 1295 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1296 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1297 "after": self._match_text_seq("AFTER"), 1298 "minimum": self._match_texts(("MIN", "MINIMUM")), 1299 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1300 } 1301 1302 if self._match_texts(self.PROPERTY_PARSERS): 1303 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1304 try: 1305 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1306 except TypeError: 1307 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1308 1309 return None 1310 1311 def _parse_property(self) -> t.Optional[exp.Expression]: 1312 if self._match_texts(self.PROPERTY_PARSERS): 1313 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1314 1315 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1316 return self._parse_character_set(default=True) 1317 1318 if self._match_text_seq("COMPOUND", "SORTKEY"): 1319 return self._parse_sortkey(compound=True) 1320 1321 if self._match_text_seq("SQL", "SECURITY"): 1322 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1323 1324 assignment = self._match_pair( 1325 TokenType.VAR, TokenType.EQ, advance=False 1326 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1327 1328 if assignment: 1329 key = self._parse_var_or_string() 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.Property, this=key, value=self._parse_column()) 1332 1333 return None 1334 1335 def _parse_stored(self) -> exp.FileFormatProperty: 1336 self._match(TokenType.ALIAS) 1337 1338 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1339 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1340 1341 return self.expression( 1342 exp.FileFormatProperty, 1343 this=self.expression( 1344 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1345 ) 1346 if input_format or output_format 1347 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1348 ) 1349 1350 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1351 self._match(TokenType.EQ) 1352 self._match(TokenType.ALIAS) 1353 return self.expression(exp_class, this=self._parse_field()) 1354 1355 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1356 properties = [] 1357 while True: 1358 if before: 1359 prop = self._parse_property_before() 1360 else: 1361 prop = self._parse_property() 1362 1363 if not prop: 1364 break 1365 for p in ensure_list(prop): 1366 properties.append(p) 1367 1368 if properties: 1369 return self.expression(exp.Properties, expressions=properties) 1370 1371 return None 1372 1373 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1374 return self.expression( 1375 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1376 ) 1377 1378 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1379 if self._index >= 2: 1380 pre_volatile_token = self._tokens[self._index - 2] 1381 else: 1382 pre_volatile_token = None 1383 1384 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1385 return exp.VolatileProperty() 1386 1387 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1388 1389 def _parse_with_property( 1390 self, 1391 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1392 if self._match(TokenType.L_PAREN, advance=False): 1393 return self._parse_wrapped_csv(self._parse_property) 1394 1395 if self._match_text_seq("JOURNAL"): 1396 return self._parse_withjournaltable() 1397 1398 if self._match_text_seq("DATA"): 1399 return self._parse_withdata(no=False) 1400 elif self._match_text_seq("NO", "DATA"): 1401 return self._parse_withdata(no=True) 1402 1403 if not self._next: 1404 return None 1405 1406 return self._parse_withisolatedloading() 1407 1408 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1409 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1410 self._match(TokenType.EQ) 1411 1412 user = self._parse_id_var() 1413 self._match(TokenType.PARAMETER) 1414 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1415 1416 if not user or not host: 1417 return None 1418 1419 return exp.DefinerProperty(this=f"{user}@{host}") 1420 1421 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1422 self._match(TokenType.TABLE) 1423 self._match(TokenType.EQ) 1424 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1425 1426 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1427 return self.expression(exp.LogProperty, no=no) 1428 1429 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1430 return self.expression(exp.JournalProperty, **kwargs) 1431 1432 def _parse_checksum(self) -> exp.ChecksumProperty: 1433 self._match(TokenType.EQ) 1434 1435 on = None 1436 if self._match(TokenType.ON): 1437 on = True 1438 elif self._match_text_seq("OFF"): 1439 on = False 1440 1441 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1442 1443 def _parse_cluster(self) -> exp.Cluster: 1444 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1445 1446 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1447 self._match_text_seq("BY") 1448 1449 self._match_l_paren() 1450 expressions = self._parse_csv(self._parse_column) 1451 self._match_r_paren() 1452 1453 if self._match_text_seq("SORTED", "BY"): 1454 self._match_l_paren() 1455 sorted_by = self._parse_csv(self._parse_ordered) 1456 self._match_r_paren() 1457 else: 1458 sorted_by = None 1459 1460 self._match(TokenType.INTO) 1461 buckets = self._parse_number() 1462 self._match_text_seq("BUCKETS") 1463 1464 return self.expression( 1465 exp.ClusteredByProperty, 1466 expressions=expressions, 1467 sorted_by=sorted_by, 1468 buckets=buckets, 1469 ) 1470 1471 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1472 if not self._match_text_seq("GRANTS"): 1473 self._retreat(self._index - 1) 1474 return None 1475 1476 return self.expression(exp.CopyGrantsProperty) 1477 1478 def _parse_freespace(self) -> exp.FreespaceProperty: 1479 self._match(TokenType.EQ) 1480 return self.expression( 1481 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1482 ) 1483 1484 def _parse_mergeblockratio( 1485 self, no: bool = False, default: bool = False 1486 ) -> exp.MergeBlockRatioProperty: 1487 if self._match(TokenType.EQ): 1488 return self.expression( 1489 exp.MergeBlockRatioProperty, 1490 this=self._parse_number(), 1491 percent=self._match(TokenType.PERCENT), 1492 ) 1493 1494 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1495 1496 def _parse_datablocksize( 1497 self, 1498 default: t.Optional[bool] = None, 1499 minimum: t.Optional[bool] = None, 1500 maximum: t.Optional[bool] = None, 1501 ) -> exp.DataBlocksizeProperty: 1502 self._match(TokenType.EQ) 1503 size = self._parse_number() 1504 1505 units = None 1506 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1507 units = self._prev.text 1508 1509 return self.expression( 1510 exp.DataBlocksizeProperty, 1511 size=size, 1512 units=units, 1513 default=default, 1514 minimum=minimum, 1515 maximum=maximum, 1516 ) 1517 1518 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1519 self._match(TokenType.EQ) 1520 always = self._match_text_seq("ALWAYS") 1521 manual = self._match_text_seq("MANUAL") 1522 never = self._match_text_seq("NEVER") 1523 default = self._match_text_seq("DEFAULT") 1524 1525 autotemp = None 1526 if self._match_text_seq("AUTOTEMP"): 1527 autotemp = self._parse_schema() 1528 1529 return self.expression( 1530 exp.BlockCompressionProperty, 1531 always=always, 1532 manual=manual, 1533 never=never, 1534 default=default, 1535 autotemp=autotemp, 1536 ) 1537 1538 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1539 no = self._match_text_seq("NO") 1540 concurrent = self._match_text_seq("CONCURRENT") 1541 self._match_text_seq("ISOLATED", "LOADING") 1542 for_all = self._match_text_seq("FOR", "ALL") 1543 for_insert = self._match_text_seq("FOR", "INSERT") 1544 for_none = self._match_text_seq("FOR", "NONE") 1545 return self.expression( 1546 exp.IsolatedLoadingProperty, 1547 no=no, 1548 concurrent=concurrent, 1549 for_all=for_all, 1550 for_insert=for_insert, 1551 for_none=for_none, 1552 ) 1553 1554 def _parse_locking(self) -> exp.LockingProperty: 1555 if self._match(TokenType.TABLE): 1556 kind = "TABLE" 1557 elif self._match(TokenType.VIEW): 1558 kind = "VIEW" 1559 elif self._match(TokenType.ROW): 1560 kind = "ROW" 1561 elif self._match_text_seq("DATABASE"): 1562 kind = "DATABASE" 1563 else: 1564 kind = None 1565 1566 if kind in ("DATABASE", "TABLE", "VIEW"): 1567 this = self._parse_table_parts() 1568 else: 1569 this = None 1570 1571 if self._match(TokenType.FOR): 1572 for_or_in = "FOR" 1573 elif self._match(TokenType.IN): 1574 for_or_in = "IN" 1575 else: 1576 for_or_in = None 1577 1578 if self._match_text_seq("ACCESS"): 1579 lock_type = "ACCESS" 1580 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1581 lock_type = "EXCLUSIVE" 1582 elif self._match_text_seq("SHARE"): 1583 lock_type = "SHARE" 1584 elif self._match_text_seq("READ"): 1585 lock_type = "READ" 1586 elif self._match_text_seq("WRITE"): 1587 lock_type = "WRITE" 1588 elif self._match_text_seq("CHECKSUM"): 1589 lock_type = "CHECKSUM" 1590 else: 1591 lock_type = None 1592 1593 override = self._match_text_seq("OVERRIDE") 1594 1595 return self.expression( 1596 exp.LockingProperty, 1597 this=this, 1598 kind=kind, 1599 for_or_in=for_or_in, 1600 lock_type=lock_type, 1601 override=override, 1602 ) 1603 1604 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1605 if self._match(TokenType.PARTITION_BY): 1606 return self._parse_csv(self._parse_conjunction) 1607 return [] 1608 1609 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1610 self._match(TokenType.EQ) 1611 return self.expression( 1612 exp.PartitionedByProperty, 1613 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1614 ) 1615 1616 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1617 if self._match_text_seq("AND", "STATISTICS"): 1618 statistics = True 1619 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1620 statistics = False 1621 else: 1622 statistics = None 1623 1624 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1625 1626 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1627 if self._match_text_seq("PRIMARY", "INDEX"): 1628 return exp.NoPrimaryIndexProperty() 1629 return None 1630 1631 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1632 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1633 return exp.OnCommitProperty() 1634 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1635 return exp.OnCommitProperty(delete=True) 1636 return None 1637 1638 def _parse_distkey(self) -> exp.DistKeyProperty: 1639 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1640 1641 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1642 table = self._parse_table(schema=True) 1643 1644 options = [] 1645 while self._match_texts(("INCLUDING", "EXCLUDING")): 1646 this = self._prev.text.upper() 1647 1648 id_var = self._parse_id_var() 1649 if not id_var: 1650 return None 1651 1652 options.append( 1653 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1654 ) 1655 1656 return self.expression(exp.LikeProperty, this=table, expressions=options) 1657 1658 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1659 return self.expression( 1660 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1661 ) 1662 1663 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1667 ) 1668 1669 def _parse_returns(self) -> exp.ReturnsProperty: 1670 value: t.Optional[exp.Expression] 1671 is_table = self._match(TokenType.TABLE) 1672 1673 if is_table: 1674 if self._match(TokenType.LT): 1675 value = self.expression( 1676 exp.Schema, 1677 this="TABLE", 1678 expressions=self._parse_csv(self._parse_struct_types), 1679 ) 1680 if not self._match(TokenType.GT): 1681 self.raise_error("Expecting >") 1682 else: 1683 value = self._parse_schema(exp.var("TABLE")) 1684 else: 1685 value = self._parse_types() 1686 1687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1688 1689 def _parse_describe(self) -> exp.Describe: 1690 kind = self._match_set(self.CREATABLES) and self._prev.text 1691 this = self._parse_table() 1692 return self.expression(exp.Describe, this=this, kind=kind) 1693 1694 def _parse_insert(self) -> exp.Insert: 1695 overwrite = self._match(TokenType.OVERWRITE) 1696 ignore = self._match(TokenType.IGNORE) 1697 local = self._match_text_seq("LOCAL") 1698 alternative = None 1699 1700 if self._match_text_seq("DIRECTORY"): 1701 this: t.Optional[exp.Expression] = self.expression( 1702 exp.Directory, 1703 this=self._parse_var_or_string(), 1704 local=local, 1705 row_format=self._parse_row_format(match_row=True), 1706 ) 1707 else: 1708 if self._match(TokenType.OR): 1709 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1710 1711 self._match(TokenType.INTO) 1712 self._match(TokenType.TABLE) 1713 this = self._parse_table(schema=True) 1714 1715 returning = self._parse_returning() 1716 1717 return self.expression( 1718 exp.Insert, 1719 this=this, 1720 exists=self._parse_exists(), 1721 partition=self._parse_partition(), 1722 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1723 and self._parse_conjunction(), 1724 expression=self._parse_ddl_select(), 1725 conflict=self._parse_on_conflict(), 1726 returning=returning or self._parse_returning(), 1727 overwrite=overwrite, 1728 alternative=alternative, 1729 ignore=ignore, 1730 ) 1731 1732 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1733 conflict = self._match_text_seq("ON", "CONFLICT") 1734 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1735 1736 if not conflict and not duplicate: 1737 return None 1738 1739 nothing = None 1740 expressions = None 1741 key = None 1742 constraint = None 1743 1744 if conflict: 1745 if self._match_text_seq("ON", "CONSTRAINT"): 1746 constraint = self._parse_id_var() 1747 else: 1748 key = self._parse_csv(self._parse_value) 1749 1750 self._match_text_seq("DO") 1751 if self._match_text_seq("NOTHING"): 1752 nothing = True 1753 else: 1754 self._match(TokenType.UPDATE) 1755 self._match(TokenType.SET) 1756 expressions = self._parse_csv(self._parse_equality) 1757 1758 return self.expression( 1759 exp.OnConflict, 1760 duplicate=duplicate, 1761 expressions=expressions, 1762 nothing=nothing, 1763 key=key, 1764 constraint=constraint, 1765 ) 1766 1767 def _parse_returning(self) -> t.Optional[exp.Returning]: 1768 if not self._match(TokenType.RETURNING): 1769 return None 1770 return self.expression( 1771 exp.Returning, 1772 expressions=self._parse_csv(self._parse_expression), 1773 into=self._match(TokenType.INTO) and self._parse_table_part(), 1774 ) 1775 1776 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1777 if not self._match(TokenType.FORMAT): 1778 return None 1779 return self._parse_row_format() 1780 1781 def _parse_row_format( 1782 self, match_row: bool = False 1783 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1784 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1785 return None 1786 1787 if self._match_text_seq("SERDE"): 1788 this = self._parse_string() 1789 1790 serde_properties = None 1791 if self._match(TokenType.SERDE_PROPERTIES): 1792 serde_properties = self.expression( 1793 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1794 ) 1795 1796 return self.expression( 1797 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1798 ) 1799 1800 self._match_text_seq("DELIMITED") 1801 1802 kwargs = {} 1803 1804 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1805 kwargs["fields"] = self._parse_string() 1806 if self._match_text_seq("ESCAPED", "BY"): 1807 kwargs["escaped"] = self._parse_string() 1808 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1809 kwargs["collection_items"] = self._parse_string() 1810 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1811 kwargs["map_keys"] = self._parse_string() 1812 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1813 kwargs["lines"] = self._parse_string() 1814 if self._match_text_seq("NULL", "DEFINED", "AS"): 1815 kwargs["null"] = self._parse_string() 1816 1817 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1818 1819 def _parse_load(self) -> exp.LoadData | exp.Command: 1820 if self._match_text_seq("DATA"): 1821 local = self._match_text_seq("LOCAL") 1822 self._match_text_seq("INPATH") 1823 inpath = self._parse_string() 1824 overwrite = self._match(TokenType.OVERWRITE) 1825 self._match_pair(TokenType.INTO, TokenType.TABLE) 1826 1827 return self.expression( 1828 exp.LoadData, 1829 this=self._parse_table(schema=True), 1830 local=local, 1831 overwrite=overwrite, 1832 inpath=inpath, 1833 partition=self._parse_partition(), 1834 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1835 serde=self._match_text_seq("SERDE") and self._parse_string(), 1836 ) 1837 return self._parse_as_command(self._prev) 1838 1839 def _parse_delete(self) -> exp.Delete: 1840 # This handles MySQL's "Multiple-Table Syntax" 1841 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1842 tables = None 1843 if not self._match(TokenType.FROM, advance=False): 1844 tables = self._parse_csv(self._parse_table) or None 1845 1846 returning = self._parse_returning() 1847 1848 return self.expression( 1849 exp.Delete, 1850 tables=tables, 1851 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1852 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1853 where=self._parse_where(), 1854 returning=returning or self._parse_returning(), 1855 limit=self._parse_limit(), 1856 ) 1857 1858 def _parse_update(self) -> exp.Update: 1859 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1860 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1861 returning = self._parse_returning() 1862 return self.expression( 1863 exp.Update, 1864 **{ # type: ignore 1865 "this": this, 1866 "expressions": expressions, 1867 "from": self._parse_from(joins=True), 1868 "where": self._parse_where(), 1869 "returning": returning or self._parse_returning(), 1870 "limit": self._parse_limit(), 1871 }, 1872 ) 1873 1874 def _parse_uncache(self) -> exp.Uncache: 1875 if not self._match(TokenType.TABLE): 1876 self.raise_error("Expecting TABLE after UNCACHE") 1877 1878 return self.expression( 1879 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1880 ) 1881 1882 def _parse_cache(self) -> exp.Cache: 1883 lazy = self._match_text_seq("LAZY") 1884 self._match(TokenType.TABLE) 1885 table = self._parse_table(schema=True) 1886 1887 options = [] 1888 if self._match_text_seq("OPTIONS"): 1889 self._match_l_paren() 1890 k = self._parse_string() 1891 self._match(TokenType.EQ) 1892 v = self._parse_string() 1893 options = [k, v] 1894 self._match_r_paren() 1895 1896 self._match(TokenType.ALIAS) 1897 return self.expression( 1898 exp.Cache, 1899 this=table, 1900 lazy=lazy, 1901 options=options, 1902 expression=self._parse_select(nested=True), 1903 ) 1904 1905 def _parse_partition(self) -> t.Optional[exp.Partition]: 1906 if not self._match(TokenType.PARTITION): 1907 return None 1908 1909 return self.expression( 1910 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1911 ) 1912 1913 def _parse_value(self) -> exp.Tuple: 1914 if self._match(TokenType.L_PAREN): 1915 expressions = self._parse_csv(self._parse_conjunction) 1916 self._match_r_paren() 1917 return self.expression(exp.Tuple, expressions=expressions) 1918 1919 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1920 # https://prestodb.io/docs/current/sql/values.html 1921 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1922 1923 def _parse_select( 1924 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1925 ) -> t.Optional[exp.Expression]: 1926 cte = self._parse_with() 1927 if cte: 1928 this = self._parse_statement() 1929 1930 if not this: 1931 self.raise_error("Failed to parse any statement following CTE") 1932 return cte 1933 1934 if "with" in this.arg_types: 1935 this.set("with", cte) 1936 else: 1937 self.raise_error(f"{this.key} does not support CTE") 1938 this = cte 1939 elif self._match(TokenType.SELECT): 1940 comments = self._prev_comments 1941 1942 hint = self._parse_hint() 1943 all_ = self._match(TokenType.ALL) 1944 distinct = self._match(TokenType.DISTINCT) 1945 1946 kind = ( 1947 self._match(TokenType.ALIAS) 1948 and self._match_texts(("STRUCT", "VALUE")) 1949 and self._prev.text 1950 ) 1951 1952 if distinct: 1953 distinct = self.expression( 1954 exp.Distinct, 1955 on=self._parse_value() if self._match(TokenType.ON) else None, 1956 ) 1957 1958 if all_ and distinct: 1959 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1960 1961 limit = self._parse_limit(top=True) 1962 expressions = self._parse_expressions() 1963 1964 this = self.expression( 1965 exp.Select, 1966 kind=kind, 1967 hint=hint, 1968 distinct=distinct, 1969 expressions=expressions, 1970 limit=limit, 1971 ) 1972 this.comments = comments 1973 1974 into = self._parse_into() 1975 if into: 1976 this.set("into", into) 1977 1978 from_ = self._parse_from() 1979 if from_: 1980 this.set("from", from_) 1981 1982 this = self._parse_query_modifiers(this) 1983 elif (table or nested) and self._match(TokenType.L_PAREN): 1984 if self._match(TokenType.PIVOT): 1985 this = self._parse_simplified_pivot() 1986 elif self._match(TokenType.FROM): 1987 this = exp.select("*").from_( 1988 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1989 ) 1990 else: 1991 this = self._parse_table() if table else self._parse_select(nested=True) 1992 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1993 1994 self._match_r_paren() 1995 1996 # We return early here so that the UNION isn't attached to the subquery by the 1997 # following call to _parse_set_operations, but instead becomes the parent node 1998 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1999 elif self._match(TokenType.VALUES): 2000 this = self.expression( 2001 exp.Values, 2002 expressions=self._parse_csv(self._parse_value), 2003 alias=self._parse_table_alias(), 2004 ) 2005 else: 2006 this = None 2007 2008 return self._parse_set_operations(this) 2009 2010 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2011 if not skip_with_token and not self._match(TokenType.WITH): 2012 return None 2013 2014 comments = self._prev_comments 2015 recursive = self._match(TokenType.RECURSIVE) 2016 2017 expressions = [] 2018 while True: 2019 expressions.append(self._parse_cte()) 2020 2021 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2022 break 2023 else: 2024 self._match(TokenType.WITH) 2025 2026 return self.expression( 2027 exp.With, comments=comments, expressions=expressions, recursive=recursive 2028 ) 2029 2030 def _parse_cte(self) -> exp.CTE: 2031 alias = self._parse_table_alias() 2032 if not alias or not alias.this: 2033 self.raise_error("Expected CTE to have alias") 2034 2035 self._match(TokenType.ALIAS) 2036 return self.expression( 2037 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2038 ) 2039 2040 def _parse_table_alias( 2041 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2042 ) -> t.Optional[exp.TableAlias]: 2043 any_token = self._match(TokenType.ALIAS) 2044 alias = ( 2045 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2046 or self._parse_string_as_identifier() 2047 ) 2048 2049 index = self._index 2050 if self._match(TokenType.L_PAREN): 2051 columns = self._parse_csv(self._parse_function_parameter) 2052 self._match_r_paren() if columns else self._retreat(index) 2053 else: 2054 columns = None 2055 2056 if not alias and not columns: 2057 return None 2058 2059 return self.expression(exp.TableAlias, this=alias, columns=columns) 2060 2061 def _parse_subquery( 2062 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2063 ) -> t.Optional[exp.Subquery]: 2064 if not this: 2065 return None 2066 2067 return self.expression( 2068 exp.Subquery, 2069 this=this, 2070 pivots=self._parse_pivots(), 2071 alias=self._parse_table_alias() if parse_alias else None, 2072 ) 2073 2074 def _parse_query_modifiers( 2075 self, this: t.Optional[exp.Expression] 2076 ) -> t.Optional[exp.Expression]: 2077 if isinstance(this, self.MODIFIABLES): 2078 for join in iter(self._parse_join, None): 2079 this.append("joins", join) 2080 for lateral in iter(self._parse_lateral, None): 2081 this.append("laterals", lateral) 2082 2083 while True: 2084 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2085 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2086 key, expression = parser(self) 2087 2088 if expression: 2089 this.set(key, expression) 2090 if key == "limit": 2091 offset = expression.args.pop("offset", None) 2092 if offset: 2093 this.set("offset", exp.Offset(expression=offset)) 2094 continue 2095 break 2096 return this 2097 2098 def _parse_hint(self) -> t.Optional[exp.Hint]: 2099 if self._match(TokenType.HINT): 2100 hints = [] 2101 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2102 hints.extend(hint) 2103 2104 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2105 self.raise_error("Expected */ after HINT") 2106 2107 return self.expression(exp.Hint, expressions=hints) 2108 2109 return None 2110 2111 def _parse_into(self) -> t.Optional[exp.Into]: 2112 if not self._match(TokenType.INTO): 2113 return None 2114 2115 temp = self._match(TokenType.TEMPORARY) 2116 unlogged = self._match_text_seq("UNLOGGED") 2117 self._match(TokenType.TABLE) 2118 2119 return self.expression( 2120 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2121 ) 2122 2123 def _parse_from( 2124 self, joins: bool = False, skip_from_token: bool = False 2125 ) -> t.Optional[exp.From]: 2126 if not skip_from_token and not self._match(TokenType.FROM): 2127 return None 2128 2129 return self.expression( 2130 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2131 ) 2132 2133 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2134 if not self._match(TokenType.MATCH_RECOGNIZE): 2135 return None 2136 2137 self._match_l_paren() 2138 2139 partition = self._parse_partition_by() 2140 order = self._parse_order() 2141 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2142 2143 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2144 rows = exp.var("ONE ROW PER MATCH") 2145 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2146 text = "ALL ROWS PER MATCH" 2147 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2148 text += f" SHOW EMPTY MATCHES" 2149 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2150 text += f" OMIT EMPTY MATCHES" 2151 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2152 text += f" WITH UNMATCHED ROWS" 2153 rows = exp.var(text) 2154 else: 2155 rows = None 2156 2157 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2158 text = "AFTER MATCH SKIP" 2159 if self._match_text_seq("PAST", "LAST", "ROW"): 2160 text += f" PAST LAST ROW" 2161 elif self._match_text_seq("TO", "NEXT", "ROW"): 2162 text += f" TO NEXT ROW" 2163 elif self._match_text_seq("TO", "FIRST"): 2164 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2165 elif self._match_text_seq("TO", "LAST"): 2166 text += f" TO LAST {self._advance_any().text}" # type: ignore 2167 after = exp.var(text) 2168 else: 2169 after = None 2170 2171 if self._match_text_seq("PATTERN"): 2172 self._match_l_paren() 2173 2174 if not self._curr: 2175 self.raise_error("Expecting )", self._curr) 2176 2177 paren = 1 2178 start = self._curr 2179 2180 while self._curr and paren > 0: 2181 if self._curr.token_type == TokenType.L_PAREN: 2182 paren += 1 2183 if self._curr.token_type == TokenType.R_PAREN: 2184 paren -= 1 2185 2186 end = self._prev 2187 self._advance() 2188 2189 if paren > 0: 2190 self.raise_error("Expecting )", self._curr) 2191 2192 pattern = exp.var(self._find_sql(start, end)) 2193 else: 2194 pattern = None 2195 2196 define = ( 2197 self._parse_csv( 2198 lambda: self.expression( 2199 exp.Alias, 2200 alias=self._parse_id_var(any_token=True), 2201 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2202 ) 2203 ) 2204 if self._match_text_seq("DEFINE") 2205 else None 2206 ) 2207 2208 self._match_r_paren() 2209 2210 return self.expression( 2211 exp.MatchRecognize, 2212 partition_by=partition, 2213 order=order, 2214 measures=measures, 2215 rows=rows, 2216 after=after, 2217 pattern=pattern, 2218 define=define, 2219 alias=self._parse_table_alias(), 2220 ) 2221 2222 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2223 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2224 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2225 2226 if outer_apply or cross_apply: 2227 this = self._parse_select(table=True) 2228 view = None 2229 outer = not cross_apply 2230 elif self._match(TokenType.LATERAL): 2231 this = self._parse_select(table=True) 2232 view = self._match(TokenType.VIEW) 2233 outer = self._match(TokenType.OUTER) 2234 else: 2235 return None 2236 2237 if not this: 2238 this = self._parse_function() or self._parse_id_var(any_token=False) 2239 while self._match(TokenType.DOT): 2240 this = exp.Dot( 2241 this=this, 2242 expression=self._parse_function() or self._parse_id_var(any_token=False), 2243 ) 2244 2245 if view: 2246 table = self._parse_id_var(any_token=False) 2247 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2248 table_alias: t.Optional[exp.TableAlias] = self.expression( 2249 exp.TableAlias, this=table, columns=columns 2250 ) 2251 elif isinstance(this, exp.Subquery) and this.alias: 2252 # Ensures parity between the Subquery's and the Lateral's "alias" args 2253 table_alias = this.args["alias"].copy() 2254 else: 2255 table_alias = self._parse_table_alias() 2256 2257 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2258 2259 def _parse_join_parts( 2260 self, 2261 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2262 return ( 2263 self._match_set(self.JOIN_METHODS) and self._prev, 2264 self._match_set(self.JOIN_SIDES) and self._prev, 2265 self._match_set(self.JOIN_KINDS) and self._prev, 2266 ) 2267 2268 def _parse_join( 2269 self, skip_join_token: bool = False, parse_bracket: bool = False 2270 ) -> t.Optional[exp.Join]: 2271 if self._match(TokenType.COMMA): 2272 return self.expression(exp.Join, this=self._parse_table()) 2273 2274 index = self._index 2275 method, side, kind = self._parse_join_parts() 2276 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2277 join = self._match(TokenType.JOIN) 2278 2279 if not skip_join_token and not join: 2280 self._retreat(index) 2281 kind = None 2282 method = None 2283 side = None 2284 2285 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2286 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2287 2288 if not skip_join_token and not join and not outer_apply and not cross_apply: 2289 return None 2290 2291 if outer_apply: 2292 side = Token(TokenType.LEFT, "LEFT") 2293 2294 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2295 2296 if method: 2297 kwargs["method"] = method.text 2298 if side: 2299 kwargs["side"] = side.text 2300 if kind: 2301 kwargs["kind"] = kind.text 2302 if hint: 2303 kwargs["hint"] = hint 2304 2305 if self._match(TokenType.ON): 2306 kwargs["on"] = self._parse_conjunction() 2307 elif self._match(TokenType.USING): 2308 kwargs["using"] = self._parse_wrapped_id_vars() 2309 elif not (kind and kind.token_type == TokenType.CROSS): 2310 index = self._index 2311 joins = self._parse_joins() 2312 2313 if joins and self._match(TokenType.ON): 2314 kwargs["on"] = self._parse_conjunction() 2315 elif joins and self._match(TokenType.USING): 2316 kwargs["using"] = self._parse_wrapped_id_vars() 2317 else: 2318 joins = None 2319 self._retreat(index) 2320 2321 kwargs["this"].set("joins", joins) 2322 2323 return self.expression(exp.Join, **kwargs) 2324 2325 def _parse_index( 2326 self, 2327 index: t.Optional[exp.Expression] = None, 2328 ) -> t.Optional[exp.Index]: 2329 if index: 2330 unique = None 2331 primary = None 2332 amp = None 2333 2334 self._match(TokenType.ON) 2335 self._match(TokenType.TABLE) # hive 2336 table = self._parse_table_parts(schema=True) 2337 else: 2338 unique = self._match(TokenType.UNIQUE) 2339 primary = self._match_text_seq("PRIMARY") 2340 amp = self._match_text_seq("AMP") 2341 2342 if not self._match(TokenType.INDEX): 2343 return None 2344 2345 index = self._parse_id_var() 2346 table = None 2347 2348 using = self._parse_field() if self._match(TokenType.USING) else None 2349 2350 if self._match(TokenType.L_PAREN, advance=False): 2351 columns = self._parse_wrapped_csv(self._parse_ordered) 2352 else: 2353 columns = None 2354 2355 return self.expression( 2356 exp.Index, 2357 this=index, 2358 table=table, 2359 using=using, 2360 columns=columns, 2361 unique=unique, 2362 primary=primary, 2363 amp=amp, 2364 partition_by=self._parse_partition_by(), 2365 ) 2366 2367 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2368 hints: t.List[exp.Expression] = [] 2369 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2370 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2371 hints.append( 2372 self.expression( 2373 exp.WithTableHint, 2374 expressions=self._parse_csv( 2375 lambda: self._parse_function() or self._parse_var(any_token=True) 2376 ), 2377 ) 2378 ) 2379 self._match_r_paren() 2380 else: 2381 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2382 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2383 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2384 2385 self._match_texts({"INDEX", "KEY"}) 2386 if self._match(TokenType.FOR): 2387 hint.set("target", self._advance_any() and self._prev.text.upper()) 2388 2389 hint.set("expressions", self._parse_wrapped_id_vars()) 2390 hints.append(hint) 2391 2392 return hints or None 2393 2394 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2395 return ( 2396 (not schema and self._parse_function(optional_parens=False)) 2397 or self._parse_id_var(any_token=False) 2398 or self._parse_string_as_identifier() 2399 or self._parse_placeholder() 2400 ) 2401 2402 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2403 catalog = None 2404 db = None 2405 table = self._parse_table_part(schema=schema) 2406 2407 while self._match(TokenType.DOT): 2408 if catalog: 2409 # This allows nesting the table in arbitrarily many dot expressions if needed 2410 table = self.expression( 2411 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2412 ) 2413 else: 2414 catalog = db 2415 db = table 2416 table = self._parse_table_part(schema=schema) 2417 2418 if not table: 2419 self.raise_error(f"Expected table name but got {self._curr}") 2420 2421 return self.expression( 2422 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2423 ) 2424 2425 def _parse_table( 2426 self, 2427 schema: bool = False, 2428 joins: bool = False, 2429 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2430 parse_bracket: bool = False, 2431 ) -> t.Optional[exp.Expression]: 2432 lateral = self._parse_lateral() 2433 if lateral: 2434 return lateral 2435 2436 unnest = self._parse_unnest() 2437 if unnest: 2438 return unnest 2439 2440 values = self._parse_derived_table_values() 2441 if values: 2442 return values 2443 2444 subquery = self._parse_select(table=True) 2445 if subquery: 2446 if not subquery.args.get("pivots"): 2447 subquery.set("pivots", self._parse_pivots()) 2448 return subquery 2449 2450 bracket = parse_bracket and self._parse_bracket(None) 2451 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2452 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2453 2454 if schema: 2455 return self._parse_schema(this=this) 2456 2457 if self.ALIAS_POST_TABLESAMPLE: 2458 table_sample = self._parse_table_sample() 2459 2460 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2461 if alias: 2462 this.set("alias", alias) 2463 2464 if not this.args.get("pivots"): 2465 this.set("pivots", self._parse_pivots()) 2466 2467 this.set("hints", self._parse_table_hints()) 2468 2469 if not self.ALIAS_POST_TABLESAMPLE: 2470 table_sample = self._parse_table_sample() 2471 2472 if table_sample: 2473 table_sample.set("this", this) 2474 this = table_sample 2475 2476 if joins: 2477 for join in iter(self._parse_join, None): 2478 this.append("joins", join) 2479 2480 return this 2481 2482 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2483 if not self._match(TokenType.UNNEST): 2484 return None 2485 2486 expressions = self._parse_wrapped_csv(self._parse_type) 2487 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2488 2489 alias = self._parse_table_alias() if with_alias else None 2490 2491 if alias and self.UNNEST_COLUMN_ONLY: 2492 if alias.args.get("columns"): 2493 self.raise_error("Unexpected extra column alias in unnest.") 2494 2495 alias.set("columns", [alias.this]) 2496 alias.set("this", None) 2497 2498 offset = None 2499 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2500 self._match(TokenType.ALIAS) 2501 offset = self._parse_id_var() or exp.to_identifier("offset") 2502 2503 return self.expression( 2504 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2505 ) 2506 2507 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2508 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2509 if not is_derived and not self._match(TokenType.VALUES): 2510 return None 2511 2512 expressions = self._parse_csv(self._parse_value) 2513 alias = self._parse_table_alias() 2514 2515 if is_derived: 2516 self._match_r_paren() 2517 2518 return self.expression( 2519 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2520 ) 2521 2522 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2523 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2524 as_modifier and self._match_text_seq("USING", "SAMPLE") 2525 ): 2526 return None 2527 2528 bucket_numerator = None 2529 bucket_denominator = None 2530 bucket_field = None 2531 percent = None 2532 rows = None 2533 size = None 2534 seed = None 2535 2536 kind = ( 2537 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2538 ) 2539 method = self._parse_var(tokens=(TokenType.ROW,)) 2540 2541 self._match(TokenType.L_PAREN) 2542 2543 num = self._parse_number() 2544 2545 if self._match_text_seq("BUCKET"): 2546 bucket_numerator = self._parse_number() 2547 self._match_text_seq("OUT", "OF") 2548 bucket_denominator = bucket_denominator = self._parse_number() 2549 self._match(TokenType.ON) 2550 bucket_field = self._parse_field() 2551 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2552 percent = num 2553 elif self._match(TokenType.ROWS): 2554 rows = num 2555 else: 2556 size = num 2557 2558 self._match(TokenType.R_PAREN) 2559 2560 if self._match(TokenType.L_PAREN): 2561 method = self._parse_var() 2562 seed = self._match(TokenType.COMMA) and self._parse_number() 2563 self._match_r_paren() 2564 elif self._match_texts(("SEED", "REPEATABLE")): 2565 seed = self._parse_wrapped(self._parse_number) 2566 2567 return self.expression( 2568 exp.TableSample, 2569 method=method, 2570 bucket_numerator=bucket_numerator, 2571 bucket_denominator=bucket_denominator, 2572 bucket_field=bucket_field, 2573 percent=percent, 2574 rows=rows, 2575 size=size, 2576 seed=seed, 2577 kind=kind, 2578 ) 2579 2580 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2581 return list(iter(self._parse_pivot, None)) or None 2582 2583 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2584 return list(iter(self._parse_join, None)) or None 2585 2586 # https://duckdb.org/docs/sql/statements/pivot 2587 def _parse_simplified_pivot(self) -> exp.Pivot: 2588 def _parse_on() -> t.Optional[exp.Expression]: 2589 this = self._parse_bitwise() 2590 return self._parse_in(this) if self._match(TokenType.IN) else this 2591 2592 this = self._parse_table() 2593 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2594 using = self._match(TokenType.USING) and self._parse_csv( 2595 lambda: self._parse_alias(self._parse_function()) 2596 ) 2597 group = self._parse_group() 2598 return self.expression( 2599 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2600 ) 2601 2602 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2603 index = self._index 2604 2605 if self._match(TokenType.PIVOT): 2606 unpivot = False 2607 elif self._match(TokenType.UNPIVOT): 2608 unpivot = True 2609 else: 2610 return None 2611 2612 expressions = [] 2613 field = None 2614 2615 if not self._match(TokenType.L_PAREN): 2616 self._retreat(index) 2617 return None 2618 2619 if unpivot: 2620 expressions = self._parse_csv(self._parse_column) 2621 else: 2622 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2623 2624 if not expressions: 2625 self.raise_error("Failed to parse PIVOT's aggregation list") 2626 2627 if not self._match(TokenType.FOR): 2628 self.raise_error("Expecting FOR") 2629 2630 value = self._parse_column() 2631 2632 if not self._match(TokenType.IN): 2633 self.raise_error("Expecting IN") 2634 2635 field = self._parse_in(value, alias=True) 2636 2637 self._match_r_paren() 2638 2639 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2640 2641 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2642 pivot.set("alias", self._parse_table_alias()) 2643 2644 if not unpivot: 2645 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2646 2647 columns: t.List[exp.Expression] = [] 2648 for fld in pivot.args["field"].expressions: 2649 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2650 for name in names: 2651 if self.PREFIXED_PIVOT_COLUMNS: 2652 name = f"{name}_{field_name}" if name else field_name 2653 else: 2654 name = f"{field_name}_{name}" if name else field_name 2655 2656 columns.append(exp.to_identifier(name)) 2657 2658 pivot.set("columns", columns) 2659 2660 return pivot 2661 2662 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2663 return [agg.alias for agg in aggregations] 2664 2665 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2666 if not skip_where_token and not self._match(TokenType.WHERE): 2667 return None 2668 2669 return self.expression( 2670 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2671 ) 2672 2673 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2674 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2675 return None 2676 2677 elements = defaultdict(list) 2678 2679 if self._match(TokenType.ALL): 2680 return self.expression(exp.Group, all=True) 2681 2682 while True: 2683 expressions = self._parse_csv(self._parse_conjunction) 2684 if expressions: 2685 elements["expressions"].extend(expressions) 2686 2687 grouping_sets = self._parse_grouping_sets() 2688 if grouping_sets: 2689 elements["grouping_sets"].extend(grouping_sets) 2690 2691 rollup = None 2692 cube = None 2693 totals = None 2694 2695 with_ = self._match(TokenType.WITH) 2696 if self._match(TokenType.ROLLUP): 2697 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2698 elements["rollup"].extend(ensure_list(rollup)) 2699 2700 if self._match(TokenType.CUBE): 2701 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2702 elements["cube"].extend(ensure_list(cube)) 2703 2704 if self._match_text_seq("TOTALS"): 2705 totals = True 2706 elements["totals"] = True # type: ignore 2707 2708 if not (grouping_sets or rollup or cube or totals): 2709 break 2710 2711 return self.expression(exp.Group, **elements) # type: ignore 2712 2713 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2714 if not self._match(TokenType.GROUPING_SETS): 2715 return None 2716 2717 return self._parse_wrapped_csv(self._parse_grouping_set) 2718 2719 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2720 if self._match(TokenType.L_PAREN): 2721 grouping_set = self._parse_csv(self._parse_column) 2722 self._match_r_paren() 2723 return self.expression(exp.Tuple, expressions=grouping_set) 2724 2725 return self._parse_column() 2726 2727 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2728 if not skip_having_token and not self._match(TokenType.HAVING): 2729 return None 2730 return self.expression(exp.Having, this=self._parse_conjunction()) 2731 2732 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2733 if not self._match(TokenType.QUALIFY): 2734 return None 2735 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2736 2737 def _parse_order( 2738 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2739 ) -> t.Optional[exp.Expression]: 2740 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2741 return this 2742 2743 return self.expression( 2744 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2745 ) 2746 2747 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2748 if not self._match(token): 2749 return None 2750 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2751 2752 def _parse_ordered(self) -> exp.Ordered: 2753 this = self._parse_conjunction() 2754 self._match(TokenType.ASC) 2755 2756 is_desc = self._match(TokenType.DESC) 2757 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2758 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2759 desc = is_desc or False 2760 asc = not desc 2761 nulls_first = is_nulls_first or False 2762 explicitly_null_ordered = is_nulls_first or is_nulls_last 2763 2764 if ( 2765 not explicitly_null_ordered 2766 and ( 2767 (asc and self.NULL_ORDERING == "nulls_are_small") 2768 or (desc and self.NULL_ORDERING != "nulls_are_small") 2769 ) 2770 and self.NULL_ORDERING != "nulls_are_last" 2771 ): 2772 nulls_first = True 2773 2774 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2775 2776 def _parse_limit( 2777 self, this: t.Optional[exp.Expression] = None, top: bool = False 2778 ) -> t.Optional[exp.Expression]: 2779 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2780 comments = self._prev_comments 2781 limit_paren = self._match(TokenType.L_PAREN) 2782 expression = self._parse_number() if top else self._parse_term() 2783 2784 if self._match(TokenType.COMMA): 2785 offset = expression 2786 expression = self._parse_term() 2787 else: 2788 offset = None 2789 2790 limit_exp = self.expression( 2791 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2792 ) 2793 2794 if limit_paren: 2795 self._match_r_paren() 2796 2797 return limit_exp 2798 2799 if self._match(TokenType.FETCH): 2800 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2801 direction = self._prev.text if direction else "FIRST" 2802 2803 count = self._parse_number() 2804 percent = self._match(TokenType.PERCENT) 2805 2806 self._match_set((TokenType.ROW, TokenType.ROWS)) 2807 2808 only = self._match_text_seq("ONLY") 2809 with_ties = self._match_text_seq("WITH", "TIES") 2810 2811 if only and with_ties: 2812 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2813 2814 return self.expression( 2815 exp.Fetch, 2816 direction=direction, 2817 count=count, 2818 percent=percent, 2819 with_ties=with_ties, 2820 ) 2821 2822 return this 2823 2824 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2825 if not self._match(TokenType.OFFSET): 2826 return this 2827 2828 count = self._parse_number() 2829 self._match_set((TokenType.ROW, TokenType.ROWS)) 2830 return self.expression(exp.Offset, this=this, expression=count) 2831 2832 def _parse_locks(self) -> t.List[exp.Lock]: 2833 locks = [] 2834 while True: 2835 if self._match_text_seq("FOR", "UPDATE"): 2836 update = True 2837 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2838 "LOCK", "IN", "SHARE", "MODE" 2839 ): 2840 update = False 2841 else: 2842 break 2843 2844 expressions = None 2845 if self._match_text_seq("OF"): 2846 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2847 2848 wait: t.Optional[bool | exp.Expression] = None 2849 if self._match_text_seq("NOWAIT"): 2850 wait = True 2851 elif self._match_text_seq("WAIT"): 2852 wait = self._parse_primary() 2853 elif self._match_text_seq("SKIP", "LOCKED"): 2854 wait = False 2855 2856 locks.append( 2857 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2858 ) 2859 2860 return locks 2861 2862 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2863 if not self._match_set(self.SET_OPERATIONS): 2864 return this 2865 2866 token_type = self._prev.token_type 2867 2868 if token_type == TokenType.UNION: 2869 expression = exp.Union 2870 elif token_type == TokenType.EXCEPT: 2871 expression = exp.Except 2872 else: 2873 expression = exp.Intersect 2874 2875 return self.expression( 2876 expression, 2877 this=this, 2878 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2879 expression=self._parse_set_operations(self._parse_select(nested=True)), 2880 ) 2881 2882 def _parse_expression(self) -> t.Optional[exp.Expression]: 2883 return self._parse_alias(self._parse_conjunction()) 2884 2885 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2886 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2887 2888 def _parse_equality(self) -> t.Optional[exp.Expression]: 2889 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2890 2891 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2892 return self._parse_tokens(self._parse_range, self.COMPARISON) 2893 2894 def _parse_range(self) -> t.Optional[exp.Expression]: 2895 this = self._parse_bitwise() 2896 negate = self._match(TokenType.NOT) 2897 2898 if self._match_set(self.RANGE_PARSERS): 2899 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2900 if not expression: 2901 return this 2902 2903 this = expression 2904 elif self._match(TokenType.ISNULL): 2905 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2906 2907 # Postgres supports ISNULL and NOTNULL for conditions. 2908 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2909 if self._match(TokenType.NOTNULL): 2910 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2911 this = self.expression(exp.Not, this=this) 2912 2913 if negate: 2914 this = self.expression(exp.Not, this=this) 2915 2916 if self._match(TokenType.IS): 2917 this = self._parse_is(this) 2918 2919 return this 2920 2921 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2922 index = self._index - 1 2923 negate = self._match(TokenType.NOT) 2924 2925 if self._match_text_seq("DISTINCT", "FROM"): 2926 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2927 return self.expression(klass, this=this, expression=self._parse_expression()) 2928 2929 expression = self._parse_null() or self._parse_boolean() 2930 if not expression: 2931 self._retreat(index) 2932 return None 2933 2934 this = self.expression(exp.Is, this=this, expression=expression) 2935 return self.expression(exp.Not, this=this) if negate else this 2936 2937 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2938 unnest = self._parse_unnest(with_alias=False) 2939 if unnest: 2940 this = self.expression(exp.In, this=this, unnest=unnest) 2941 elif self._match(TokenType.L_PAREN): 2942 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2943 2944 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2945 this = self.expression(exp.In, this=this, query=expressions[0]) 2946 else: 2947 this = self.expression(exp.In, this=this, expressions=expressions) 2948 2949 self._match_r_paren(this) 2950 else: 2951 this = self.expression(exp.In, this=this, field=self._parse_field()) 2952 2953 return this 2954 2955 def _parse_between(self, this: exp.Expression) -> exp.Between: 2956 low = self._parse_bitwise() 2957 self._match(TokenType.AND) 2958 high = self._parse_bitwise() 2959 return self.expression(exp.Between, this=this, low=low, high=high) 2960 2961 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2962 if not self._match(TokenType.ESCAPE): 2963 return this 2964 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2965 2966 def _parse_interval(self) -> t.Optional[exp.Interval]: 2967 if not self._match(TokenType.INTERVAL): 2968 return None 2969 2970 if self._match(TokenType.STRING, advance=False): 2971 this = self._parse_primary() 2972 else: 2973 this = self._parse_term() 2974 2975 unit = self._parse_function() or self._parse_var() 2976 2977 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2978 # each INTERVAL expression into this canonical form so it's easy to transpile 2979 if this and this.is_number: 2980 this = exp.Literal.string(this.name) 2981 elif this and this.is_string: 2982 parts = this.name.split() 2983 2984 if len(parts) == 2: 2985 if unit: 2986 # this is not actually a unit, it's something else 2987 unit = None 2988 self._retreat(self._index - 1) 2989 else: 2990 this = exp.Literal.string(parts[0]) 2991 unit = self.expression(exp.Var, this=parts[1]) 2992 2993 return self.expression(exp.Interval, this=this, unit=unit) 2994 2995 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2996 this = self._parse_term() 2997 2998 while True: 2999 if self._match_set(self.BITWISE): 3000 this = self.expression( 3001 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3002 ) 3003 elif self._match_pair(TokenType.LT, TokenType.LT): 3004 this = self.expression( 3005 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3006 ) 3007 elif self._match_pair(TokenType.GT, TokenType.GT): 3008 this = self.expression( 3009 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3010 ) 3011 else: 3012 break 3013 3014 return this 3015 3016 def _parse_term(self) -> t.Optional[exp.Expression]: 3017 return self._parse_tokens(self._parse_factor, self.TERM) 3018 3019 def _parse_factor(self) -> t.Optional[exp.Expression]: 3020 return self._parse_tokens(self._parse_unary, self.FACTOR) 3021 3022 def _parse_unary(self) -> t.Optional[exp.Expression]: 3023 if self._match_set(self.UNARY_PARSERS): 3024 return self.UNARY_PARSERS[self._prev.token_type](self) 3025 return self._parse_at_time_zone(self._parse_type()) 3026 3027 def _parse_type(self) -> t.Optional[exp.Expression]: 3028 interval = self._parse_interval() 3029 if interval: 3030 return interval 3031 3032 index = self._index 3033 data_type = self._parse_types(check_func=True) 3034 this = self._parse_column() 3035 3036 if data_type: 3037 if isinstance(this, exp.Literal): 3038 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3039 if parser: 3040 return parser(self, this, data_type) 3041 return self.expression(exp.Cast, this=this, to=data_type) 3042 if not data_type.expressions: 3043 self._retreat(index) 3044 return self._parse_column() 3045 return self._parse_column_ops(data_type) 3046 3047 return this 3048 3049 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3050 this = self._parse_type() 3051 if not this: 3052 return None 3053 3054 return self.expression( 3055 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3056 ) 3057 3058 def _parse_types( 3059 self, check_func: bool = False, schema: bool = False 3060 ) -> t.Optional[exp.Expression]: 3061 index = self._index 3062 3063 prefix = self._match_text_seq("SYSUDTLIB", ".") 3064 3065 if not self._match_set(self.TYPE_TOKENS): 3066 return None 3067 3068 type_token = self._prev.token_type 3069 3070 if type_token == TokenType.PSEUDO_TYPE: 3071 return self.expression(exp.PseudoType, this=self._prev.text) 3072 3073 nested = type_token in self.NESTED_TYPE_TOKENS 3074 is_struct = type_token == TokenType.STRUCT 3075 expressions = None 3076 maybe_func = False 3077 3078 if self._match(TokenType.L_PAREN): 3079 if is_struct: 3080 expressions = self._parse_csv(self._parse_struct_types) 3081 elif nested: 3082 expressions = self._parse_csv( 3083 lambda: self._parse_types(check_func=check_func, schema=schema) 3084 ) 3085 elif type_token in self.ENUM_TYPE_TOKENS: 3086 expressions = self._parse_csv(self._parse_primary) 3087 else: 3088 expressions = self._parse_csv(self._parse_type_size) 3089 3090 if not expressions or not self._match(TokenType.R_PAREN): 3091 self._retreat(index) 3092 return None 3093 3094 maybe_func = True 3095 3096 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3097 this = exp.DataType( 3098 this=exp.DataType.Type.ARRAY, 3099 expressions=[ 3100 exp.DataType( 3101 this=exp.DataType.Type[type_token.value], 3102 expressions=expressions, 3103 nested=nested, 3104 ) 3105 ], 3106 nested=True, 3107 ) 3108 3109 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3110 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3111 3112 return this 3113 3114 if self._match(TokenType.L_BRACKET): 3115 self._retreat(index) 3116 return None 3117 3118 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3119 if nested and self._match(TokenType.LT): 3120 if is_struct: 3121 expressions = self._parse_csv(self._parse_struct_types) 3122 else: 3123 expressions = self._parse_csv( 3124 lambda: self._parse_types(check_func=check_func, schema=schema) 3125 ) 3126 3127 if not self._match(TokenType.GT): 3128 self.raise_error("Expecting >") 3129 3130 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3131 values = self._parse_csv(self._parse_conjunction) 3132 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3133 3134 value: t.Optional[exp.Expression] = None 3135 if type_token in self.TIMESTAMPS: 3136 if self._match_text_seq("WITH", "TIME", "ZONE"): 3137 maybe_func = False 3138 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3139 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3140 maybe_func = False 3141 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3142 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3143 maybe_func = False 3144 elif type_token == TokenType.INTERVAL: 3145 unit = self._parse_var() 3146 3147 if not unit: 3148 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3149 else: 3150 value = self.expression(exp.Interval, unit=unit) 3151 3152 if maybe_func and check_func: 3153 index2 = self._index 3154 peek = self._parse_string() 3155 3156 if not peek: 3157 self._retreat(index) 3158 return None 3159 3160 self._retreat(index2) 3161 3162 if value: 3163 return value 3164 3165 return exp.DataType( 3166 this=exp.DataType.Type[type_token.value], 3167 expressions=expressions, 3168 nested=nested, 3169 values=values, 3170 prefix=prefix, 3171 ) 3172 3173 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3174 this = self._parse_type() or self._parse_id_var() 3175 self._match(TokenType.COLON) 3176 return self._parse_column_def(this) 3177 3178 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3179 if not self._match_text_seq("AT", "TIME", "ZONE"): 3180 return this 3181 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3182 3183 def _parse_column(self) -> t.Optional[exp.Expression]: 3184 this = self._parse_field() 3185 if isinstance(this, exp.Identifier): 3186 this = self.expression(exp.Column, this=this) 3187 elif not this: 3188 return self._parse_bracket(this) 3189 return self._parse_column_ops(this) 3190 3191 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3192 this = self._parse_bracket(this) 3193 3194 while self._match_set(self.COLUMN_OPERATORS): 3195 op_token = self._prev.token_type 3196 op = self.COLUMN_OPERATORS.get(op_token) 3197 3198 if op_token == TokenType.DCOLON: 3199 field = self._parse_types() 3200 if not field: 3201 self.raise_error("Expected type") 3202 elif op and self._curr: 3203 self._advance() 3204 value = self._prev.text 3205 field = ( 3206 exp.Literal.number(value) 3207 if self._prev.token_type == TokenType.NUMBER 3208 else exp.Literal.string(value) 3209 ) 3210 else: 3211 field = self._parse_field(anonymous_func=True, any_token=True) 3212 3213 if isinstance(field, exp.Func): 3214 # bigquery allows function calls like x.y.count(...) 3215 # SAFE.SUBSTR(...) 3216 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3217 this = self._replace_columns_with_dots(this) 3218 3219 if op: 3220 this = op(self, this, field) 3221 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3222 this = self.expression( 3223 exp.Column, 3224 this=field, 3225 table=this.this, 3226 db=this.args.get("table"), 3227 catalog=this.args.get("db"), 3228 ) 3229 else: 3230 this = self.expression(exp.Dot, this=this, expression=field) 3231 this = self._parse_bracket(this) 3232 return this 3233 3234 def _parse_primary(self) -> t.Optional[exp.Expression]: 3235 if self._match_set(self.PRIMARY_PARSERS): 3236 token_type = self._prev.token_type 3237 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3238 3239 if token_type == TokenType.STRING: 3240 expressions = [primary] 3241 while self._match(TokenType.STRING): 3242 expressions.append(exp.Literal.string(self._prev.text)) 3243 3244 if len(expressions) > 1: 3245 return self.expression(exp.Concat, expressions=expressions) 3246 3247 return primary 3248 3249 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3250 return exp.Literal.number(f"0.{self._prev.text}") 3251 3252 if self._match(TokenType.L_PAREN): 3253 comments = self._prev_comments 3254 query = self._parse_select() 3255 3256 if query: 3257 expressions = [query] 3258 else: 3259 expressions = self._parse_expressions() 3260 3261 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3262 3263 if isinstance(this, exp.Subqueryable): 3264 this = self._parse_set_operations( 3265 self._parse_subquery(this=this, parse_alias=False) 3266 ) 3267 elif len(expressions) > 1: 3268 this = self.expression(exp.Tuple, expressions=expressions) 3269 else: 3270 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3271 3272 if this: 3273 this.add_comments(comments) 3274 3275 self._match_r_paren(expression=this) 3276 return this 3277 3278 return None 3279 3280 def _parse_field( 3281 self, 3282 any_token: bool = False, 3283 tokens: t.Optional[t.Collection[TokenType]] = None, 3284 anonymous_func: bool = False, 3285 ) -> t.Optional[exp.Expression]: 3286 return ( 3287 self._parse_primary() 3288 or self._parse_function(anonymous=anonymous_func) 3289 or self._parse_id_var(any_token=any_token, tokens=tokens) 3290 ) 3291 3292 def _parse_function( 3293 self, 3294 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3295 anonymous: bool = False, 3296 optional_parens: bool = True, 3297 ) -> t.Optional[exp.Expression]: 3298 if not self._curr: 3299 return None 3300 3301 token_type = self._curr.token_type 3302 3303 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3304 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3305 3306 if not self._next or self._next.token_type != TokenType.L_PAREN: 3307 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3308 self._advance() 3309 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3310 3311 return None 3312 3313 if token_type not in self.FUNC_TOKENS: 3314 return None 3315 3316 this = self._curr.text 3317 upper = this.upper() 3318 self._advance(2) 3319 3320 parser = self.FUNCTION_PARSERS.get(upper) 3321 3322 if parser and not anonymous: 3323 this = parser(self) 3324 else: 3325 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3326 3327 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3328 this = self.expression(subquery_predicate, this=self._parse_select()) 3329 self._match_r_paren() 3330 return this 3331 3332 if functions is None: 3333 functions = self.FUNCTIONS 3334 3335 function = functions.get(upper) 3336 3337 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3338 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3339 3340 if function and not anonymous: 3341 this = self.validate_expression(function(args), args) 3342 else: 3343 this = self.expression(exp.Anonymous, this=this, expressions=args) 3344 3345 self._match(TokenType.R_PAREN, expression=this) 3346 return self._parse_window(this) 3347 3348 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3349 return self._parse_column_def(self._parse_id_var()) 3350 3351 def _parse_user_defined_function( 3352 self, kind: t.Optional[TokenType] = None 3353 ) -> t.Optional[exp.Expression]: 3354 this = self._parse_id_var() 3355 3356 while self._match(TokenType.DOT): 3357 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3358 3359 if not self._match(TokenType.L_PAREN): 3360 return this 3361 3362 expressions = self._parse_csv(self._parse_function_parameter) 3363 self._match_r_paren() 3364 return self.expression( 3365 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3366 ) 3367 3368 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3369 literal = self._parse_primary() 3370 if literal: 3371 return self.expression(exp.Introducer, this=token.text, expression=literal) 3372 3373 return self.expression(exp.Identifier, this=token.text) 3374 3375 def _parse_session_parameter(self) -> exp.SessionParameter: 3376 kind = None 3377 this = self._parse_id_var() or self._parse_primary() 3378 3379 if this and self._match(TokenType.DOT): 3380 kind = this.name 3381 this = self._parse_var() or self._parse_primary() 3382 3383 return self.expression(exp.SessionParameter, this=this, kind=kind) 3384 3385 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3386 index = self._index 3387 3388 if self._match(TokenType.L_PAREN): 3389 expressions = self._parse_csv(self._parse_id_var) 3390 3391 if not self._match(TokenType.R_PAREN): 3392 self._retreat(index) 3393 else: 3394 expressions = [self._parse_id_var()] 3395 3396 if self._match_set(self.LAMBDAS): 3397 return self.LAMBDAS[self._prev.token_type](self, expressions) 3398 3399 self._retreat(index) 3400 3401 this: t.Optional[exp.Expression] 3402 3403 if self._match(TokenType.DISTINCT): 3404 this = self.expression( 3405 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3406 ) 3407 else: 3408 this = self._parse_select_or_expression(alias=alias) 3409 3410 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3411 3412 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3413 index = self._index 3414 3415 if not self.errors: 3416 try: 3417 if self._parse_select(nested=True): 3418 return this 3419 except ParseError: 3420 pass 3421 finally: 3422 self.errors.clear() 3423 self._retreat(index) 3424 3425 if not self._match(TokenType.L_PAREN): 3426 return this 3427 3428 args = self._parse_csv( 3429 lambda: self._parse_constraint() 3430 or self._parse_column_def(self._parse_field(any_token=True)) 3431 ) 3432 3433 self._match_r_paren() 3434 return self.expression(exp.Schema, this=this, expressions=args) 3435 3436 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3437 # column defs are not really columns, they're identifiers 3438 if isinstance(this, exp.Column): 3439 this = this.this 3440 3441 kind = self._parse_types(schema=True) 3442 3443 if self._match_text_seq("FOR", "ORDINALITY"): 3444 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3445 3446 constraints = [] 3447 while True: 3448 constraint = self._parse_column_constraint() 3449 if not constraint: 3450 break 3451 constraints.append(constraint) 3452 3453 if not kind and not constraints: 3454 return this 3455 3456 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3457 3458 def _parse_auto_increment( 3459 self, 3460 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3461 start = None 3462 increment = None 3463 3464 if self._match(TokenType.L_PAREN, advance=False): 3465 args = self._parse_wrapped_csv(self._parse_bitwise) 3466 start = seq_get(args, 0) 3467 increment = seq_get(args, 1) 3468 elif self._match_text_seq("START"): 3469 start = self._parse_bitwise() 3470 self._match_text_seq("INCREMENT") 3471 increment = self._parse_bitwise() 3472 3473 if start and increment: 3474 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3475 3476 return exp.AutoIncrementColumnConstraint() 3477 3478 def _parse_compress(self) -> exp.CompressColumnConstraint: 3479 if self._match(TokenType.L_PAREN, advance=False): 3480 return self.expression( 3481 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3482 ) 3483 3484 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3485 3486 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3487 if self._match_text_seq("BY", "DEFAULT"): 3488 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3489 this = self.expression( 3490 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3491 ) 3492 else: 3493 self._match_text_seq("ALWAYS") 3494 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3495 3496 self._match(TokenType.ALIAS) 3497 identity = self._match_text_seq("IDENTITY") 3498 3499 if self._match(TokenType.L_PAREN): 3500 if self._match_text_seq("START", "WITH"): 3501 this.set("start", self._parse_bitwise()) 3502 if self._match_text_seq("INCREMENT", "BY"): 3503 this.set("increment", self._parse_bitwise()) 3504 if self._match_text_seq("MINVALUE"): 3505 this.set("minvalue", self._parse_bitwise()) 3506 if self._match_text_seq("MAXVALUE"): 3507 this.set("maxvalue", self._parse_bitwise()) 3508 3509 if self._match_text_seq("CYCLE"): 3510 this.set("cycle", True) 3511 elif self._match_text_seq("NO", "CYCLE"): 3512 this.set("cycle", False) 3513 3514 if not identity: 3515 this.set("expression", self._parse_bitwise()) 3516 3517 self._match_r_paren() 3518 3519 return this 3520 3521 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3522 self._match_text_seq("LENGTH") 3523 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3524 3525 def _parse_not_constraint( 3526 self, 3527 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3528 if self._match_text_seq("NULL"): 3529 return self.expression(exp.NotNullColumnConstraint) 3530 if self._match_text_seq("CASESPECIFIC"): 3531 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3532 return None 3533 3534 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3535 if self._match(TokenType.CONSTRAINT): 3536 this = self._parse_id_var() 3537 else: 3538 this = None 3539 3540 if self._match_texts(self.CONSTRAINT_PARSERS): 3541 return self.expression( 3542 exp.ColumnConstraint, 3543 this=this, 3544 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3545 ) 3546 3547 return this 3548 3549 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3550 if not self._match(TokenType.CONSTRAINT): 3551 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3552 3553 this = self._parse_id_var() 3554 expressions = [] 3555 3556 while True: 3557 constraint = self._parse_unnamed_constraint() or self._parse_function() 3558 if not constraint: 3559 break 3560 expressions.append(constraint) 3561 3562 return self.expression(exp.Constraint, this=this, expressions=expressions) 3563 3564 def _parse_unnamed_constraint( 3565 self, constraints: t.Optional[t.Collection[str]] = None 3566 ) -> t.Optional[exp.Expression]: 3567 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3568 return None 3569 3570 constraint = self._prev.text.upper() 3571 if constraint not in self.CONSTRAINT_PARSERS: 3572 self.raise_error(f"No parser found for schema constraint {constraint}.") 3573 3574 return self.CONSTRAINT_PARSERS[constraint](self) 3575 3576 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3577 self._match_text_seq("KEY") 3578 return self.expression( 3579 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3580 ) 3581 3582 def _parse_key_constraint_options(self) -> t.List[str]: 3583 options = [] 3584 while True: 3585 if not self._curr: 3586 break 3587 3588 if self._match(TokenType.ON): 3589 action = None 3590 on = self._advance_any() and self._prev.text 3591 3592 if self._match_text_seq("NO", "ACTION"): 3593 action = "NO ACTION" 3594 elif self._match_text_seq("CASCADE"): 3595 action = "CASCADE" 3596 elif self._match_pair(TokenType.SET, TokenType.NULL): 3597 action = "SET NULL" 3598 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3599 action = "SET DEFAULT" 3600 else: 3601 self.raise_error("Invalid key constraint") 3602 3603 options.append(f"ON {on} {action}") 3604 elif self._match_text_seq("NOT", "ENFORCED"): 3605 options.append("NOT ENFORCED") 3606 elif self._match_text_seq("DEFERRABLE"): 3607 options.append("DEFERRABLE") 3608 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3609 options.append("INITIALLY DEFERRED") 3610 elif self._match_text_seq("NORELY"): 3611 options.append("NORELY") 3612 elif self._match_text_seq("MATCH", "FULL"): 3613 options.append("MATCH FULL") 3614 else: 3615 break 3616 3617 return options 3618 3619 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3620 if match and not self._match(TokenType.REFERENCES): 3621 return None 3622 3623 expressions = None 3624 this = self._parse_table(schema=True) 3625 options = self._parse_key_constraint_options() 3626 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3627 3628 def _parse_foreign_key(self) -> exp.ForeignKey: 3629 expressions = self._parse_wrapped_id_vars() 3630 reference = self._parse_references() 3631 options = {} 3632 3633 while self._match(TokenType.ON): 3634 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3635 self.raise_error("Expected DELETE or UPDATE") 3636 3637 kind = self._prev.text.lower() 3638 3639 if self._match_text_seq("NO", "ACTION"): 3640 action = "NO ACTION" 3641 elif self._match(TokenType.SET): 3642 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3643 action = "SET " + self._prev.text.upper() 3644 else: 3645 self._advance() 3646 action = self._prev.text.upper() 3647 3648 options[kind] = action 3649 3650 return self.expression( 3651 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3652 ) 3653 3654 def _parse_primary_key( 3655 self, wrapped_optional: bool = False, in_props: bool = False 3656 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3657 desc = ( 3658 self._match_set((TokenType.ASC, TokenType.DESC)) 3659 and self._prev.token_type == TokenType.DESC 3660 ) 3661 3662 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3663 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3664 3665 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3666 options = self._parse_key_constraint_options() 3667 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3668 3669 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3670 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3671 return this 3672 3673 bracket_kind = self._prev.token_type 3674 3675 if self._match(TokenType.COLON): 3676 expressions: t.List[t.Optional[exp.Expression]] = [ 3677 self.expression(exp.Slice, expression=self._parse_conjunction()) 3678 ] 3679 else: 3680 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3681 3682 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3683 if bracket_kind == TokenType.L_BRACE: 3684 this = self.expression(exp.Struct, expressions=expressions) 3685 elif not this or this.name.upper() == "ARRAY": 3686 this = self.expression(exp.Array, expressions=expressions) 3687 else: 3688 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3689 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3690 3691 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3692 self.raise_error("Expected ]") 3693 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3694 self.raise_error("Expected }") 3695 3696 self._add_comments(this) 3697 return self._parse_bracket(this) 3698 3699 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3700 if self._match(TokenType.COLON): 3701 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3702 return this 3703 3704 def _parse_case(self) -> t.Optional[exp.Expression]: 3705 ifs = [] 3706 default = None 3707 3708 expression = self._parse_conjunction() 3709 3710 while self._match(TokenType.WHEN): 3711 this = self._parse_conjunction() 3712 self._match(TokenType.THEN) 3713 then = self._parse_conjunction() 3714 ifs.append(self.expression(exp.If, this=this, true=then)) 3715 3716 if self._match(TokenType.ELSE): 3717 default = self._parse_conjunction() 3718 3719 if not self._match(TokenType.END): 3720 self.raise_error("Expected END after CASE", self._prev) 3721 3722 return self._parse_window( 3723 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3724 ) 3725 3726 def _parse_if(self) -> t.Optional[exp.Expression]: 3727 if self._match(TokenType.L_PAREN): 3728 args = self._parse_csv(self._parse_conjunction) 3729 this = self.validate_expression(exp.If.from_arg_list(args), args) 3730 self._match_r_paren() 3731 else: 3732 index = self._index - 1 3733 condition = self._parse_conjunction() 3734 3735 if not condition: 3736 self._retreat(index) 3737 return None 3738 3739 self._match(TokenType.THEN) 3740 true = self._parse_conjunction() 3741 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3742 self._match(TokenType.END) 3743 this = self.expression(exp.If, this=condition, true=true, false=false) 3744 3745 return self._parse_window(this) 3746 3747 def _parse_extract(self) -> exp.Extract: 3748 this = self._parse_function() or self._parse_var() or self._parse_type() 3749 3750 if self._match(TokenType.FROM): 3751 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3752 3753 if not self._match(TokenType.COMMA): 3754 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3755 3756 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3757 3758 def _parse_any_value(self) -> exp.AnyValue: 3759 this = self._parse_lambda() 3760 is_max = None 3761 having = None 3762 3763 if self._match(TokenType.HAVING): 3764 self._match_texts(("MAX", "MIN")) 3765 is_max = self._prev.text == "MAX" 3766 having = self._parse_column() 3767 3768 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3769 3770 def _parse_cast(self, strict: bool) -> exp.Expression: 3771 this = self._parse_conjunction() 3772 3773 if not self._match(TokenType.ALIAS): 3774 if self._match(TokenType.COMMA): 3775 return self.expression( 3776 exp.CastToStrType, this=this, expression=self._parse_string() 3777 ) 3778 else: 3779 self.raise_error("Expected AS after CAST") 3780 3781 fmt = None 3782 to = self._parse_types() 3783 3784 if not to: 3785 self.raise_error("Expected TYPE after CAST") 3786 elif to.this == exp.DataType.Type.CHAR: 3787 if self._match(TokenType.CHARACTER_SET): 3788 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3789 elif self._match(TokenType.FORMAT): 3790 fmt_string = self._parse_string() 3791 fmt = self._parse_at_time_zone(fmt_string) 3792 3793 if to.this in exp.DataType.TEMPORAL_TYPES: 3794 this = self.expression( 3795 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3796 this=this, 3797 format=exp.Literal.string( 3798 format_time( 3799 fmt_string.this if fmt_string else "", 3800 self.FORMAT_MAPPING or self.TIME_MAPPING, 3801 self.FORMAT_TRIE or self.TIME_TRIE, 3802 ) 3803 ), 3804 ) 3805 3806 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3807 this.set("zone", fmt.args["zone"]) 3808 3809 return this 3810 3811 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3812 3813 def _parse_concat(self) -> t.Optional[exp.Expression]: 3814 args = self._parse_csv(self._parse_conjunction) 3815 if self.CONCAT_NULL_OUTPUTS_STRING: 3816 args = [ 3817 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3818 for arg in args 3819 if arg 3820 ] 3821 3822 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3823 # we find such a call we replace it with its argument. 3824 if len(args) == 1: 3825 return args[0] 3826 3827 return self.expression( 3828 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3829 ) 3830 3831 def _parse_string_agg(self) -> exp.Expression: 3832 if self._match(TokenType.DISTINCT): 3833 args: t.List[t.Optional[exp.Expression]] = [ 3834 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3835 ] 3836 if self._match(TokenType.COMMA): 3837 args.extend(self._parse_csv(self._parse_conjunction)) 3838 else: 3839 args = self._parse_csv(self._parse_conjunction) 3840 3841 index = self._index 3842 if not self._match(TokenType.R_PAREN): 3843 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3844 return self.expression( 3845 exp.GroupConcat, 3846 this=seq_get(args, 0), 3847 separator=self._parse_order(this=seq_get(args, 1)), 3848 ) 3849 3850 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3851 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3852 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3853 if not self._match_text_seq("WITHIN", "GROUP"): 3854 self._retreat(index) 3855 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3856 3857 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3858 order = self._parse_order(this=seq_get(args, 0)) 3859 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3860 3861 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3862 this = self._parse_bitwise() 3863 3864 if self._match(TokenType.USING): 3865 to: t.Optional[exp.Expression] = self.expression( 3866 exp.CharacterSet, this=self._parse_var() 3867 ) 3868 elif self._match(TokenType.COMMA): 3869 to = self._parse_types() 3870 else: 3871 to = None 3872 3873 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3874 3875 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3876 """ 3877 There are generally two variants of the DECODE function: 3878 3879 - DECODE(bin, charset) 3880 - DECODE(expression, search, result [, search, result] ... [, default]) 3881 3882 The second variant will always be parsed into a CASE expression. Note that NULL 3883 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3884 instead of relying on pattern matching. 3885 """ 3886 args = self._parse_csv(self._parse_conjunction) 3887 3888 if len(args) < 3: 3889 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3890 3891 expression, *expressions = args 3892 if not expression: 3893 return None 3894 3895 ifs = [] 3896 for search, result in zip(expressions[::2], expressions[1::2]): 3897 if not search or not result: 3898 return None 3899 3900 if isinstance(search, exp.Literal): 3901 ifs.append( 3902 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3903 ) 3904 elif isinstance(search, exp.Null): 3905 ifs.append( 3906 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3907 ) 3908 else: 3909 cond = exp.or_( 3910 exp.EQ(this=expression.copy(), expression=search), 3911 exp.and_( 3912 exp.Is(this=expression.copy(), expression=exp.Null()), 3913 exp.Is(this=search.copy(), expression=exp.Null()), 3914 copy=False, 3915 ), 3916 copy=False, 3917 ) 3918 ifs.append(exp.If(this=cond, true=result)) 3919 3920 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3921 3922 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3923 self._match_text_seq("KEY") 3924 key = self._parse_field() 3925 self._match(TokenType.COLON) 3926 self._match_text_seq("VALUE") 3927 value = self._parse_field() 3928 3929 if not key and not value: 3930 return None 3931 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3932 3933 def _parse_json_object(self) -> exp.JSONObject: 3934 star = self._parse_star() 3935 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3936 3937 null_handling = None 3938 if self._match_text_seq("NULL", "ON", "NULL"): 3939 null_handling = "NULL ON NULL" 3940 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3941 null_handling = "ABSENT ON NULL" 3942 3943 unique_keys = None 3944 if self._match_text_seq("WITH", "UNIQUE"): 3945 unique_keys = True 3946 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3947 unique_keys = False 3948 3949 self._match_text_seq("KEYS") 3950 3951 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3952 format_json = self._match_text_seq("FORMAT", "JSON") 3953 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3954 3955 return self.expression( 3956 exp.JSONObject, 3957 expressions=expressions, 3958 null_handling=null_handling, 3959 unique_keys=unique_keys, 3960 return_type=return_type, 3961 format_json=format_json, 3962 encoding=encoding, 3963 ) 3964 3965 def _parse_logarithm(self) -> exp.Func: 3966 # Default argument order is base, expression 3967 args = self._parse_csv(self._parse_range) 3968 3969 if len(args) > 1: 3970 if not self.LOG_BASE_FIRST: 3971 args.reverse() 3972 return exp.Log.from_arg_list(args) 3973 3974 return self.expression( 3975 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3976 ) 3977 3978 def _parse_match_against(self) -> exp.MatchAgainst: 3979 expressions = self._parse_csv(self._parse_column) 3980 3981 self._match_text_seq(")", "AGAINST", "(") 3982 3983 this = self._parse_string() 3984 3985 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3986 modifier = "IN NATURAL LANGUAGE MODE" 3987 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3988 modifier = f"{modifier} WITH QUERY EXPANSION" 3989 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3990 modifier = "IN BOOLEAN MODE" 3991 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3992 modifier = "WITH QUERY EXPANSION" 3993 else: 3994 modifier = None 3995 3996 return self.expression( 3997 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3998 ) 3999 4000 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4001 def _parse_open_json(self) -> exp.OpenJSON: 4002 this = self._parse_bitwise() 4003 path = self._match(TokenType.COMMA) and self._parse_string() 4004 4005 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4006 this = self._parse_field(any_token=True) 4007 kind = self._parse_types() 4008 path = self._parse_string() 4009 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4010 4011 return self.expression( 4012 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4013 ) 4014 4015 expressions = None 4016 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4017 self._match_l_paren() 4018 expressions = self._parse_csv(_parse_open_json_column_def) 4019 4020 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4021 4022 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4023 args = self._parse_csv(self._parse_bitwise) 4024 4025 if self._match(TokenType.IN): 4026 return self.expression( 4027 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4028 ) 4029 4030 if haystack_first: 4031 haystack = seq_get(args, 0) 4032 needle = seq_get(args, 1) 4033 else: 4034 needle = seq_get(args, 0) 4035 haystack = seq_get(args, 1) 4036 4037 return self.expression( 4038 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4039 ) 4040 4041 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4042 args = self._parse_csv(self._parse_table) 4043 return exp.JoinHint(this=func_name.upper(), expressions=args) 4044 4045 def _parse_substring(self) -> exp.Substring: 4046 # Postgres supports the form: substring(string [from int] [for int]) 4047 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4048 4049 args = self._parse_csv(self._parse_bitwise) 4050 4051 if self._match(TokenType.FROM): 4052 args.append(self._parse_bitwise()) 4053 if self._match(TokenType.FOR): 4054 args.append(self._parse_bitwise()) 4055 4056 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4057 4058 def _parse_trim(self) -> exp.Trim: 4059 # https://www.w3resource.com/sql/character-functions/trim.php 4060 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4061 4062 position = None 4063 collation = None 4064 4065 if self._match_texts(self.TRIM_TYPES): 4066 position = self._prev.text.upper() 4067 4068 expression = self._parse_bitwise() 4069 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4070 this = self._parse_bitwise() 4071 else: 4072 this = expression 4073 expression = None 4074 4075 if self._match(TokenType.COLLATE): 4076 collation = self._parse_bitwise() 4077 4078 return self.expression( 4079 exp.Trim, this=this, position=position, expression=expression, collation=collation 4080 ) 4081 4082 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4083 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4084 4085 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4086 return self._parse_window(self._parse_id_var(), alias=True) 4087 4088 def _parse_respect_or_ignore_nulls( 4089 self, this: t.Optional[exp.Expression] 4090 ) -> t.Optional[exp.Expression]: 4091 if self._match_text_seq("IGNORE", "NULLS"): 4092 return self.expression(exp.IgnoreNulls, this=this) 4093 if self._match_text_seq("RESPECT", "NULLS"): 4094 return self.expression(exp.RespectNulls, this=this) 4095 return this 4096 4097 def _parse_window( 4098 self, this: t.Optional[exp.Expression], alias: bool = False 4099 ) -> t.Optional[exp.Expression]: 4100 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4101 self._match(TokenType.WHERE) 4102 this = self.expression( 4103 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4104 ) 4105 self._match_r_paren() 4106 4107 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4108 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4109 if self._match_text_seq("WITHIN", "GROUP"): 4110 order = self._parse_wrapped(self._parse_order) 4111 this = self.expression(exp.WithinGroup, this=this, expression=order) 4112 4113 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4114 # Some dialects choose to implement and some do not. 4115 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4116 4117 # There is some code above in _parse_lambda that handles 4118 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4119 4120 # The below changes handle 4121 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4122 4123 # Oracle allows both formats 4124 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4125 # and Snowflake chose to do the same for familiarity 4126 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4127 this = self._parse_respect_or_ignore_nulls(this) 4128 4129 # bigquery select from window x AS (partition by ...) 4130 if alias: 4131 over = None 4132 self._match(TokenType.ALIAS) 4133 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4134 return this 4135 else: 4136 over = self._prev.text.upper() 4137 4138 if not self._match(TokenType.L_PAREN): 4139 return self.expression( 4140 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4141 ) 4142 4143 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4144 4145 first = self._match(TokenType.FIRST) 4146 if self._match_text_seq("LAST"): 4147 first = False 4148 4149 partition = self._parse_partition_by() 4150 order = self._parse_order() 4151 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4152 4153 if kind: 4154 self._match(TokenType.BETWEEN) 4155 start = self._parse_window_spec() 4156 self._match(TokenType.AND) 4157 end = self._parse_window_spec() 4158 4159 spec = self.expression( 4160 exp.WindowSpec, 4161 kind=kind, 4162 start=start["value"], 4163 start_side=start["side"], 4164 end=end["value"], 4165 end_side=end["side"], 4166 ) 4167 else: 4168 spec = None 4169 4170 self._match_r_paren() 4171 4172 return self.expression( 4173 exp.Window, 4174 this=this, 4175 partition_by=partition, 4176 order=order, 4177 spec=spec, 4178 alias=window_alias, 4179 over=over, 4180 first=first, 4181 ) 4182 4183 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4184 self._match(TokenType.BETWEEN) 4185 4186 return { 4187 "value": ( 4188 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4189 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4190 or self._parse_bitwise() 4191 ), 4192 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4193 } 4194 4195 def _parse_alias( 4196 self, this: t.Optional[exp.Expression], explicit: bool = False 4197 ) -> t.Optional[exp.Expression]: 4198 any_token = self._match(TokenType.ALIAS) 4199 4200 if explicit and not any_token: 4201 return this 4202 4203 if self._match(TokenType.L_PAREN): 4204 aliases = self.expression( 4205 exp.Aliases, 4206 this=this, 4207 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4208 ) 4209 self._match_r_paren(aliases) 4210 return aliases 4211 4212 alias = self._parse_id_var(any_token) 4213 4214 if alias: 4215 return self.expression(exp.Alias, this=this, alias=alias) 4216 4217 return this 4218 4219 def _parse_id_var( 4220 self, 4221 any_token: bool = True, 4222 tokens: t.Optional[t.Collection[TokenType]] = None, 4223 ) -> t.Optional[exp.Expression]: 4224 identifier = self._parse_identifier() 4225 4226 if identifier: 4227 return identifier 4228 4229 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4230 quoted = self._prev.token_type == TokenType.STRING 4231 return exp.Identifier(this=self._prev.text, quoted=quoted) 4232 4233 return None 4234 4235 def _parse_string(self) -> t.Optional[exp.Expression]: 4236 if self._match(TokenType.STRING): 4237 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4238 return self._parse_placeholder() 4239 4240 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4241 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4242 4243 def _parse_number(self) -> t.Optional[exp.Expression]: 4244 if self._match(TokenType.NUMBER): 4245 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4246 return self._parse_placeholder() 4247 4248 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4249 if self._match(TokenType.IDENTIFIER): 4250 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4251 return self._parse_placeholder() 4252 4253 def _parse_var( 4254 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4255 ) -> t.Optional[exp.Expression]: 4256 if ( 4257 (any_token and self._advance_any()) 4258 or self._match(TokenType.VAR) 4259 or (self._match_set(tokens) if tokens else False) 4260 ): 4261 return self.expression(exp.Var, this=self._prev.text) 4262 return self._parse_placeholder() 4263 4264 def _advance_any(self) -> t.Optional[Token]: 4265 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4266 self._advance() 4267 return self._prev 4268 return None 4269 4270 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4271 return self._parse_var() or self._parse_string() 4272 4273 def _parse_null(self) -> t.Optional[exp.Expression]: 4274 if self._match(TokenType.NULL): 4275 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4276 return None 4277 4278 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4279 if self._match(TokenType.TRUE): 4280 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4281 if self._match(TokenType.FALSE): 4282 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4283 return None 4284 4285 def _parse_star(self) -> t.Optional[exp.Expression]: 4286 if self._match(TokenType.STAR): 4287 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4288 return None 4289 4290 def _parse_parameter(self) -> exp.Parameter: 4291 wrapped = self._match(TokenType.L_BRACE) 4292 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4293 self._match(TokenType.R_BRACE) 4294 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4295 4296 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4297 if self._match_set(self.PLACEHOLDER_PARSERS): 4298 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4299 if placeholder: 4300 return placeholder 4301 self._advance(-1) 4302 return None 4303 4304 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4305 if not self._match(TokenType.EXCEPT): 4306 return None 4307 if self._match(TokenType.L_PAREN, advance=False): 4308 return self._parse_wrapped_csv(self._parse_column) 4309 return self._parse_csv(self._parse_column) 4310 4311 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4312 if not self._match(TokenType.REPLACE): 4313 return None 4314 if self._match(TokenType.L_PAREN, advance=False): 4315 return self._parse_wrapped_csv(self._parse_expression) 4316 return self._parse_expressions() 4317 4318 def _parse_csv( 4319 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4320 ) -> t.List[t.Optional[exp.Expression]]: 4321 parse_result = parse_method() 4322 items = [parse_result] if parse_result is not None else [] 4323 4324 while self._match(sep): 4325 self._add_comments(parse_result) 4326 parse_result = parse_method() 4327 if parse_result is not None: 4328 items.append(parse_result) 4329 4330 return items 4331 4332 def _parse_tokens( 4333 self, parse_method: t.Callable, expressions: t.Dict 4334 ) -> t.Optional[exp.Expression]: 4335 this = parse_method() 4336 4337 while self._match_set(expressions): 4338 this = self.expression( 4339 expressions[self._prev.token_type], 4340 this=this, 4341 comments=self._prev_comments, 4342 expression=parse_method(), 4343 ) 4344 4345 return this 4346 4347 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4348 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4349 4350 def _parse_wrapped_csv( 4351 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4352 ) -> t.List[t.Optional[exp.Expression]]: 4353 return self._parse_wrapped( 4354 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4355 ) 4356 4357 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4358 wrapped = self._match(TokenType.L_PAREN) 4359 if not wrapped and not optional: 4360 self.raise_error("Expecting (") 4361 parse_result = parse_method() 4362 if wrapped: 4363 self._match_r_paren() 4364 return parse_result 4365 4366 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4367 return self._parse_csv(self._parse_expression) 4368 4369 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4370 return self._parse_select() or self._parse_set_operations( 4371 self._parse_expression() if alias else self._parse_conjunction() 4372 ) 4373 4374 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4375 return self._parse_query_modifiers( 4376 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4377 ) 4378 4379 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4380 this = None 4381 if self._match_texts(self.TRANSACTION_KIND): 4382 this = self._prev.text 4383 4384 self._match_texts({"TRANSACTION", "WORK"}) 4385 4386 modes = [] 4387 while True: 4388 mode = [] 4389 while self._match(TokenType.VAR): 4390 mode.append(self._prev.text) 4391 4392 if mode: 4393 modes.append(" ".join(mode)) 4394 if not self._match(TokenType.COMMA): 4395 break 4396 4397 return self.expression(exp.Transaction, this=this, modes=modes) 4398 4399 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4400 chain = None 4401 savepoint = None 4402 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4403 4404 self._match_texts({"TRANSACTION", "WORK"}) 4405 4406 if self._match_text_seq("TO"): 4407 self._match_text_seq("SAVEPOINT") 4408 savepoint = self._parse_id_var() 4409 4410 if self._match(TokenType.AND): 4411 chain = not self._match_text_seq("NO") 4412 self._match_text_seq("CHAIN") 4413 4414 if is_rollback: 4415 return self.expression(exp.Rollback, savepoint=savepoint) 4416 4417 return self.expression(exp.Commit, chain=chain) 4418 4419 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4420 if not self._match_text_seq("ADD"): 4421 return None 4422 4423 self._match(TokenType.COLUMN) 4424 exists_column = self._parse_exists(not_=True) 4425 expression = self._parse_column_def(self._parse_field(any_token=True)) 4426 4427 if expression: 4428 expression.set("exists", exists_column) 4429 4430 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4431 if self._match_texts(("FIRST", "AFTER")): 4432 position = self._prev.text 4433 column_position = self.expression( 4434 exp.ColumnPosition, this=self._parse_column(), position=position 4435 ) 4436 expression.set("position", column_position) 4437 4438 return expression 4439 4440 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4441 drop = self._match(TokenType.DROP) and self._parse_drop() 4442 if drop and not isinstance(drop, exp.Command): 4443 drop.set("kind", drop.args.get("kind", "COLUMN")) 4444 return drop 4445 4446 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4447 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4448 return self.expression( 4449 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4450 ) 4451 4452 def _parse_add_constraint(self) -> exp.AddConstraint: 4453 this = None 4454 kind = self._prev.token_type 4455 4456 if kind == TokenType.CONSTRAINT: 4457 this = self._parse_id_var() 4458 4459 if self._match_text_seq("CHECK"): 4460 expression = self._parse_wrapped(self._parse_conjunction) 4461 enforced = self._match_text_seq("ENFORCED") 4462 4463 return self.expression( 4464 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4465 ) 4466 4467 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4468 expression = self._parse_foreign_key() 4469 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4470 expression = self._parse_primary_key() 4471 else: 4472 expression = None 4473 4474 return self.expression(exp.AddConstraint, this=this, expression=expression) 4475 4476 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4477 index = self._index - 1 4478 4479 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4480 return self._parse_csv(self._parse_add_constraint) 4481 4482 self._retreat(index) 4483 return self._parse_csv(self._parse_add_column) 4484 4485 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4486 self._match(TokenType.COLUMN) 4487 column = self._parse_field(any_token=True) 4488 4489 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4490 return self.expression(exp.AlterColumn, this=column, drop=True) 4491 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4492 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4493 4494 self._match_text_seq("SET", "DATA") 4495 return self.expression( 4496 exp.AlterColumn, 4497 this=column, 4498 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4499 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4500 using=self._match(TokenType.USING) and self._parse_conjunction(), 4501 ) 4502 4503 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4504 index = self._index - 1 4505 4506 partition_exists = self._parse_exists() 4507 if self._match(TokenType.PARTITION, advance=False): 4508 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4509 4510 self._retreat(index) 4511 return self._parse_csv(self._parse_drop_column) 4512 4513 def _parse_alter_table_rename(self) -> exp.RenameTable: 4514 self._match_text_seq("TO") 4515 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4516 4517 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4518 start = self._prev 4519 4520 if not self._match(TokenType.TABLE): 4521 return self._parse_as_command(start) 4522 4523 exists = self._parse_exists() 4524 this = self._parse_table(schema=True) 4525 4526 if self._next: 4527 self._advance() 4528 4529 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4530 if parser: 4531 actions = ensure_list(parser(self)) 4532 4533 if not self._curr: 4534 return self.expression( 4535 exp.AlterTable, 4536 this=this, 4537 exists=exists, 4538 actions=actions, 4539 ) 4540 return self._parse_as_command(start) 4541 4542 def _parse_merge(self) -> exp.Merge: 4543 self._match(TokenType.INTO) 4544 target = self._parse_table() 4545 4546 self._match(TokenType.USING) 4547 using = self._parse_table() 4548 4549 self._match(TokenType.ON) 4550 on = self._parse_conjunction() 4551 4552 whens = [] 4553 while self._match(TokenType.WHEN): 4554 matched = not self._match(TokenType.NOT) 4555 self._match_text_seq("MATCHED") 4556 source = ( 4557 False 4558 if self._match_text_seq("BY", "TARGET") 4559 else self._match_text_seq("BY", "SOURCE") 4560 ) 4561 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4562 4563 self._match(TokenType.THEN) 4564 4565 if self._match(TokenType.INSERT): 4566 _this = self._parse_star() 4567 if _this: 4568 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4569 else: 4570 then = self.expression( 4571 exp.Insert, 4572 this=self._parse_value(), 4573 expression=self._match(TokenType.VALUES) and self._parse_value(), 4574 ) 4575 elif self._match(TokenType.UPDATE): 4576 expressions = self._parse_star() 4577 if expressions: 4578 then = self.expression(exp.Update, expressions=expressions) 4579 else: 4580 then = self.expression( 4581 exp.Update, 4582 expressions=self._match(TokenType.SET) 4583 and self._parse_csv(self._parse_equality), 4584 ) 4585 elif self._match(TokenType.DELETE): 4586 then = self.expression(exp.Var, this=self._prev.text) 4587 else: 4588 then = None 4589 4590 whens.append( 4591 self.expression( 4592 exp.When, 4593 matched=matched, 4594 source=source, 4595 condition=condition, 4596 then=then, 4597 ) 4598 ) 4599 4600 return self.expression( 4601 exp.Merge, 4602 this=target, 4603 using=using, 4604 on=on, 4605 expressions=whens, 4606 ) 4607 4608 def _parse_show(self) -> t.Optional[exp.Expression]: 4609 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4610 if parser: 4611 return parser(self) 4612 self._advance() 4613 return self.expression(exp.Show, this=self._prev.text.upper()) 4614 4615 def _parse_set_item_assignment( 4616 self, kind: t.Optional[str] = None 4617 ) -> t.Optional[exp.Expression]: 4618 index = self._index 4619 4620 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4621 return self._parse_set_transaction(global_=kind == "GLOBAL") 4622 4623 left = self._parse_primary() or self._parse_id_var() 4624 4625 if not self._match_texts(("=", "TO")): 4626 self._retreat(index) 4627 return None 4628 4629 right = self._parse_statement() or self._parse_id_var() 4630 this = self.expression(exp.EQ, this=left, expression=right) 4631 4632 return self.expression(exp.SetItem, this=this, kind=kind) 4633 4634 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4635 self._match_text_seq("TRANSACTION") 4636 characteristics = self._parse_csv( 4637 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4638 ) 4639 return self.expression( 4640 exp.SetItem, 4641 expressions=characteristics, 4642 kind="TRANSACTION", 4643 **{"global": global_}, # type: ignore 4644 ) 4645 4646 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4647 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4648 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4649 4650 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4651 index = self._index 4652 set_ = self.expression( 4653 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4654 ) 4655 4656 if self._curr: 4657 self._retreat(index) 4658 return self._parse_as_command(self._prev) 4659 4660 return set_ 4661 4662 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4663 for option in options: 4664 if self._match_text_seq(*option.split(" ")): 4665 return exp.var(option) 4666 return None 4667 4668 def _parse_as_command(self, start: Token) -> exp.Command: 4669 while self._curr: 4670 self._advance() 4671 text = self._find_sql(start, self._prev) 4672 size = len(start.text) 4673 return exp.Command(this=text[:size], expression=text[size:]) 4674 4675 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4676 settings = [] 4677 4678 self._match_l_paren() 4679 kind = self._parse_id_var() 4680 4681 if self._match(TokenType.L_PAREN): 4682 while True: 4683 key = self._parse_id_var() 4684 value = self._parse_primary() 4685 4686 if not key and value is None: 4687 break 4688 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4689 self._match(TokenType.R_PAREN) 4690 4691 self._match_r_paren() 4692 4693 return self.expression( 4694 exp.DictProperty, 4695 this=this, 4696 kind=kind.this if kind else None, 4697 settings=settings, 4698 ) 4699 4700 def _parse_dict_range(self, this: str) -> exp.DictRange: 4701 self._match_l_paren() 4702 has_min = self._match_text_seq("MIN") 4703 if has_min: 4704 min = self._parse_var() or self._parse_primary() 4705 self._match_text_seq("MAX") 4706 max = self._parse_var() or self._parse_primary() 4707 else: 4708 max = self._parse_var() or self._parse_primary() 4709 min = exp.Literal.number(0) 4710 self._match_r_paren() 4711 return self.expression(exp.DictRange, this=this, min=min, max=max) 4712 4713 def _find_parser( 4714 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4715 ) -> t.Optional[t.Callable]: 4716 if not self._curr: 4717 return None 4718 4719 index = self._index 4720 this = [] 4721 while True: 4722 # The current token might be multiple words 4723 curr = self._curr.text.upper() 4724 key = curr.split(" ") 4725 this.append(curr) 4726 4727 self._advance() 4728 result, trie = in_trie(trie, key) 4729 if result == TrieResult.FAILED: 4730 break 4731 4732 if result == TrieResult.EXISTS: 4733 subparser = parsers[" ".join(this)] 4734 return subparser 4735 4736 self._retreat(index) 4737 return None 4738 4739 def _match(self, token_type, advance=True, expression=None): 4740 if not self._curr: 4741 return None 4742 4743 if self._curr.token_type == token_type: 4744 if advance: 4745 self._advance() 4746 self._add_comments(expression) 4747 return True 4748 4749 return None 4750 4751 def _match_set(self, types, advance=True): 4752 if not self._curr: 4753 return None 4754 4755 if self._curr.token_type in types: 4756 if advance: 4757 self._advance() 4758 return True 4759 4760 return None 4761 4762 def _match_pair(self, token_type_a, token_type_b, advance=True): 4763 if not self._curr or not self._next: 4764 return None 4765 4766 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4767 if advance: 4768 self._advance(2) 4769 return True 4770 4771 return None 4772 4773 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4774 if not self._match(TokenType.L_PAREN, expression=expression): 4775 self.raise_error("Expecting (") 4776 4777 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4778 if not self._match(TokenType.R_PAREN, expression=expression): 4779 self.raise_error("Expecting )") 4780 4781 def _match_texts(self, texts, advance=True): 4782 if self._curr and self._curr.text.upper() in texts: 4783 if advance: 4784 self._advance() 4785 return True 4786 return False 4787 4788 def _match_text_seq(self, *texts, advance=True): 4789 index = self._index 4790 for text in texts: 4791 if self._curr and self._curr.text.upper() == text: 4792 self._advance() 4793 else: 4794 self._retreat(index) 4795 return False 4796 4797 if not advance: 4798 self._retreat(index) 4799 4800 return True 4801 4802 @t.overload 4803 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4804 ... 4805 4806 @t.overload 4807 def _replace_columns_with_dots( 4808 self, this: t.Optional[exp.Expression] 4809 ) -> t.Optional[exp.Expression]: 4810 ... 4811 4812 def _replace_columns_with_dots(self, this): 4813 if isinstance(this, exp.Dot): 4814 exp.replace_children(this, self._replace_columns_with_dots) 4815 elif isinstance(this, exp.Column): 4816 exp.replace_children(this, self._replace_columns_with_dots) 4817 table = this.args.get("table") 4818 this = ( 4819 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4820 ) 4821 4822 return this 4823 4824 def _replace_lambda( 4825 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4826 ) -> t.Optional[exp.Expression]: 4827 if not node: 4828 return node 4829 4830 for column in node.find_all(exp.Column): 4831 if column.parts[0].name in lambda_variables: 4832 dot_or_id = column.to_dot() if column.table else column.this 4833 parent = column.parent 4834 4835 while isinstance(parent, exp.Dot): 4836 if not isinstance(parent.parent, exp.Dot): 4837 parent.replace(dot_or_id) 4838 break 4839 parent = parent.parent 4840 else: 4841 if column is node: 4842 node = dot_or_id 4843 else: 4844 column.replace(dot_or_id) 4845 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.RLIKE, 332 TokenType.ROW, 333 TokenType.UNNEST, 334 TokenType.VAR, 335 TokenType.LEFT, 336 TokenType.RIGHT, 337 TokenType.DATE, 338 TokenType.DATETIME, 339 TokenType.TABLE, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.WINDOW, 343 TokenType.XOR, 344 *TYPE_TOKENS, 345 *SUBQUERY_PREDICATES, 346 } 347 348 CONJUNCTION = { 349 TokenType.AND: exp.And, 350 TokenType.OR: exp.Or, 351 } 352 353 EQUALITY = { 354 TokenType.EQ: exp.EQ, 355 TokenType.NEQ: exp.NEQ, 356 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 357 } 358 359 COMPARISON = { 360 TokenType.GT: exp.GT, 361 TokenType.GTE: exp.GTE, 362 TokenType.LT: exp.LT, 363 TokenType.LTE: exp.LTE, 364 } 365 366 BITWISE = { 367 TokenType.AMP: exp.BitwiseAnd, 368 TokenType.CARET: exp.BitwiseXor, 369 TokenType.PIPE: exp.BitwiseOr, 370 TokenType.DPIPE: exp.DPipe, 371 } 372 373 TERM = { 374 TokenType.DASH: exp.Sub, 375 TokenType.PLUS: exp.Add, 376 TokenType.MOD: exp.Mod, 377 TokenType.COLLATE: exp.Collate, 378 } 379 380 FACTOR = { 381 TokenType.DIV: exp.IntDiv, 382 TokenType.LR_ARROW: exp.Distance, 383 TokenType.SLASH: exp.Div, 384 TokenType.STAR: exp.Mul, 385 } 386 387 TIMESTAMPS = { 388 TokenType.TIME, 389 TokenType.TIMESTAMP, 390 TokenType.TIMESTAMPTZ, 391 TokenType.TIMESTAMPLTZ, 392 } 393 394 SET_OPERATIONS = { 395 TokenType.UNION, 396 TokenType.INTERSECT, 397 TokenType.EXCEPT, 398 } 399 400 JOIN_METHODS = { 401 TokenType.NATURAL, 402 TokenType.ASOF, 403 } 404 405 JOIN_SIDES = { 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.FULL, 409 } 410 411 JOIN_KINDS = { 412 TokenType.INNER, 413 TokenType.OUTER, 414 TokenType.CROSS, 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 JOIN_HINTS: t.Set[str] = set() 420 421 LAMBDAS = { 422 TokenType.ARROW: lambda self, expressions: self.expression( 423 exp.Lambda, 424 this=self._replace_lambda( 425 self._parse_conjunction(), 426 {node.name for node in expressions}, 427 ), 428 expressions=expressions, 429 ), 430 TokenType.FARROW: lambda self, expressions: self.expression( 431 exp.Kwarg, 432 this=exp.var(expressions[0].name), 433 expression=self._parse_conjunction(), 434 ), 435 } 436 437 COLUMN_OPERATORS = { 438 TokenType.DOT: None, 439 TokenType.DCOLON: lambda self, this, to: self.expression( 440 exp.Cast if self.STRICT_CAST else exp.TryCast, 441 this=this, 442 to=to, 443 ), 444 TokenType.ARROW: lambda self, this, path: self.expression( 445 exp.JSONExtract, 446 this=this, 447 expression=path, 448 ), 449 TokenType.DARROW: lambda self, this, path: self.expression( 450 exp.JSONExtractScalar, 451 this=this, 452 expression=path, 453 ), 454 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 455 exp.JSONBExtract, 456 this=this, 457 expression=path, 458 ), 459 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 460 exp.JSONBExtractScalar, 461 this=this, 462 expression=path, 463 ), 464 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 465 exp.JSONBContains, 466 this=this, 467 expression=key, 468 ), 469 } 470 471 EXPRESSION_PARSERS = { 472 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 473 exp.Column: lambda self: self._parse_column(), 474 exp.Condition: lambda self: self._parse_conjunction(), 475 exp.DataType: lambda self: self._parse_types(), 476 exp.Expression: lambda self: self._parse_statement(), 477 exp.From: lambda self: self._parse_from(), 478 exp.Group: lambda self: self._parse_group(), 479 exp.Having: lambda self: self._parse_having(), 480 exp.Identifier: lambda self: self._parse_id_var(), 481 exp.Join: lambda self: self._parse_join(), 482 exp.Lambda: lambda self: self._parse_lambda(), 483 exp.Lateral: lambda self: self._parse_lateral(), 484 exp.Limit: lambda self: self._parse_limit(), 485 exp.Offset: lambda self: self._parse_offset(), 486 exp.Order: lambda self: self._parse_order(), 487 exp.Ordered: lambda self: self._parse_ordered(), 488 exp.Properties: lambda self: self._parse_properties(), 489 exp.Qualify: lambda self: self._parse_qualify(), 490 exp.Returning: lambda self: self._parse_returning(), 491 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 492 exp.Table: lambda self: self._parse_table_parts(), 493 exp.TableAlias: lambda self: self._parse_table_alias(), 494 exp.Where: lambda self: self._parse_where(), 495 exp.Window: lambda self: self._parse_named_window(), 496 exp.With: lambda self: self._parse_with(), 497 "JOIN_TYPE": lambda self: self._parse_join_parts(), 498 } 499 500 STATEMENT_PARSERS = { 501 TokenType.ALTER: lambda self: self._parse_alter(), 502 TokenType.BEGIN: lambda self: self._parse_transaction(), 503 TokenType.CACHE: lambda self: self._parse_cache(), 504 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 505 TokenType.COMMENT: lambda self: self._parse_comment(), 506 TokenType.CREATE: lambda self: self._parse_create(), 507 TokenType.DELETE: lambda self: self._parse_delete(), 508 TokenType.DESC: lambda self: self._parse_describe(), 509 TokenType.DESCRIBE: lambda self: self._parse_describe(), 510 TokenType.DROP: lambda self: self._parse_drop(), 511 TokenType.FROM: lambda self: exp.select("*").from_( 512 t.cast(exp.From, self._parse_from(skip_from_token=True)) 513 ), 514 TokenType.INSERT: lambda self: self._parse_insert(), 515 TokenType.LOAD: lambda self: self._parse_load(), 516 TokenType.MERGE: lambda self: self._parse_merge(), 517 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 518 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 519 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 520 TokenType.SET: lambda self: self._parse_set(), 521 TokenType.UNCACHE: lambda self: self._parse_uncache(), 522 TokenType.UPDATE: lambda self: self._parse_update(), 523 TokenType.USE: lambda self: self.expression( 524 exp.Use, 525 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 526 and exp.var(self._prev.text), 527 this=self._parse_table(schema=False), 528 ), 529 } 530 531 UNARY_PARSERS = { 532 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 533 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 534 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 535 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 536 } 537 538 PRIMARY_PARSERS = { 539 TokenType.STRING: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=True 541 ), 542 TokenType.NUMBER: lambda self, token: self.expression( 543 exp.Literal, this=token.text, is_string=False 544 ), 545 TokenType.STAR: lambda self, _: self.expression( 546 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 547 ), 548 TokenType.NULL: lambda self, _: self.expression(exp.Null), 549 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 550 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 551 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 552 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 553 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 554 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 555 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 556 exp.National, this=token.text 557 ), 558 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 559 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 560 } 561 562 PLACEHOLDER_PARSERS = { 563 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 564 TokenType.PARAMETER: lambda self: self._parse_parameter(), 565 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 566 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 567 else None, 568 } 569 570 RANGE_PARSERS = { 571 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 572 TokenType.GLOB: binary_range_parser(exp.Glob), 573 TokenType.ILIKE: binary_range_parser(exp.ILike), 574 TokenType.IN: lambda self, this: self._parse_in(this), 575 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 576 TokenType.IS: lambda self, this: self._parse_is(this), 577 TokenType.LIKE: binary_range_parser(exp.Like), 578 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 579 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 580 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 581 } 582 583 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 584 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 585 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 586 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 587 "CHARACTER SET": lambda self: self._parse_character_set(), 588 "CHECKSUM": lambda self: self._parse_checksum(), 589 "CLUSTER BY": lambda self: self._parse_cluster(), 590 "CLUSTERED": lambda self: self._parse_clustered_by(), 591 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 592 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 593 "COPY": lambda self: self._parse_copy_property(), 594 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 595 "DEFINER": lambda self: self._parse_definer(), 596 "DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "DISTKEY": lambda self: self._parse_distkey(), 600 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 601 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 602 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 603 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 604 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 605 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 606 "FREESPACE": lambda self: self._parse_freespace(), 607 "IMMUTABLE": lambda self: self.expression( 608 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 609 ), 610 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 611 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 612 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 613 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 614 "LIKE": lambda self: self._parse_create_like(), 615 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 616 "LOCK": lambda self: self._parse_locking(), 617 "LOCKING": lambda self: self._parse_locking(), 618 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 619 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 620 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 621 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 622 "NO": lambda self: self._parse_no_property(), 623 "ON": lambda self: self._parse_on_property(), 624 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 625 "PARTITION BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 627 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 628 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 629 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 630 "RETURNS": lambda self: self._parse_returns(), 631 "ROW": lambda self: self._parse_row(), 632 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 633 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 634 "SETTINGS": lambda self: self.expression( 635 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 636 ), 637 "SORTKEY": lambda self: self._parse_sortkey(), 638 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 639 "STABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("STABLE") 641 ), 642 "STORED": lambda self: self._parse_stored(), 643 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 644 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 645 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 646 "TO": lambda self: self._parse_to_table(), 647 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 648 "TTL": lambda self: self._parse_ttl(), 649 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 650 "VOLATILE": lambda self: self._parse_volatile_property(), 651 "WITH": lambda self: self._parse_with_property(), 652 } 653 654 CONSTRAINT_PARSERS = { 655 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 656 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 657 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 658 "CHARACTER SET": lambda self: self.expression( 659 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 660 ), 661 "CHECK": lambda self: self.expression( 662 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 663 ), 664 "COLLATE": lambda self: self.expression( 665 exp.CollateColumnConstraint, this=self._parse_var() 666 ), 667 "COMMENT": lambda self: self.expression( 668 exp.CommentColumnConstraint, this=self._parse_string() 669 ), 670 "COMPRESS": lambda self: self._parse_compress(), 671 "DEFAULT": lambda self: self.expression( 672 exp.DefaultColumnConstraint, this=self._parse_bitwise() 673 ), 674 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 675 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 676 "FORMAT": lambda self: self.expression( 677 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 678 ), 679 "GENERATED": lambda self: self._parse_generated_as_identity(), 680 "IDENTITY": lambda self: self._parse_auto_increment(), 681 "INLINE": lambda self: self._parse_inline(), 682 "LIKE": lambda self: self._parse_create_like(), 683 "NOT": lambda self: self._parse_not_constraint(), 684 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 685 "ON": lambda self: self._match(TokenType.UPDATE) 686 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 687 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 688 "PRIMARY KEY": lambda self: self._parse_primary_key(), 689 "REFERENCES": lambda self: self._parse_references(match=False), 690 "TITLE": lambda self: self.expression( 691 exp.TitleColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 694 "UNIQUE": lambda self: self._parse_unique(), 695 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 696 } 697 698 ALTER_PARSERS = { 699 "ADD": lambda self: self._parse_alter_table_add(), 700 "ALTER": lambda self: self._parse_alter_table_alter(), 701 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 702 "DROP": lambda self: self._parse_alter_table_drop(), 703 "RENAME": lambda self: self._parse_alter_table_rename(), 704 } 705 706 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 707 708 NO_PAREN_FUNCTION_PARSERS = { 709 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 710 TokenType.CASE: lambda self: self._parse_case(), 711 TokenType.IF: lambda self: self._parse_if(), 712 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 713 exp.NextValueFor, 714 this=self._parse_column(), 715 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 716 ), 717 } 718 719 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 720 721 FUNCTION_PARSERS = { 722 "ANY_VALUE": lambda self: self._parse_any_value(), 723 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 724 "CONCAT": lambda self: self._parse_concat(), 725 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 726 "DECODE": lambda self: self._parse_decode(), 727 "EXTRACT": lambda self: self._parse_extract(), 728 "JSON_OBJECT": lambda self: self._parse_json_object(), 729 "LOG": lambda self: self._parse_logarithm(), 730 "MATCH": lambda self: self._parse_match_against(), 731 "OPENJSON": lambda self: self._parse_open_json(), 732 "POSITION": lambda self: self._parse_position(), 733 "SAFE_CAST": lambda self: self._parse_cast(False), 734 "STRING_AGG": lambda self: self._parse_string_agg(), 735 "SUBSTRING": lambda self: self._parse_substring(), 736 "TRIM": lambda self: self._parse_trim(), 737 "TRY_CAST": lambda self: self._parse_cast(False), 738 "TRY_CONVERT": lambda self: self._parse_convert(False), 739 } 740 741 QUERY_MODIFIER_PARSERS = { 742 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 743 TokenType.WHERE: lambda self: ("where", self._parse_where()), 744 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 745 TokenType.HAVING: lambda self: ("having", self._parse_having()), 746 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 747 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 748 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 749 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 750 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 751 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 752 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 753 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 754 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 756 TokenType.CLUSTER_BY: lambda self: ( 757 "cluster", 758 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 759 ), 760 TokenType.DISTRIBUTE_BY: lambda self: ( 761 "distribute", 762 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 763 ), 764 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 765 } 766 767 SET_PARSERS = { 768 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 769 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 770 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 771 "TRANSACTION": lambda self: self._parse_set_transaction(), 772 } 773 774 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 775 776 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 777 778 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 779 780 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 781 782 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 783 784 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 785 TRANSACTION_CHARACTERISTICS = { 786 "ISOLATION LEVEL REPEATABLE READ", 787 "ISOLATION LEVEL READ COMMITTED", 788 "ISOLATION LEVEL READ UNCOMMITTED", 789 "ISOLATION LEVEL SERIALIZABLE", 790 "READ WRITE", 791 "READ ONLY", 792 } 793 794 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 795 796 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 797 798 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 799 800 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 801 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 802 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 803 804 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 805 806 STRICT_CAST = True 807 808 # A NULL arg in CONCAT yields NULL by default 809 CONCAT_NULL_OUTPUTS_STRING = False 810 811 PREFIXED_PIVOT_COLUMNS = False 812 IDENTIFY_PIVOT_STRINGS = False 813 814 LOG_BASE_FIRST = True 815 LOG_DEFAULTS_TO_LN = False 816 817 __slots__ = ( 818 "error_level", 819 "error_message_context", 820 "max_errors", 821 "sql", 822 "errors", 823 "_tokens", 824 "_index", 825 "_curr", 826 "_next", 827 "_prev", 828 "_prev_comments", 829 ) 830 831 # Autofilled 832 INDEX_OFFSET: int = 0 833 UNNEST_COLUMN_ONLY: bool = False 834 ALIAS_POST_TABLESAMPLE: bool = False 835 STRICT_STRING_CONCAT = False 836 NULL_ORDERING: str = "nulls_are_small" 837 SHOW_TRIE: t.Dict = {} 838 SET_TRIE: t.Dict = {} 839 FORMAT_MAPPING: t.Dict[str, str] = {} 840 FORMAT_TRIE: t.Dict = {} 841 TIME_MAPPING: t.Dict[str, str] = {} 842 TIME_TRIE: t.Dict = {} 843 844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset() 854 855 def reset(self): 856 self.sql = "" 857 self.errors = [] 858 self._tokens = [] 859 self._index = 0 860 self._curr = None 861 self._next = None 862 self._prev = None 863 self._prev_comments = None 864 865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 ) 882 883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1] 918 919 def _parse( 920 self, 921 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 922 raw_tokens: t.List[Token], 923 sql: t.Optional[str] = None, 924 ) -> t.List[t.Optional[exp.Expression]]: 925 self.reset() 926 self.sql = sql or "" 927 928 total = len(raw_tokens) 929 chunks: t.List[t.List[Token]] = [[]] 930 931 for i, token in enumerate(raw_tokens): 932 if token.token_type == TokenType.SEMICOLON: 933 if i < total - 1: 934 chunks.append([]) 935 else: 936 chunks[-1].append(token) 937 938 expressions = [] 939 940 for tokens in chunks: 941 self._index = -1 942 self._tokens = tokens 943 self._advance() 944 945 expressions.append(parse_method(self)) 946 947 if self._index < len(self._tokens): 948 self.raise_error("Invalid expression / Unexpected token") 949 950 self.check_errors() 951 952 return expressions 953 954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 ) 964 965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error) 992 993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance) 1010 1011 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1012 if expression and self._prev_comments: 1013 expression.add_comments(self._prev_comments) 1014 self._prev_comments = None 1015 1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression 1032 1033 def _find_sql(self, start: Token, end: Token) -> str: 1034 return self.sql[start.start : end.end + 1] 1035 1036 def _advance(self, times: int = 1) -> None: 1037 self._index += times 1038 self._curr = seq_get(self._tokens, self._index) 1039 self._next = seq_get(self._tokens, self._index + 1) 1040 1041 if self._index > 0: 1042 self._prev = self._tokens[self._index - 1] 1043 self._prev_comments = self._prev.comments 1044 else: 1045 self._prev = None 1046 self._prev_comments = None 1047 1048 def _retreat(self, index: int) -> None: 1049 if index != self._index: 1050 self._advance(index - self._index) 1051 1052 def _parse_command(self) -> exp.Command: 1053 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1054 1055 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1056 start = self._prev 1057 exists = self._parse_exists() if allow_exists else None 1058 1059 self._match(TokenType.ON) 1060 1061 kind = self._match_set(self.CREATABLES) and self._prev 1062 if not kind: 1063 return self._parse_as_command(start) 1064 1065 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1066 this = self._parse_user_defined_function(kind=kind.token_type) 1067 elif kind.token_type == TokenType.TABLE: 1068 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1069 elif kind.token_type == TokenType.COLUMN: 1070 this = self._parse_column() 1071 else: 1072 this = self._parse_id_var() 1073 1074 self._match(TokenType.IS) 1075 1076 return self.expression( 1077 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1078 ) 1079 1080 def _parse_to_table( 1081 self, 1082 ) -> exp.ToTableProperty: 1083 table = self._parse_table_parts(schema=True) 1084 return self.expression(exp.ToTableProperty, this=table) 1085 1086 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1087 def _parse_ttl(self) -> exp.Expression: 1088 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1089 this = self._parse_bitwise() 1090 1091 if self._match_text_seq("DELETE"): 1092 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1093 if self._match_text_seq("RECOMPRESS"): 1094 return self.expression( 1095 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1096 ) 1097 if self._match_text_seq("TO", "DISK"): 1098 return self.expression( 1099 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1100 ) 1101 if self._match_text_seq("TO", "VOLUME"): 1102 return self.expression( 1103 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1104 ) 1105 1106 return this 1107 1108 expressions = self._parse_csv(_parse_ttl_action) 1109 where = self._parse_where() 1110 group = self._parse_group() 1111 1112 aggregates = None 1113 if group and self._match(TokenType.SET): 1114 aggregates = self._parse_csv(self._parse_set_item) 1115 1116 return self.expression( 1117 exp.MergeTreeTTL, 1118 expressions=expressions, 1119 where=where, 1120 group=group, 1121 aggregates=aggregates, 1122 ) 1123 1124 def _parse_statement(self) -> t.Optional[exp.Expression]: 1125 if self._curr is None: 1126 return None 1127 1128 if self._match_set(self.STATEMENT_PARSERS): 1129 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1130 1131 if self._match_set(Tokenizer.COMMANDS): 1132 return self._parse_command() 1133 1134 expression = self._parse_expression() 1135 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1136 return self._parse_query_modifiers(expression) 1137 1138 def _parse_drop(self) -> exp.Drop | exp.Command: 1139 start = self._prev 1140 temporary = self._match(TokenType.TEMPORARY) 1141 materialized = self._match_text_seq("MATERIALIZED") 1142 1143 kind = self._match_set(self.CREATABLES) and self._prev.text 1144 if not kind: 1145 return self._parse_as_command(start) 1146 1147 return self.expression( 1148 exp.Drop, 1149 comments=start.comments, 1150 exists=self._parse_exists(), 1151 this=self._parse_table(schema=True), 1152 kind=kind, 1153 temporary=temporary, 1154 materialized=materialized, 1155 cascade=self._match_text_seq("CASCADE"), 1156 constraints=self._match_text_seq("CONSTRAINTS"), 1157 purge=self._match_text_seq("PURGE"), 1158 ) 1159 1160 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1161 return ( 1162 self._match(TokenType.IF) 1163 and (not not_ or self._match(TokenType.NOT)) 1164 and self._match(TokenType.EXISTS) 1165 ) 1166 1167 def _parse_create(self) -> exp.Create | exp.Command: 1168 # Note: this can't be None because we've matched a statement parser 1169 start = self._prev 1170 replace = start.text.upper() == "REPLACE" or self._match_pair( 1171 TokenType.OR, TokenType.REPLACE 1172 ) 1173 unique = self._match(TokenType.UNIQUE) 1174 1175 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1176 self._advance() 1177 1178 properties = None 1179 create_token = self._match_set(self.CREATABLES) and self._prev 1180 1181 if not create_token: 1182 # exp.Properties.Location.POST_CREATE 1183 properties = self._parse_properties() 1184 create_token = self._match_set(self.CREATABLES) and self._prev 1185 1186 if not properties or not create_token: 1187 return self._parse_as_command(start) 1188 1189 exists = self._parse_exists(not_=True) 1190 this = None 1191 expression = None 1192 indexes = None 1193 no_schema_binding = None 1194 begin = None 1195 clone = None 1196 1197 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1198 nonlocal properties 1199 if properties and temp_props: 1200 properties.expressions.extend(temp_props.expressions) 1201 elif temp_props: 1202 properties = temp_props 1203 1204 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1205 this = self._parse_user_defined_function(kind=create_token.token_type) 1206 1207 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1208 extend_props(self._parse_properties()) 1209 1210 self._match(TokenType.ALIAS) 1211 begin = self._match(TokenType.BEGIN) 1212 return_ = self._match_text_seq("RETURN") 1213 expression = self._parse_statement() 1214 1215 if return_: 1216 expression = self.expression(exp.Return, this=expression) 1217 elif create_token.token_type == TokenType.INDEX: 1218 this = self._parse_index(index=self._parse_id_var()) 1219 elif create_token.token_type in self.DB_CREATABLES: 1220 table_parts = self._parse_table_parts(schema=True) 1221 1222 # exp.Properties.Location.POST_NAME 1223 self._match(TokenType.COMMA) 1224 extend_props(self._parse_properties(before=True)) 1225 1226 this = self._parse_schema(this=table_parts) 1227 1228 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1229 extend_props(self._parse_properties()) 1230 1231 self._match(TokenType.ALIAS) 1232 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1233 # exp.Properties.Location.POST_ALIAS 1234 extend_props(self._parse_properties()) 1235 1236 expression = self._parse_ddl_select() 1237 1238 if create_token.token_type == TokenType.TABLE: 1239 # exp.Properties.Location.POST_EXPRESSION 1240 extend_props(self._parse_properties()) 1241 1242 indexes = [] 1243 while True: 1244 index = self._parse_index() 1245 1246 # exp.Properties.Location.POST_INDEX 1247 extend_props(self._parse_properties()) 1248 1249 if not index: 1250 break 1251 else: 1252 self._match(TokenType.COMMA) 1253 indexes.append(index) 1254 elif create_token.token_type == TokenType.VIEW: 1255 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1256 no_schema_binding = True 1257 1258 if self._match_text_seq("CLONE"): 1259 clone = self._parse_table(schema=True) 1260 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1261 clone_kind = ( 1262 self._match(TokenType.L_PAREN) 1263 and self._match_texts(self.CLONE_KINDS) 1264 and self._prev.text.upper() 1265 ) 1266 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1267 self._match(TokenType.R_PAREN) 1268 clone = self.expression( 1269 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1270 ) 1271 1272 return self.expression( 1273 exp.Create, 1274 this=this, 1275 kind=create_token.text, 1276 replace=replace, 1277 unique=unique, 1278 expression=expression, 1279 exists=exists, 1280 properties=properties, 1281 indexes=indexes, 1282 no_schema_binding=no_schema_binding, 1283 begin=begin, 1284 clone=clone, 1285 ) 1286 1287 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1288 # only used for teradata currently 1289 self._match(TokenType.COMMA) 1290 1291 kwargs = { 1292 "no": self._match_text_seq("NO"), 1293 "dual": self._match_text_seq("DUAL"), 1294 "before": self._match_text_seq("BEFORE"), 1295 "default": self._match_text_seq("DEFAULT"), 1296 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1297 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1298 "after": self._match_text_seq("AFTER"), 1299 "minimum": self._match_texts(("MIN", "MINIMUM")), 1300 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1301 } 1302 1303 if self._match_texts(self.PROPERTY_PARSERS): 1304 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1305 try: 1306 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1307 except TypeError: 1308 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1309 1310 return None 1311 1312 def _parse_property(self) -> t.Optional[exp.Expression]: 1313 if self._match_texts(self.PROPERTY_PARSERS): 1314 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1315 1316 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1317 return self._parse_character_set(default=True) 1318 1319 if self._match_text_seq("COMPOUND", "SORTKEY"): 1320 return self._parse_sortkey(compound=True) 1321 1322 if self._match_text_seq("SQL", "SECURITY"): 1323 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1324 1325 assignment = self._match_pair( 1326 TokenType.VAR, TokenType.EQ, advance=False 1327 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1328 1329 if assignment: 1330 key = self._parse_var_or_string() 1331 self._match(TokenType.EQ) 1332 return self.expression(exp.Property, this=key, value=self._parse_column()) 1333 1334 return None 1335 1336 def _parse_stored(self) -> exp.FileFormatProperty: 1337 self._match(TokenType.ALIAS) 1338 1339 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1340 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1341 1342 return self.expression( 1343 exp.FileFormatProperty, 1344 this=self.expression( 1345 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1346 ) 1347 if input_format or output_format 1348 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1349 ) 1350 1351 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1352 self._match(TokenType.EQ) 1353 self._match(TokenType.ALIAS) 1354 return self.expression(exp_class, this=self._parse_field()) 1355 1356 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1357 properties = [] 1358 while True: 1359 if before: 1360 prop = self._parse_property_before() 1361 else: 1362 prop = self._parse_property() 1363 1364 if not prop: 1365 break 1366 for p in ensure_list(prop): 1367 properties.append(p) 1368 1369 if properties: 1370 return self.expression(exp.Properties, expressions=properties) 1371 1372 return None 1373 1374 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1375 return self.expression( 1376 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1377 ) 1378 1379 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1380 if self._index >= 2: 1381 pre_volatile_token = self._tokens[self._index - 2] 1382 else: 1383 pre_volatile_token = None 1384 1385 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1386 return exp.VolatileProperty() 1387 1388 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1389 1390 def _parse_with_property( 1391 self, 1392 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1393 if self._match(TokenType.L_PAREN, advance=False): 1394 return self._parse_wrapped_csv(self._parse_property) 1395 1396 if self._match_text_seq("JOURNAL"): 1397 return self._parse_withjournaltable() 1398 1399 if self._match_text_seq("DATA"): 1400 return self._parse_withdata(no=False) 1401 elif self._match_text_seq("NO", "DATA"): 1402 return self._parse_withdata(no=True) 1403 1404 if not self._next: 1405 return None 1406 1407 return self._parse_withisolatedloading() 1408 1409 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1410 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1411 self._match(TokenType.EQ) 1412 1413 user = self._parse_id_var() 1414 self._match(TokenType.PARAMETER) 1415 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1416 1417 if not user or not host: 1418 return None 1419 1420 return exp.DefinerProperty(this=f"{user}@{host}") 1421 1422 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1423 self._match(TokenType.TABLE) 1424 self._match(TokenType.EQ) 1425 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1426 1427 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1428 return self.expression(exp.LogProperty, no=no) 1429 1430 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1431 return self.expression(exp.JournalProperty, **kwargs) 1432 1433 def _parse_checksum(self) -> exp.ChecksumProperty: 1434 self._match(TokenType.EQ) 1435 1436 on = None 1437 if self._match(TokenType.ON): 1438 on = True 1439 elif self._match_text_seq("OFF"): 1440 on = False 1441 1442 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1443 1444 def _parse_cluster(self) -> exp.Cluster: 1445 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1446 1447 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1448 self._match_text_seq("BY") 1449 1450 self._match_l_paren() 1451 expressions = self._parse_csv(self._parse_column) 1452 self._match_r_paren() 1453 1454 if self._match_text_seq("SORTED", "BY"): 1455 self._match_l_paren() 1456 sorted_by = self._parse_csv(self._parse_ordered) 1457 self._match_r_paren() 1458 else: 1459 sorted_by = None 1460 1461 self._match(TokenType.INTO) 1462 buckets = self._parse_number() 1463 self._match_text_seq("BUCKETS") 1464 1465 return self.expression( 1466 exp.ClusteredByProperty, 1467 expressions=expressions, 1468 sorted_by=sorted_by, 1469 buckets=buckets, 1470 ) 1471 1472 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1473 if not self._match_text_seq("GRANTS"): 1474 self._retreat(self._index - 1) 1475 return None 1476 1477 return self.expression(exp.CopyGrantsProperty) 1478 1479 def _parse_freespace(self) -> exp.FreespaceProperty: 1480 self._match(TokenType.EQ) 1481 return self.expression( 1482 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1483 ) 1484 1485 def _parse_mergeblockratio( 1486 self, no: bool = False, default: bool = False 1487 ) -> exp.MergeBlockRatioProperty: 1488 if self._match(TokenType.EQ): 1489 return self.expression( 1490 exp.MergeBlockRatioProperty, 1491 this=self._parse_number(), 1492 percent=self._match(TokenType.PERCENT), 1493 ) 1494 1495 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1496 1497 def _parse_datablocksize( 1498 self, 1499 default: t.Optional[bool] = None, 1500 minimum: t.Optional[bool] = None, 1501 maximum: t.Optional[bool] = None, 1502 ) -> exp.DataBlocksizeProperty: 1503 self._match(TokenType.EQ) 1504 size = self._parse_number() 1505 1506 units = None 1507 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1508 units = self._prev.text 1509 1510 return self.expression( 1511 exp.DataBlocksizeProperty, 1512 size=size, 1513 units=units, 1514 default=default, 1515 minimum=minimum, 1516 maximum=maximum, 1517 ) 1518 1519 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1520 self._match(TokenType.EQ) 1521 always = self._match_text_seq("ALWAYS") 1522 manual = self._match_text_seq("MANUAL") 1523 never = self._match_text_seq("NEVER") 1524 default = self._match_text_seq("DEFAULT") 1525 1526 autotemp = None 1527 if self._match_text_seq("AUTOTEMP"): 1528 autotemp = self._parse_schema() 1529 1530 return self.expression( 1531 exp.BlockCompressionProperty, 1532 always=always, 1533 manual=manual, 1534 never=never, 1535 default=default, 1536 autotemp=autotemp, 1537 ) 1538 1539 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1540 no = self._match_text_seq("NO") 1541 concurrent = self._match_text_seq("CONCURRENT") 1542 self._match_text_seq("ISOLATED", "LOADING") 1543 for_all = self._match_text_seq("FOR", "ALL") 1544 for_insert = self._match_text_seq("FOR", "INSERT") 1545 for_none = self._match_text_seq("FOR", "NONE") 1546 return self.expression( 1547 exp.IsolatedLoadingProperty, 1548 no=no, 1549 concurrent=concurrent, 1550 for_all=for_all, 1551 for_insert=for_insert, 1552 for_none=for_none, 1553 ) 1554 1555 def _parse_locking(self) -> exp.LockingProperty: 1556 if self._match(TokenType.TABLE): 1557 kind = "TABLE" 1558 elif self._match(TokenType.VIEW): 1559 kind = "VIEW" 1560 elif self._match(TokenType.ROW): 1561 kind = "ROW" 1562 elif self._match_text_seq("DATABASE"): 1563 kind = "DATABASE" 1564 else: 1565 kind = None 1566 1567 if kind in ("DATABASE", "TABLE", "VIEW"): 1568 this = self._parse_table_parts() 1569 else: 1570 this = None 1571 1572 if self._match(TokenType.FOR): 1573 for_or_in = "FOR" 1574 elif self._match(TokenType.IN): 1575 for_or_in = "IN" 1576 else: 1577 for_or_in = None 1578 1579 if self._match_text_seq("ACCESS"): 1580 lock_type = "ACCESS" 1581 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1582 lock_type = "EXCLUSIVE" 1583 elif self._match_text_seq("SHARE"): 1584 lock_type = "SHARE" 1585 elif self._match_text_seq("READ"): 1586 lock_type = "READ" 1587 elif self._match_text_seq("WRITE"): 1588 lock_type = "WRITE" 1589 elif self._match_text_seq("CHECKSUM"): 1590 lock_type = "CHECKSUM" 1591 else: 1592 lock_type = None 1593 1594 override = self._match_text_seq("OVERRIDE") 1595 1596 return self.expression( 1597 exp.LockingProperty, 1598 this=this, 1599 kind=kind, 1600 for_or_in=for_or_in, 1601 lock_type=lock_type, 1602 override=override, 1603 ) 1604 1605 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1606 if self._match(TokenType.PARTITION_BY): 1607 return self._parse_csv(self._parse_conjunction) 1608 return [] 1609 1610 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1611 self._match(TokenType.EQ) 1612 return self.expression( 1613 exp.PartitionedByProperty, 1614 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1615 ) 1616 1617 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1618 if self._match_text_seq("AND", "STATISTICS"): 1619 statistics = True 1620 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1621 statistics = False 1622 else: 1623 statistics = None 1624 1625 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1626 1627 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1628 if self._match_text_seq("PRIMARY", "INDEX"): 1629 return exp.NoPrimaryIndexProperty() 1630 return None 1631 1632 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1633 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1634 return exp.OnCommitProperty() 1635 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1636 return exp.OnCommitProperty(delete=True) 1637 return None 1638 1639 def _parse_distkey(self) -> exp.DistKeyProperty: 1640 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1641 1642 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1643 table = self._parse_table(schema=True) 1644 1645 options = [] 1646 while self._match_texts(("INCLUDING", "EXCLUDING")): 1647 this = self._prev.text.upper() 1648 1649 id_var = self._parse_id_var() 1650 if not id_var: 1651 return None 1652 1653 options.append( 1654 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1655 ) 1656 1657 return self.expression(exp.LikeProperty, this=table, expressions=options) 1658 1659 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1660 return self.expression( 1661 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1662 ) 1663 1664 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1665 self._match(TokenType.EQ) 1666 return self.expression( 1667 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1668 ) 1669 1670 def _parse_returns(self) -> exp.ReturnsProperty: 1671 value: t.Optional[exp.Expression] 1672 is_table = self._match(TokenType.TABLE) 1673 1674 if is_table: 1675 if self._match(TokenType.LT): 1676 value = self.expression( 1677 exp.Schema, 1678 this="TABLE", 1679 expressions=self._parse_csv(self._parse_struct_types), 1680 ) 1681 if not self._match(TokenType.GT): 1682 self.raise_error("Expecting >") 1683 else: 1684 value = self._parse_schema(exp.var("TABLE")) 1685 else: 1686 value = self._parse_types() 1687 1688 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1689 1690 def _parse_describe(self) -> exp.Describe: 1691 kind = self._match_set(self.CREATABLES) and self._prev.text 1692 this = self._parse_table() 1693 return self.expression(exp.Describe, this=this, kind=kind) 1694 1695 def _parse_insert(self) -> exp.Insert: 1696 overwrite = self._match(TokenType.OVERWRITE) 1697 ignore = self._match(TokenType.IGNORE) 1698 local = self._match_text_seq("LOCAL") 1699 alternative = None 1700 1701 if self._match_text_seq("DIRECTORY"): 1702 this: t.Optional[exp.Expression] = self.expression( 1703 exp.Directory, 1704 this=self._parse_var_or_string(), 1705 local=local, 1706 row_format=self._parse_row_format(match_row=True), 1707 ) 1708 else: 1709 if self._match(TokenType.OR): 1710 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1711 1712 self._match(TokenType.INTO) 1713 self._match(TokenType.TABLE) 1714 this = self._parse_table(schema=True) 1715 1716 returning = self._parse_returning() 1717 1718 return self.expression( 1719 exp.Insert, 1720 this=this, 1721 exists=self._parse_exists(), 1722 partition=self._parse_partition(), 1723 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1724 and self._parse_conjunction(), 1725 expression=self._parse_ddl_select(), 1726 conflict=self._parse_on_conflict(), 1727 returning=returning or self._parse_returning(), 1728 overwrite=overwrite, 1729 alternative=alternative, 1730 ignore=ignore, 1731 ) 1732 1733 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1734 conflict = self._match_text_seq("ON", "CONFLICT") 1735 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1736 1737 if not conflict and not duplicate: 1738 return None 1739 1740 nothing = None 1741 expressions = None 1742 key = None 1743 constraint = None 1744 1745 if conflict: 1746 if self._match_text_seq("ON", "CONSTRAINT"): 1747 constraint = self._parse_id_var() 1748 else: 1749 key = self._parse_csv(self._parse_value) 1750 1751 self._match_text_seq("DO") 1752 if self._match_text_seq("NOTHING"): 1753 nothing = True 1754 else: 1755 self._match(TokenType.UPDATE) 1756 self._match(TokenType.SET) 1757 expressions = self._parse_csv(self._parse_equality) 1758 1759 return self.expression( 1760 exp.OnConflict, 1761 duplicate=duplicate, 1762 expressions=expressions, 1763 nothing=nothing, 1764 key=key, 1765 constraint=constraint, 1766 ) 1767 1768 def _parse_returning(self) -> t.Optional[exp.Returning]: 1769 if not self._match(TokenType.RETURNING): 1770 return None 1771 return self.expression( 1772 exp.Returning, 1773 expressions=self._parse_csv(self._parse_expression), 1774 into=self._match(TokenType.INTO) and self._parse_table_part(), 1775 ) 1776 1777 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1778 if not self._match(TokenType.FORMAT): 1779 return None 1780 return self._parse_row_format() 1781 1782 def _parse_row_format( 1783 self, match_row: bool = False 1784 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1785 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1786 return None 1787 1788 if self._match_text_seq("SERDE"): 1789 this = self._parse_string() 1790 1791 serde_properties = None 1792 if self._match(TokenType.SERDE_PROPERTIES): 1793 serde_properties = self.expression( 1794 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1795 ) 1796 1797 return self.expression( 1798 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1799 ) 1800 1801 self._match_text_seq("DELIMITED") 1802 1803 kwargs = {} 1804 1805 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1806 kwargs["fields"] = self._parse_string() 1807 if self._match_text_seq("ESCAPED", "BY"): 1808 kwargs["escaped"] = self._parse_string() 1809 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1810 kwargs["collection_items"] = self._parse_string() 1811 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1812 kwargs["map_keys"] = self._parse_string() 1813 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1814 kwargs["lines"] = self._parse_string() 1815 if self._match_text_seq("NULL", "DEFINED", "AS"): 1816 kwargs["null"] = self._parse_string() 1817 1818 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1819 1820 def _parse_load(self) -> exp.LoadData | exp.Command: 1821 if self._match_text_seq("DATA"): 1822 local = self._match_text_seq("LOCAL") 1823 self._match_text_seq("INPATH") 1824 inpath = self._parse_string() 1825 overwrite = self._match(TokenType.OVERWRITE) 1826 self._match_pair(TokenType.INTO, TokenType.TABLE) 1827 1828 return self.expression( 1829 exp.LoadData, 1830 this=self._parse_table(schema=True), 1831 local=local, 1832 overwrite=overwrite, 1833 inpath=inpath, 1834 partition=self._parse_partition(), 1835 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1836 serde=self._match_text_seq("SERDE") and self._parse_string(), 1837 ) 1838 return self._parse_as_command(self._prev) 1839 1840 def _parse_delete(self) -> exp.Delete: 1841 # This handles MySQL's "Multiple-Table Syntax" 1842 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1843 tables = None 1844 if not self._match(TokenType.FROM, advance=False): 1845 tables = self._parse_csv(self._parse_table) or None 1846 1847 returning = self._parse_returning() 1848 1849 return self.expression( 1850 exp.Delete, 1851 tables=tables, 1852 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1853 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1854 where=self._parse_where(), 1855 returning=returning or self._parse_returning(), 1856 limit=self._parse_limit(), 1857 ) 1858 1859 def _parse_update(self) -> exp.Update: 1860 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1861 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1862 returning = self._parse_returning() 1863 return self.expression( 1864 exp.Update, 1865 **{ # type: ignore 1866 "this": this, 1867 "expressions": expressions, 1868 "from": self._parse_from(joins=True), 1869 "where": self._parse_where(), 1870 "returning": returning or self._parse_returning(), 1871 "limit": self._parse_limit(), 1872 }, 1873 ) 1874 1875 def _parse_uncache(self) -> exp.Uncache: 1876 if not self._match(TokenType.TABLE): 1877 self.raise_error("Expecting TABLE after UNCACHE") 1878 1879 return self.expression( 1880 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1881 ) 1882 1883 def _parse_cache(self) -> exp.Cache: 1884 lazy = self._match_text_seq("LAZY") 1885 self._match(TokenType.TABLE) 1886 table = self._parse_table(schema=True) 1887 1888 options = [] 1889 if self._match_text_seq("OPTIONS"): 1890 self._match_l_paren() 1891 k = self._parse_string() 1892 self._match(TokenType.EQ) 1893 v = self._parse_string() 1894 options = [k, v] 1895 self._match_r_paren() 1896 1897 self._match(TokenType.ALIAS) 1898 return self.expression( 1899 exp.Cache, 1900 this=table, 1901 lazy=lazy, 1902 options=options, 1903 expression=self._parse_select(nested=True), 1904 ) 1905 1906 def _parse_partition(self) -> t.Optional[exp.Partition]: 1907 if not self._match(TokenType.PARTITION): 1908 return None 1909 1910 return self.expression( 1911 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1912 ) 1913 1914 def _parse_value(self) -> exp.Tuple: 1915 if self._match(TokenType.L_PAREN): 1916 expressions = self._parse_csv(self._parse_conjunction) 1917 self._match_r_paren() 1918 return self.expression(exp.Tuple, expressions=expressions) 1919 1920 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1921 # https://prestodb.io/docs/current/sql/values.html 1922 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1923 1924 def _parse_select( 1925 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1926 ) -> t.Optional[exp.Expression]: 1927 cte = self._parse_with() 1928 if cte: 1929 this = self._parse_statement() 1930 1931 if not this: 1932 self.raise_error("Failed to parse any statement following CTE") 1933 return cte 1934 1935 if "with" in this.arg_types: 1936 this.set("with", cte) 1937 else: 1938 self.raise_error(f"{this.key} does not support CTE") 1939 this = cte 1940 elif self._match(TokenType.SELECT): 1941 comments = self._prev_comments 1942 1943 hint = self._parse_hint() 1944 all_ = self._match(TokenType.ALL) 1945 distinct = self._match(TokenType.DISTINCT) 1946 1947 kind = ( 1948 self._match(TokenType.ALIAS) 1949 and self._match_texts(("STRUCT", "VALUE")) 1950 and self._prev.text 1951 ) 1952 1953 if distinct: 1954 distinct = self.expression( 1955 exp.Distinct, 1956 on=self._parse_value() if self._match(TokenType.ON) else None, 1957 ) 1958 1959 if all_ and distinct: 1960 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1961 1962 limit = self._parse_limit(top=True) 1963 expressions = self._parse_expressions() 1964 1965 this = self.expression( 1966 exp.Select, 1967 kind=kind, 1968 hint=hint, 1969 distinct=distinct, 1970 expressions=expressions, 1971 limit=limit, 1972 ) 1973 this.comments = comments 1974 1975 into = self._parse_into() 1976 if into: 1977 this.set("into", into) 1978 1979 from_ = self._parse_from() 1980 if from_: 1981 this.set("from", from_) 1982 1983 this = self._parse_query_modifiers(this) 1984 elif (table or nested) and self._match(TokenType.L_PAREN): 1985 if self._match(TokenType.PIVOT): 1986 this = self._parse_simplified_pivot() 1987 elif self._match(TokenType.FROM): 1988 this = exp.select("*").from_( 1989 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1990 ) 1991 else: 1992 this = self._parse_table() if table else self._parse_select(nested=True) 1993 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1994 1995 self._match_r_paren() 1996 1997 # We return early here so that the UNION isn't attached to the subquery by the 1998 # following call to _parse_set_operations, but instead becomes the parent node 1999 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2000 elif self._match(TokenType.VALUES): 2001 this = self.expression( 2002 exp.Values, 2003 expressions=self._parse_csv(self._parse_value), 2004 alias=self._parse_table_alias(), 2005 ) 2006 else: 2007 this = None 2008 2009 return self._parse_set_operations(this) 2010 2011 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2012 if not skip_with_token and not self._match(TokenType.WITH): 2013 return None 2014 2015 comments = self._prev_comments 2016 recursive = self._match(TokenType.RECURSIVE) 2017 2018 expressions = [] 2019 while True: 2020 expressions.append(self._parse_cte()) 2021 2022 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2023 break 2024 else: 2025 self._match(TokenType.WITH) 2026 2027 return self.expression( 2028 exp.With, comments=comments, expressions=expressions, recursive=recursive 2029 ) 2030 2031 def _parse_cte(self) -> exp.CTE: 2032 alias = self._parse_table_alias() 2033 if not alias or not alias.this: 2034 self.raise_error("Expected CTE to have alias") 2035 2036 self._match(TokenType.ALIAS) 2037 return self.expression( 2038 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2039 ) 2040 2041 def _parse_table_alias( 2042 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2043 ) -> t.Optional[exp.TableAlias]: 2044 any_token = self._match(TokenType.ALIAS) 2045 alias = ( 2046 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2047 or self._parse_string_as_identifier() 2048 ) 2049 2050 index = self._index 2051 if self._match(TokenType.L_PAREN): 2052 columns = self._parse_csv(self._parse_function_parameter) 2053 self._match_r_paren() if columns else self._retreat(index) 2054 else: 2055 columns = None 2056 2057 if not alias and not columns: 2058 return None 2059 2060 return self.expression(exp.TableAlias, this=alias, columns=columns) 2061 2062 def _parse_subquery( 2063 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2064 ) -> t.Optional[exp.Subquery]: 2065 if not this: 2066 return None 2067 2068 return self.expression( 2069 exp.Subquery, 2070 this=this, 2071 pivots=self._parse_pivots(), 2072 alias=self._parse_table_alias() if parse_alias else None, 2073 ) 2074 2075 def _parse_query_modifiers( 2076 self, this: t.Optional[exp.Expression] 2077 ) -> t.Optional[exp.Expression]: 2078 if isinstance(this, self.MODIFIABLES): 2079 for join in iter(self._parse_join, None): 2080 this.append("joins", join) 2081 for lateral in iter(self._parse_lateral, None): 2082 this.append("laterals", lateral) 2083 2084 while True: 2085 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2086 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2087 key, expression = parser(self) 2088 2089 if expression: 2090 this.set(key, expression) 2091 if key == "limit": 2092 offset = expression.args.pop("offset", None) 2093 if offset: 2094 this.set("offset", exp.Offset(expression=offset)) 2095 continue 2096 break 2097 return this 2098 2099 def _parse_hint(self) -> t.Optional[exp.Hint]: 2100 if self._match(TokenType.HINT): 2101 hints = [] 2102 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2103 hints.extend(hint) 2104 2105 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2106 self.raise_error("Expected */ after HINT") 2107 2108 return self.expression(exp.Hint, expressions=hints) 2109 2110 return None 2111 2112 def _parse_into(self) -> t.Optional[exp.Into]: 2113 if not self._match(TokenType.INTO): 2114 return None 2115 2116 temp = self._match(TokenType.TEMPORARY) 2117 unlogged = self._match_text_seq("UNLOGGED") 2118 self._match(TokenType.TABLE) 2119 2120 return self.expression( 2121 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2122 ) 2123 2124 def _parse_from( 2125 self, joins: bool = False, skip_from_token: bool = False 2126 ) -> t.Optional[exp.From]: 2127 if not skip_from_token and not self._match(TokenType.FROM): 2128 return None 2129 2130 return self.expression( 2131 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2132 ) 2133 2134 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2135 if not self._match(TokenType.MATCH_RECOGNIZE): 2136 return None 2137 2138 self._match_l_paren() 2139 2140 partition = self._parse_partition_by() 2141 order = self._parse_order() 2142 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2143 2144 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2145 rows = exp.var("ONE ROW PER MATCH") 2146 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2147 text = "ALL ROWS PER MATCH" 2148 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2149 text += f" SHOW EMPTY MATCHES" 2150 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2151 text += f" OMIT EMPTY MATCHES" 2152 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2153 text += f" WITH UNMATCHED ROWS" 2154 rows = exp.var(text) 2155 else: 2156 rows = None 2157 2158 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2159 text = "AFTER MATCH SKIP" 2160 if self._match_text_seq("PAST", "LAST", "ROW"): 2161 text += f" PAST LAST ROW" 2162 elif self._match_text_seq("TO", "NEXT", "ROW"): 2163 text += f" TO NEXT ROW" 2164 elif self._match_text_seq("TO", "FIRST"): 2165 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2166 elif self._match_text_seq("TO", "LAST"): 2167 text += f" TO LAST {self._advance_any().text}" # type: ignore 2168 after = exp.var(text) 2169 else: 2170 after = None 2171 2172 if self._match_text_seq("PATTERN"): 2173 self._match_l_paren() 2174 2175 if not self._curr: 2176 self.raise_error("Expecting )", self._curr) 2177 2178 paren = 1 2179 start = self._curr 2180 2181 while self._curr and paren > 0: 2182 if self._curr.token_type == TokenType.L_PAREN: 2183 paren += 1 2184 if self._curr.token_type == TokenType.R_PAREN: 2185 paren -= 1 2186 2187 end = self._prev 2188 self._advance() 2189 2190 if paren > 0: 2191 self.raise_error("Expecting )", self._curr) 2192 2193 pattern = exp.var(self._find_sql(start, end)) 2194 else: 2195 pattern = None 2196 2197 define = ( 2198 self._parse_csv( 2199 lambda: self.expression( 2200 exp.Alias, 2201 alias=self._parse_id_var(any_token=True), 2202 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2203 ) 2204 ) 2205 if self._match_text_seq("DEFINE") 2206 else None 2207 ) 2208 2209 self._match_r_paren() 2210 2211 return self.expression( 2212 exp.MatchRecognize, 2213 partition_by=partition, 2214 order=order, 2215 measures=measures, 2216 rows=rows, 2217 after=after, 2218 pattern=pattern, 2219 define=define, 2220 alias=self._parse_table_alias(), 2221 ) 2222 2223 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2224 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2225 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2226 2227 if outer_apply or cross_apply: 2228 this = self._parse_select(table=True) 2229 view = None 2230 outer = not cross_apply 2231 elif self._match(TokenType.LATERAL): 2232 this = self._parse_select(table=True) 2233 view = self._match(TokenType.VIEW) 2234 outer = self._match(TokenType.OUTER) 2235 else: 2236 return None 2237 2238 if not this: 2239 this = self._parse_function() or self._parse_id_var(any_token=False) 2240 while self._match(TokenType.DOT): 2241 this = exp.Dot( 2242 this=this, 2243 expression=self._parse_function() or self._parse_id_var(any_token=False), 2244 ) 2245 2246 if view: 2247 table = self._parse_id_var(any_token=False) 2248 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2249 table_alias: t.Optional[exp.TableAlias] = self.expression( 2250 exp.TableAlias, this=table, columns=columns 2251 ) 2252 elif isinstance(this, exp.Subquery) and this.alias: 2253 # Ensures parity between the Subquery's and the Lateral's "alias" args 2254 table_alias = this.args["alias"].copy() 2255 else: 2256 table_alias = self._parse_table_alias() 2257 2258 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2259 2260 def _parse_join_parts( 2261 self, 2262 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2263 return ( 2264 self._match_set(self.JOIN_METHODS) and self._prev, 2265 self._match_set(self.JOIN_SIDES) and self._prev, 2266 self._match_set(self.JOIN_KINDS) and self._prev, 2267 ) 2268 2269 def _parse_join( 2270 self, skip_join_token: bool = False, parse_bracket: bool = False 2271 ) -> t.Optional[exp.Join]: 2272 if self._match(TokenType.COMMA): 2273 return self.expression(exp.Join, this=self._parse_table()) 2274 2275 index = self._index 2276 method, side, kind = self._parse_join_parts() 2277 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2278 join = self._match(TokenType.JOIN) 2279 2280 if not skip_join_token and not join: 2281 self._retreat(index) 2282 kind = None 2283 method = None 2284 side = None 2285 2286 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2287 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2288 2289 if not skip_join_token and not join and not outer_apply and not cross_apply: 2290 return None 2291 2292 if outer_apply: 2293 side = Token(TokenType.LEFT, "LEFT") 2294 2295 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2296 2297 if method: 2298 kwargs["method"] = method.text 2299 if side: 2300 kwargs["side"] = side.text 2301 if kind: 2302 kwargs["kind"] = kind.text 2303 if hint: 2304 kwargs["hint"] = hint 2305 2306 if self._match(TokenType.ON): 2307 kwargs["on"] = self._parse_conjunction() 2308 elif self._match(TokenType.USING): 2309 kwargs["using"] = self._parse_wrapped_id_vars() 2310 elif not (kind and kind.token_type == TokenType.CROSS): 2311 index = self._index 2312 joins = self._parse_joins() 2313 2314 if joins and self._match(TokenType.ON): 2315 kwargs["on"] = self._parse_conjunction() 2316 elif joins and self._match(TokenType.USING): 2317 kwargs["using"] = self._parse_wrapped_id_vars() 2318 else: 2319 joins = None 2320 self._retreat(index) 2321 2322 kwargs["this"].set("joins", joins) 2323 2324 return self.expression(exp.Join, **kwargs) 2325 2326 def _parse_index( 2327 self, 2328 index: t.Optional[exp.Expression] = None, 2329 ) -> t.Optional[exp.Index]: 2330 if index: 2331 unique = None 2332 primary = None 2333 amp = None 2334 2335 self._match(TokenType.ON) 2336 self._match(TokenType.TABLE) # hive 2337 table = self._parse_table_parts(schema=True) 2338 else: 2339 unique = self._match(TokenType.UNIQUE) 2340 primary = self._match_text_seq("PRIMARY") 2341 amp = self._match_text_seq("AMP") 2342 2343 if not self._match(TokenType.INDEX): 2344 return None 2345 2346 index = self._parse_id_var() 2347 table = None 2348 2349 using = self._parse_field() if self._match(TokenType.USING) else None 2350 2351 if self._match(TokenType.L_PAREN, advance=False): 2352 columns = self._parse_wrapped_csv(self._parse_ordered) 2353 else: 2354 columns = None 2355 2356 return self.expression( 2357 exp.Index, 2358 this=index, 2359 table=table, 2360 using=using, 2361 columns=columns, 2362 unique=unique, 2363 primary=primary, 2364 amp=amp, 2365 partition_by=self._parse_partition_by(), 2366 ) 2367 2368 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2369 hints: t.List[exp.Expression] = [] 2370 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2371 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2372 hints.append( 2373 self.expression( 2374 exp.WithTableHint, 2375 expressions=self._parse_csv( 2376 lambda: self._parse_function() or self._parse_var(any_token=True) 2377 ), 2378 ) 2379 ) 2380 self._match_r_paren() 2381 else: 2382 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2383 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2384 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2385 2386 self._match_texts({"INDEX", "KEY"}) 2387 if self._match(TokenType.FOR): 2388 hint.set("target", self._advance_any() and self._prev.text.upper()) 2389 2390 hint.set("expressions", self._parse_wrapped_id_vars()) 2391 hints.append(hint) 2392 2393 return hints or None 2394 2395 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2396 return ( 2397 (not schema and self._parse_function(optional_parens=False)) 2398 or self._parse_id_var(any_token=False) 2399 or self._parse_string_as_identifier() 2400 or self._parse_placeholder() 2401 ) 2402 2403 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2404 catalog = None 2405 db = None 2406 table = self._parse_table_part(schema=schema) 2407 2408 while self._match(TokenType.DOT): 2409 if catalog: 2410 # This allows nesting the table in arbitrarily many dot expressions if needed 2411 table = self.expression( 2412 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2413 ) 2414 else: 2415 catalog = db 2416 db = table 2417 table = self._parse_table_part(schema=schema) 2418 2419 if not table: 2420 self.raise_error(f"Expected table name but got {self._curr}") 2421 2422 return self.expression( 2423 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2424 ) 2425 2426 def _parse_table( 2427 self, 2428 schema: bool = False, 2429 joins: bool = False, 2430 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2431 parse_bracket: bool = False, 2432 ) -> t.Optional[exp.Expression]: 2433 lateral = self._parse_lateral() 2434 if lateral: 2435 return lateral 2436 2437 unnest = self._parse_unnest() 2438 if unnest: 2439 return unnest 2440 2441 values = self._parse_derived_table_values() 2442 if values: 2443 return values 2444 2445 subquery = self._parse_select(table=True) 2446 if subquery: 2447 if not subquery.args.get("pivots"): 2448 subquery.set("pivots", self._parse_pivots()) 2449 return subquery 2450 2451 bracket = parse_bracket and self._parse_bracket(None) 2452 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2453 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2454 2455 if schema: 2456 return self._parse_schema(this=this) 2457 2458 if self.ALIAS_POST_TABLESAMPLE: 2459 table_sample = self._parse_table_sample() 2460 2461 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2462 if alias: 2463 this.set("alias", alias) 2464 2465 if not this.args.get("pivots"): 2466 this.set("pivots", self._parse_pivots()) 2467 2468 this.set("hints", self._parse_table_hints()) 2469 2470 if not self.ALIAS_POST_TABLESAMPLE: 2471 table_sample = self._parse_table_sample() 2472 2473 if table_sample: 2474 table_sample.set("this", this) 2475 this = table_sample 2476 2477 if joins: 2478 for join in iter(self._parse_join, None): 2479 this.append("joins", join) 2480 2481 return this 2482 2483 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2484 if not self._match(TokenType.UNNEST): 2485 return None 2486 2487 expressions = self._parse_wrapped_csv(self._parse_type) 2488 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2489 2490 alias = self._parse_table_alias() if with_alias else None 2491 2492 if alias and self.UNNEST_COLUMN_ONLY: 2493 if alias.args.get("columns"): 2494 self.raise_error("Unexpected extra column alias in unnest.") 2495 2496 alias.set("columns", [alias.this]) 2497 alias.set("this", None) 2498 2499 offset = None 2500 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2501 self._match(TokenType.ALIAS) 2502 offset = self._parse_id_var() or exp.to_identifier("offset") 2503 2504 return self.expression( 2505 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2506 ) 2507 2508 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2509 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2510 if not is_derived and not self._match(TokenType.VALUES): 2511 return None 2512 2513 expressions = self._parse_csv(self._parse_value) 2514 alias = self._parse_table_alias() 2515 2516 if is_derived: 2517 self._match_r_paren() 2518 2519 return self.expression( 2520 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2521 ) 2522 2523 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2524 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2525 as_modifier and self._match_text_seq("USING", "SAMPLE") 2526 ): 2527 return None 2528 2529 bucket_numerator = None 2530 bucket_denominator = None 2531 bucket_field = None 2532 percent = None 2533 rows = None 2534 size = None 2535 seed = None 2536 2537 kind = ( 2538 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2539 ) 2540 method = self._parse_var(tokens=(TokenType.ROW,)) 2541 2542 self._match(TokenType.L_PAREN) 2543 2544 num = self._parse_number() 2545 2546 if self._match_text_seq("BUCKET"): 2547 bucket_numerator = self._parse_number() 2548 self._match_text_seq("OUT", "OF") 2549 bucket_denominator = bucket_denominator = self._parse_number() 2550 self._match(TokenType.ON) 2551 bucket_field = self._parse_field() 2552 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2553 percent = num 2554 elif self._match(TokenType.ROWS): 2555 rows = num 2556 else: 2557 size = num 2558 2559 self._match(TokenType.R_PAREN) 2560 2561 if self._match(TokenType.L_PAREN): 2562 method = self._parse_var() 2563 seed = self._match(TokenType.COMMA) and self._parse_number() 2564 self._match_r_paren() 2565 elif self._match_texts(("SEED", "REPEATABLE")): 2566 seed = self._parse_wrapped(self._parse_number) 2567 2568 return self.expression( 2569 exp.TableSample, 2570 method=method, 2571 bucket_numerator=bucket_numerator, 2572 bucket_denominator=bucket_denominator, 2573 bucket_field=bucket_field, 2574 percent=percent, 2575 rows=rows, 2576 size=size, 2577 seed=seed, 2578 kind=kind, 2579 ) 2580 2581 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2582 return list(iter(self._parse_pivot, None)) or None 2583 2584 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2585 return list(iter(self._parse_join, None)) or None 2586 2587 # https://duckdb.org/docs/sql/statements/pivot 2588 def _parse_simplified_pivot(self) -> exp.Pivot: 2589 def _parse_on() -> t.Optional[exp.Expression]: 2590 this = self._parse_bitwise() 2591 return self._parse_in(this) if self._match(TokenType.IN) else this 2592 2593 this = self._parse_table() 2594 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2595 using = self._match(TokenType.USING) and self._parse_csv( 2596 lambda: self._parse_alias(self._parse_function()) 2597 ) 2598 group = self._parse_group() 2599 return self.expression( 2600 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2601 ) 2602 2603 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2604 index = self._index 2605 2606 if self._match(TokenType.PIVOT): 2607 unpivot = False 2608 elif self._match(TokenType.UNPIVOT): 2609 unpivot = True 2610 else: 2611 return None 2612 2613 expressions = [] 2614 field = None 2615 2616 if not self._match(TokenType.L_PAREN): 2617 self._retreat(index) 2618 return None 2619 2620 if unpivot: 2621 expressions = self._parse_csv(self._parse_column) 2622 else: 2623 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2624 2625 if not expressions: 2626 self.raise_error("Failed to parse PIVOT's aggregation list") 2627 2628 if not self._match(TokenType.FOR): 2629 self.raise_error("Expecting FOR") 2630 2631 value = self._parse_column() 2632 2633 if not self._match(TokenType.IN): 2634 self.raise_error("Expecting IN") 2635 2636 field = self._parse_in(value, alias=True) 2637 2638 self._match_r_paren() 2639 2640 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2641 2642 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2643 pivot.set("alias", self._parse_table_alias()) 2644 2645 if not unpivot: 2646 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2647 2648 columns: t.List[exp.Expression] = [] 2649 for fld in pivot.args["field"].expressions: 2650 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2651 for name in names: 2652 if self.PREFIXED_PIVOT_COLUMNS: 2653 name = f"{name}_{field_name}" if name else field_name 2654 else: 2655 name = f"{field_name}_{name}" if name else field_name 2656 2657 columns.append(exp.to_identifier(name)) 2658 2659 pivot.set("columns", columns) 2660 2661 return pivot 2662 2663 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2664 return [agg.alias for agg in aggregations] 2665 2666 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2667 if not skip_where_token and not self._match(TokenType.WHERE): 2668 return None 2669 2670 return self.expression( 2671 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2672 ) 2673 2674 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2675 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2676 return None 2677 2678 elements = defaultdict(list) 2679 2680 if self._match(TokenType.ALL): 2681 return self.expression(exp.Group, all=True) 2682 2683 while True: 2684 expressions = self._parse_csv(self._parse_conjunction) 2685 if expressions: 2686 elements["expressions"].extend(expressions) 2687 2688 grouping_sets = self._parse_grouping_sets() 2689 if grouping_sets: 2690 elements["grouping_sets"].extend(grouping_sets) 2691 2692 rollup = None 2693 cube = None 2694 totals = None 2695 2696 with_ = self._match(TokenType.WITH) 2697 if self._match(TokenType.ROLLUP): 2698 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2699 elements["rollup"].extend(ensure_list(rollup)) 2700 2701 if self._match(TokenType.CUBE): 2702 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2703 elements["cube"].extend(ensure_list(cube)) 2704 2705 if self._match_text_seq("TOTALS"): 2706 totals = True 2707 elements["totals"] = True # type: ignore 2708 2709 if not (grouping_sets or rollup or cube or totals): 2710 break 2711 2712 return self.expression(exp.Group, **elements) # type: ignore 2713 2714 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2715 if not self._match(TokenType.GROUPING_SETS): 2716 return None 2717 2718 return self._parse_wrapped_csv(self._parse_grouping_set) 2719 2720 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2721 if self._match(TokenType.L_PAREN): 2722 grouping_set = self._parse_csv(self._parse_column) 2723 self._match_r_paren() 2724 return self.expression(exp.Tuple, expressions=grouping_set) 2725 2726 return self._parse_column() 2727 2728 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2729 if not skip_having_token and not self._match(TokenType.HAVING): 2730 return None 2731 return self.expression(exp.Having, this=self._parse_conjunction()) 2732 2733 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2734 if not self._match(TokenType.QUALIFY): 2735 return None 2736 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2737 2738 def _parse_order( 2739 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2740 ) -> t.Optional[exp.Expression]: 2741 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2742 return this 2743 2744 return self.expression( 2745 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2746 ) 2747 2748 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2749 if not self._match(token): 2750 return None 2751 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2752 2753 def _parse_ordered(self) -> exp.Ordered: 2754 this = self._parse_conjunction() 2755 self._match(TokenType.ASC) 2756 2757 is_desc = self._match(TokenType.DESC) 2758 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2759 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2760 desc = is_desc or False 2761 asc = not desc 2762 nulls_first = is_nulls_first or False 2763 explicitly_null_ordered = is_nulls_first or is_nulls_last 2764 2765 if ( 2766 not explicitly_null_ordered 2767 and ( 2768 (asc and self.NULL_ORDERING == "nulls_are_small") 2769 or (desc and self.NULL_ORDERING != "nulls_are_small") 2770 ) 2771 and self.NULL_ORDERING != "nulls_are_last" 2772 ): 2773 nulls_first = True 2774 2775 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2776 2777 def _parse_limit( 2778 self, this: t.Optional[exp.Expression] = None, top: bool = False 2779 ) -> t.Optional[exp.Expression]: 2780 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2781 comments = self._prev_comments 2782 limit_paren = self._match(TokenType.L_PAREN) 2783 expression = self._parse_number() if top else self._parse_term() 2784 2785 if self._match(TokenType.COMMA): 2786 offset = expression 2787 expression = self._parse_term() 2788 else: 2789 offset = None 2790 2791 limit_exp = self.expression( 2792 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2793 ) 2794 2795 if limit_paren: 2796 self._match_r_paren() 2797 2798 return limit_exp 2799 2800 if self._match(TokenType.FETCH): 2801 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2802 direction = self._prev.text if direction else "FIRST" 2803 2804 count = self._parse_number() 2805 percent = self._match(TokenType.PERCENT) 2806 2807 self._match_set((TokenType.ROW, TokenType.ROWS)) 2808 2809 only = self._match_text_seq("ONLY") 2810 with_ties = self._match_text_seq("WITH", "TIES") 2811 2812 if only and with_ties: 2813 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2814 2815 return self.expression( 2816 exp.Fetch, 2817 direction=direction, 2818 count=count, 2819 percent=percent, 2820 with_ties=with_ties, 2821 ) 2822 2823 return this 2824 2825 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2826 if not self._match(TokenType.OFFSET): 2827 return this 2828 2829 count = self._parse_number() 2830 self._match_set((TokenType.ROW, TokenType.ROWS)) 2831 return self.expression(exp.Offset, this=this, expression=count) 2832 2833 def _parse_locks(self) -> t.List[exp.Lock]: 2834 locks = [] 2835 while True: 2836 if self._match_text_seq("FOR", "UPDATE"): 2837 update = True 2838 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2839 "LOCK", "IN", "SHARE", "MODE" 2840 ): 2841 update = False 2842 else: 2843 break 2844 2845 expressions = None 2846 if self._match_text_seq("OF"): 2847 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2848 2849 wait: t.Optional[bool | exp.Expression] = None 2850 if self._match_text_seq("NOWAIT"): 2851 wait = True 2852 elif self._match_text_seq("WAIT"): 2853 wait = self._parse_primary() 2854 elif self._match_text_seq("SKIP", "LOCKED"): 2855 wait = False 2856 2857 locks.append( 2858 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2859 ) 2860 2861 return locks 2862 2863 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2864 if not self._match_set(self.SET_OPERATIONS): 2865 return this 2866 2867 token_type = self._prev.token_type 2868 2869 if token_type == TokenType.UNION: 2870 expression = exp.Union 2871 elif token_type == TokenType.EXCEPT: 2872 expression = exp.Except 2873 else: 2874 expression = exp.Intersect 2875 2876 return self.expression( 2877 expression, 2878 this=this, 2879 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2880 expression=self._parse_set_operations(self._parse_select(nested=True)), 2881 ) 2882 2883 def _parse_expression(self) -> t.Optional[exp.Expression]: 2884 return self._parse_alias(self._parse_conjunction()) 2885 2886 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2887 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2888 2889 def _parse_equality(self) -> t.Optional[exp.Expression]: 2890 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2891 2892 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2893 return self._parse_tokens(self._parse_range, self.COMPARISON) 2894 2895 def _parse_range(self) -> t.Optional[exp.Expression]: 2896 this = self._parse_bitwise() 2897 negate = self._match(TokenType.NOT) 2898 2899 if self._match_set(self.RANGE_PARSERS): 2900 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2901 if not expression: 2902 return this 2903 2904 this = expression 2905 elif self._match(TokenType.ISNULL): 2906 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2907 2908 # Postgres supports ISNULL and NOTNULL for conditions. 2909 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2910 if self._match(TokenType.NOTNULL): 2911 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2912 this = self.expression(exp.Not, this=this) 2913 2914 if negate: 2915 this = self.expression(exp.Not, this=this) 2916 2917 if self._match(TokenType.IS): 2918 this = self._parse_is(this) 2919 2920 return this 2921 2922 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2923 index = self._index - 1 2924 negate = self._match(TokenType.NOT) 2925 2926 if self._match_text_seq("DISTINCT", "FROM"): 2927 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2928 return self.expression(klass, this=this, expression=self._parse_expression()) 2929 2930 expression = self._parse_null() or self._parse_boolean() 2931 if not expression: 2932 self._retreat(index) 2933 return None 2934 2935 this = self.expression(exp.Is, this=this, expression=expression) 2936 return self.expression(exp.Not, this=this) if negate else this 2937 2938 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2939 unnest = self._parse_unnest(with_alias=False) 2940 if unnest: 2941 this = self.expression(exp.In, this=this, unnest=unnest) 2942 elif self._match(TokenType.L_PAREN): 2943 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2944 2945 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2946 this = self.expression(exp.In, this=this, query=expressions[0]) 2947 else: 2948 this = self.expression(exp.In, this=this, expressions=expressions) 2949 2950 self._match_r_paren(this) 2951 else: 2952 this = self.expression(exp.In, this=this, field=self._parse_field()) 2953 2954 return this 2955 2956 def _parse_between(self, this: exp.Expression) -> exp.Between: 2957 low = self._parse_bitwise() 2958 self._match(TokenType.AND) 2959 high = self._parse_bitwise() 2960 return self.expression(exp.Between, this=this, low=low, high=high) 2961 2962 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2963 if not self._match(TokenType.ESCAPE): 2964 return this 2965 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2966 2967 def _parse_interval(self) -> t.Optional[exp.Interval]: 2968 if not self._match(TokenType.INTERVAL): 2969 return None 2970 2971 if self._match(TokenType.STRING, advance=False): 2972 this = self._parse_primary() 2973 else: 2974 this = self._parse_term() 2975 2976 unit = self._parse_function() or self._parse_var() 2977 2978 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2979 # each INTERVAL expression into this canonical form so it's easy to transpile 2980 if this and this.is_number: 2981 this = exp.Literal.string(this.name) 2982 elif this and this.is_string: 2983 parts = this.name.split() 2984 2985 if len(parts) == 2: 2986 if unit: 2987 # this is not actually a unit, it's something else 2988 unit = None 2989 self._retreat(self._index - 1) 2990 else: 2991 this = exp.Literal.string(parts[0]) 2992 unit = self.expression(exp.Var, this=parts[1]) 2993 2994 return self.expression(exp.Interval, this=this, unit=unit) 2995 2996 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2997 this = self._parse_term() 2998 2999 while True: 3000 if self._match_set(self.BITWISE): 3001 this = self.expression( 3002 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3003 ) 3004 elif self._match_pair(TokenType.LT, TokenType.LT): 3005 this = self.expression( 3006 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3007 ) 3008 elif self._match_pair(TokenType.GT, TokenType.GT): 3009 this = self.expression( 3010 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3011 ) 3012 else: 3013 break 3014 3015 return this 3016 3017 def _parse_term(self) -> t.Optional[exp.Expression]: 3018 return self._parse_tokens(self._parse_factor, self.TERM) 3019 3020 def _parse_factor(self) -> t.Optional[exp.Expression]: 3021 return self._parse_tokens(self._parse_unary, self.FACTOR) 3022 3023 def _parse_unary(self) -> t.Optional[exp.Expression]: 3024 if self._match_set(self.UNARY_PARSERS): 3025 return self.UNARY_PARSERS[self._prev.token_type](self) 3026 return self._parse_at_time_zone(self._parse_type()) 3027 3028 def _parse_type(self) -> t.Optional[exp.Expression]: 3029 interval = self._parse_interval() 3030 if interval: 3031 return interval 3032 3033 index = self._index 3034 data_type = self._parse_types(check_func=True) 3035 this = self._parse_column() 3036 3037 if data_type: 3038 if isinstance(this, exp.Literal): 3039 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3040 if parser: 3041 return parser(self, this, data_type) 3042 return self.expression(exp.Cast, this=this, to=data_type) 3043 if not data_type.expressions: 3044 self._retreat(index) 3045 return self._parse_column() 3046 return self._parse_column_ops(data_type) 3047 3048 return this 3049 3050 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3051 this = self._parse_type() 3052 if not this: 3053 return None 3054 3055 return self.expression( 3056 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3057 ) 3058 3059 def _parse_types( 3060 self, check_func: bool = False, schema: bool = False 3061 ) -> t.Optional[exp.Expression]: 3062 index = self._index 3063 3064 prefix = self._match_text_seq("SYSUDTLIB", ".") 3065 3066 if not self._match_set(self.TYPE_TOKENS): 3067 return None 3068 3069 type_token = self._prev.token_type 3070 3071 if type_token == TokenType.PSEUDO_TYPE: 3072 return self.expression(exp.PseudoType, this=self._prev.text) 3073 3074 nested = type_token in self.NESTED_TYPE_TOKENS 3075 is_struct = type_token == TokenType.STRUCT 3076 expressions = None 3077 maybe_func = False 3078 3079 if self._match(TokenType.L_PAREN): 3080 if is_struct: 3081 expressions = self._parse_csv(self._parse_struct_types) 3082 elif nested: 3083 expressions = self._parse_csv( 3084 lambda: self._parse_types(check_func=check_func, schema=schema) 3085 ) 3086 elif type_token in self.ENUM_TYPE_TOKENS: 3087 expressions = self._parse_csv(self._parse_primary) 3088 else: 3089 expressions = self._parse_csv(self._parse_type_size) 3090 3091 if not expressions or not self._match(TokenType.R_PAREN): 3092 self._retreat(index) 3093 return None 3094 3095 maybe_func = True 3096 3097 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3098 this = exp.DataType( 3099 this=exp.DataType.Type.ARRAY, 3100 expressions=[ 3101 exp.DataType( 3102 this=exp.DataType.Type[type_token.value], 3103 expressions=expressions, 3104 nested=nested, 3105 ) 3106 ], 3107 nested=True, 3108 ) 3109 3110 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3111 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3112 3113 return this 3114 3115 if self._match(TokenType.L_BRACKET): 3116 self._retreat(index) 3117 return None 3118 3119 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3120 if nested and self._match(TokenType.LT): 3121 if is_struct: 3122 expressions = self._parse_csv(self._parse_struct_types) 3123 else: 3124 expressions = self._parse_csv( 3125 lambda: self._parse_types(check_func=check_func, schema=schema) 3126 ) 3127 3128 if not self._match(TokenType.GT): 3129 self.raise_error("Expecting >") 3130 3131 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3132 values = self._parse_csv(self._parse_conjunction) 3133 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3134 3135 value: t.Optional[exp.Expression] = None 3136 if type_token in self.TIMESTAMPS: 3137 if self._match_text_seq("WITH", "TIME", "ZONE"): 3138 maybe_func = False 3139 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3140 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3141 maybe_func = False 3142 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3143 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3144 maybe_func = False 3145 elif type_token == TokenType.INTERVAL: 3146 unit = self._parse_var() 3147 3148 if not unit: 3149 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3150 else: 3151 value = self.expression(exp.Interval, unit=unit) 3152 3153 if maybe_func and check_func: 3154 index2 = self._index 3155 peek = self._parse_string() 3156 3157 if not peek: 3158 self._retreat(index) 3159 return None 3160 3161 self._retreat(index2) 3162 3163 if value: 3164 return value 3165 3166 return exp.DataType( 3167 this=exp.DataType.Type[type_token.value], 3168 expressions=expressions, 3169 nested=nested, 3170 values=values, 3171 prefix=prefix, 3172 ) 3173 3174 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3175 this = self._parse_type() or self._parse_id_var() 3176 self._match(TokenType.COLON) 3177 return self._parse_column_def(this) 3178 3179 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3180 if not self._match_text_seq("AT", "TIME", "ZONE"): 3181 return this 3182 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3183 3184 def _parse_column(self) -> t.Optional[exp.Expression]: 3185 this = self._parse_field() 3186 if isinstance(this, exp.Identifier): 3187 this = self.expression(exp.Column, this=this) 3188 elif not this: 3189 return self._parse_bracket(this) 3190 return self._parse_column_ops(this) 3191 3192 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3193 this = self._parse_bracket(this) 3194 3195 while self._match_set(self.COLUMN_OPERATORS): 3196 op_token = self._prev.token_type 3197 op = self.COLUMN_OPERATORS.get(op_token) 3198 3199 if op_token == TokenType.DCOLON: 3200 field = self._parse_types() 3201 if not field: 3202 self.raise_error("Expected type") 3203 elif op and self._curr: 3204 self._advance() 3205 value = self._prev.text 3206 field = ( 3207 exp.Literal.number(value) 3208 if self._prev.token_type == TokenType.NUMBER 3209 else exp.Literal.string(value) 3210 ) 3211 else: 3212 field = self._parse_field(anonymous_func=True, any_token=True) 3213 3214 if isinstance(field, exp.Func): 3215 # bigquery allows function calls like x.y.count(...) 3216 # SAFE.SUBSTR(...) 3217 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3218 this = self._replace_columns_with_dots(this) 3219 3220 if op: 3221 this = op(self, this, field) 3222 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3223 this = self.expression( 3224 exp.Column, 3225 this=field, 3226 table=this.this, 3227 db=this.args.get("table"), 3228 catalog=this.args.get("db"), 3229 ) 3230 else: 3231 this = self.expression(exp.Dot, this=this, expression=field) 3232 this = self._parse_bracket(this) 3233 return this 3234 3235 def _parse_primary(self) -> t.Optional[exp.Expression]: 3236 if self._match_set(self.PRIMARY_PARSERS): 3237 token_type = self._prev.token_type 3238 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3239 3240 if token_type == TokenType.STRING: 3241 expressions = [primary] 3242 while self._match(TokenType.STRING): 3243 expressions.append(exp.Literal.string(self._prev.text)) 3244 3245 if len(expressions) > 1: 3246 return self.expression(exp.Concat, expressions=expressions) 3247 3248 return primary 3249 3250 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3251 return exp.Literal.number(f"0.{self._prev.text}") 3252 3253 if self._match(TokenType.L_PAREN): 3254 comments = self._prev_comments 3255 query = self._parse_select() 3256 3257 if query: 3258 expressions = [query] 3259 else: 3260 expressions = self._parse_expressions() 3261 3262 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3263 3264 if isinstance(this, exp.Subqueryable): 3265 this = self._parse_set_operations( 3266 self._parse_subquery(this=this, parse_alias=False) 3267 ) 3268 elif len(expressions) > 1: 3269 this = self.expression(exp.Tuple, expressions=expressions) 3270 else: 3271 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3272 3273 if this: 3274 this.add_comments(comments) 3275 3276 self._match_r_paren(expression=this) 3277 return this 3278 3279 return None 3280 3281 def _parse_field( 3282 self, 3283 any_token: bool = False, 3284 tokens: t.Optional[t.Collection[TokenType]] = None, 3285 anonymous_func: bool = False, 3286 ) -> t.Optional[exp.Expression]: 3287 return ( 3288 self._parse_primary() 3289 or self._parse_function(anonymous=anonymous_func) 3290 or self._parse_id_var(any_token=any_token, tokens=tokens) 3291 ) 3292 3293 def _parse_function( 3294 self, 3295 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3296 anonymous: bool = False, 3297 optional_parens: bool = True, 3298 ) -> t.Optional[exp.Expression]: 3299 if not self._curr: 3300 return None 3301 3302 token_type = self._curr.token_type 3303 3304 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3305 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3306 3307 if not self._next or self._next.token_type != TokenType.L_PAREN: 3308 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3309 self._advance() 3310 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3311 3312 return None 3313 3314 if token_type not in self.FUNC_TOKENS: 3315 return None 3316 3317 this = self._curr.text 3318 upper = this.upper() 3319 self._advance(2) 3320 3321 parser = self.FUNCTION_PARSERS.get(upper) 3322 3323 if parser and not anonymous: 3324 this = parser(self) 3325 else: 3326 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3327 3328 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3329 this = self.expression(subquery_predicate, this=self._parse_select()) 3330 self._match_r_paren() 3331 return this 3332 3333 if functions is None: 3334 functions = self.FUNCTIONS 3335 3336 function = functions.get(upper) 3337 3338 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3339 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3340 3341 if function and not anonymous: 3342 this = self.validate_expression(function(args), args) 3343 else: 3344 this = self.expression(exp.Anonymous, this=this, expressions=args) 3345 3346 self._match(TokenType.R_PAREN, expression=this) 3347 return self._parse_window(this) 3348 3349 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3350 return self._parse_column_def(self._parse_id_var()) 3351 3352 def _parse_user_defined_function( 3353 self, kind: t.Optional[TokenType] = None 3354 ) -> t.Optional[exp.Expression]: 3355 this = self._parse_id_var() 3356 3357 while self._match(TokenType.DOT): 3358 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3359 3360 if not self._match(TokenType.L_PAREN): 3361 return this 3362 3363 expressions = self._parse_csv(self._parse_function_parameter) 3364 self._match_r_paren() 3365 return self.expression( 3366 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3367 ) 3368 3369 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3370 literal = self._parse_primary() 3371 if literal: 3372 return self.expression(exp.Introducer, this=token.text, expression=literal) 3373 3374 return self.expression(exp.Identifier, this=token.text) 3375 3376 def _parse_session_parameter(self) -> exp.SessionParameter: 3377 kind = None 3378 this = self._parse_id_var() or self._parse_primary() 3379 3380 if this and self._match(TokenType.DOT): 3381 kind = this.name 3382 this = self._parse_var() or self._parse_primary() 3383 3384 return self.expression(exp.SessionParameter, this=this, kind=kind) 3385 3386 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3387 index = self._index 3388 3389 if self._match(TokenType.L_PAREN): 3390 expressions = self._parse_csv(self._parse_id_var) 3391 3392 if not self._match(TokenType.R_PAREN): 3393 self._retreat(index) 3394 else: 3395 expressions = [self._parse_id_var()] 3396 3397 if self._match_set(self.LAMBDAS): 3398 return self.LAMBDAS[self._prev.token_type](self, expressions) 3399 3400 self._retreat(index) 3401 3402 this: t.Optional[exp.Expression] 3403 3404 if self._match(TokenType.DISTINCT): 3405 this = self.expression( 3406 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3407 ) 3408 else: 3409 this = self._parse_select_or_expression(alias=alias) 3410 3411 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3412 3413 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3414 index = self._index 3415 3416 if not self.errors: 3417 try: 3418 if self._parse_select(nested=True): 3419 return this 3420 except ParseError: 3421 pass 3422 finally: 3423 self.errors.clear() 3424 self._retreat(index) 3425 3426 if not self._match(TokenType.L_PAREN): 3427 return this 3428 3429 args = self._parse_csv( 3430 lambda: self._parse_constraint() 3431 or self._parse_column_def(self._parse_field(any_token=True)) 3432 ) 3433 3434 self._match_r_paren() 3435 return self.expression(exp.Schema, this=this, expressions=args) 3436 3437 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3438 # column defs are not really columns, they're identifiers 3439 if isinstance(this, exp.Column): 3440 this = this.this 3441 3442 kind = self._parse_types(schema=True) 3443 3444 if self._match_text_seq("FOR", "ORDINALITY"): 3445 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3446 3447 constraints = [] 3448 while True: 3449 constraint = self._parse_column_constraint() 3450 if not constraint: 3451 break 3452 constraints.append(constraint) 3453 3454 if not kind and not constraints: 3455 return this 3456 3457 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3458 3459 def _parse_auto_increment( 3460 self, 3461 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3462 start = None 3463 increment = None 3464 3465 if self._match(TokenType.L_PAREN, advance=False): 3466 args = self._parse_wrapped_csv(self._parse_bitwise) 3467 start = seq_get(args, 0) 3468 increment = seq_get(args, 1) 3469 elif self._match_text_seq("START"): 3470 start = self._parse_bitwise() 3471 self._match_text_seq("INCREMENT") 3472 increment = self._parse_bitwise() 3473 3474 if start and increment: 3475 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3476 3477 return exp.AutoIncrementColumnConstraint() 3478 3479 def _parse_compress(self) -> exp.CompressColumnConstraint: 3480 if self._match(TokenType.L_PAREN, advance=False): 3481 return self.expression( 3482 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3483 ) 3484 3485 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3486 3487 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3488 if self._match_text_seq("BY", "DEFAULT"): 3489 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3490 this = self.expression( 3491 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3492 ) 3493 else: 3494 self._match_text_seq("ALWAYS") 3495 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3496 3497 self._match(TokenType.ALIAS) 3498 identity = self._match_text_seq("IDENTITY") 3499 3500 if self._match(TokenType.L_PAREN): 3501 if self._match_text_seq("START", "WITH"): 3502 this.set("start", self._parse_bitwise()) 3503 if self._match_text_seq("INCREMENT", "BY"): 3504 this.set("increment", self._parse_bitwise()) 3505 if self._match_text_seq("MINVALUE"): 3506 this.set("minvalue", self._parse_bitwise()) 3507 if self._match_text_seq("MAXVALUE"): 3508 this.set("maxvalue", self._parse_bitwise()) 3509 3510 if self._match_text_seq("CYCLE"): 3511 this.set("cycle", True) 3512 elif self._match_text_seq("NO", "CYCLE"): 3513 this.set("cycle", False) 3514 3515 if not identity: 3516 this.set("expression", self._parse_bitwise()) 3517 3518 self._match_r_paren() 3519 3520 return this 3521 3522 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3523 self._match_text_seq("LENGTH") 3524 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3525 3526 def _parse_not_constraint( 3527 self, 3528 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3529 if self._match_text_seq("NULL"): 3530 return self.expression(exp.NotNullColumnConstraint) 3531 if self._match_text_seq("CASESPECIFIC"): 3532 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3533 return None 3534 3535 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3536 if self._match(TokenType.CONSTRAINT): 3537 this = self._parse_id_var() 3538 else: 3539 this = None 3540 3541 if self._match_texts(self.CONSTRAINT_PARSERS): 3542 return self.expression( 3543 exp.ColumnConstraint, 3544 this=this, 3545 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3546 ) 3547 3548 return this 3549 3550 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3551 if not self._match(TokenType.CONSTRAINT): 3552 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3553 3554 this = self._parse_id_var() 3555 expressions = [] 3556 3557 while True: 3558 constraint = self._parse_unnamed_constraint() or self._parse_function() 3559 if not constraint: 3560 break 3561 expressions.append(constraint) 3562 3563 return self.expression(exp.Constraint, this=this, expressions=expressions) 3564 3565 def _parse_unnamed_constraint( 3566 self, constraints: t.Optional[t.Collection[str]] = None 3567 ) -> t.Optional[exp.Expression]: 3568 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3569 return None 3570 3571 constraint = self._prev.text.upper() 3572 if constraint not in self.CONSTRAINT_PARSERS: 3573 self.raise_error(f"No parser found for schema constraint {constraint}.") 3574 3575 return self.CONSTRAINT_PARSERS[constraint](self) 3576 3577 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3578 self._match_text_seq("KEY") 3579 return self.expression( 3580 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3581 ) 3582 3583 def _parse_key_constraint_options(self) -> t.List[str]: 3584 options = [] 3585 while True: 3586 if not self._curr: 3587 break 3588 3589 if self._match(TokenType.ON): 3590 action = None 3591 on = self._advance_any() and self._prev.text 3592 3593 if self._match_text_seq("NO", "ACTION"): 3594 action = "NO ACTION" 3595 elif self._match_text_seq("CASCADE"): 3596 action = "CASCADE" 3597 elif self._match_pair(TokenType.SET, TokenType.NULL): 3598 action = "SET NULL" 3599 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3600 action = "SET DEFAULT" 3601 else: 3602 self.raise_error("Invalid key constraint") 3603 3604 options.append(f"ON {on} {action}") 3605 elif self._match_text_seq("NOT", "ENFORCED"): 3606 options.append("NOT ENFORCED") 3607 elif self._match_text_seq("DEFERRABLE"): 3608 options.append("DEFERRABLE") 3609 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3610 options.append("INITIALLY DEFERRED") 3611 elif self._match_text_seq("NORELY"): 3612 options.append("NORELY") 3613 elif self._match_text_seq("MATCH", "FULL"): 3614 options.append("MATCH FULL") 3615 else: 3616 break 3617 3618 return options 3619 3620 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3621 if match and not self._match(TokenType.REFERENCES): 3622 return None 3623 3624 expressions = None 3625 this = self._parse_table(schema=True) 3626 options = self._parse_key_constraint_options() 3627 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3628 3629 def _parse_foreign_key(self) -> exp.ForeignKey: 3630 expressions = self._parse_wrapped_id_vars() 3631 reference = self._parse_references() 3632 options = {} 3633 3634 while self._match(TokenType.ON): 3635 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3636 self.raise_error("Expected DELETE or UPDATE") 3637 3638 kind = self._prev.text.lower() 3639 3640 if self._match_text_seq("NO", "ACTION"): 3641 action = "NO ACTION" 3642 elif self._match(TokenType.SET): 3643 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3644 action = "SET " + self._prev.text.upper() 3645 else: 3646 self._advance() 3647 action = self._prev.text.upper() 3648 3649 options[kind] = action 3650 3651 return self.expression( 3652 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3653 ) 3654 3655 def _parse_primary_key( 3656 self, wrapped_optional: bool = False, in_props: bool = False 3657 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3658 desc = ( 3659 self._match_set((TokenType.ASC, TokenType.DESC)) 3660 and self._prev.token_type == TokenType.DESC 3661 ) 3662 3663 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3664 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3665 3666 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3667 options = self._parse_key_constraint_options() 3668 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3669 3670 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3671 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3672 return this 3673 3674 bracket_kind = self._prev.token_type 3675 3676 if self._match(TokenType.COLON): 3677 expressions: t.List[t.Optional[exp.Expression]] = [ 3678 self.expression(exp.Slice, expression=self._parse_conjunction()) 3679 ] 3680 else: 3681 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3682 3683 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3684 if bracket_kind == TokenType.L_BRACE: 3685 this = self.expression(exp.Struct, expressions=expressions) 3686 elif not this or this.name.upper() == "ARRAY": 3687 this = self.expression(exp.Array, expressions=expressions) 3688 else: 3689 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3690 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3691 3692 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3693 self.raise_error("Expected ]") 3694 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3695 self.raise_error("Expected }") 3696 3697 self._add_comments(this) 3698 return self._parse_bracket(this) 3699 3700 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3701 if self._match(TokenType.COLON): 3702 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3703 return this 3704 3705 def _parse_case(self) -> t.Optional[exp.Expression]: 3706 ifs = [] 3707 default = None 3708 3709 expression = self._parse_conjunction() 3710 3711 while self._match(TokenType.WHEN): 3712 this = self._parse_conjunction() 3713 self._match(TokenType.THEN) 3714 then = self._parse_conjunction() 3715 ifs.append(self.expression(exp.If, this=this, true=then)) 3716 3717 if self._match(TokenType.ELSE): 3718 default = self._parse_conjunction() 3719 3720 if not self._match(TokenType.END): 3721 self.raise_error("Expected END after CASE", self._prev) 3722 3723 return self._parse_window( 3724 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3725 ) 3726 3727 def _parse_if(self) -> t.Optional[exp.Expression]: 3728 if self._match(TokenType.L_PAREN): 3729 args = self._parse_csv(self._parse_conjunction) 3730 this = self.validate_expression(exp.If.from_arg_list(args), args) 3731 self._match_r_paren() 3732 else: 3733 index = self._index - 1 3734 condition = self._parse_conjunction() 3735 3736 if not condition: 3737 self._retreat(index) 3738 return None 3739 3740 self._match(TokenType.THEN) 3741 true = self._parse_conjunction() 3742 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3743 self._match(TokenType.END) 3744 this = self.expression(exp.If, this=condition, true=true, false=false) 3745 3746 return self._parse_window(this) 3747 3748 def _parse_extract(self) -> exp.Extract: 3749 this = self._parse_function() or self._parse_var() or self._parse_type() 3750 3751 if self._match(TokenType.FROM): 3752 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3753 3754 if not self._match(TokenType.COMMA): 3755 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3756 3757 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3758 3759 def _parse_any_value(self) -> exp.AnyValue: 3760 this = self._parse_lambda() 3761 is_max = None 3762 having = None 3763 3764 if self._match(TokenType.HAVING): 3765 self._match_texts(("MAX", "MIN")) 3766 is_max = self._prev.text == "MAX" 3767 having = self._parse_column() 3768 3769 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3770 3771 def _parse_cast(self, strict: bool) -> exp.Expression: 3772 this = self._parse_conjunction() 3773 3774 if not self._match(TokenType.ALIAS): 3775 if self._match(TokenType.COMMA): 3776 return self.expression( 3777 exp.CastToStrType, this=this, expression=self._parse_string() 3778 ) 3779 else: 3780 self.raise_error("Expected AS after CAST") 3781 3782 fmt = None 3783 to = self._parse_types() 3784 3785 if not to: 3786 self.raise_error("Expected TYPE after CAST") 3787 elif to.this == exp.DataType.Type.CHAR: 3788 if self._match(TokenType.CHARACTER_SET): 3789 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3790 elif self._match(TokenType.FORMAT): 3791 fmt_string = self._parse_string() 3792 fmt = self._parse_at_time_zone(fmt_string) 3793 3794 if to.this in exp.DataType.TEMPORAL_TYPES: 3795 this = self.expression( 3796 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3797 this=this, 3798 format=exp.Literal.string( 3799 format_time( 3800 fmt_string.this if fmt_string else "", 3801 self.FORMAT_MAPPING or self.TIME_MAPPING, 3802 self.FORMAT_TRIE or self.TIME_TRIE, 3803 ) 3804 ), 3805 ) 3806 3807 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3808 this.set("zone", fmt.args["zone"]) 3809 3810 return this 3811 3812 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3813 3814 def _parse_concat(self) -> t.Optional[exp.Expression]: 3815 args = self._parse_csv(self._parse_conjunction) 3816 if self.CONCAT_NULL_OUTPUTS_STRING: 3817 args = [ 3818 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3819 for arg in args 3820 if arg 3821 ] 3822 3823 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3824 # we find such a call we replace it with its argument. 3825 if len(args) == 1: 3826 return args[0] 3827 3828 return self.expression( 3829 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3830 ) 3831 3832 def _parse_string_agg(self) -> exp.Expression: 3833 if self._match(TokenType.DISTINCT): 3834 args: t.List[t.Optional[exp.Expression]] = [ 3835 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3836 ] 3837 if self._match(TokenType.COMMA): 3838 args.extend(self._parse_csv(self._parse_conjunction)) 3839 else: 3840 args = self._parse_csv(self._parse_conjunction) 3841 3842 index = self._index 3843 if not self._match(TokenType.R_PAREN): 3844 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3845 return self.expression( 3846 exp.GroupConcat, 3847 this=seq_get(args, 0), 3848 separator=self._parse_order(this=seq_get(args, 1)), 3849 ) 3850 3851 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3852 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3853 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3854 if not self._match_text_seq("WITHIN", "GROUP"): 3855 self._retreat(index) 3856 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3857 3858 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3859 order = self._parse_order(this=seq_get(args, 0)) 3860 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3861 3862 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3863 this = self._parse_bitwise() 3864 3865 if self._match(TokenType.USING): 3866 to: t.Optional[exp.Expression] = self.expression( 3867 exp.CharacterSet, this=self._parse_var() 3868 ) 3869 elif self._match(TokenType.COMMA): 3870 to = self._parse_types() 3871 else: 3872 to = None 3873 3874 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3875 3876 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3877 """ 3878 There are generally two variants of the DECODE function: 3879 3880 - DECODE(bin, charset) 3881 - DECODE(expression, search, result [, search, result] ... [, default]) 3882 3883 The second variant will always be parsed into a CASE expression. Note that NULL 3884 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3885 instead of relying on pattern matching. 3886 """ 3887 args = self._parse_csv(self._parse_conjunction) 3888 3889 if len(args) < 3: 3890 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3891 3892 expression, *expressions = args 3893 if not expression: 3894 return None 3895 3896 ifs = [] 3897 for search, result in zip(expressions[::2], expressions[1::2]): 3898 if not search or not result: 3899 return None 3900 3901 if isinstance(search, exp.Literal): 3902 ifs.append( 3903 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3904 ) 3905 elif isinstance(search, exp.Null): 3906 ifs.append( 3907 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3908 ) 3909 else: 3910 cond = exp.or_( 3911 exp.EQ(this=expression.copy(), expression=search), 3912 exp.and_( 3913 exp.Is(this=expression.copy(), expression=exp.Null()), 3914 exp.Is(this=search.copy(), expression=exp.Null()), 3915 copy=False, 3916 ), 3917 copy=False, 3918 ) 3919 ifs.append(exp.If(this=cond, true=result)) 3920 3921 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3922 3923 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3924 self._match_text_seq("KEY") 3925 key = self._parse_field() 3926 self._match(TokenType.COLON) 3927 self._match_text_seq("VALUE") 3928 value = self._parse_field() 3929 3930 if not key and not value: 3931 return None 3932 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3933 3934 def _parse_json_object(self) -> exp.JSONObject: 3935 star = self._parse_star() 3936 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3937 3938 null_handling = None 3939 if self._match_text_seq("NULL", "ON", "NULL"): 3940 null_handling = "NULL ON NULL" 3941 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3942 null_handling = "ABSENT ON NULL" 3943 3944 unique_keys = None 3945 if self._match_text_seq("WITH", "UNIQUE"): 3946 unique_keys = True 3947 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3948 unique_keys = False 3949 3950 self._match_text_seq("KEYS") 3951 3952 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3953 format_json = self._match_text_seq("FORMAT", "JSON") 3954 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3955 3956 return self.expression( 3957 exp.JSONObject, 3958 expressions=expressions, 3959 null_handling=null_handling, 3960 unique_keys=unique_keys, 3961 return_type=return_type, 3962 format_json=format_json, 3963 encoding=encoding, 3964 ) 3965 3966 def _parse_logarithm(self) -> exp.Func: 3967 # Default argument order is base, expression 3968 args = self._parse_csv(self._parse_range) 3969 3970 if len(args) > 1: 3971 if not self.LOG_BASE_FIRST: 3972 args.reverse() 3973 return exp.Log.from_arg_list(args) 3974 3975 return self.expression( 3976 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3977 ) 3978 3979 def _parse_match_against(self) -> exp.MatchAgainst: 3980 expressions = self._parse_csv(self._parse_column) 3981 3982 self._match_text_seq(")", "AGAINST", "(") 3983 3984 this = self._parse_string() 3985 3986 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3987 modifier = "IN NATURAL LANGUAGE MODE" 3988 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3989 modifier = f"{modifier} WITH QUERY EXPANSION" 3990 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3991 modifier = "IN BOOLEAN MODE" 3992 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3993 modifier = "WITH QUERY EXPANSION" 3994 else: 3995 modifier = None 3996 3997 return self.expression( 3998 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3999 ) 4000 4001 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4002 def _parse_open_json(self) -> exp.OpenJSON: 4003 this = self._parse_bitwise() 4004 path = self._match(TokenType.COMMA) and self._parse_string() 4005 4006 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4007 this = self._parse_field(any_token=True) 4008 kind = self._parse_types() 4009 path = self._parse_string() 4010 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4011 4012 return self.expression( 4013 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4014 ) 4015 4016 expressions = None 4017 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4018 self._match_l_paren() 4019 expressions = self._parse_csv(_parse_open_json_column_def) 4020 4021 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4022 4023 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4024 args = self._parse_csv(self._parse_bitwise) 4025 4026 if self._match(TokenType.IN): 4027 return self.expression( 4028 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4029 ) 4030 4031 if haystack_first: 4032 haystack = seq_get(args, 0) 4033 needle = seq_get(args, 1) 4034 else: 4035 needle = seq_get(args, 0) 4036 haystack = seq_get(args, 1) 4037 4038 return self.expression( 4039 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4040 ) 4041 4042 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4043 args = self._parse_csv(self._parse_table) 4044 return exp.JoinHint(this=func_name.upper(), expressions=args) 4045 4046 def _parse_substring(self) -> exp.Substring: 4047 # Postgres supports the form: substring(string [from int] [for int]) 4048 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4049 4050 args = self._parse_csv(self._parse_bitwise) 4051 4052 if self._match(TokenType.FROM): 4053 args.append(self._parse_bitwise()) 4054 if self._match(TokenType.FOR): 4055 args.append(self._parse_bitwise()) 4056 4057 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4058 4059 def _parse_trim(self) -> exp.Trim: 4060 # https://www.w3resource.com/sql/character-functions/trim.php 4061 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4062 4063 position = None 4064 collation = None 4065 4066 if self._match_texts(self.TRIM_TYPES): 4067 position = self._prev.text.upper() 4068 4069 expression = self._parse_bitwise() 4070 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4071 this = self._parse_bitwise() 4072 else: 4073 this = expression 4074 expression = None 4075 4076 if self._match(TokenType.COLLATE): 4077 collation = self._parse_bitwise() 4078 4079 return self.expression( 4080 exp.Trim, this=this, position=position, expression=expression, collation=collation 4081 ) 4082 4083 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4084 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4085 4086 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4087 return self._parse_window(self._parse_id_var(), alias=True) 4088 4089 def _parse_respect_or_ignore_nulls( 4090 self, this: t.Optional[exp.Expression] 4091 ) -> t.Optional[exp.Expression]: 4092 if self._match_text_seq("IGNORE", "NULLS"): 4093 return self.expression(exp.IgnoreNulls, this=this) 4094 if self._match_text_seq("RESPECT", "NULLS"): 4095 return self.expression(exp.RespectNulls, this=this) 4096 return this 4097 4098 def _parse_window( 4099 self, this: t.Optional[exp.Expression], alias: bool = False 4100 ) -> t.Optional[exp.Expression]: 4101 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4102 self._match(TokenType.WHERE) 4103 this = self.expression( 4104 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4105 ) 4106 self._match_r_paren() 4107 4108 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4109 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4110 if self._match_text_seq("WITHIN", "GROUP"): 4111 order = self._parse_wrapped(self._parse_order) 4112 this = self.expression(exp.WithinGroup, this=this, expression=order) 4113 4114 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4115 # Some dialects choose to implement and some do not. 4116 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4117 4118 # There is some code above in _parse_lambda that handles 4119 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4120 4121 # The below changes handle 4122 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4123 4124 # Oracle allows both formats 4125 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4126 # and Snowflake chose to do the same for familiarity 4127 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4128 this = self._parse_respect_or_ignore_nulls(this) 4129 4130 # bigquery select from window x AS (partition by ...) 4131 if alias: 4132 over = None 4133 self._match(TokenType.ALIAS) 4134 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4135 return this 4136 else: 4137 over = self._prev.text.upper() 4138 4139 if not self._match(TokenType.L_PAREN): 4140 return self.expression( 4141 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4142 ) 4143 4144 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4145 4146 first = self._match(TokenType.FIRST) 4147 if self._match_text_seq("LAST"): 4148 first = False 4149 4150 partition = self._parse_partition_by() 4151 order = self._parse_order() 4152 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4153 4154 if kind: 4155 self._match(TokenType.BETWEEN) 4156 start = self._parse_window_spec() 4157 self._match(TokenType.AND) 4158 end = self._parse_window_spec() 4159 4160 spec = self.expression( 4161 exp.WindowSpec, 4162 kind=kind, 4163 start=start["value"], 4164 start_side=start["side"], 4165 end=end["value"], 4166 end_side=end["side"], 4167 ) 4168 else: 4169 spec = None 4170 4171 self._match_r_paren() 4172 4173 return self.expression( 4174 exp.Window, 4175 this=this, 4176 partition_by=partition, 4177 order=order, 4178 spec=spec, 4179 alias=window_alias, 4180 over=over, 4181 first=first, 4182 ) 4183 4184 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4185 self._match(TokenType.BETWEEN) 4186 4187 return { 4188 "value": ( 4189 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4190 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4191 or self._parse_bitwise() 4192 ), 4193 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4194 } 4195 4196 def _parse_alias( 4197 self, this: t.Optional[exp.Expression], explicit: bool = False 4198 ) -> t.Optional[exp.Expression]: 4199 any_token = self._match(TokenType.ALIAS) 4200 4201 if explicit and not any_token: 4202 return this 4203 4204 if self._match(TokenType.L_PAREN): 4205 aliases = self.expression( 4206 exp.Aliases, 4207 this=this, 4208 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4209 ) 4210 self._match_r_paren(aliases) 4211 return aliases 4212 4213 alias = self._parse_id_var(any_token) 4214 4215 if alias: 4216 return self.expression(exp.Alias, this=this, alias=alias) 4217 4218 return this 4219 4220 def _parse_id_var( 4221 self, 4222 any_token: bool = True, 4223 tokens: t.Optional[t.Collection[TokenType]] = None, 4224 ) -> t.Optional[exp.Expression]: 4225 identifier = self._parse_identifier() 4226 4227 if identifier: 4228 return identifier 4229 4230 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4231 quoted = self._prev.token_type == TokenType.STRING 4232 return exp.Identifier(this=self._prev.text, quoted=quoted) 4233 4234 return None 4235 4236 def _parse_string(self) -> t.Optional[exp.Expression]: 4237 if self._match(TokenType.STRING): 4238 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4239 return self._parse_placeholder() 4240 4241 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4242 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4243 4244 def _parse_number(self) -> t.Optional[exp.Expression]: 4245 if self._match(TokenType.NUMBER): 4246 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4247 return self._parse_placeholder() 4248 4249 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4250 if self._match(TokenType.IDENTIFIER): 4251 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4252 return self._parse_placeholder() 4253 4254 def _parse_var( 4255 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4256 ) -> t.Optional[exp.Expression]: 4257 if ( 4258 (any_token and self._advance_any()) 4259 or self._match(TokenType.VAR) 4260 or (self._match_set(tokens) if tokens else False) 4261 ): 4262 return self.expression(exp.Var, this=self._prev.text) 4263 return self._parse_placeholder() 4264 4265 def _advance_any(self) -> t.Optional[Token]: 4266 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4267 self._advance() 4268 return self._prev 4269 return None 4270 4271 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4272 return self._parse_var() or self._parse_string() 4273 4274 def _parse_null(self) -> t.Optional[exp.Expression]: 4275 if self._match(TokenType.NULL): 4276 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4277 return None 4278 4279 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4280 if self._match(TokenType.TRUE): 4281 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4282 if self._match(TokenType.FALSE): 4283 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4284 return None 4285 4286 def _parse_star(self) -> t.Optional[exp.Expression]: 4287 if self._match(TokenType.STAR): 4288 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4289 return None 4290 4291 def _parse_parameter(self) -> exp.Parameter: 4292 wrapped = self._match(TokenType.L_BRACE) 4293 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4294 self._match(TokenType.R_BRACE) 4295 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4296 4297 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4298 if self._match_set(self.PLACEHOLDER_PARSERS): 4299 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4300 if placeholder: 4301 return placeholder 4302 self._advance(-1) 4303 return None 4304 4305 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4306 if not self._match(TokenType.EXCEPT): 4307 return None 4308 if self._match(TokenType.L_PAREN, advance=False): 4309 return self._parse_wrapped_csv(self._parse_column) 4310 return self._parse_csv(self._parse_column) 4311 4312 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4313 if not self._match(TokenType.REPLACE): 4314 return None 4315 if self._match(TokenType.L_PAREN, advance=False): 4316 return self._parse_wrapped_csv(self._parse_expression) 4317 return self._parse_expressions() 4318 4319 def _parse_csv( 4320 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4321 ) -> t.List[t.Optional[exp.Expression]]: 4322 parse_result = parse_method() 4323 items = [parse_result] if parse_result is not None else [] 4324 4325 while self._match(sep): 4326 self._add_comments(parse_result) 4327 parse_result = parse_method() 4328 if parse_result is not None: 4329 items.append(parse_result) 4330 4331 return items 4332 4333 def _parse_tokens( 4334 self, parse_method: t.Callable, expressions: t.Dict 4335 ) -> t.Optional[exp.Expression]: 4336 this = parse_method() 4337 4338 while self._match_set(expressions): 4339 this = self.expression( 4340 expressions[self._prev.token_type], 4341 this=this, 4342 comments=self._prev_comments, 4343 expression=parse_method(), 4344 ) 4345 4346 return this 4347 4348 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4349 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4350 4351 def _parse_wrapped_csv( 4352 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4353 ) -> t.List[t.Optional[exp.Expression]]: 4354 return self._parse_wrapped( 4355 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4356 ) 4357 4358 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4359 wrapped = self._match(TokenType.L_PAREN) 4360 if not wrapped and not optional: 4361 self.raise_error("Expecting (") 4362 parse_result = parse_method() 4363 if wrapped: 4364 self._match_r_paren() 4365 return parse_result 4366 4367 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4368 return self._parse_csv(self._parse_expression) 4369 4370 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4371 return self._parse_select() or self._parse_set_operations( 4372 self._parse_expression() if alias else self._parse_conjunction() 4373 ) 4374 4375 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4376 return self._parse_query_modifiers( 4377 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4378 ) 4379 4380 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4381 this = None 4382 if self._match_texts(self.TRANSACTION_KIND): 4383 this = self._prev.text 4384 4385 self._match_texts({"TRANSACTION", "WORK"}) 4386 4387 modes = [] 4388 while True: 4389 mode = [] 4390 while self._match(TokenType.VAR): 4391 mode.append(self._prev.text) 4392 4393 if mode: 4394 modes.append(" ".join(mode)) 4395 if not self._match(TokenType.COMMA): 4396 break 4397 4398 return self.expression(exp.Transaction, this=this, modes=modes) 4399 4400 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4401 chain = None 4402 savepoint = None 4403 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4404 4405 self._match_texts({"TRANSACTION", "WORK"}) 4406 4407 if self._match_text_seq("TO"): 4408 self._match_text_seq("SAVEPOINT") 4409 savepoint = self._parse_id_var() 4410 4411 if self._match(TokenType.AND): 4412 chain = not self._match_text_seq("NO") 4413 self._match_text_seq("CHAIN") 4414 4415 if is_rollback: 4416 return self.expression(exp.Rollback, savepoint=savepoint) 4417 4418 return self.expression(exp.Commit, chain=chain) 4419 4420 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4421 if not self._match_text_seq("ADD"): 4422 return None 4423 4424 self._match(TokenType.COLUMN) 4425 exists_column = self._parse_exists(not_=True) 4426 expression = self._parse_column_def(self._parse_field(any_token=True)) 4427 4428 if expression: 4429 expression.set("exists", exists_column) 4430 4431 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4432 if self._match_texts(("FIRST", "AFTER")): 4433 position = self._prev.text 4434 column_position = self.expression( 4435 exp.ColumnPosition, this=self._parse_column(), position=position 4436 ) 4437 expression.set("position", column_position) 4438 4439 return expression 4440 4441 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4442 drop = self._match(TokenType.DROP) and self._parse_drop() 4443 if drop and not isinstance(drop, exp.Command): 4444 drop.set("kind", drop.args.get("kind", "COLUMN")) 4445 return drop 4446 4447 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4448 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4449 return self.expression( 4450 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4451 ) 4452 4453 def _parse_add_constraint(self) -> exp.AddConstraint: 4454 this = None 4455 kind = self._prev.token_type 4456 4457 if kind == TokenType.CONSTRAINT: 4458 this = self._parse_id_var() 4459 4460 if self._match_text_seq("CHECK"): 4461 expression = self._parse_wrapped(self._parse_conjunction) 4462 enforced = self._match_text_seq("ENFORCED") 4463 4464 return self.expression( 4465 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4466 ) 4467 4468 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4469 expression = self._parse_foreign_key() 4470 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4471 expression = self._parse_primary_key() 4472 else: 4473 expression = None 4474 4475 return self.expression(exp.AddConstraint, this=this, expression=expression) 4476 4477 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4478 index = self._index - 1 4479 4480 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4481 return self._parse_csv(self._parse_add_constraint) 4482 4483 self._retreat(index) 4484 return self._parse_csv(self._parse_add_column) 4485 4486 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4487 self._match(TokenType.COLUMN) 4488 column = self._parse_field(any_token=True) 4489 4490 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4491 return self.expression(exp.AlterColumn, this=column, drop=True) 4492 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4493 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4494 4495 self._match_text_seq("SET", "DATA") 4496 return self.expression( 4497 exp.AlterColumn, 4498 this=column, 4499 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4500 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4501 using=self._match(TokenType.USING) and self._parse_conjunction(), 4502 ) 4503 4504 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4505 index = self._index - 1 4506 4507 partition_exists = self._parse_exists() 4508 if self._match(TokenType.PARTITION, advance=False): 4509 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4510 4511 self._retreat(index) 4512 return self._parse_csv(self._parse_drop_column) 4513 4514 def _parse_alter_table_rename(self) -> exp.RenameTable: 4515 self._match_text_seq("TO") 4516 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4517 4518 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4519 start = self._prev 4520 4521 if not self._match(TokenType.TABLE): 4522 return self._parse_as_command(start) 4523 4524 exists = self._parse_exists() 4525 this = self._parse_table(schema=True) 4526 4527 if self._next: 4528 self._advance() 4529 4530 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4531 if parser: 4532 actions = ensure_list(parser(self)) 4533 4534 if not self._curr: 4535 return self.expression( 4536 exp.AlterTable, 4537 this=this, 4538 exists=exists, 4539 actions=actions, 4540 ) 4541 return self._parse_as_command(start) 4542 4543 def _parse_merge(self) -> exp.Merge: 4544 self._match(TokenType.INTO) 4545 target = self._parse_table() 4546 4547 self._match(TokenType.USING) 4548 using = self._parse_table() 4549 4550 self._match(TokenType.ON) 4551 on = self._parse_conjunction() 4552 4553 whens = [] 4554 while self._match(TokenType.WHEN): 4555 matched = not self._match(TokenType.NOT) 4556 self._match_text_seq("MATCHED") 4557 source = ( 4558 False 4559 if self._match_text_seq("BY", "TARGET") 4560 else self._match_text_seq("BY", "SOURCE") 4561 ) 4562 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4563 4564 self._match(TokenType.THEN) 4565 4566 if self._match(TokenType.INSERT): 4567 _this = self._parse_star() 4568 if _this: 4569 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4570 else: 4571 then = self.expression( 4572 exp.Insert, 4573 this=self._parse_value(), 4574 expression=self._match(TokenType.VALUES) and self._parse_value(), 4575 ) 4576 elif self._match(TokenType.UPDATE): 4577 expressions = self._parse_star() 4578 if expressions: 4579 then = self.expression(exp.Update, expressions=expressions) 4580 else: 4581 then = self.expression( 4582 exp.Update, 4583 expressions=self._match(TokenType.SET) 4584 and self._parse_csv(self._parse_equality), 4585 ) 4586 elif self._match(TokenType.DELETE): 4587 then = self.expression(exp.Var, this=self._prev.text) 4588 else: 4589 then = None 4590 4591 whens.append( 4592 self.expression( 4593 exp.When, 4594 matched=matched, 4595 source=source, 4596 condition=condition, 4597 then=then, 4598 ) 4599 ) 4600 4601 return self.expression( 4602 exp.Merge, 4603 this=target, 4604 using=using, 4605 on=on, 4606 expressions=whens, 4607 ) 4608 4609 def _parse_show(self) -> t.Optional[exp.Expression]: 4610 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4611 if parser: 4612 return parser(self) 4613 self._advance() 4614 return self.expression(exp.Show, this=self._prev.text.upper()) 4615 4616 def _parse_set_item_assignment( 4617 self, kind: t.Optional[str] = None 4618 ) -> t.Optional[exp.Expression]: 4619 index = self._index 4620 4621 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4622 return self._parse_set_transaction(global_=kind == "GLOBAL") 4623 4624 left = self._parse_primary() or self._parse_id_var() 4625 4626 if not self._match_texts(("=", "TO")): 4627 self._retreat(index) 4628 return None 4629 4630 right = self._parse_statement() or self._parse_id_var() 4631 this = self.expression(exp.EQ, this=left, expression=right) 4632 4633 return self.expression(exp.SetItem, this=this, kind=kind) 4634 4635 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4636 self._match_text_seq("TRANSACTION") 4637 characteristics = self._parse_csv( 4638 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4639 ) 4640 return self.expression( 4641 exp.SetItem, 4642 expressions=characteristics, 4643 kind="TRANSACTION", 4644 **{"global": global_}, # type: ignore 4645 ) 4646 4647 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4648 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4649 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4650 4651 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4652 index = self._index 4653 set_ = self.expression( 4654 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4655 ) 4656 4657 if self._curr: 4658 self._retreat(index) 4659 return self._parse_as_command(self._prev) 4660 4661 return set_ 4662 4663 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4664 for option in options: 4665 if self._match_text_seq(*option.split(" ")): 4666 return exp.var(option) 4667 return None 4668 4669 def _parse_as_command(self, start: Token) -> exp.Command: 4670 while self._curr: 4671 self._advance() 4672 text = self._find_sql(start, self._prev) 4673 size = len(start.text) 4674 return exp.Command(this=text[:size], expression=text[size:]) 4675 4676 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4677 settings = [] 4678 4679 self._match_l_paren() 4680 kind = self._parse_id_var() 4681 4682 if self._match(TokenType.L_PAREN): 4683 while True: 4684 key = self._parse_id_var() 4685 value = self._parse_primary() 4686 4687 if not key and value is None: 4688 break 4689 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4690 self._match(TokenType.R_PAREN) 4691 4692 self._match_r_paren() 4693 4694 return self.expression( 4695 exp.DictProperty, 4696 this=this, 4697 kind=kind.this if kind else None, 4698 settings=settings, 4699 ) 4700 4701 def _parse_dict_range(self, this: str) -> exp.DictRange: 4702 self._match_l_paren() 4703 has_min = self._match_text_seq("MIN") 4704 if has_min: 4705 min = self._parse_var() or self._parse_primary() 4706 self._match_text_seq("MAX") 4707 max = self._parse_var() or self._parse_primary() 4708 else: 4709 max = self._parse_var() or self._parse_primary() 4710 min = exp.Literal.number(0) 4711 self._match_r_paren() 4712 return self.expression(exp.DictRange, this=this, min=min, max=max) 4713 4714 def _find_parser( 4715 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4716 ) -> t.Optional[t.Callable]: 4717 if not self._curr: 4718 return None 4719 4720 index = self._index 4721 this = [] 4722 while True: 4723 # The current token might be multiple words 4724 curr = self._curr.text.upper() 4725 key = curr.split(" ") 4726 this.append(curr) 4727 4728 self._advance() 4729 result, trie = in_trie(trie, key) 4730 if result == TrieResult.FAILED: 4731 break 4732 4733 if result == TrieResult.EXISTS: 4734 subparser = parsers[" ".join(this)] 4735 return subparser 4736 4737 self._retreat(index) 4738 return None 4739 4740 def _match(self, token_type, advance=True, expression=None): 4741 if not self._curr: 4742 return None 4743 4744 if self._curr.token_type == token_type: 4745 if advance: 4746 self._advance() 4747 self._add_comments(expression) 4748 return True 4749 4750 return None 4751 4752 def _match_set(self, types, advance=True): 4753 if not self._curr: 4754 return None 4755 4756 if self._curr.token_type in types: 4757 if advance: 4758 self._advance() 4759 return True 4760 4761 return None 4762 4763 def _match_pair(self, token_type_a, token_type_b, advance=True): 4764 if not self._curr or not self._next: 4765 return None 4766 4767 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4768 if advance: 4769 self._advance(2) 4770 return True 4771 4772 return None 4773 4774 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4775 if not self._match(TokenType.L_PAREN, expression=expression): 4776 self.raise_error("Expecting (") 4777 4778 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4779 if not self._match(TokenType.R_PAREN, expression=expression): 4780 self.raise_error("Expecting )") 4781 4782 def _match_texts(self, texts, advance=True): 4783 if self._curr and self._curr.text.upper() in texts: 4784 if advance: 4785 self._advance() 4786 return True 4787 return False 4788 4789 def _match_text_seq(self, *texts, advance=True): 4790 index = self._index 4791 for text in texts: 4792 if self._curr and self._curr.text.upper() == text: 4793 self._advance() 4794 else: 4795 self._retreat(index) 4796 return False 4797 4798 if not advance: 4799 self._retreat(index) 4800 4801 return True 4802 4803 @t.overload 4804 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4805 ... 4806 4807 @t.overload 4808 def _replace_columns_with_dots( 4809 self, this: t.Optional[exp.Expression] 4810 ) -> t.Optional[exp.Expression]: 4811 ... 4812 4813 def _replace_columns_with_dots(self, this): 4814 if isinstance(this, exp.Dot): 4815 exp.replace_children(this, self._replace_columns_with_dots) 4816 elif isinstance(this, exp.Column): 4817 exp.replace_children(this, self._replace_columns_with_dots) 4818 table = this.args.get("table") 4819 this = ( 4820 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4821 ) 4822 4823 return this 4824 4825 def _replace_lambda( 4826 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4827 ) -> t.Optional[exp.Expression]: 4828 if not node: 4829 return node 4830 4831 for column in node.find_all(exp.Column): 4832 if column.parts[0].name in lambda_variables: 4833 dot_or_id = column.to_dot() if column.table else column.this 4834 parent = column.parent 4835 4836 while isinstance(parent, exp.Dot): 4837 if not isinstance(parent.parent, exp.Dot): 4838 parent.replace(dot_or_id) 4839 break 4840 parent = parent.parent 4841 else: 4842 if column is node: 4843 node = dot_or_id 4844 else: 4845 column.replace(dot_or_id) 4846 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset()
865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 )
Logs or raises any found errors, depending on the chosen error level setting.
965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.