sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.IPADDRESS, 189 TokenType.IPPREFIX, 190 TokenType.ENUM, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = { 202 *Tokenizer.SINGLE_TOKENS.values(), 203 TokenType.SELECT, 204 } 205 206 DB_CREATABLES = { 207 TokenType.DATABASE, 208 TokenType.SCHEMA, 209 TokenType.TABLE, 210 TokenType.VIEW, 211 TokenType.DICTIONARY, 212 } 213 214 CREATABLES = { 215 TokenType.COLUMN, 216 TokenType.FUNCTION, 217 TokenType.INDEX, 218 TokenType.PROCEDURE, 219 *DB_CREATABLES, 220 } 221 222 # Tokens that can represent identifiers 223 ID_VAR_TOKENS = { 224 TokenType.VAR, 225 TokenType.ANTI, 226 TokenType.APPLY, 227 TokenType.ASC, 228 TokenType.AUTO_INCREMENT, 229 TokenType.BEGIN, 230 TokenType.CACHE, 231 TokenType.CASE, 232 TokenType.COLLATE, 233 TokenType.COMMAND, 234 TokenType.COMMENT, 235 TokenType.COMMIT, 236 TokenType.CONSTRAINT, 237 TokenType.DEFAULT, 238 TokenType.DELETE, 239 TokenType.DESC, 240 TokenType.DESCRIBE, 241 TokenType.DICTIONARY, 242 TokenType.DIV, 243 TokenType.END, 244 TokenType.EXECUTE, 245 TokenType.ESCAPE, 246 TokenType.FALSE, 247 TokenType.FIRST, 248 TokenType.FILTER, 249 TokenType.FORMAT, 250 TokenType.FULL, 251 TokenType.IF, 252 TokenType.IS, 253 TokenType.ISNULL, 254 TokenType.INTERVAL, 255 TokenType.KEEP, 256 TokenType.LEFT, 257 TokenType.LOAD, 258 TokenType.MERGE, 259 TokenType.NATURAL, 260 TokenType.NEXT, 261 TokenType.OFFSET, 262 TokenType.ORDINALITY, 263 TokenType.OVERWRITE, 264 TokenType.PARTITION, 265 TokenType.PERCENT, 266 TokenType.PIVOT, 267 TokenType.PRAGMA, 268 TokenType.RANGE, 269 TokenType.REFERENCES, 270 TokenType.RIGHT, 271 TokenType.ROW, 272 TokenType.ROWS, 273 TokenType.SEMI, 274 TokenType.SET, 275 TokenType.SETTINGS, 276 TokenType.SHOW, 277 TokenType.TEMPORARY, 278 TokenType.TOP, 279 TokenType.TRUE, 280 TokenType.UNIQUE, 281 TokenType.UNPIVOT, 282 TokenType.UPDATE, 283 TokenType.VOLATILE, 284 TokenType.WINDOW, 285 *CREATABLES, 286 *SUBQUERY_PREDICATES, 287 *TYPE_TOKENS, 288 *NO_PAREN_FUNCTIONS, 289 } 290 291 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 292 293 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 294 TokenType.APPLY, 295 TokenType.ASOF, 296 TokenType.FULL, 297 TokenType.LEFT, 298 TokenType.LOCK, 299 TokenType.NATURAL, 300 TokenType.OFFSET, 301 TokenType.RIGHT, 302 TokenType.WINDOW, 303 } 304 305 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 306 307 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 308 309 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 310 311 FUNC_TOKENS = { 312 TokenType.COMMAND, 313 TokenType.CURRENT_DATE, 314 TokenType.CURRENT_DATETIME, 315 TokenType.CURRENT_TIMESTAMP, 316 TokenType.CURRENT_TIME, 317 TokenType.CURRENT_USER, 318 TokenType.FILTER, 319 TokenType.FIRST, 320 TokenType.FORMAT, 321 TokenType.GLOB, 322 TokenType.IDENTIFIER, 323 TokenType.INDEX, 324 TokenType.ISNULL, 325 TokenType.ILIKE, 326 TokenType.LIKE, 327 TokenType.MERGE, 328 TokenType.OFFSET, 329 TokenType.PRIMARY_KEY, 330 TokenType.RANGE, 331 TokenType.REPLACE, 332 TokenType.RLIKE, 333 TokenType.ROW, 334 TokenType.UNNEST, 335 TokenType.VAR, 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.DATE, 339 TokenType.DATETIME, 340 TokenType.TABLE, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMPTZ, 343 TokenType.WINDOW, 344 TokenType.XOR, 345 *TYPE_TOKENS, 346 *SUBQUERY_PREDICATES, 347 } 348 349 CONJUNCTION = { 350 TokenType.AND: exp.And, 351 TokenType.OR: exp.Or, 352 } 353 354 EQUALITY = { 355 TokenType.EQ: exp.EQ, 356 TokenType.NEQ: exp.NEQ, 357 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 358 } 359 360 COMPARISON = { 361 TokenType.GT: exp.GT, 362 TokenType.GTE: exp.GTE, 363 TokenType.LT: exp.LT, 364 TokenType.LTE: exp.LTE, 365 } 366 367 BITWISE = { 368 TokenType.AMP: exp.BitwiseAnd, 369 TokenType.CARET: exp.BitwiseXor, 370 TokenType.PIPE: exp.BitwiseOr, 371 TokenType.DPIPE: exp.DPipe, 372 } 373 374 TERM = { 375 TokenType.DASH: exp.Sub, 376 TokenType.PLUS: exp.Add, 377 TokenType.MOD: exp.Mod, 378 TokenType.COLLATE: exp.Collate, 379 } 380 381 FACTOR = { 382 TokenType.DIV: exp.IntDiv, 383 TokenType.LR_ARROW: exp.Distance, 384 TokenType.SLASH: exp.Div, 385 TokenType.STAR: exp.Mul, 386 } 387 388 TIMESTAMPS = { 389 TokenType.TIME, 390 TokenType.TIMESTAMP, 391 TokenType.TIMESTAMPTZ, 392 TokenType.TIMESTAMPLTZ, 393 } 394 395 SET_OPERATIONS = { 396 TokenType.UNION, 397 TokenType.INTERSECT, 398 TokenType.EXCEPT, 399 } 400 401 JOIN_METHODS = { 402 TokenType.NATURAL, 403 TokenType.ASOF, 404 } 405 406 JOIN_SIDES = { 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.FULL, 410 } 411 412 JOIN_KINDS = { 413 TokenType.INNER, 414 TokenType.OUTER, 415 TokenType.CROSS, 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 JOIN_HINTS: t.Set[str] = set() 421 422 LAMBDAS = { 423 TokenType.ARROW: lambda self, expressions: self.expression( 424 exp.Lambda, 425 this=self._replace_lambda( 426 self._parse_conjunction(), 427 {node.name for node in expressions}, 428 ), 429 expressions=expressions, 430 ), 431 TokenType.FARROW: lambda self, expressions: self.expression( 432 exp.Kwarg, 433 this=exp.var(expressions[0].name), 434 expression=self._parse_conjunction(), 435 ), 436 } 437 438 COLUMN_OPERATORS = { 439 TokenType.DOT: None, 440 TokenType.DCOLON: lambda self, this, to: self.expression( 441 exp.Cast if self.STRICT_CAST else exp.TryCast, 442 this=this, 443 to=to, 444 ), 445 TokenType.ARROW: lambda self, this, path: self.expression( 446 exp.JSONExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DARROW: lambda self, this, path: self.expression( 451 exp.JSONExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtract, 457 this=this, 458 expression=path, 459 ), 460 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 461 exp.JSONBExtractScalar, 462 this=this, 463 expression=path, 464 ), 465 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 466 exp.JSONBContains, 467 this=this, 468 expression=key, 469 ), 470 } 471 472 EXPRESSION_PARSERS = { 473 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 474 exp.Column: lambda self: self._parse_column(), 475 exp.Condition: lambda self: self._parse_conjunction(), 476 exp.DataType: lambda self: self._parse_types(), 477 exp.Expression: lambda self: self._parse_statement(), 478 exp.From: lambda self: self._parse_from(), 479 exp.Group: lambda self: self._parse_group(), 480 exp.Having: lambda self: self._parse_having(), 481 exp.Identifier: lambda self: self._parse_id_var(), 482 exp.Join: lambda self: self._parse_join(), 483 exp.Lambda: lambda self: self._parse_lambda(), 484 exp.Lateral: lambda self: self._parse_lateral(), 485 exp.Limit: lambda self: self._parse_limit(), 486 exp.Offset: lambda self: self._parse_offset(), 487 exp.Order: lambda self: self._parse_order(), 488 exp.Ordered: lambda self: self._parse_ordered(), 489 exp.Properties: lambda self: self._parse_properties(), 490 exp.Qualify: lambda self: self._parse_qualify(), 491 exp.Returning: lambda self: self._parse_returning(), 492 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 493 exp.Table: lambda self: self._parse_table_parts(), 494 exp.TableAlias: lambda self: self._parse_table_alias(), 495 exp.Where: lambda self: self._parse_where(), 496 exp.Window: lambda self: self._parse_named_window(), 497 exp.With: lambda self: self._parse_with(), 498 "JOIN_TYPE": lambda self: self._parse_join_parts(), 499 } 500 501 STATEMENT_PARSERS = { 502 TokenType.ALTER: lambda self: self._parse_alter(), 503 TokenType.BEGIN: lambda self: self._parse_transaction(), 504 TokenType.CACHE: lambda self: self._parse_cache(), 505 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 506 TokenType.COMMENT: lambda self: self._parse_comment(), 507 TokenType.CREATE: lambda self: self._parse_create(), 508 TokenType.DELETE: lambda self: self._parse_delete(), 509 TokenType.DESC: lambda self: self._parse_describe(), 510 TokenType.DESCRIBE: lambda self: self._parse_describe(), 511 TokenType.DROP: lambda self: self._parse_drop(), 512 TokenType.FROM: lambda self: exp.select("*").from_( 513 t.cast(exp.From, self._parse_from(skip_from_token=True)) 514 ), 515 TokenType.INSERT: lambda self: self._parse_insert(), 516 TokenType.LOAD: lambda self: self._parse_load(), 517 TokenType.MERGE: lambda self: self._parse_merge(), 518 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 519 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 520 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 521 TokenType.SET: lambda self: self._parse_set(), 522 TokenType.UNCACHE: lambda self: self._parse_uncache(), 523 TokenType.UPDATE: lambda self: self._parse_update(), 524 TokenType.USE: lambda self: self.expression( 525 exp.Use, 526 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 527 and exp.var(self._prev.text), 528 this=self._parse_table(schema=False), 529 ), 530 } 531 532 UNARY_PARSERS = { 533 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 534 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 535 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 536 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 537 } 538 539 PRIMARY_PARSERS = { 540 TokenType.STRING: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=True 542 ), 543 TokenType.NUMBER: lambda self, token: self.expression( 544 exp.Literal, this=token.text, is_string=False 545 ), 546 TokenType.STAR: lambda self, _: self.expression( 547 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 548 ), 549 TokenType.NULL: lambda self, _: self.expression(exp.Null), 550 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 551 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 552 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 553 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 554 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 555 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 556 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 557 exp.National, this=token.text 558 ), 559 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 560 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 561 } 562 563 PLACEHOLDER_PARSERS = { 564 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 565 TokenType.PARAMETER: lambda self: self._parse_parameter(), 566 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 567 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 568 else None, 569 } 570 571 RANGE_PARSERS = { 572 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 573 TokenType.GLOB: binary_range_parser(exp.Glob), 574 TokenType.ILIKE: binary_range_parser(exp.ILike), 575 TokenType.IN: lambda self, this: self._parse_in(this), 576 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 577 TokenType.IS: lambda self, this: self._parse_is(this), 578 TokenType.LIKE: binary_range_parser(exp.Like), 579 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 580 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 581 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 582 } 583 584 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 585 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 586 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 587 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 588 "CHARACTER SET": lambda self: self._parse_character_set(), 589 "CHECKSUM": lambda self: self._parse_checksum(), 590 "CLUSTER BY": lambda self: self._parse_cluster(), 591 "CLUSTERED": lambda self: self._parse_clustered_by(), 592 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 593 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 594 "COPY": lambda self: self._parse_copy_property(), 595 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 596 "DEFINER": lambda self: self._parse_definer(), 597 "DETERMINISTIC": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "DISTKEY": lambda self: self._parse_distkey(), 601 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 602 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 603 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 604 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 605 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 606 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "FREESPACE": lambda self: self._parse_freespace(), 608 "HEAP": lambda self: self.expression(exp.HeapProperty), 609 "IMMUTABLE": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 611 ), 612 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 613 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 614 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 615 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 616 "LIKE": lambda self: self._parse_create_like(), 617 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 618 "LOCK": lambda self: self._parse_locking(), 619 "LOCKING": lambda self: self._parse_locking(), 620 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 621 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 622 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 623 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 624 "NO": lambda self: self._parse_no_property(), 625 "ON": lambda self: self._parse_on_property(), 626 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 627 "PARTITION BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 630 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 631 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 632 "RETURNS": lambda self: self._parse_returns(), 633 "ROW": lambda self: self._parse_row(), 634 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 635 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 636 "SETTINGS": lambda self: self.expression( 637 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 638 ), 639 "SORTKEY": lambda self: self._parse_sortkey(), 640 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 641 "STABLE": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("STABLE") 643 ), 644 "STORED": lambda self: self._parse_stored(), 645 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 646 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 647 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 648 "TO": lambda self: self._parse_to_table(), 649 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 650 "TTL": lambda self: self._parse_ttl(), 651 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "VOLATILE": lambda self: self._parse_volatile_property(), 653 "WITH": lambda self: self._parse_with_property(), 654 } 655 656 CONSTRAINT_PARSERS = { 657 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 658 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 659 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 660 "CHARACTER SET": lambda self: self.expression( 661 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 662 ), 663 "CHECK": lambda self: self.expression( 664 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 665 ), 666 "COLLATE": lambda self: self.expression( 667 exp.CollateColumnConstraint, this=self._parse_var() 668 ), 669 "COMMENT": lambda self: self.expression( 670 exp.CommentColumnConstraint, this=self._parse_string() 671 ), 672 "COMPRESS": lambda self: self._parse_compress(), 673 "DEFAULT": lambda self: self.expression( 674 exp.DefaultColumnConstraint, this=self._parse_bitwise() 675 ), 676 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 677 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 678 "FORMAT": lambda self: self.expression( 679 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "GENERATED": lambda self: self._parse_generated_as_identity(), 682 "IDENTITY": lambda self: self._parse_auto_increment(), 683 "INLINE": lambda self: self._parse_inline(), 684 "LIKE": lambda self: self._parse_create_like(), 685 "NOT": lambda self: self._parse_not_constraint(), 686 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 687 "ON": lambda self: self._match(TokenType.UPDATE) 688 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 689 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(), 691 "REFERENCES": lambda self: self._parse_references(match=False), 692 "TITLE": lambda self: self.expression( 693 exp.TitleColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 696 "UNIQUE": lambda self: self._parse_unique(), 697 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 698 } 699 700 ALTER_PARSERS = { 701 "ADD": lambda self: self._parse_alter_table_add(), 702 "ALTER": lambda self: self._parse_alter_table_alter(), 703 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 704 "DROP": lambda self: self._parse_alter_table_drop(), 705 "RENAME": lambda self: self._parse_alter_table_rename(), 706 } 707 708 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 709 710 NO_PAREN_FUNCTION_PARSERS = { 711 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 712 TokenType.CASE: lambda self: self._parse_case(), 713 TokenType.IF: lambda self: self._parse_if(), 714 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 715 exp.NextValueFor, 716 this=self._parse_column(), 717 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 718 ), 719 } 720 721 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 722 723 FUNCTION_PARSERS = { 724 "ANY_VALUE": lambda self: self._parse_any_value(), 725 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 726 "CONCAT": lambda self: self._parse_concat(), 727 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 728 "DECODE": lambda self: self._parse_decode(), 729 "EXTRACT": lambda self: self._parse_extract(), 730 "JSON_OBJECT": lambda self: self._parse_json_object(), 731 "LOG": lambda self: self._parse_logarithm(), 732 "MATCH": lambda self: self._parse_match_against(), 733 "OPENJSON": lambda self: self._parse_open_json(), 734 "POSITION": lambda self: self._parse_position(), 735 "SAFE_CAST": lambda self: self._parse_cast(False), 736 "STRING_AGG": lambda self: self._parse_string_agg(), 737 "SUBSTRING": lambda self: self._parse_substring(), 738 "TRIM": lambda self: self._parse_trim(), 739 "TRY_CAST": lambda self: self._parse_cast(False), 740 "TRY_CONVERT": lambda self: self._parse_convert(False), 741 } 742 743 QUERY_MODIFIER_PARSERS = { 744 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 745 TokenType.WHERE: lambda self: ("where", self._parse_where()), 746 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 747 TokenType.HAVING: lambda self: ("having", self._parse_having()), 748 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 749 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 750 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 751 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 752 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 753 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 754 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 755 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 756 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 757 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.CLUSTER_BY: lambda self: ( 759 "cluster", 760 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 761 ), 762 TokenType.DISTRIBUTE_BY: lambda self: ( 763 "distribute", 764 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 765 ), 766 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 767 } 768 769 SET_PARSERS = { 770 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 771 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 772 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 773 "TRANSACTION": lambda self: self._parse_set_transaction(), 774 } 775 776 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 777 778 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 779 780 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 781 782 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 783 784 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 785 786 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 787 TRANSACTION_CHARACTERISTICS = { 788 "ISOLATION LEVEL REPEATABLE READ", 789 "ISOLATION LEVEL READ COMMITTED", 790 "ISOLATION LEVEL READ UNCOMMITTED", 791 "ISOLATION LEVEL SERIALIZABLE", 792 "READ WRITE", 793 "READ ONLY", 794 } 795 796 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 797 798 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 799 800 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 801 802 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 803 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 804 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 805 806 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 807 808 STRICT_CAST = True 809 810 # A NULL arg in CONCAT yields NULL by default 811 CONCAT_NULL_OUTPUTS_STRING = False 812 813 PREFIXED_PIVOT_COLUMNS = False 814 IDENTIFY_PIVOT_STRINGS = False 815 816 LOG_BASE_FIRST = True 817 LOG_DEFAULTS_TO_LN = False 818 819 __slots__ = ( 820 "error_level", 821 "error_message_context", 822 "max_errors", 823 "sql", 824 "errors", 825 "_tokens", 826 "_index", 827 "_curr", 828 "_next", 829 "_prev", 830 "_prev_comments", 831 ) 832 833 # Autofilled 834 INDEX_OFFSET: int = 0 835 UNNEST_COLUMN_ONLY: bool = False 836 ALIAS_POST_TABLESAMPLE: bool = False 837 STRICT_STRING_CONCAT = False 838 NORMALIZE_FUNCTIONS = "upper" 839 NULL_ORDERING: str = "nulls_are_small" 840 SHOW_TRIE: t.Dict = {} 841 SET_TRIE: t.Dict = {} 842 FORMAT_MAPPING: t.Dict[str, str] = {} 843 FORMAT_TRIE: t.Dict = {} 844 TIME_MAPPING: t.Dict[str, str] = {} 845 TIME_TRIE: t.Dict = {} 846 847 def __init__( 848 self, 849 error_level: t.Optional[ErrorLevel] = None, 850 error_message_context: int = 100, 851 max_errors: int = 3, 852 ): 853 self.error_level = error_level or ErrorLevel.IMMEDIATE 854 self.error_message_context = error_message_context 855 self.max_errors = max_errors 856 self.reset() 857 858 def reset(self): 859 self.sql = "" 860 self.errors = [] 861 self._tokens = [] 862 self._index = 0 863 self._curr = None 864 self._next = None 865 self._prev = None 866 self._prev_comments = None 867 868 def parse( 869 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 870 ) -> t.List[t.Optional[exp.Expression]]: 871 """ 872 Parses a list of tokens and returns a list of syntax trees, one tree 873 per parsed SQL statement. 874 875 Args: 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The list of the produced syntax trees. 881 """ 882 return self._parse( 883 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 884 ) 885 886 def parse_into( 887 self, 888 expression_types: exp.IntoType, 889 raw_tokens: t.List[Token], 890 sql: t.Optional[str] = None, 891 ) -> t.List[t.Optional[exp.Expression]]: 892 """ 893 Parses a list of tokens into a given Expression type. If a collection of Expression 894 types is given instead, this method will try to parse the token list into each one 895 of them, stopping at the first for which the parsing succeeds. 896 897 Args: 898 expression_types: The expression type(s) to try and parse the token list into. 899 raw_tokens: The list of tokens. 900 sql: The original SQL string, used to produce helpful debug messages. 901 902 Returns: 903 The target Expression. 904 """ 905 errors = [] 906 for expression_type in ensure_list(expression_types): 907 parser = self.EXPRESSION_PARSERS.get(expression_type) 908 if not parser: 909 raise TypeError(f"No parser registered for {expression_type}") 910 911 try: 912 return self._parse(parser, raw_tokens, sql) 913 except ParseError as e: 914 e.errors[0]["into_expression"] = expression_type 915 errors.append(e) 916 917 raise ParseError( 918 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 919 errors=merge_errors(errors), 920 ) from errors[-1] 921 922 def _parse( 923 self, 924 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 925 raw_tokens: t.List[Token], 926 sql: t.Optional[str] = None, 927 ) -> t.List[t.Optional[exp.Expression]]: 928 self.reset() 929 self.sql = sql or "" 930 931 total = len(raw_tokens) 932 chunks: t.List[t.List[Token]] = [[]] 933 934 for i, token in enumerate(raw_tokens): 935 if token.token_type == TokenType.SEMICOLON: 936 if i < total - 1: 937 chunks.append([]) 938 else: 939 chunks[-1].append(token) 940 941 expressions = [] 942 943 for tokens in chunks: 944 self._index = -1 945 self._tokens = tokens 946 self._advance() 947 948 expressions.append(parse_method(self)) 949 950 if self._index < len(self._tokens): 951 self.raise_error("Invalid expression / Unexpected token") 952 953 self.check_errors() 954 955 return expressions 956 957 def check_errors(self) -> None: 958 """Logs or raises any found errors, depending on the chosen error level setting.""" 959 if self.error_level == ErrorLevel.WARN: 960 for error in self.errors: 961 logger.error(str(error)) 962 elif self.error_level == ErrorLevel.RAISE and self.errors: 963 raise ParseError( 964 concat_messages(self.errors, self.max_errors), 965 errors=merge_errors(self.errors), 966 ) 967 968 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 969 """ 970 Appends an error in the list of recorded errors or raises it, depending on the chosen 971 error level setting. 972 """ 973 token = token or self._curr or self._prev or Token.string("") 974 start = token.start 975 end = token.end + 1 976 start_context = self.sql[max(start - self.error_message_context, 0) : start] 977 highlight = self.sql[start:end] 978 end_context = self.sql[end : end + self.error_message_context] 979 980 error = ParseError.new( 981 f"{message}. Line {token.line}, Col: {token.col}.\n" 982 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 983 description=message, 984 line=token.line, 985 col=token.col, 986 start_context=start_context, 987 highlight=highlight, 988 end_context=end_context, 989 ) 990 991 if self.error_level == ErrorLevel.IMMEDIATE: 992 raise error 993 994 self.errors.append(error) 995 996 def expression( 997 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 998 ) -> E: 999 """ 1000 Creates a new, validated Expression. 1001 1002 Args: 1003 exp_class: The expression class to instantiate. 1004 comments: An optional list of comments to attach to the expression. 1005 kwargs: The arguments to set for the expression along with their respective values. 1006 1007 Returns: 1008 The target expression. 1009 """ 1010 instance = exp_class(**kwargs) 1011 instance.add_comments(comments) if comments else self._add_comments(instance) 1012 return self.validate_expression(instance) 1013 1014 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1015 if expression and self._prev_comments: 1016 expression.add_comments(self._prev_comments) 1017 self._prev_comments = None 1018 1019 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1020 """ 1021 Validates an Expression, making sure that all its mandatory arguments are set. 1022 1023 Args: 1024 expression: The expression to validate. 1025 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1026 1027 Returns: 1028 The validated expression. 1029 """ 1030 if self.error_level != ErrorLevel.IGNORE: 1031 for error_message in expression.error_messages(args): 1032 self.raise_error(error_message) 1033 1034 return expression 1035 1036 def _find_sql(self, start: Token, end: Token) -> str: 1037 return self.sql[start.start : end.end + 1] 1038 1039 def _advance(self, times: int = 1) -> None: 1040 self._index += times 1041 self._curr = seq_get(self._tokens, self._index) 1042 self._next = seq_get(self._tokens, self._index + 1) 1043 1044 if self._index > 0: 1045 self._prev = self._tokens[self._index - 1] 1046 self._prev_comments = self._prev.comments 1047 else: 1048 self._prev = None 1049 self._prev_comments = None 1050 1051 def _retreat(self, index: int) -> None: 1052 if index != self._index: 1053 self._advance(index - self._index) 1054 1055 def _parse_command(self) -> exp.Command: 1056 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1057 1058 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1059 start = self._prev 1060 exists = self._parse_exists() if allow_exists else None 1061 1062 self._match(TokenType.ON) 1063 1064 kind = self._match_set(self.CREATABLES) and self._prev 1065 if not kind: 1066 return self._parse_as_command(start) 1067 1068 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1069 this = self._parse_user_defined_function(kind=kind.token_type) 1070 elif kind.token_type == TokenType.TABLE: 1071 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1072 elif kind.token_type == TokenType.COLUMN: 1073 this = self._parse_column() 1074 else: 1075 this = self._parse_id_var() 1076 1077 self._match(TokenType.IS) 1078 1079 return self.expression( 1080 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1081 ) 1082 1083 def _parse_to_table( 1084 self, 1085 ) -> exp.ToTableProperty: 1086 table = self._parse_table_parts(schema=True) 1087 return self.expression(exp.ToTableProperty, this=table) 1088 1089 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1090 def _parse_ttl(self) -> exp.Expression: 1091 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1092 this = self._parse_bitwise() 1093 1094 if self._match_text_seq("DELETE"): 1095 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1096 if self._match_text_seq("RECOMPRESS"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1099 ) 1100 if self._match_text_seq("TO", "DISK"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1103 ) 1104 if self._match_text_seq("TO", "VOLUME"): 1105 return self.expression( 1106 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1107 ) 1108 1109 return this 1110 1111 expressions = self._parse_csv(_parse_ttl_action) 1112 where = self._parse_where() 1113 group = self._parse_group() 1114 1115 aggregates = None 1116 if group and self._match(TokenType.SET): 1117 aggregates = self._parse_csv(self._parse_set_item) 1118 1119 return self.expression( 1120 exp.MergeTreeTTL, 1121 expressions=expressions, 1122 where=where, 1123 group=group, 1124 aggregates=aggregates, 1125 ) 1126 1127 def _parse_statement(self) -> t.Optional[exp.Expression]: 1128 if self._curr is None: 1129 return None 1130 1131 if self._match_set(self.STATEMENT_PARSERS): 1132 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1133 1134 if self._match_set(Tokenizer.COMMANDS): 1135 return self._parse_command() 1136 1137 expression = self._parse_expression() 1138 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1139 return self._parse_query_modifiers(expression) 1140 1141 def _parse_drop(self) -> exp.Drop | exp.Command: 1142 start = self._prev 1143 temporary = self._match(TokenType.TEMPORARY) 1144 materialized = self._match_text_seq("MATERIALIZED") 1145 1146 kind = self._match_set(self.CREATABLES) and self._prev.text 1147 if not kind: 1148 return self._parse_as_command(start) 1149 1150 return self.expression( 1151 exp.Drop, 1152 comments=start.comments, 1153 exists=self._parse_exists(), 1154 this=self._parse_table(schema=True), 1155 kind=kind, 1156 temporary=temporary, 1157 materialized=materialized, 1158 cascade=self._match_text_seq("CASCADE"), 1159 constraints=self._match_text_seq("CONSTRAINTS"), 1160 purge=self._match_text_seq("PURGE"), 1161 ) 1162 1163 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1164 return ( 1165 self._match(TokenType.IF) 1166 and (not not_ or self._match(TokenType.NOT)) 1167 and self._match(TokenType.EXISTS) 1168 ) 1169 1170 def _parse_create(self) -> exp.Create | exp.Command: 1171 # Note: this can't be None because we've matched a statement parser 1172 start = self._prev 1173 replace = start.text.upper() == "REPLACE" or self._match_pair( 1174 TokenType.OR, TokenType.REPLACE 1175 ) 1176 unique = self._match(TokenType.UNIQUE) 1177 1178 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1179 self._advance() 1180 1181 properties = None 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not create_token: 1185 # exp.Properties.Location.POST_CREATE 1186 properties = self._parse_properties() 1187 create_token = self._match_set(self.CREATABLES) and self._prev 1188 1189 if not properties or not create_token: 1190 return self._parse_as_command(start) 1191 1192 exists = self._parse_exists(not_=True) 1193 this = None 1194 expression: t.Optional[exp.Expression] = None 1195 indexes = None 1196 no_schema_binding = None 1197 begin = None 1198 clone = None 1199 1200 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1201 nonlocal properties 1202 if properties and temp_props: 1203 properties.expressions.extend(temp_props.expressions) 1204 elif temp_props: 1205 properties = temp_props 1206 1207 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1208 this = self._parse_user_defined_function(kind=create_token.token_type) 1209 1210 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1211 extend_props(self._parse_properties()) 1212 1213 self._match(TokenType.ALIAS) 1214 1215 if self._match(TokenType.COMMAND): 1216 expression = self._parse_as_command(self._prev) 1217 else: 1218 begin = self._match(TokenType.BEGIN) 1219 return_ = self._match_text_seq("RETURN") 1220 expression = self._parse_statement() 1221 1222 if return_: 1223 expression = self.expression(exp.Return, this=expression) 1224 elif create_token.token_type == TokenType.INDEX: 1225 this = self._parse_index(index=self._parse_id_var()) 1226 elif create_token.token_type in self.DB_CREATABLES: 1227 table_parts = self._parse_table_parts(schema=True) 1228 1229 # exp.Properties.Location.POST_NAME 1230 self._match(TokenType.COMMA) 1231 extend_props(self._parse_properties(before=True)) 1232 1233 this = self._parse_schema(this=table_parts) 1234 1235 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1236 extend_props(self._parse_properties()) 1237 1238 self._match(TokenType.ALIAS) 1239 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1240 # exp.Properties.Location.POST_ALIAS 1241 extend_props(self._parse_properties()) 1242 1243 expression = self._parse_ddl_select() 1244 1245 if create_token.token_type == TokenType.TABLE: 1246 # exp.Properties.Location.POST_EXPRESSION 1247 extend_props(self._parse_properties()) 1248 1249 indexes = [] 1250 while True: 1251 index = self._parse_index() 1252 1253 # exp.Properties.Location.POST_INDEX 1254 extend_props(self._parse_properties()) 1255 1256 if not index: 1257 break 1258 else: 1259 self._match(TokenType.COMMA) 1260 indexes.append(index) 1261 elif create_token.token_type == TokenType.VIEW: 1262 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1263 no_schema_binding = True 1264 1265 if self._match_text_seq("CLONE"): 1266 clone = self._parse_table(schema=True) 1267 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1268 clone_kind = ( 1269 self._match(TokenType.L_PAREN) 1270 and self._match_texts(self.CLONE_KINDS) 1271 and self._prev.text.upper() 1272 ) 1273 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1274 self._match(TokenType.R_PAREN) 1275 clone = self.expression( 1276 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1277 ) 1278 1279 return self.expression( 1280 exp.Create, 1281 this=this, 1282 kind=create_token.text, 1283 replace=replace, 1284 unique=unique, 1285 expression=expression, 1286 exists=exists, 1287 properties=properties, 1288 indexes=indexes, 1289 no_schema_binding=no_schema_binding, 1290 begin=begin, 1291 clone=clone, 1292 ) 1293 1294 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1295 # only used for teradata currently 1296 self._match(TokenType.COMMA) 1297 1298 kwargs = { 1299 "no": self._match_text_seq("NO"), 1300 "dual": self._match_text_seq("DUAL"), 1301 "before": self._match_text_seq("BEFORE"), 1302 "default": self._match_text_seq("DEFAULT"), 1303 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1304 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1305 "after": self._match_text_seq("AFTER"), 1306 "minimum": self._match_texts(("MIN", "MINIMUM")), 1307 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1308 } 1309 1310 if self._match_texts(self.PROPERTY_PARSERS): 1311 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1312 try: 1313 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1314 except TypeError: 1315 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1316 1317 return None 1318 1319 def _parse_property(self) -> t.Optional[exp.Expression]: 1320 if self._match_texts(self.PROPERTY_PARSERS): 1321 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1322 1323 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1324 return self._parse_character_set(default=True) 1325 1326 if self._match_text_seq("COMPOUND", "SORTKEY"): 1327 return self._parse_sortkey(compound=True) 1328 1329 if self._match_text_seq("SQL", "SECURITY"): 1330 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1331 1332 assignment = self._match_pair( 1333 TokenType.VAR, TokenType.EQ, advance=False 1334 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1335 1336 if assignment: 1337 key = self._parse_var_or_string() 1338 self._match(TokenType.EQ) 1339 return self.expression(exp.Property, this=key, value=self._parse_column()) 1340 1341 return None 1342 1343 def _parse_stored(self) -> exp.FileFormatProperty: 1344 self._match(TokenType.ALIAS) 1345 1346 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1347 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1348 1349 return self.expression( 1350 exp.FileFormatProperty, 1351 this=self.expression( 1352 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1353 ) 1354 if input_format or output_format 1355 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1356 ) 1357 1358 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1359 self._match(TokenType.EQ) 1360 self._match(TokenType.ALIAS) 1361 return self.expression(exp_class, this=self._parse_field()) 1362 1363 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1364 properties = [] 1365 while True: 1366 if before: 1367 prop = self._parse_property_before() 1368 else: 1369 prop = self._parse_property() 1370 1371 if not prop: 1372 break 1373 for p in ensure_list(prop): 1374 properties.append(p) 1375 1376 if properties: 1377 return self.expression(exp.Properties, expressions=properties) 1378 1379 return None 1380 1381 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1382 return self.expression( 1383 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1384 ) 1385 1386 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1387 if self._index >= 2: 1388 pre_volatile_token = self._tokens[self._index - 2] 1389 else: 1390 pre_volatile_token = None 1391 1392 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1393 return exp.VolatileProperty() 1394 1395 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1396 1397 def _parse_with_property( 1398 self, 1399 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1400 if self._match(TokenType.L_PAREN, advance=False): 1401 return self._parse_wrapped_csv(self._parse_property) 1402 1403 if self._match_text_seq("JOURNAL"): 1404 return self._parse_withjournaltable() 1405 1406 if self._match_text_seq("DATA"): 1407 return self._parse_withdata(no=False) 1408 elif self._match_text_seq("NO", "DATA"): 1409 return self._parse_withdata(no=True) 1410 1411 if not self._next: 1412 return None 1413 1414 return self._parse_withisolatedloading() 1415 1416 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1417 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1418 self._match(TokenType.EQ) 1419 1420 user = self._parse_id_var() 1421 self._match(TokenType.PARAMETER) 1422 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1423 1424 if not user or not host: 1425 return None 1426 1427 return exp.DefinerProperty(this=f"{user}@{host}") 1428 1429 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1430 self._match(TokenType.TABLE) 1431 self._match(TokenType.EQ) 1432 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1433 1434 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1435 return self.expression(exp.LogProperty, no=no) 1436 1437 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1438 return self.expression(exp.JournalProperty, **kwargs) 1439 1440 def _parse_checksum(self) -> exp.ChecksumProperty: 1441 self._match(TokenType.EQ) 1442 1443 on = None 1444 if self._match(TokenType.ON): 1445 on = True 1446 elif self._match_text_seq("OFF"): 1447 on = False 1448 1449 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1450 1451 def _parse_cluster(self) -> exp.Cluster: 1452 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1453 1454 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1455 self._match_text_seq("BY") 1456 1457 self._match_l_paren() 1458 expressions = self._parse_csv(self._parse_column) 1459 self._match_r_paren() 1460 1461 if self._match_text_seq("SORTED", "BY"): 1462 self._match_l_paren() 1463 sorted_by = self._parse_csv(self._parse_ordered) 1464 self._match_r_paren() 1465 else: 1466 sorted_by = None 1467 1468 self._match(TokenType.INTO) 1469 buckets = self._parse_number() 1470 self._match_text_seq("BUCKETS") 1471 1472 return self.expression( 1473 exp.ClusteredByProperty, 1474 expressions=expressions, 1475 sorted_by=sorted_by, 1476 buckets=buckets, 1477 ) 1478 1479 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1480 if not self._match_text_seq("GRANTS"): 1481 self._retreat(self._index - 1) 1482 return None 1483 1484 return self.expression(exp.CopyGrantsProperty) 1485 1486 def _parse_freespace(self) -> exp.FreespaceProperty: 1487 self._match(TokenType.EQ) 1488 return self.expression( 1489 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1490 ) 1491 1492 def _parse_mergeblockratio( 1493 self, no: bool = False, default: bool = False 1494 ) -> exp.MergeBlockRatioProperty: 1495 if self._match(TokenType.EQ): 1496 return self.expression( 1497 exp.MergeBlockRatioProperty, 1498 this=self._parse_number(), 1499 percent=self._match(TokenType.PERCENT), 1500 ) 1501 1502 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1503 1504 def _parse_datablocksize( 1505 self, 1506 default: t.Optional[bool] = None, 1507 minimum: t.Optional[bool] = None, 1508 maximum: t.Optional[bool] = None, 1509 ) -> exp.DataBlocksizeProperty: 1510 self._match(TokenType.EQ) 1511 size = self._parse_number() 1512 1513 units = None 1514 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1515 units = self._prev.text 1516 1517 return self.expression( 1518 exp.DataBlocksizeProperty, 1519 size=size, 1520 units=units, 1521 default=default, 1522 minimum=minimum, 1523 maximum=maximum, 1524 ) 1525 1526 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1527 self._match(TokenType.EQ) 1528 always = self._match_text_seq("ALWAYS") 1529 manual = self._match_text_seq("MANUAL") 1530 never = self._match_text_seq("NEVER") 1531 default = self._match_text_seq("DEFAULT") 1532 1533 autotemp = None 1534 if self._match_text_seq("AUTOTEMP"): 1535 autotemp = self._parse_schema() 1536 1537 return self.expression( 1538 exp.BlockCompressionProperty, 1539 always=always, 1540 manual=manual, 1541 never=never, 1542 default=default, 1543 autotemp=autotemp, 1544 ) 1545 1546 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1547 no = self._match_text_seq("NO") 1548 concurrent = self._match_text_seq("CONCURRENT") 1549 self._match_text_seq("ISOLATED", "LOADING") 1550 for_all = self._match_text_seq("FOR", "ALL") 1551 for_insert = self._match_text_seq("FOR", "INSERT") 1552 for_none = self._match_text_seq("FOR", "NONE") 1553 return self.expression( 1554 exp.IsolatedLoadingProperty, 1555 no=no, 1556 concurrent=concurrent, 1557 for_all=for_all, 1558 for_insert=for_insert, 1559 for_none=for_none, 1560 ) 1561 1562 def _parse_locking(self) -> exp.LockingProperty: 1563 if self._match(TokenType.TABLE): 1564 kind = "TABLE" 1565 elif self._match(TokenType.VIEW): 1566 kind = "VIEW" 1567 elif self._match(TokenType.ROW): 1568 kind = "ROW" 1569 elif self._match_text_seq("DATABASE"): 1570 kind = "DATABASE" 1571 else: 1572 kind = None 1573 1574 if kind in ("DATABASE", "TABLE", "VIEW"): 1575 this = self._parse_table_parts() 1576 else: 1577 this = None 1578 1579 if self._match(TokenType.FOR): 1580 for_or_in = "FOR" 1581 elif self._match(TokenType.IN): 1582 for_or_in = "IN" 1583 else: 1584 for_or_in = None 1585 1586 if self._match_text_seq("ACCESS"): 1587 lock_type = "ACCESS" 1588 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1589 lock_type = "EXCLUSIVE" 1590 elif self._match_text_seq("SHARE"): 1591 lock_type = "SHARE" 1592 elif self._match_text_seq("READ"): 1593 lock_type = "READ" 1594 elif self._match_text_seq("WRITE"): 1595 lock_type = "WRITE" 1596 elif self._match_text_seq("CHECKSUM"): 1597 lock_type = "CHECKSUM" 1598 else: 1599 lock_type = None 1600 1601 override = self._match_text_seq("OVERRIDE") 1602 1603 return self.expression( 1604 exp.LockingProperty, 1605 this=this, 1606 kind=kind, 1607 for_or_in=for_or_in, 1608 lock_type=lock_type, 1609 override=override, 1610 ) 1611 1612 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1613 if self._match(TokenType.PARTITION_BY): 1614 return self._parse_csv(self._parse_conjunction) 1615 return [] 1616 1617 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.PartitionedByProperty, 1621 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1622 ) 1623 1624 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1625 if self._match_text_seq("AND", "STATISTICS"): 1626 statistics = True 1627 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1628 statistics = False 1629 else: 1630 statistics = None 1631 1632 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1633 1634 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1635 if self._match_text_seq("PRIMARY", "INDEX"): 1636 return exp.NoPrimaryIndexProperty() 1637 return None 1638 1639 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1640 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1641 return exp.OnCommitProperty() 1642 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1643 return exp.OnCommitProperty(delete=True) 1644 return None 1645 1646 def _parse_distkey(self) -> exp.DistKeyProperty: 1647 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1648 1649 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1650 table = self._parse_table(schema=True) 1651 1652 options = [] 1653 while self._match_texts(("INCLUDING", "EXCLUDING")): 1654 this = self._prev.text.upper() 1655 1656 id_var = self._parse_id_var() 1657 if not id_var: 1658 return None 1659 1660 options.append( 1661 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1662 ) 1663 1664 return self.expression(exp.LikeProperty, this=table, expressions=options) 1665 1666 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1667 return self.expression( 1668 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1669 ) 1670 1671 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1675 ) 1676 1677 def _parse_returns(self) -> exp.ReturnsProperty: 1678 value: t.Optional[exp.Expression] 1679 is_table = self._match(TokenType.TABLE) 1680 1681 if is_table: 1682 if self._match(TokenType.LT): 1683 value = self.expression( 1684 exp.Schema, 1685 this="TABLE", 1686 expressions=self._parse_csv(self._parse_struct_types), 1687 ) 1688 if not self._match(TokenType.GT): 1689 self.raise_error("Expecting >") 1690 else: 1691 value = self._parse_schema(exp.var("TABLE")) 1692 else: 1693 value = self._parse_types() 1694 1695 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1696 1697 def _parse_describe(self) -> exp.Describe: 1698 kind = self._match_set(self.CREATABLES) and self._prev.text 1699 this = self._parse_table() 1700 return self.expression(exp.Describe, this=this, kind=kind) 1701 1702 def _parse_insert(self) -> exp.Insert: 1703 comments = ensure_list(self._prev_comments) 1704 overwrite = self._match(TokenType.OVERWRITE) 1705 ignore = self._match(TokenType.IGNORE) 1706 local = self._match_text_seq("LOCAL") 1707 alternative = None 1708 1709 if self._match_text_seq("DIRECTORY"): 1710 this: t.Optional[exp.Expression] = self.expression( 1711 exp.Directory, 1712 this=self._parse_var_or_string(), 1713 local=local, 1714 row_format=self._parse_row_format(match_row=True), 1715 ) 1716 else: 1717 if self._match(TokenType.OR): 1718 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1719 1720 self._match(TokenType.INTO) 1721 comments += ensure_list(self._prev_comments) 1722 self._match(TokenType.TABLE) 1723 this = self._parse_table(schema=True) 1724 1725 returning = self._parse_returning() 1726 1727 return self.expression( 1728 exp.Insert, 1729 comments=comments, 1730 this=this, 1731 exists=self._parse_exists(), 1732 partition=self._parse_partition(), 1733 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1734 and self._parse_conjunction(), 1735 expression=self._parse_ddl_select(), 1736 conflict=self._parse_on_conflict(), 1737 returning=returning or self._parse_returning(), 1738 overwrite=overwrite, 1739 alternative=alternative, 1740 ignore=ignore, 1741 ) 1742 1743 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1744 conflict = self._match_text_seq("ON", "CONFLICT") 1745 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1746 1747 if not conflict and not duplicate: 1748 return None 1749 1750 nothing = None 1751 expressions = None 1752 key = None 1753 constraint = None 1754 1755 if conflict: 1756 if self._match_text_seq("ON", "CONSTRAINT"): 1757 constraint = self._parse_id_var() 1758 else: 1759 key = self._parse_csv(self._parse_value) 1760 1761 self._match_text_seq("DO") 1762 if self._match_text_seq("NOTHING"): 1763 nothing = True 1764 else: 1765 self._match(TokenType.UPDATE) 1766 self._match(TokenType.SET) 1767 expressions = self._parse_csv(self._parse_equality) 1768 1769 return self.expression( 1770 exp.OnConflict, 1771 duplicate=duplicate, 1772 expressions=expressions, 1773 nothing=nothing, 1774 key=key, 1775 constraint=constraint, 1776 ) 1777 1778 def _parse_returning(self) -> t.Optional[exp.Returning]: 1779 if not self._match(TokenType.RETURNING): 1780 return None 1781 return self.expression( 1782 exp.Returning, 1783 expressions=self._parse_csv(self._parse_expression), 1784 into=self._match(TokenType.INTO) and self._parse_table_part(), 1785 ) 1786 1787 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1788 if not self._match(TokenType.FORMAT): 1789 return None 1790 return self._parse_row_format() 1791 1792 def _parse_row_format( 1793 self, match_row: bool = False 1794 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1795 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1796 return None 1797 1798 if self._match_text_seq("SERDE"): 1799 this = self._parse_string() 1800 1801 serde_properties = None 1802 if self._match(TokenType.SERDE_PROPERTIES): 1803 serde_properties = self.expression( 1804 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1805 ) 1806 1807 return self.expression( 1808 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1809 ) 1810 1811 self._match_text_seq("DELIMITED") 1812 1813 kwargs = {} 1814 1815 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1816 kwargs["fields"] = self._parse_string() 1817 if self._match_text_seq("ESCAPED", "BY"): 1818 kwargs["escaped"] = self._parse_string() 1819 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1820 kwargs["collection_items"] = self._parse_string() 1821 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1822 kwargs["map_keys"] = self._parse_string() 1823 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1824 kwargs["lines"] = self._parse_string() 1825 if self._match_text_seq("NULL", "DEFINED", "AS"): 1826 kwargs["null"] = self._parse_string() 1827 1828 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1829 1830 def _parse_load(self) -> exp.LoadData | exp.Command: 1831 if self._match_text_seq("DATA"): 1832 local = self._match_text_seq("LOCAL") 1833 self._match_text_seq("INPATH") 1834 inpath = self._parse_string() 1835 overwrite = self._match(TokenType.OVERWRITE) 1836 self._match_pair(TokenType.INTO, TokenType.TABLE) 1837 1838 return self.expression( 1839 exp.LoadData, 1840 this=self._parse_table(schema=True), 1841 local=local, 1842 overwrite=overwrite, 1843 inpath=inpath, 1844 partition=self._parse_partition(), 1845 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1846 serde=self._match_text_seq("SERDE") and self._parse_string(), 1847 ) 1848 return self._parse_as_command(self._prev) 1849 1850 def _parse_delete(self) -> exp.Delete: 1851 # This handles MySQL's "Multiple-Table Syntax" 1852 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1853 tables = None 1854 comments = self._prev_comments 1855 if not self._match(TokenType.FROM, advance=False): 1856 tables = self._parse_csv(self._parse_table) or None 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Delete, 1862 comments=comments, 1863 tables=tables, 1864 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1865 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1866 where=self._parse_where(), 1867 returning=returning or self._parse_returning(), 1868 limit=self._parse_limit(), 1869 ) 1870 1871 def _parse_update(self) -> exp.Update: 1872 comments = self._prev_comments 1873 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1874 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1875 returning = self._parse_returning() 1876 return self.expression( 1877 exp.Update, 1878 comments=comments, 1879 **{ # type: ignore 1880 "this": this, 1881 "expressions": expressions, 1882 "from": self._parse_from(joins=True), 1883 "where": self._parse_where(), 1884 "returning": returning or self._parse_returning(), 1885 "limit": self._parse_limit(), 1886 }, 1887 ) 1888 1889 def _parse_uncache(self) -> exp.Uncache: 1890 if not self._match(TokenType.TABLE): 1891 self.raise_error("Expecting TABLE after UNCACHE") 1892 1893 return self.expression( 1894 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1895 ) 1896 1897 def _parse_cache(self) -> exp.Cache: 1898 lazy = self._match_text_seq("LAZY") 1899 self._match(TokenType.TABLE) 1900 table = self._parse_table(schema=True) 1901 1902 options = [] 1903 if self._match_text_seq("OPTIONS"): 1904 self._match_l_paren() 1905 k = self._parse_string() 1906 self._match(TokenType.EQ) 1907 v = self._parse_string() 1908 options = [k, v] 1909 self._match_r_paren() 1910 1911 self._match(TokenType.ALIAS) 1912 return self.expression( 1913 exp.Cache, 1914 this=table, 1915 lazy=lazy, 1916 options=options, 1917 expression=self._parse_select(nested=True), 1918 ) 1919 1920 def _parse_partition(self) -> t.Optional[exp.Partition]: 1921 if not self._match(TokenType.PARTITION): 1922 return None 1923 1924 return self.expression( 1925 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1926 ) 1927 1928 def _parse_value(self) -> exp.Tuple: 1929 if self._match(TokenType.L_PAREN): 1930 expressions = self._parse_csv(self._parse_conjunction) 1931 self._match_r_paren() 1932 return self.expression(exp.Tuple, expressions=expressions) 1933 1934 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1935 # https://prestodb.io/docs/current/sql/values.html 1936 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1937 1938 def _parse_select( 1939 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1940 ) -> t.Optional[exp.Expression]: 1941 cte = self._parse_with() 1942 if cte: 1943 this = self._parse_statement() 1944 1945 if not this: 1946 self.raise_error("Failed to parse any statement following CTE") 1947 return cte 1948 1949 if "with" in this.arg_types: 1950 this.set("with", cte) 1951 else: 1952 self.raise_error(f"{this.key} does not support CTE") 1953 this = cte 1954 elif self._match(TokenType.SELECT): 1955 comments = self._prev_comments 1956 1957 hint = self._parse_hint() 1958 all_ = self._match(TokenType.ALL) 1959 distinct = self._match(TokenType.DISTINCT) 1960 1961 kind = ( 1962 self._match(TokenType.ALIAS) 1963 and self._match_texts(("STRUCT", "VALUE")) 1964 and self._prev.text 1965 ) 1966 1967 if distinct: 1968 distinct = self.expression( 1969 exp.Distinct, 1970 on=self._parse_value() if self._match(TokenType.ON) else None, 1971 ) 1972 1973 if all_ and distinct: 1974 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1975 1976 limit = self._parse_limit(top=True) 1977 expressions = self._parse_expressions() 1978 1979 this = self.expression( 1980 exp.Select, 1981 kind=kind, 1982 hint=hint, 1983 distinct=distinct, 1984 expressions=expressions, 1985 limit=limit, 1986 ) 1987 this.comments = comments 1988 1989 into = self._parse_into() 1990 if into: 1991 this.set("into", into) 1992 1993 from_ = self._parse_from() 1994 if from_: 1995 this.set("from", from_) 1996 1997 this = self._parse_query_modifiers(this) 1998 elif (table or nested) and self._match(TokenType.L_PAREN): 1999 if self._match(TokenType.PIVOT): 2000 this = self._parse_simplified_pivot() 2001 elif self._match(TokenType.FROM): 2002 this = exp.select("*").from_( 2003 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2004 ) 2005 else: 2006 this = self._parse_table() if table else self._parse_select(nested=True) 2007 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2008 2009 self._match_r_paren() 2010 2011 # We return early here so that the UNION isn't attached to the subquery by the 2012 # following call to _parse_set_operations, but instead becomes the parent node 2013 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2014 elif self._match(TokenType.VALUES): 2015 this = self.expression( 2016 exp.Values, 2017 expressions=self._parse_csv(self._parse_value), 2018 alias=self._parse_table_alias(), 2019 ) 2020 else: 2021 this = None 2022 2023 return self._parse_set_operations(this) 2024 2025 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2026 if not skip_with_token and not self._match(TokenType.WITH): 2027 return None 2028 2029 comments = self._prev_comments 2030 recursive = self._match(TokenType.RECURSIVE) 2031 2032 expressions = [] 2033 while True: 2034 expressions.append(self._parse_cte()) 2035 2036 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2037 break 2038 else: 2039 self._match(TokenType.WITH) 2040 2041 return self.expression( 2042 exp.With, comments=comments, expressions=expressions, recursive=recursive 2043 ) 2044 2045 def _parse_cte(self) -> exp.CTE: 2046 alias = self._parse_table_alias() 2047 if not alias or not alias.this: 2048 self.raise_error("Expected CTE to have alias") 2049 2050 self._match(TokenType.ALIAS) 2051 return self.expression( 2052 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2053 ) 2054 2055 def _parse_table_alias( 2056 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2057 ) -> t.Optional[exp.TableAlias]: 2058 any_token = self._match(TokenType.ALIAS) 2059 alias = ( 2060 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2061 or self._parse_string_as_identifier() 2062 ) 2063 2064 index = self._index 2065 if self._match(TokenType.L_PAREN): 2066 columns = self._parse_csv(self._parse_function_parameter) 2067 self._match_r_paren() if columns else self._retreat(index) 2068 else: 2069 columns = None 2070 2071 if not alias and not columns: 2072 return None 2073 2074 return self.expression(exp.TableAlias, this=alias, columns=columns) 2075 2076 def _parse_subquery( 2077 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2078 ) -> t.Optional[exp.Subquery]: 2079 if not this: 2080 return None 2081 2082 return self.expression( 2083 exp.Subquery, 2084 this=this, 2085 pivots=self._parse_pivots(), 2086 alias=self._parse_table_alias() if parse_alias else None, 2087 ) 2088 2089 def _parse_query_modifiers( 2090 self, this: t.Optional[exp.Expression] 2091 ) -> t.Optional[exp.Expression]: 2092 if isinstance(this, self.MODIFIABLES): 2093 for join in iter(self._parse_join, None): 2094 this.append("joins", join) 2095 for lateral in iter(self._parse_lateral, None): 2096 this.append("laterals", lateral) 2097 2098 while True: 2099 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2100 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2101 key, expression = parser(self) 2102 2103 if expression: 2104 this.set(key, expression) 2105 if key == "limit": 2106 offset = expression.args.pop("offset", None) 2107 if offset: 2108 this.set("offset", exp.Offset(expression=offset)) 2109 continue 2110 break 2111 return this 2112 2113 def _parse_hint(self) -> t.Optional[exp.Hint]: 2114 if self._match(TokenType.HINT): 2115 hints = [] 2116 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2117 hints.extend(hint) 2118 2119 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2120 self.raise_error("Expected */ after HINT") 2121 2122 return self.expression(exp.Hint, expressions=hints) 2123 2124 return None 2125 2126 def _parse_into(self) -> t.Optional[exp.Into]: 2127 if not self._match(TokenType.INTO): 2128 return None 2129 2130 temp = self._match(TokenType.TEMPORARY) 2131 unlogged = self._match_text_seq("UNLOGGED") 2132 self._match(TokenType.TABLE) 2133 2134 return self.expression( 2135 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2136 ) 2137 2138 def _parse_from( 2139 self, joins: bool = False, skip_from_token: bool = False 2140 ) -> t.Optional[exp.From]: 2141 if not skip_from_token and not self._match(TokenType.FROM): 2142 return None 2143 2144 return self.expression( 2145 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2146 ) 2147 2148 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2149 if not self._match(TokenType.MATCH_RECOGNIZE): 2150 return None 2151 2152 self._match_l_paren() 2153 2154 partition = self._parse_partition_by() 2155 order = self._parse_order() 2156 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2157 2158 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2159 rows = exp.var("ONE ROW PER MATCH") 2160 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2161 text = "ALL ROWS PER MATCH" 2162 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2163 text += f" SHOW EMPTY MATCHES" 2164 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2165 text += f" OMIT EMPTY MATCHES" 2166 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2167 text += f" WITH UNMATCHED ROWS" 2168 rows = exp.var(text) 2169 else: 2170 rows = None 2171 2172 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2173 text = "AFTER MATCH SKIP" 2174 if self._match_text_seq("PAST", "LAST", "ROW"): 2175 text += f" PAST LAST ROW" 2176 elif self._match_text_seq("TO", "NEXT", "ROW"): 2177 text += f" TO NEXT ROW" 2178 elif self._match_text_seq("TO", "FIRST"): 2179 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2180 elif self._match_text_seq("TO", "LAST"): 2181 text += f" TO LAST {self._advance_any().text}" # type: ignore 2182 after = exp.var(text) 2183 else: 2184 after = None 2185 2186 if self._match_text_seq("PATTERN"): 2187 self._match_l_paren() 2188 2189 if not self._curr: 2190 self.raise_error("Expecting )", self._curr) 2191 2192 paren = 1 2193 start = self._curr 2194 2195 while self._curr and paren > 0: 2196 if self._curr.token_type == TokenType.L_PAREN: 2197 paren += 1 2198 if self._curr.token_type == TokenType.R_PAREN: 2199 paren -= 1 2200 2201 end = self._prev 2202 self._advance() 2203 2204 if paren > 0: 2205 self.raise_error("Expecting )", self._curr) 2206 2207 pattern = exp.var(self._find_sql(start, end)) 2208 else: 2209 pattern = None 2210 2211 define = ( 2212 self._parse_csv( 2213 lambda: self.expression( 2214 exp.Alias, 2215 alias=self._parse_id_var(any_token=True), 2216 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2217 ) 2218 ) 2219 if self._match_text_seq("DEFINE") 2220 else None 2221 ) 2222 2223 self._match_r_paren() 2224 2225 return self.expression( 2226 exp.MatchRecognize, 2227 partition_by=partition, 2228 order=order, 2229 measures=measures, 2230 rows=rows, 2231 after=after, 2232 pattern=pattern, 2233 define=define, 2234 alias=self._parse_table_alias(), 2235 ) 2236 2237 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2240 2241 if outer_apply or cross_apply: 2242 this = self._parse_select(table=True) 2243 view = None 2244 outer = not cross_apply 2245 elif self._match(TokenType.LATERAL): 2246 this = self._parse_select(table=True) 2247 view = self._match(TokenType.VIEW) 2248 outer = self._match(TokenType.OUTER) 2249 else: 2250 return None 2251 2252 if not this: 2253 this = ( 2254 self._parse_unnest() 2255 or self._parse_function() 2256 or self._parse_id_var(any_token=False) 2257 ) 2258 2259 while self._match(TokenType.DOT): 2260 this = exp.Dot( 2261 this=this, 2262 expression=self._parse_function() or self._parse_id_var(any_token=False), 2263 ) 2264 2265 if view: 2266 table = self._parse_id_var(any_token=False) 2267 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2268 table_alias: t.Optional[exp.TableAlias] = self.expression( 2269 exp.TableAlias, this=table, columns=columns 2270 ) 2271 elif isinstance(this, exp.Subquery) and this.alias: 2272 # Ensures parity between the Subquery's and the Lateral's "alias" args 2273 table_alias = this.args["alias"].copy() 2274 else: 2275 table_alias = self._parse_table_alias() 2276 2277 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2278 2279 def _parse_join_parts( 2280 self, 2281 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2282 return ( 2283 self._match_set(self.JOIN_METHODS) and self._prev, 2284 self._match_set(self.JOIN_SIDES) and self._prev, 2285 self._match_set(self.JOIN_KINDS) and self._prev, 2286 ) 2287 2288 def _parse_join( 2289 self, skip_join_token: bool = False, parse_bracket: bool = False 2290 ) -> t.Optional[exp.Join]: 2291 if self._match(TokenType.COMMA): 2292 return self.expression(exp.Join, this=self._parse_table()) 2293 2294 index = self._index 2295 method, side, kind = self._parse_join_parts() 2296 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2297 join = self._match(TokenType.JOIN) 2298 2299 if not skip_join_token and not join: 2300 self._retreat(index) 2301 kind = None 2302 method = None 2303 side = None 2304 2305 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2306 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2307 2308 if not skip_join_token and not join and not outer_apply and not cross_apply: 2309 return None 2310 2311 if outer_apply: 2312 side = Token(TokenType.LEFT, "LEFT") 2313 2314 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2315 2316 if method: 2317 kwargs["method"] = method.text 2318 if side: 2319 kwargs["side"] = side.text 2320 if kind: 2321 kwargs["kind"] = kind.text 2322 if hint: 2323 kwargs["hint"] = hint 2324 2325 if self._match(TokenType.ON): 2326 kwargs["on"] = self._parse_conjunction() 2327 elif self._match(TokenType.USING): 2328 kwargs["using"] = self._parse_wrapped_id_vars() 2329 elif not (kind and kind.token_type == TokenType.CROSS): 2330 index = self._index 2331 joins = self._parse_joins() 2332 2333 if joins and self._match(TokenType.ON): 2334 kwargs["on"] = self._parse_conjunction() 2335 elif joins and self._match(TokenType.USING): 2336 kwargs["using"] = self._parse_wrapped_id_vars() 2337 else: 2338 joins = None 2339 self._retreat(index) 2340 2341 kwargs["this"].set("joins", joins) 2342 2343 return self.expression(exp.Join, **kwargs) 2344 2345 def _parse_index( 2346 self, 2347 index: t.Optional[exp.Expression] = None, 2348 ) -> t.Optional[exp.Index]: 2349 if index: 2350 unique = None 2351 primary = None 2352 amp = None 2353 2354 self._match(TokenType.ON) 2355 self._match(TokenType.TABLE) # hive 2356 table = self._parse_table_parts(schema=True) 2357 else: 2358 unique = self._match(TokenType.UNIQUE) 2359 primary = self._match_text_seq("PRIMARY") 2360 amp = self._match_text_seq("AMP") 2361 2362 if not self._match(TokenType.INDEX): 2363 return None 2364 2365 index = self._parse_id_var() 2366 table = None 2367 2368 using = self._parse_field() if self._match(TokenType.USING) else None 2369 2370 if self._match(TokenType.L_PAREN, advance=False): 2371 columns = self._parse_wrapped_csv(self._parse_ordered) 2372 else: 2373 columns = None 2374 2375 return self.expression( 2376 exp.Index, 2377 this=index, 2378 table=table, 2379 using=using, 2380 columns=columns, 2381 unique=unique, 2382 primary=primary, 2383 amp=amp, 2384 partition_by=self._parse_partition_by(), 2385 ) 2386 2387 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2388 hints: t.List[exp.Expression] = [] 2389 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2390 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2391 hints.append( 2392 self.expression( 2393 exp.WithTableHint, 2394 expressions=self._parse_csv( 2395 lambda: self._parse_function() or self._parse_var(any_token=True) 2396 ), 2397 ) 2398 ) 2399 self._match_r_paren() 2400 else: 2401 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2402 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2403 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2404 2405 self._match_texts({"INDEX", "KEY"}) 2406 if self._match(TokenType.FOR): 2407 hint.set("target", self._advance_any() and self._prev.text.upper()) 2408 2409 hint.set("expressions", self._parse_wrapped_id_vars()) 2410 hints.append(hint) 2411 2412 return hints or None 2413 2414 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2415 return ( 2416 (not schema and self._parse_function(optional_parens=False)) 2417 or self._parse_id_var(any_token=False) 2418 or self._parse_string_as_identifier() 2419 or self._parse_placeholder() 2420 ) 2421 2422 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2423 catalog = None 2424 db = None 2425 table = self._parse_table_part(schema=schema) 2426 2427 while self._match(TokenType.DOT): 2428 if catalog: 2429 # This allows nesting the table in arbitrarily many dot expressions if needed 2430 table = self.expression( 2431 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2432 ) 2433 else: 2434 catalog = db 2435 db = table 2436 table = self._parse_table_part(schema=schema) 2437 2438 if not table: 2439 self.raise_error(f"Expected table name but got {self._curr}") 2440 2441 return self.expression( 2442 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2443 ) 2444 2445 def _parse_table( 2446 self, 2447 schema: bool = False, 2448 joins: bool = False, 2449 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2450 parse_bracket: bool = False, 2451 ) -> t.Optional[exp.Expression]: 2452 lateral = self._parse_lateral() 2453 if lateral: 2454 return lateral 2455 2456 unnest = self._parse_unnest() 2457 if unnest: 2458 return unnest 2459 2460 values = self._parse_derived_table_values() 2461 if values: 2462 return values 2463 2464 subquery = self._parse_select(table=True) 2465 if subquery: 2466 if not subquery.args.get("pivots"): 2467 subquery.set("pivots", self._parse_pivots()) 2468 return subquery 2469 2470 bracket = parse_bracket and self._parse_bracket(None) 2471 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2472 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2473 2474 if schema: 2475 return self._parse_schema(this=this) 2476 2477 if self.ALIAS_POST_TABLESAMPLE: 2478 table_sample = self._parse_table_sample() 2479 2480 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2481 if alias: 2482 this.set("alias", alias) 2483 2484 if not this.args.get("pivots"): 2485 this.set("pivots", self._parse_pivots()) 2486 2487 this.set("hints", self._parse_table_hints()) 2488 2489 if not self.ALIAS_POST_TABLESAMPLE: 2490 table_sample = self._parse_table_sample() 2491 2492 if table_sample: 2493 table_sample.set("this", this) 2494 this = table_sample 2495 2496 if joins: 2497 for join in iter(self._parse_join, None): 2498 this.append("joins", join) 2499 2500 return this 2501 2502 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2503 if not self._match(TokenType.UNNEST): 2504 return None 2505 2506 expressions = self._parse_wrapped_csv(self._parse_type) 2507 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2508 2509 alias = self._parse_table_alias() if with_alias else None 2510 2511 if alias and self.UNNEST_COLUMN_ONLY: 2512 if alias.args.get("columns"): 2513 self.raise_error("Unexpected extra column alias in unnest.") 2514 2515 alias.set("columns", [alias.this]) 2516 alias.set("this", None) 2517 2518 offset = None 2519 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2520 self._match(TokenType.ALIAS) 2521 offset = self._parse_id_var() or exp.to_identifier("offset") 2522 2523 return self.expression( 2524 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2525 ) 2526 2527 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2528 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2529 if not is_derived and not self._match(TokenType.VALUES): 2530 return None 2531 2532 expressions = self._parse_csv(self._parse_value) 2533 alias = self._parse_table_alias() 2534 2535 if is_derived: 2536 self._match_r_paren() 2537 2538 return self.expression( 2539 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2540 ) 2541 2542 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2543 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2544 as_modifier and self._match_text_seq("USING", "SAMPLE") 2545 ): 2546 return None 2547 2548 bucket_numerator = None 2549 bucket_denominator = None 2550 bucket_field = None 2551 percent = None 2552 rows = None 2553 size = None 2554 seed = None 2555 2556 kind = ( 2557 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2558 ) 2559 method = self._parse_var(tokens=(TokenType.ROW,)) 2560 2561 self._match(TokenType.L_PAREN) 2562 2563 num = self._parse_number() 2564 2565 if self._match_text_seq("BUCKET"): 2566 bucket_numerator = self._parse_number() 2567 self._match_text_seq("OUT", "OF") 2568 bucket_denominator = bucket_denominator = self._parse_number() 2569 self._match(TokenType.ON) 2570 bucket_field = self._parse_field() 2571 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2572 percent = num 2573 elif self._match(TokenType.ROWS): 2574 rows = num 2575 else: 2576 size = num 2577 2578 self._match(TokenType.R_PAREN) 2579 2580 if self._match(TokenType.L_PAREN): 2581 method = self._parse_var() 2582 seed = self._match(TokenType.COMMA) and self._parse_number() 2583 self._match_r_paren() 2584 elif self._match_texts(("SEED", "REPEATABLE")): 2585 seed = self._parse_wrapped(self._parse_number) 2586 2587 return self.expression( 2588 exp.TableSample, 2589 method=method, 2590 bucket_numerator=bucket_numerator, 2591 bucket_denominator=bucket_denominator, 2592 bucket_field=bucket_field, 2593 percent=percent, 2594 rows=rows, 2595 size=size, 2596 seed=seed, 2597 kind=kind, 2598 ) 2599 2600 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2601 return list(iter(self._parse_pivot, None)) or None 2602 2603 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2604 return list(iter(self._parse_join, None)) or None 2605 2606 # https://duckdb.org/docs/sql/statements/pivot 2607 def _parse_simplified_pivot(self) -> exp.Pivot: 2608 def _parse_on() -> t.Optional[exp.Expression]: 2609 this = self._parse_bitwise() 2610 return self._parse_in(this) if self._match(TokenType.IN) else this 2611 2612 this = self._parse_table() 2613 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2614 using = self._match(TokenType.USING) and self._parse_csv( 2615 lambda: self._parse_alias(self._parse_function()) 2616 ) 2617 group = self._parse_group() 2618 return self.expression( 2619 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2620 ) 2621 2622 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2623 index = self._index 2624 2625 if self._match(TokenType.PIVOT): 2626 unpivot = False 2627 elif self._match(TokenType.UNPIVOT): 2628 unpivot = True 2629 else: 2630 return None 2631 2632 expressions = [] 2633 field = None 2634 2635 if not self._match(TokenType.L_PAREN): 2636 self._retreat(index) 2637 return None 2638 2639 if unpivot: 2640 expressions = self._parse_csv(self._parse_column) 2641 else: 2642 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2643 2644 if not expressions: 2645 self.raise_error("Failed to parse PIVOT's aggregation list") 2646 2647 if not self._match(TokenType.FOR): 2648 self.raise_error("Expecting FOR") 2649 2650 value = self._parse_column() 2651 2652 if not self._match(TokenType.IN): 2653 self.raise_error("Expecting IN") 2654 2655 field = self._parse_in(value, alias=True) 2656 2657 self._match_r_paren() 2658 2659 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2660 2661 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2662 pivot.set("alias", self._parse_table_alias()) 2663 2664 if not unpivot: 2665 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2666 2667 columns: t.List[exp.Expression] = [] 2668 for fld in pivot.args["field"].expressions: 2669 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2670 for name in names: 2671 if self.PREFIXED_PIVOT_COLUMNS: 2672 name = f"{name}_{field_name}" if name else field_name 2673 else: 2674 name = f"{field_name}_{name}" if name else field_name 2675 2676 columns.append(exp.to_identifier(name)) 2677 2678 pivot.set("columns", columns) 2679 2680 return pivot 2681 2682 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2683 return [agg.alias for agg in aggregations] 2684 2685 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2686 if not skip_where_token and not self._match(TokenType.WHERE): 2687 return None 2688 2689 return self.expression( 2690 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2691 ) 2692 2693 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2694 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2695 return None 2696 2697 elements = defaultdict(list) 2698 2699 if self._match(TokenType.ALL): 2700 return self.expression(exp.Group, all=True) 2701 2702 while True: 2703 expressions = self._parse_csv(self._parse_conjunction) 2704 if expressions: 2705 elements["expressions"].extend(expressions) 2706 2707 grouping_sets = self._parse_grouping_sets() 2708 if grouping_sets: 2709 elements["grouping_sets"].extend(grouping_sets) 2710 2711 rollup = None 2712 cube = None 2713 totals = None 2714 2715 with_ = self._match(TokenType.WITH) 2716 if self._match(TokenType.ROLLUP): 2717 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2718 elements["rollup"].extend(ensure_list(rollup)) 2719 2720 if self._match(TokenType.CUBE): 2721 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2722 elements["cube"].extend(ensure_list(cube)) 2723 2724 if self._match_text_seq("TOTALS"): 2725 totals = True 2726 elements["totals"] = True # type: ignore 2727 2728 if not (grouping_sets or rollup or cube or totals): 2729 break 2730 2731 return self.expression(exp.Group, **elements) # type: ignore 2732 2733 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2734 if not self._match(TokenType.GROUPING_SETS): 2735 return None 2736 2737 return self._parse_wrapped_csv(self._parse_grouping_set) 2738 2739 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2740 if self._match(TokenType.L_PAREN): 2741 grouping_set = self._parse_csv(self._parse_column) 2742 self._match_r_paren() 2743 return self.expression(exp.Tuple, expressions=grouping_set) 2744 2745 return self._parse_column() 2746 2747 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2748 if not skip_having_token and not self._match(TokenType.HAVING): 2749 return None 2750 return self.expression(exp.Having, this=self._parse_conjunction()) 2751 2752 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2753 if not self._match(TokenType.QUALIFY): 2754 return None 2755 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2756 2757 def _parse_order( 2758 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2759 ) -> t.Optional[exp.Expression]: 2760 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2761 return this 2762 2763 return self.expression( 2764 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2765 ) 2766 2767 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2768 if not self._match(token): 2769 return None 2770 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2771 2772 def _parse_ordered(self) -> exp.Ordered: 2773 this = self._parse_conjunction() 2774 self._match(TokenType.ASC) 2775 2776 is_desc = self._match(TokenType.DESC) 2777 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2778 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2779 desc = is_desc or False 2780 asc = not desc 2781 nulls_first = is_nulls_first or False 2782 explicitly_null_ordered = is_nulls_first or is_nulls_last 2783 2784 if ( 2785 not explicitly_null_ordered 2786 and ( 2787 (asc and self.NULL_ORDERING == "nulls_are_small") 2788 or (desc and self.NULL_ORDERING != "nulls_are_small") 2789 ) 2790 and self.NULL_ORDERING != "nulls_are_last" 2791 ): 2792 nulls_first = True 2793 2794 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2795 2796 def _parse_limit( 2797 self, this: t.Optional[exp.Expression] = None, top: bool = False 2798 ) -> t.Optional[exp.Expression]: 2799 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2800 comments = self._prev_comments 2801 if top: 2802 limit_paren = self._match(TokenType.L_PAREN) 2803 expression = self._parse_number() 2804 2805 if limit_paren: 2806 self._match_r_paren() 2807 else: 2808 expression = self._parse_term() 2809 2810 if self._match(TokenType.COMMA): 2811 offset = expression 2812 expression = self._parse_term() 2813 else: 2814 offset = None 2815 2816 limit_exp = self.expression( 2817 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2818 ) 2819 2820 return limit_exp 2821 2822 if self._match(TokenType.FETCH): 2823 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2824 direction = self._prev.text if direction else "FIRST" 2825 2826 count = self._parse_number() 2827 percent = self._match(TokenType.PERCENT) 2828 2829 self._match_set((TokenType.ROW, TokenType.ROWS)) 2830 2831 only = self._match_text_seq("ONLY") 2832 with_ties = self._match_text_seq("WITH", "TIES") 2833 2834 if only and with_ties: 2835 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2836 2837 return self.expression( 2838 exp.Fetch, 2839 direction=direction, 2840 count=count, 2841 percent=percent, 2842 with_ties=with_ties, 2843 ) 2844 2845 return this 2846 2847 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2848 if not self._match(TokenType.OFFSET): 2849 return this 2850 2851 count = self._parse_term() 2852 self._match_set((TokenType.ROW, TokenType.ROWS)) 2853 return self.expression(exp.Offset, this=this, expression=count) 2854 2855 def _parse_locks(self) -> t.List[exp.Lock]: 2856 locks = [] 2857 while True: 2858 if self._match_text_seq("FOR", "UPDATE"): 2859 update = True 2860 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2861 "LOCK", "IN", "SHARE", "MODE" 2862 ): 2863 update = False 2864 else: 2865 break 2866 2867 expressions = None 2868 if self._match_text_seq("OF"): 2869 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2870 2871 wait: t.Optional[bool | exp.Expression] = None 2872 if self._match_text_seq("NOWAIT"): 2873 wait = True 2874 elif self._match_text_seq("WAIT"): 2875 wait = self._parse_primary() 2876 elif self._match_text_seq("SKIP", "LOCKED"): 2877 wait = False 2878 2879 locks.append( 2880 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2881 ) 2882 2883 return locks 2884 2885 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2886 if not self._match_set(self.SET_OPERATIONS): 2887 return this 2888 2889 token_type = self._prev.token_type 2890 2891 if token_type == TokenType.UNION: 2892 expression = exp.Union 2893 elif token_type == TokenType.EXCEPT: 2894 expression = exp.Except 2895 else: 2896 expression = exp.Intersect 2897 2898 return self.expression( 2899 expression, 2900 this=this, 2901 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2902 expression=self._parse_set_operations(self._parse_select(nested=True)), 2903 ) 2904 2905 def _parse_expression(self) -> t.Optional[exp.Expression]: 2906 return self._parse_alias(self._parse_conjunction()) 2907 2908 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2909 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2910 2911 def _parse_equality(self) -> t.Optional[exp.Expression]: 2912 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2913 2914 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2915 return self._parse_tokens(self._parse_range, self.COMPARISON) 2916 2917 def _parse_range(self) -> t.Optional[exp.Expression]: 2918 this = self._parse_bitwise() 2919 negate = self._match(TokenType.NOT) 2920 2921 if self._match_set(self.RANGE_PARSERS): 2922 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2923 if not expression: 2924 return this 2925 2926 this = expression 2927 elif self._match(TokenType.ISNULL): 2928 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2929 2930 # Postgres supports ISNULL and NOTNULL for conditions. 2931 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2932 if self._match(TokenType.NOTNULL): 2933 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2934 this = self.expression(exp.Not, this=this) 2935 2936 if negate: 2937 this = self.expression(exp.Not, this=this) 2938 2939 if self._match(TokenType.IS): 2940 this = self._parse_is(this) 2941 2942 return this 2943 2944 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2945 index = self._index - 1 2946 negate = self._match(TokenType.NOT) 2947 2948 if self._match_text_seq("DISTINCT", "FROM"): 2949 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2950 return self.expression(klass, this=this, expression=self._parse_expression()) 2951 2952 expression = self._parse_null() or self._parse_boolean() 2953 if not expression: 2954 self._retreat(index) 2955 return None 2956 2957 this = self.expression(exp.Is, this=this, expression=expression) 2958 return self.expression(exp.Not, this=this) if negate else this 2959 2960 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2961 unnest = self._parse_unnest(with_alias=False) 2962 if unnest: 2963 this = self.expression(exp.In, this=this, unnest=unnest) 2964 elif self._match(TokenType.L_PAREN): 2965 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2966 2967 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2968 this = self.expression(exp.In, this=this, query=expressions[0]) 2969 else: 2970 this = self.expression(exp.In, this=this, expressions=expressions) 2971 2972 self._match_r_paren(this) 2973 else: 2974 this = self.expression(exp.In, this=this, field=self._parse_field()) 2975 2976 return this 2977 2978 def _parse_between(self, this: exp.Expression) -> exp.Between: 2979 low = self._parse_bitwise() 2980 self._match(TokenType.AND) 2981 high = self._parse_bitwise() 2982 return self.expression(exp.Between, this=this, low=low, high=high) 2983 2984 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2985 if not self._match(TokenType.ESCAPE): 2986 return this 2987 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2988 2989 def _parse_interval(self) -> t.Optional[exp.Interval]: 2990 if not self._match(TokenType.INTERVAL): 2991 return None 2992 2993 if self._match(TokenType.STRING, advance=False): 2994 this = self._parse_primary() 2995 else: 2996 this = self._parse_term() 2997 2998 unit = self._parse_function() or self._parse_var() 2999 3000 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3001 # each INTERVAL expression into this canonical form so it's easy to transpile 3002 if this and this.is_number: 3003 this = exp.Literal.string(this.name) 3004 elif this and this.is_string: 3005 parts = this.name.split() 3006 3007 if len(parts) == 2: 3008 if unit: 3009 # this is not actually a unit, it's something else 3010 unit = None 3011 self._retreat(self._index - 1) 3012 else: 3013 this = exp.Literal.string(parts[0]) 3014 unit = self.expression(exp.Var, this=parts[1]) 3015 3016 return self.expression(exp.Interval, this=this, unit=unit) 3017 3018 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3019 this = self._parse_term() 3020 3021 while True: 3022 if self._match_set(self.BITWISE): 3023 this = self.expression( 3024 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3025 ) 3026 elif self._match_pair(TokenType.LT, TokenType.LT): 3027 this = self.expression( 3028 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3029 ) 3030 elif self._match_pair(TokenType.GT, TokenType.GT): 3031 this = self.expression( 3032 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3033 ) 3034 else: 3035 break 3036 3037 return this 3038 3039 def _parse_term(self) -> t.Optional[exp.Expression]: 3040 return self._parse_tokens(self._parse_factor, self.TERM) 3041 3042 def _parse_factor(self) -> t.Optional[exp.Expression]: 3043 return self._parse_tokens(self._parse_unary, self.FACTOR) 3044 3045 def _parse_unary(self) -> t.Optional[exp.Expression]: 3046 if self._match_set(self.UNARY_PARSERS): 3047 return self.UNARY_PARSERS[self._prev.token_type](self) 3048 return self._parse_at_time_zone(self._parse_type()) 3049 3050 def _parse_type(self) -> t.Optional[exp.Expression]: 3051 interval = self._parse_interval() 3052 if interval: 3053 return interval 3054 3055 index = self._index 3056 data_type = self._parse_types(check_func=True) 3057 this = self._parse_column() 3058 3059 if data_type: 3060 if isinstance(this, exp.Literal): 3061 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3062 if parser: 3063 return parser(self, this, data_type) 3064 return self.expression(exp.Cast, this=this, to=data_type) 3065 if not data_type.expressions: 3066 self._retreat(index) 3067 return self._parse_column() 3068 return self._parse_column_ops(data_type) 3069 3070 return this 3071 3072 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3073 this = self._parse_type() 3074 if not this: 3075 return None 3076 3077 return self.expression( 3078 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3079 ) 3080 3081 def _parse_types( 3082 self, check_func: bool = False, schema: bool = False 3083 ) -> t.Optional[exp.Expression]: 3084 index = self._index 3085 3086 prefix = self._match_text_seq("SYSUDTLIB", ".") 3087 3088 if not self._match_set(self.TYPE_TOKENS): 3089 return None 3090 3091 type_token = self._prev.token_type 3092 3093 if type_token == TokenType.PSEUDO_TYPE: 3094 return self.expression(exp.PseudoType, this=self._prev.text) 3095 3096 nested = type_token in self.NESTED_TYPE_TOKENS 3097 is_struct = type_token == TokenType.STRUCT 3098 expressions = None 3099 maybe_func = False 3100 3101 if self._match(TokenType.L_PAREN): 3102 if is_struct: 3103 expressions = self._parse_csv(self._parse_struct_types) 3104 elif nested: 3105 expressions = self._parse_csv( 3106 lambda: self._parse_types(check_func=check_func, schema=schema) 3107 ) 3108 elif type_token in self.ENUM_TYPE_TOKENS: 3109 expressions = self._parse_csv(self._parse_primary) 3110 else: 3111 expressions = self._parse_csv(self._parse_type_size) 3112 3113 if not expressions or not self._match(TokenType.R_PAREN): 3114 self._retreat(index) 3115 return None 3116 3117 maybe_func = True 3118 3119 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3120 this = exp.DataType( 3121 this=exp.DataType.Type.ARRAY, 3122 expressions=[ 3123 exp.DataType( 3124 this=exp.DataType.Type[type_token.value], 3125 expressions=expressions, 3126 nested=nested, 3127 ) 3128 ], 3129 nested=True, 3130 ) 3131 3132 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3133 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3134 3135 return this 3136 3137 if self._match(TokenType.L_BRACKET): 3138 self._retreat(index) 3139 return None 3140 3141 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3142 if nested and self._match(TokenType.LT): 3143 if is_struct: 3144 expressions = self._parse_csv(self._parse_struct_types) 3145 else: 3146 expressions = self._parse_csv( 3147 lambda: self._parse_types(check_func=check_func, schema=schema) 3148 ) 3149 3150 if not self._match(TokenType.GT): 3151 self.raise_error("Expecting >") 3152 3153 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3154 values = self._parse_csv(self._parse_conjunction) 3155 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3156 3157 value: t.Optional[exp.Expression] = None 3158 if type_token in self.TIMESTAMPS: 3159 if self._match_text_seq("WITH", "TIME", "ZONE"): 3160 maybe_func = False 3161 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3162 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3163 maybe_func = False 3164 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3165 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3166 maybe_func = False 3167 elif type_token == TokenType.INTERVAL: 3168 unit = self._parse_var() 3169 3170 if not unit: 3171 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3172 else: 3173 value = self.expression(exp.Interval, unit=unit) 3174 3175 if maybe_func and check_func: 3176 index2 = self._index 3177 peek = self._parse_string() 3178 3179 if not peek: 3180 self._retreat(index) 3181 return None 3182 3183 self._retreat(index2) 3184 3185 if value: 3186 return value 3187 3188 return exp.DataType( 3189 this=exp.DataType.Type[type_token.value], 3190 expressions=expressions, 3191 nested=nested, 3192 values=values, 3193 prefix=prefix, 3194 ) 3195 3196 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3197 this = self._parse_type() or self._parse_id_var() 3198 self._match(TokenType.COLON) 3199 return self._parse_column_def(this) 3200 3201 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3202 if not self._match_text_seq("AT", "TIME", "ZONE"): 3203 return this 3204 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3205 3206 def _parse_column(self) -> t.Optional[exp.Expression]: 3207 this = self._parse_field() 3208 if isinstance(this, exp.Identifier): 3209 this = self.expression(exp.Column, this=this) 3210 elif not this: 3211 return self._parse_bracket(this) 3212 return self._parse_column_ops(this) 3213 3214 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3215 this = self._parse_bracket(this) 3216 3217 while self._match_set(self.COLUMN_OPERATORS): 3218 op_token = self._prev.token_type 3219 op = self.COLUMN_OPERATORS.get(op_token) 3220 3221 if op_token == TokenType.DCOLON: 3222 field = self._parse_types() 3223 if not field: 3224 self.raise_error("Expected type") 3225 elif op and self._curr: 3226 self._advance() 3227 value = self._prev.text 3228 field = ( 3229 exp.Literal.number(value) 3230 if self._prev.token_type == TokenType.NUMBER 3231 else exp.Literal.string(value) 3232 ) 3233 else: 3234 field = self._parse_field(anonymous_func=True, any_token=True) 3235 3236 if isinstance(field, exp.Func): 3237 # bigquery allows function calls like x.y.count(...) 3238 # SAFE.SUBSTR(...) 3239 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3240 this = self._replace_columns_with_dots(this) 3241 3242 if op: 3243 this = op(self, this, field) 3244 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3245 this = self.expression( 3246 exp.Column, 3247 this=field, 3248 table=this.this, 3249 db=this.args.get("table"), 3250 catalog=this.args.get("db"), 3251 ) 3252 else: 3253 this = self.expression(exp.Dot, this=this, expression=field) 3254 this = self._parse_bracket(this) 3255 return this 3256 3257 def _parse_primary(self) -> t.Optional[exp.Expression]: 3258 if self._match_set(self.PRIMARY_PARSERS): 3259 token_type = self._prev.token_type 3260 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3261 3262 if token_type == TokenType.STRING: 3263 expressions = [primary] 3264 while self._match(TokenType.STRING): 3265 expressions.append(exp.Literal.string(self._prev.text)) 3266 3267 if len(expressions) > 1: 3268 return self.expression(exp.Concat, expressions=expressions) 3269 3270 return primary 3271 3272 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3273 return exp.Literal.number(f"0.{self._prev.text}") 3274 3275 if self._match(TokenType.L_PAREN): 3276 comments = self._prev_comments 3277 query = self._parse_select() 3278 3279 if query: 3280 expressions = [query] 3281 else: 3282 expressions = self._parse_expressions() 3283 3284 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3285 3286 if isinstance(this, exp.Subqueryable): 3287 this = self._parse_set_operations( 3288 self._parse_subquery(this=this, parse_alias=False) 3289 ) 3290 elif len(expressions) > 1: 3291 this = self.expression(exp.Tuple, expressions=expressions) 3292 else: 3293 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3294 3295 if this: 3296 this.add_comments(comments) 3297 3298 self._match_r_paren(expression=this) 3299 return this 3300 3301 return None 3302 3303 def _parse_field( 3304 self, 3305 any_token: bool = False, 3306 tokens: t.Optional[t.Collection[TokenType]] = None, 3307 anonymous_func: bool = False, 3308 ) -> t.Optional[exp.Expression]: 3309 return ( 3310 self._parse_primary() 3311 or self._parse_function(anonymous=anonymous_func) 3312 or self._parse_id_var(any_token=any_token, tokens=tokens) 3313 ) 3314 3315 def _parse_function( 3316 self, 3317 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3318 anonymous: bool = False, 3319 optional_parens: bool = True, 3320 ) -> t.Optional[exp.Expression]: 3321 if not self._curr: 3322 return None 3323 3324 token_type = self._curr.token_type 3325 3326 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3327 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3328 3329 if not self._next or self._next.token_type != TokenType.L_PAREN: 3330 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3331 self._advance() 3332 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3333 3334 return None 3335 3336 if token_type not in self.FUNC_TOKENS: 3337 return None 3338 3339 this = self._curr.text 3340 upper = this.upper() 3341 self._advance(2) 3342 3343 parser = self.FUNCTION_PARSERS.get(upper) 3344 3345 if parser and not anonymous: 3346 this = parser(self) 3347 else: 3348 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3349 3350 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3351 this = self.expression(subquery_predicate, this=self._parse_select()) 3352 self._match_r_paren() 3353 return this 3354 3355 if functions is None: 3356 functions = self.FUNCTIONS 3357 3358 function = functions.get(upper) 3359 3360 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3361 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3362 3363 if function and not anonymous: 3364 func = self.validate_expression(function(args), args) 3365 if not self.NORMALIZE_FUNCTIONS: 3366 func.meta["name"] = this 3367 this = func 3368 else: 3369 this = self.expression(exp.Anonymous, this=this, expressions=args) 3370 3371 self._match(TokenType.R_PAREN, expression=this) 3372 return self._parse_window(this) 3373 3374 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3375 return self._parse_column_def(self._parse_id_var()) 3376 3377 def _parse_user_defined_function( 3378 self, kind: t.Optional[TokenType] = None 3379 ) -> t.Optional[exp.Expression]: 3380 this = self._parse_id_var() 3381 3382 while self._match(TokenType.DOT): 3383 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3384 3385 if not self._match(TokenType.L_PAREN): 3386 return this 3387 3388 expressions = self._parse_csv(self._parse_function_parameter) 3389 self._match_r_paren() 3390 return self.expression( 3391 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3392 ) 3393 3394 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3395 literal = self._parse_primary() 3396 if literal: 3397 return self.expression(exp.Introducer, this=token.text, expression=literal) 3398 3399 return self.expression(exp.Identifier, this=token.text) 3400 3401 def _parse_session_parameter(self) -> exp.SessionParameter: 3402 kind = None 3403 this = self._parse_id_var() or self._parse_primary() 3404 3405 if this and self._match(TokenType.DOT): 3406 kind = this.name 3407 this = self._parse_var() or self._parse_primary() 3408 3409 return self.expression(exp.SessionParameter, this=this, kind=kind) 3410 3411 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3412 index = self._index 3413 3414 if self._match(TokenType.L_PAREN): 3415 expressions = self._parse_csv(self._parse_id_var) 3416 3417 if not self._match(TokenType.R_PAREN): 3418 self._retreat(index) 3419 else: 3420 expressions = [self._parse_id_var()] 3421 3422 if self._match_set(self.LAMBDAS): 3423 return self.LAMBDAS[self._prev.token_type](self, expressions) 3424 3425 self._retreat(index) 3426 3427 this: t.Optional[exp.Expression] 3428 3429 if self._match(TokenType.DISTINCT): 3430 this = self.expression( 3431 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3432 ) 3433 else: 3434 this = self._parse_select_or_expression(alias=alias) 3435 3436 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3437 3438 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3439 index = self._index 3440 3441 if not self.errors: 3442 try: 3443 if self._parse_select(nested=True): 3444 return this 3445 except ParseError: 3446 pass 3447 finally: 3448 self.errors.clear() 3449 self._retreat(index) 3450 3451 if not self._match(TokenType.L_PAREN): 3452 return this 3453 3454 args = self._parse_csv( 3455 lambda: self._parse_constraint() 3456 or self._parse_column_def(self._parse_field(any_token=True)) 3457 ) 3458 3459 self._match_r_paren() 3460 return self.expression(exp.Schema, this=this, expressions=args) 3461 3462 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3463 # column defs are not really columns, they're identifiers 3464 if isinstance(this, exp.Column): 3465 this = this.this 3466 3467 kind = self._parse_types(schema=True) 3468 3469 if self._match_text_seq("FOR", "ORDINALITY"): 3470 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3471 3472 constraints = [] 3473 while True: 3474 constraint = self._parse_column_constraint() 3475 if not constraint: 3476 break 3477 constraints.append(constraint) 3478 3479 if not kind and not constraints: 3480 return this 3481 3482 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3483 3484 def _parse_auto_increment( 3485 self, 3486 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3487 start = None 3488 increment = None 3489 3490 if self._match(TokenType.L_PAREN, advance=False): 3491 args = self._parse_wrapped_csv(self._parse_bitwise) 3492 start = seq_get(args, 0) 3493 increment = seq_get(args, 1) 3494 elif self._match_text_seq("START"): 3495 start = self._parse_bitwise() 3496 self._match_text_seq("INCREMENT") 3497 increment = self._parse_bitwise() 3498 3499 if start and increment: 3500 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3501 3502 return exp.AutoIncrementColumnConstraint() 3503 3504 def _parse_compress(self) -> exp.CompressColumnConstraint: 3505 if self._match(TokenType.L_PAREN, advance=False): 3506 return self.expression( 3507 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3508 ) 3509 3510 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3511 3512 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3513 if self._match_text_seq("BY", "DEFAULT"): 3514 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3515 this = self.expression( 3516 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3517 ) 3518 else: 3519 self._match_text_seq("ALWAYS") 3520 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3521 3522 self._match(TokenType.ALIAS) 3523 identity = self._match_text_seq("IDENTITY") 3524 3525 if self._match(TokenType.L_PAREN): 3526 if self._match_text_seq("START", "WITH"): 3527 this.set("start", self._parse_bitwise()) 3528 if self._match_text_seq("INCREMENT", "BY"): 3529 this.set("increment", self._parse_bitwise()) 3530 if self._match_text_seq("MINVALUE"): 3531 this.set("minvalue", self._parse_bitwise()) 3532 if self._match_text_seq("MAXVALUE"): 3533 this.set("maxvalue", self._parse_bitwise()) 3534 3535 if self._match_text_seq("CYCLE"): 3536 this.set("cycle", True) 3537 elif self._match_text_seq("NO", "CYCLE"): 3538 this.set("cycle", False) 3539 3540 if not identity: 3541 this.set("expression", self._parse_bitwise()) 3542 3543 self._match_r_paren() 3544 3545 return this 3546 3547 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3548 self._match_text_seq("LENGTH") 3549 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3550 3551 def _parse_not_constraint( 3552 self, 3553 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3554 if self._match_text_seq("NULL"): 3555 return self.expression(exp.NotNullColumnConstraint) 3556 if self._match_text_seq("CASESPECIFIC"): 3557 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3558 return None 3559 3560 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3561 if self._match(TokenType.CONSTRAINT): 3562 this = self._parse_id_var() 3563 else: 3564 this = None 3565 3566 if self._match_texts(self.CONSTRAINT_PARSERS): 3567 return self.expression( 3568 exp.ColumnConstraint, 3569 this=this, 3570 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3571 ) 3572 3573 return this 3574 3575 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3576 if not self._match(TokenType.CONSTRAINT): 3577 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3578 3579 this = self._parse_id_var() 3580 expressions = [] 3581 3582 while True: 3583 constraint = self._parse_unnamed_constraint() or self._parse_function() 3584 if not constraint: 3585 break 3586 expressions.append(constraint) 3587 3588 return self.expression(exp.Constraint, this=this, expressions=expressions) 3589 3590 def _parse_unnamed_constraint( 3591 self, constraints: t.Optional[t.Collection[str]] = None 3592 ) -> t.Optional[exp.Expression]: 3593 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3594 return None 3595 3596 constraint = self._prev.text.upper() 3597 if constraint not in self.CONSTRAINT_PARSERS: 3598 self.raise_error(f"No parser found for schema constraint {constraint}.") 3599 3600 return self.CONSTRAINT_PARSERS[constraint](self) 3601 3602 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3603 self._match_text_seq("KEY") 3604 return self.expression( 3605 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3606 ) 3607 3608 def _parse_key_constraint_options(self) -> t.List[str]: 3609 options = [] 3610 while True: 3611 if not self._curr: 3612 break 3613 3614 if self._match(TokenType.ON): 3615 action = None 3616 on = self._advance_any() and self._prev.text 3617 3618 if self._match_text_seq("NO", "ACTION"): 3619 action = "NO ACTION" 3620 elif self._match_text_seq("CASCADE"): 3621 action = "CASCADE" 3622 elif self._match_pair(TokenType.SET, TokenType.NULL): 3623 action = "SET NULL" 3624 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3625 action = "SET DEFAULT" 3626 else: 3627 self.raise_error("Invalid key constraint") 3628 3629 options.append(f"ON {on} {action}") 3630 elif self._match_text_seq("NOT", "ENFORCED"): 3631 options.append("NOT ENFORCED") 3632 elif self._match_text_seq("DEFERRABLE"): 3633 options.append("DEFERRABLE") 3634 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3635 options.append("INITIALLY DEFERRED") 3636 elif self._match_text_seq("NORELY"): 3637 options.append("NORELY") 3638 elif self._match_text_seq("MATCH", "FULL"): 3639 options.append("MATCH FULL") 3640 else: 3641 break 3642 3643 return options 3644 3645 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3646 if match and not self._match(TokenType.REFERENCES): 3647 return None 3648 3649 expressions = None 3650 this = self._parse_table(schema=True) 3651 options = self._parse_key_constraint_options() 3652 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3653 3654 def _parse_foreign_key(self) -> exp.ForeignKey: 3655 expressions = self._parse_wrapped_id_vars() 3656 reference = self._parse_references() 3657 options = {} 3658 3659 while self._match(TokenType.ON): 3660 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3661 self.raise_error("Expected DELETE or UPDATE") 3662 3663 kind = self._prev.text.lower() 3664 3665 if self._match_text_seq("NO", "ACTION"): 3666 action = "NO ACTION" 3667 elif self._match(TokenType.SET): 3668 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3669 action = "SET " + self._prev.text.upper() 3670 else: 3671 self._advance() 3672 action = self._prev.text.upper() 3673 3674 options[kind] = action 3675 3676 return self.expression( 3677 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3678 ) 3679 3680 def _parse_primary_key( 3681 self, wrapped_optional: bool = False, in_props: bool = False 3682 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3683 desc = ( 3684 self._match_set((TokenType.ASC, TokenType.DESC)) 3685 and self._prev.token_type == TokenType.DESC 3686 ) 3687 3688 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3689 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3690 3691 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3692 options = self._parse_key_constraint_options() 3693 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3694 3695 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3696 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3697 return this 3698 3699 bracket_kind = self._prev.token_type 3700 3701 if self._match(TokenType.COLON): 3702 expressions: t.List[t.Optional[exp.Expression]] = [ 3703 self.expression(exp.Slice, expression=self._parse_conjunction()) 3704 ] 3705 else: 3706 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3707 3708 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3709 if bracket_kind == TokenType.L_BRACE: 3710 this = self.expression(exp.Struct, expressions=expressions) 3711 elif not this or this.name.upper() == "ARRAY": 3712 this = self.expression(exp.Array, expressions=expressions) 3713 else: 3714 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3715 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3716 3717 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3718 self.raise_error("Expected ]") 3719 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3720 self.raise_error("Expected }") 3721 3722 self._add_comments(this) 3723 return self._parse_bracket(this) 3724 3725 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3726 if self._match(TokenType.COLON): 3727 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3728 return this 3729 3730 def _parse_case(self) -> t.Optional[exp.Expression]: 3731 ifs = [] 3732 default = None 3733 3734 expression = self._parse_conjunction() 3735 3736 while self._match(TokenType.WHEN): 3737 this = self._parse_conjunction() 3738 self._match(TokenType.THEN) 3739 then = self._parse_conjunction() 3740 ifs.append(self.expression(exp.If, this=this, true=then)) 3741 3742 if self._match(TokenType.ELSE): 3743 default = self._parse_conjunction() 3744 3745 if not self._match(TokenType.END): 3746 self.raise_error("Expected END after CASE", self._prev) 3747 3748 return self._parse_window( 3749 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3750 ) 3751 3752 def _parse_if(self) -> t.Optional[exp.Expression]: 3753 if self._match(TokenType.L_PAREN): 3754 args = self._parse_csv(self._parse_conjunction) 3755 this = self.validate_expression(exp.If.from_arg_list(args), args) 3756 self._match_r_paren() 3757 else: 3758 index = self._index - 1 3759 condition = self._parse_conjunction() 3760 3761 if not condition: 3762 self._retreat(index) 3763 return None 3764 3765 self._match(TokenType.THEN) 3766 true = self._parse_conjunction() 3767 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3768 self._match(TokenType.END) 3769 this = self.expression(exp.If, this=condition, true=true, false=false) 3770 3771 return self._parse_window(this) 3772 3773 def _parse_extract(self) -> exp.Extract: 3774 this = self._parse_function() or self._parse_var() or self._parse_type() 3775 3776 if self._match(TokenType.FROM): 3777 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3778 3779 if not self._match(TokenType.COMMA): 3780 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3781 3782 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3783 3784 def _parse_any_value(self) -> exp.AnyValue: 3785 this = self._parse_lambda() 3786 is_max = None 3787 having = None 3788 3789 if self._match(TokenType.HAVING): 3790 self._match_texts(("MAX", "MIN")) 3791 is_max = self._prev.text == "MAX" 3792 having = self._parse_column() 3793 3794 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3795 3796 def _parse_cast(self, strict: bool) -> exp.Expression: 3797 this = self._parse_conjunction() 3798 3799 if not self._match(TokenType.ALIAS): 3800 if self._match(TokenType.COMMA): 3801 return self.expression( 3802 exp.CastToStrType, this=this, expression=self._parse_string() 3803 ) 3804 else: 3805 self.raise_error("Expected AS after CAST") 3806 3807 fmt = None 3808 to = self._parse_types() 3809 3810 if not to: 3811 self.raise_error("Expected TYPE after CAST") 3812 elif to.this == exp.DataType.Type.CHAR: 3813 if self._match(TokenType.CHARACTER_SET): 3814 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3815 elif self._match(TokenType.FORMAT): 3816 fmt_string = self._parse_string() 3817 fmt = self._parse_at_time_zone(fmt_string) 3818 3819 if to.this in exp.DataType.TEMPORAL_TYPES: 3820 this = self.expression( 3821 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3822 this=this, 3823 format=exp.Literal.string( 3824 format_time( 3825 fmt_string.this if fmt_string else "", 3826 self.FORMAT_MAPPING or self.TIME_MAPPING, 3827 self.FORMAT_TRIE or self.TIME_TRIE, 3828 ) 3829 ), 3830 ) 3831 3832 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3833 this.set("zone", fmt.args["zone"]) 3834 3835 return this 3836 3837 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3838 3839 def _parse_concat(self) -> t.Optional[exp.Expression]: 3840 args = self._parse_csv(self._parse_conjunction) 3841 if self.CONCAT_NULL_OUTPUTS_STRING: 3842 args = [ 3843 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3844 for arg in args 3845 if arg 3846 ] 3847 3848 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3849 # we find such a call we replace it with its argument. 3850 if len(args) == 1: 3851 return args[0] 3852 3853 return self.expression( 3854 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3855 ) 3856 3857 def _parse_string_agg(self) -> exp.Expression: 3858 if self._match(TokenType.DISTINCT): 3859 args: t.List[t.Optional[exp.Expression]] = [ 3860 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3861 ] 3862 if self._match(TokenType.COMMA): 3863 args.extend(self._parse_csv(self._parse_conjunction)) 3864 else: 3865 args = self._parse_csv(self._parse_conjunction) 3866 3867 index = self._index 3868 if not self._match(TokenType.R_PAREN) and args: 3869 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3870 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3871 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3872 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3873 3874 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3875 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3876 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3877 if not self._match_text_seq("WITHIN", "GROUP"): 3878 self._retreat(index) 3879 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3880 3881 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3882 order = self._parse_order(this=seq_get(args, 0)) 3883 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3884 3885 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3886 this = self._parse_bitwise() 3887 3888 if self._match(TokenType.USING): 3889 to: t.Optional[exp.Expression] = self.expression( 3890 exp.CharacterSet, this=self._parse_var() 3891 ) 3892 elif self._match(TokenType.COMMA): 3893 to = self._parse_types() 3894 else: 3895 to = None 3896 3897 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3898 3899 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3900 """ 3901 There are generally two variants of the DECODE function: 3902 3903 - DECODE(bin, charset) 3904 - DECODE(expression, search, result [, search, result] ... [, default]) 3905 3906 The second variant will always be parsed into a CASE expression. Note that NULL 3907 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3908 instead of relying on pattern matching. 3909 """ 3910 args = self._parse_csv(self._parse_conjunction) 3911 3912 if len(args) < 3: 3913 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3914 3915 expression, *expressions = args 3916 if not expression: 3917 return None 3918 3919 ifs = [] 3920 for search, result in zip(expressions[::2], expressions[1::2]): 3921 if not search or not result: 3922 return None 3923 3924 if isinstance(search, exp.Literal): 3925 ifs.append( 3926 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3927 ) 3928 elif isinstance(search, exp.Null): 3929 ifs.append( 3930 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3931 ) 3932 else: 3933 cond = exp.or_( 3934 exp.EQ(this=expression.copy(), expression=search), 3935 exp.and_( 3936 exp.Is(this=expression.copy(), expression=exp.Null()), 3937 exp.Is(this=search.copy(), expression=exp.Null()), 3938 copy=False, 3939 ), 3940 copy=False, 3941 ) 3942 ifs.append(exp.If(this=cond, true=result)) 3943 3944 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3945 3946 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3947 self._match_text_seq("KEY") 3948 key = self._parse_field() 3949 self._match(TokenType.COLON) 3950 self._match_text_seq("VALUE") 3951 value = self._parse_field() 3952 3953 if not key and not value: 3954 return None 3955 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3956 3957 def _parse_json_object(self) -> exp.JSONObject: 3958 star = self._parse_star() 3959 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3960 3961 null_handling = None 3962 if self._match_text_seq("NULL", "ON", "NULL"): 3963 null_handling = "NULL ON NULL" 3964 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3965 null_handling = "ABSENT ON NULL" 3966 3967 unique_keys = None 3968 if self._match_text_seq("WITH", "UNIQUE"): 3969 unique_keys = True 3970 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3971 unique_keys = False 3972 3973 self._match_text_seq("KEYS") 3974 3975 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3976 format_json = self._match_text_seq("FORMAT", "JSON") 3977 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3978 3979 return self.expression( 3980 exp.JSONObject, 3981 expressions=expressions, 3982 null_handling=null_handling, 3983 unique_keys=unique_keys, 3984 return_type=return_type, 3985 format_json=format_json, 3986 encoding=encoding, 3987 ) 3988 3989 def _parse_logarithm(self) -> exp.Func: 3990 # Default argument order is base, expression 3991 args = self._parse_csv(self._parse_range) 3992 3993 if len(args) > 1: 3994 if not self.LOG_BASE_FIRST: 3995 args.reverse() 3996 return exp.Log.from_arg_list(args) 3997 3998 return self.expression( 3999 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4000 ) 4001 4002 def _parse_match_against(self) -> exp.MatchAgainst: 4003 expressions = self._parse_csv(self._parse_column) 4004 4005 self._match_text_seq(")", "AGAINST", "(") 4006 4007 this = self._parse_string() 4008 4009 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4010 modifier = "IN NATURAL LANGUAGE MODE" 4011 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4012 modifier = f"{modifier} WITH QUERY EXPANSION" 4013 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4014 modifier = "IN BOOLEAN MODE" 4015 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4016 modifier = "WITH QUERY EXPANSION" 4017 else: 4018 modifier = None 4019 4020 return self.expression( 4021 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4022 ) 4023 4024 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4025 def _parse_open_json(self) -> exp.OpenJSON: 4026 this = self._parse_bitwise() 4027 path = self._match(TokenType.COMMA) and self._parse_string() 4028 4029 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4030 this = self._parse_field(any_token=True) 4031 kind = self._parse_types() 4032 path = self._parse_string() 4033 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4034 4035 return self.expression( 4036 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4037 ) 4038 4039 expressions = None 4040 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4041 self._match_l_paren() 4042 expressions = self._parse_csv(_parse_open_json_column_def) 4043 4044 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4045 4046 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4047 args = self._parse_csv(self._parse_bitwise) 4048 4049 if self._match(TokenType.IN): 4050 return self.expression( 4051 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4052 ) 4053 4054 if haystack_first: 4055 haystack = seq_get(args, 0) 4056 needle = seq_get(args, 1) 4057 else: 4058 needle = seq_get(args, 0) 4059 haystack = seq_get(args, 1) 4060 4061 return self.expression( 4062 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4063 ) 4064 4065 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4066 args = self._parse_csv(self._parse_table) 4067 return exp.JoinHint(this=func_name.upper(), expressions=args) 4068 4069 def _parse_substring(self) -> exp.Substring: 4070 # Postgres supports the form: substring(string [from int] [for int]) 4071 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4072 4073 args = self._parse_csv(self._parse_bitwise) 4074 4075 if self._match(TokenType.FROM): 4076 args.append(self._parse_bitwise()) 4077 if self._match(TokenType.FOR): 4078 args.append(self._parse_bitwise()) 4079 4080 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4081 4082 def _parse_trim(self) -> exp.Trim: 4083 # https://www.w3resource.com/sql/character-functions/trim.php 4084 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4085 4086 position = None 4087 collation = None 4088 4089 if self._match_texts(self.TRIM_TYPES): 4090 position = self._prev.text.upper() 4091 4092 expression = self._parse_bitwise() 4093 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4094 this = self._parse_bitwise() 4095 else: 4096 this = expression 4097 expression = None 4098 4099 if self._match(TokenType.COLLATE): 4100 collation = self._parse_bitwise() 4101 4102 return self.expression( 4103 exp.Trim, this=this, position=position, expression=expression, collation=collation 4104 ) 4105 4106 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4107 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4108 4109 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4110 return self._parse_window(self._parse_id_var(), alias=True) 4111 4112 def _parse_respect_or_ignore_nulls( 4113 self, this: t.Optional[exp.Expression] 4114 ) -> t.Optional[exp.Expression]: 4115 if self._match_text_seq("IGNORE", "NULLS"): 4116 return self.expression(exp.IgnoreNulls, this=this) 4117 if self._match_text_seq("RESPECT", "NULLS"): 4118 return self.expression(exp.RespectNulls, this=this) 4119 return this 4120 4121 def _parse_window( 4122 self, this: t.Optional[exp.Expression], alias: bool = False 4123 ) -> t.Optional[exp.Expression]: 4124 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4125 self._match(TokenType.WHERE) 4126 this = self.expression( 4127 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4128 ) 4129 self._match_r_paren() 4130 4131 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4132 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4133 if self._match_text_seq("WITHIN", "GROUP"): 4134 order = self._parse_wrapped(self._parse_order) 4135 this = self.expression(exp.WithinGroup, this=this, expression=order) 4136 4137 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4138 # Some dialects choose to implement and some do not. 4139 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4140 4141 # There is some code above in _parse_lambda that handles 4142 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4143 4144 # The below changes handle 4145 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4146 4147 # Oracle allows both formats 4148 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4149 # and Snowflake chose to do the same for familiarity 4150 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4151 this = self._parse_respect_or_ignore_nulls(this) 4152 4153 # bigquery select from window x AS (partition by ...) 4154 if alias: 4155 over = None 4156 self._match(TokenType.ALIAS) 4157 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4158 return this 4159 else: 4160 over = self._prev.text.upper() 4161 4162 if not self._match(TokenType.L_PAREN): 4163 return self.expression( 4164 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4165 ) 4166 4167 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4168 4169 first = self._match(TokenType.FIRST) 4170 if self._match_text_seq("LAST"): 4171 first = False 4172 4173 partition = self._parse_partition_by() 4174 order = self._parse_order() 4175 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4176 4177 if kind: 4178 self._match(TokenType.BETWEEN) 4179 start = self._parse_window_spec() 4180 self._match(TokenType.AND) 4181 end = self._parse_window_spec() 4182 4183 spec = self.expression( 4184 exp.WindowSpec, 4185 kind=kind, 4186 start=start["value"], 4187 start_side=start["side"], 4188 end=end["value"], 4189 end_side=end["side"], 4190 ) 4191 else: 4192 spec = None 4193 4194 self._match_r_paren() 4195 4196 window = self.expression( 4197 exp.Window, 4198 this=this, 4199 partition_by=partition, 4200 order=order, 4201 spec=spec, 4202 alias=window_alias, 4203 over=over, 4204 first=first, 4205 ) 4206 4207 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4208 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4209 return self._parse_window(window, alias=alias) 4210 4211 return window 4212 4213 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4214 self._match(TokenType.BETWEEN) 4215 4216 return { 4217 "value": ( 4218 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4219 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4220 or self._parse_bitwise() 4221 ), 4222 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4223 } 4224 4225 def _parse_alias( 4226 self, this: t.Optional[exp.Expression], explicit: bool = False 4227 ) -> t.Optional[exp.Expression]: 4228 any_token = self._match(TokenType.ALIAS) 4229 4230 if explicit and not any_token: 4231 return this 4232 4233 if self._match(TokenType.L_PAREN): 4234 aliases = self.expression( 4235 exp.Aliases, 4236 this=this, 4237 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4238 ) 4239 self._match_r_paren(aliases) 4240 return aliases 4241 4242 alias = self._parse_id_var(any_token) 4243 4244 if alias: 4245 return self.expression(exp.Alias, this=this, alias=alias) 4246 4247 return this 4248 4249 def _parse_id_var( 4250 self, 4251 any_token: bool = True, 4252 tokens: t.Optional[t.Collection[TokenType]] = None, 4253 ) -> t.Optional[exp.Expression]: 4254 identifier = self._parse_identifier() 4255 4256 if identifier: 4257 return identifier 4258 4259 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4260 quoted = self._prev.token_type == TokenType.STRING 4261 return exp.Identifier(this=self._prev.text, quoted=quoted) 4262 4263 return None 4264 4265 def _parse_string(self) -> t.Optional[exp.Expression]: 4266 if self._match(TokenType.STRING): 4267 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4268 return self._parse_placeholder() 4269 4270 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4271 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4272 4273 def _parse_number(self) -> t.Optional[exp.Expression]: 4274 if self._match(TokenType.NUMBER): 4275 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4276 return self._parse_placeholder() 4277 4278 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4279 if self._match(TokenType.IDENTIFIER): 4280 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4281 return self._parse_placeholder() 4282 4283 def _parse_var( 4284 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4285 ) -> t.Optional[exp.Expression]: 4286 if ( 4287 (any_token and self._advance_any()) 4288 or self._match(TokenType.VAR) 4289 or (self._match_set(tokens) if tokens else False) 4290 ): 4291 return self.expression(exp.Var, this=self._prev.text) 4292 return self._parse_placeholder() 4293 4294 def _advance_any(self) -> t.Optional[Token]: 4295 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4296 self._advance() 4297 return self._prev 4298 return None 4299 4300 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4301 return self._parse_var() or self._parse_string() 4302 4303 def _parse_null(self) -> t.Optional[exp.Expression]: 4304 if self._match(TokenType.NULL): 4305 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4306 return self._parse_placeholder() 4307 4308 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4309 if self._match(TokenType.TRUE): 4310 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4311 if self._match(TokenType.FALSE): 4312 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4313 return self._parse_placeholder() 4314 4315 def _parse_star(self) -> t.Optional[exp.Expression]: 4316 if self._match(TokenType.STAR): 4317 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4318 return self._parse_placeholder() 4319 4320 def _parse_parameter(self) -> exp.Parameter: 4321 wrapped = self._match(TokenType.L_BRACE) 4322 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4323 self._match(TokenType.R_BRACE) 4324 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4325 4326 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4327 if self._match_set(self.PLACEHOLDER_PARSERS): 4328 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4329 if placeholder: 4330 return placeholder 4331 self._advance(-1) 4332 return None 4333 4334 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4335 if not self._match(TokenType.EXCEPT): 4336 return None 4337 if self._match(TokenType.L_PAREN, advance=False): 4338 return self._parse_wrapped_csv(self._parse_column) 4339 return self._parse_csv(self._parse_column) 4340 4341 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4342 if not self._match(TokenType.REPLACE): 4343 return None 4344 if self._match(TokenType.L_PAREN, advance=False): 4345 return self._parse_wrapped_csv(self._parse_expression) 4346 return self._parse_expressions() 4347 4348 def _parse_csv( 4349 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4350 ) -> t.List[t.Optional[exp.Expression]]: 4351 parse_result = parse_method() 4352 items = [parse_result] if parse_result is not None else [] 4353 4354 while self._match(sep): 4355 self._add_comments(parse_result) 4356 parse_result = parse_method() 4357 if parse_result is not None: 4358 items.append(parse_result) 4359 4360 return items 4361 4362 def _parse_tokens( 4363 self, parse_method: t.Callable, expressions: t.Dict 4364 ) -> t.Optional[exp.Expression]: 4365 this = parse_method() 4366 4367 while self._match_set(expressions): 4368 this = self.expression( 4369 expressions[self._prev.token_type], 4370 this=this, 4371 comments=self._prev_comments, 4372 expression=parse_method(), 4373 ) 4374 4375 return this 4376 4377 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4378 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4379 4380 def _parse_wrapped_csv( 4381 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4382 ) -> t.List[t.Optional[exp.Expression]]: 4383 return self._parse_wrapped( 4384 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4385 ) 4386 4387 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4388 wrapped = self._match(TokenType.L_PAREN) 4389 if not wrapped and not optional: 4390 self.raise_error("Expecting (") 4391 parse_result = parse_method() 4392 if wrapped: 4393 self._match_r_paren() 4394 return parse_result 4395 4396 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4397 return self._parse_csv(self._parse_expression) 4398 4399 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4400 return self._parse_select() or self._parse_set_operations( 4401 self._parse_expression() if alias else self._parse_conjunction() 4402 ) 4403 4404 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4405 return self._parse_query_modifiers( 4406 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4407 ) 4408 4409 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4410 this = None 4411 if self._match_texts(self.TRANSACTION_KIND): 4412 this = self._prev.text 4413 4414 self._match_texts({"TRANSACTION", "WORK"}) 4415 4416 modes = [] 4417 while True: 4418 mode = [] 4419 while self._match(TokenType.VAR): 4420 mode.append(self._prev.text) 4421 4422 if mode: 4423 modes.append(" ".join(mode)) 4424 if not self._match(TokenType.COMMA): 4425 break 4426 4427 return self.expression(exp.Transaction, this=this, modes=modes) 4428 4429 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4430 chain = None 4431 savepoint = None 4432 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4433 4434 self._match_texts({"TRANSACTION", "WORK"}) 4435 4436 if self._match_text_seq("TO"): 4437 self._match_text_seq("SAVEPOINT") 4438 savepoint = self._parse_id_var() 4439 4440 if self._match(TokenType.AND): 4441 chain = not self._match_text_seq("NO") 4442 self._match_text_seq("CHAIN") 4443 4444 if is_rollback: 4445 return self.expression(exp.Rollback, savepoint=savepoint) 4446 4447 return self.expression(exp.Commit, chain=chain) 4448 4449 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4450 if not self._match_text_seq("ADD"): 4451 return None 4452 4453 self._match(TokenType.COLUMN) 4454 exists_column = self._parse_exists(not_=True) 4455 expression = self._parse_column_def(self._parse_field(any_token=True)) 4456 4457 if expression: 4458 expression.set("exists", exists_column) 4459 4460 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4461 if self._match_texts(("FIRST", "AFTER")): 4462 position = self._prev.text 4463 column_position = self.expression( 4464 exp.ColumnPosition, this=self._parse_column(), position=position 4465 ) 4466 expression.set("position", column_position) 4467 4468 return expression 4469 4470 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4471 drop = self._match(TokenType.DROP) and self._parse_drop() 4472 if drop and not isinstance(drop, exp.Command): 4473 drop.set("kind", drop.args.get("kind", "COLUMN")) 4474 return drop 4475 4476 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4477 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4478 return self.expression( 4479 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4480 ) 4481 4482 def _parse_add_constraint(self) -> exp.AddConstraint: 4483 this = None 4484 kind = self._prev.token_type 4485 4486 if kind == TokenType.CONSTRAINT: 4487 this = self._parse_id_var() 4488 4489 if self._match_text_seq("CHECK"): 4490 expression = self._parse_wrapped(self._parse_conjunction) 4491 enforced = self._match_text_seq("ENFORCED") 4492 4493 return self.expression( 4494 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4495 ) 4496 4497 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4498 expression = self._parse_foreign_key() 4499 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4500 expression = self._parse_primary_key() 4501 else: 4502 expression = None 4503 4504 return self.expression(exp.AddConstraint, this=this, expression=expression) 4505 4506 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4507 index = self._index - 1 4508 4509 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4510 return self._parse_csv(self._parse_add_constraint) 4511 4512 self._retreat(index) 4513 return self._parse_csv(self._parse_add_column) 4514 4515 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4516 self._match(TokenType.COLUMN) 4517 column = self._parse_field(any_token=True) 4518 4519 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4520 return self.expression(exp.AlterColumn, this=column, drop=True) 4521 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4522 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4523 4524 self._match_text_seq("SET", "DATA") 4525 return self.expression( 4526 exp.AlterColumn, 4527 this=column, 4528 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4529 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4530 using=self._match(TokenType.USING) and self._parse_conjunction(), 4531 ) 4532 4533 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4534 index = self._index - 1 4535 4536 partition_exists = self._parse_exists() 4537 if self._match(TokenType.PARTITION, advance=False): 4538 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4539 4540 self._retreat(index) 4541 return self._parse_csv(self._parse_drop_column) 4542 4543 def _parse_alter_table_rename(self) -> exp.RenameTable: 4544 self._match_text_seq("TO") 4545 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4546 4547 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4548 start = self._prev 4549 4550 if not self._match(TokenType.TABLE): 4551 return self._parse_as_command(start) 4552 4553 exists = self._parse_exists() 4554 this = self._parse_table(schema=True) 4555 4556 if self._next: 4557 self._advance() 4558 4559 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4560 if parser: 4561 actions = ensure_list(parser(self)) 4562 4563 if not self._curr: 4564 return self.expression( 4565 exp.AlterTable, 4566 this=this, 4567 exists=exists, 4568 actions=actions, 4569 ) 4570 return self._parse_as_command(start) 4571 4572 def _parse_merge(self) -> exp.Merge: 4573 self._match(TokenType.INTO) 4574 target = self._parse_table() 4575 4576 self._match(TokenType.USING) 4577 using = self._parse_table() 4578 4579 self._match(TokenType.ON) 4580 on = self._parse_conjunction() 4581 4582 whens = [] 4583 while self._match(TokenType.WHEN): 4584 matched = not self._match(TokenType.NOT) 4585 self._match_text_seq("MATCHED") 4586 source = ( 4587 False 4588 if self._match_text_seq("BY", "TARGET") 4589 else self._match_text_seq("BY", "SOURCE") 4590 ) 4591 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4592 4593 self._match(TokenType.THEN) 4594 4595 if self._match(TokenType.INSERT): 4596 _this = self._parse_star() 4597 if _this: 4598 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4599 else: 4600 then = self.expression( 4601 exp.Insert, 4602 this=self._parse_value(), 4603 expression=self._match(TokenType.VALUES) and self._parse_value(), 4604 ) 4605 elif self._match(TokenType.UPDATE): 4606 expressions = self._parse_star() 4607 if expressions: 4608 then = self.expression(exp.Update, expressions=expressions) 4609 else: 4610 then = self.expression( 4611 exp.Update, 4612 expressions=self._match(TokenType.SET) 4613 and self._parse_csv(self._parse_equality), 4614 ) 4615 elif self._match(TokenType.DELETE): 4616 then = self.expression(exp.Var, this=self._prev.text) 4617 else: 4618 then = None 4619 4620 whens.append( 4621 self.expression( 4622 exp.When, 4623 matched=matched, 4624 source=source, 4625 condition=condition, 4626 then=then, 4627 ) 4628 ) 4629 4630 return self.expression( 4631 exp.Merge, 4632 this=target, 4633 using=using, 4634 on=on, 4635 expressions=whens, 4636 ) 4637 4638 def _parse_show(self) -> t.Optional[exp.Expression]: 4639 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4640 if parser: 4641 return parser(self) 4642 self._advance() 4643 return self.expression(exp.Show, this=self._prev.text.upper()) 4644 4645 def _parse_set_item_assignment( 4646 self, kind: t.Optional[str] = None 4647 ) -> t.Optional[exp.Expression]: 4648 index = self._index 4649 4650 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4651 return self._parse_set_transaction(global_=kind == "GLOBAL") 4652 4653 left = self._parse_primary() or self._parse_id_var() 4654 4655 if not self._match_texts(("=", "TO")): 4656 self._retreat(index) 4657 return None 4658 4659 right = self._parse_statement() or self._parse_id_var() 4660 this = self.expression(exp.EQ, this=left, expression=right) 4661 4662 return self.expression(exp.SetItem, this=this, kind=kind) 4663 4664 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4665 self._match_text_seq("TRANSACTION") 4666 characteristics = self._parse_csv( 4667 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4668 ) 4669 return self.expression( 4670 exp.SetItem, 4671 expressions=characteristics, 4672 kind="TRANSACTION", 4673 **{"global": global_}, # type: ignore 4674 ) 4675 4676 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4677 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4678 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4679 4680 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4681 index = self._index 4682 set_ = self.expression( 4683 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4684 ) 4685 4686 if self._curr: 4687 self._retreat(index) 4688 return self._parse_as_command(self._prev) 4689 4690 return set_ 4691 4692 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4693 for option in options: 4694 if self._match_text_seq(*option.split(" ")): 4695 return exp.var(option) 4696 return None 4697 4698 def _parse_as_command(self, start: Token) -> exp.Command: 4699 while self._curr: 4700 self._advance() 4701 text = self._find_sql(start, self._prev) 4702 size = len(start.text) 4703 return exp.Command(this=text[:size], expression=text[size:]) 4704 4705 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4706 settings = [] 4707 4708 self._match_l_paren() 4709 kind = self._parse_id_var() 4710 4711 if self._match(TokenType.L_PAREN): 4712 while True: 4713 key = self._parse_id_var() 4714 value = self._parse_primary() 4715 4716 if not key and value is None: 4717 break 4718 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4719 self._match(TokenType.R_PAREN) 4720 4721 self._match_r_paren() 4722 4723 return self.expression( 4724 exp.DictProperty, 4725 this=this, 4726 kind=kind.this if kind else None, 4727 settings=settings, 4728 ) 4729 4730 def _parse_dict_range(self, this: str) -> exp.DictRange: 4731 self._match_l_paren() 4732 has_min = self._match_text_seq("MIN") 4733 if has_min: 4734 min = self._parse_var() or self._parse_primary() 4735 self._match_text_seq("MAX") 4736 max = self._parse_var() or self._parse_primary() 4737 else: 4738 max = self._parse_var() or self._parse_primary() 4739 min = exp.Literal.number(0) 4740 self._match_r_paren() 4741 return self.expression(exp.DictRange, this=this, min=min, max=max) 4742 4743 def _find_parser( 4744 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4745 ) -> t.Optional[t.Callable]: 4746 if not self._curr: 4747 return None 4748 4749 index = self._index 4750 this = [] 4751 while True: 4752 # The current token might be multiple words 4753 curr = self._curr.text.upper() 4754 key = curr.split(" ") 4755 this.append(curr) 4756 4757 self._advance() 4758 result, trie = in_trie(trie, key) 4759 if result == TrieResult.FAILED: 4760 break 4761 4762 if result == TrieResult.EXISTS: 4763 subparser = parsers[" ".join(this)] 4764 return subparser 4765 4766 self._retreat(index) 4767 return None 4768 4769 def _match(self, token_type, advance=True, expression=None): 4770 if not self._curr: 4771 return None 4772 4773 if self._curr.token_type == token_type: 4774 if advance: 4775 self._advance() 4776 self._add_comments(expression) 4777 return True 4778 4779 return None 4780 4781 def _match_set(self, types, advance=True): 4782 if not self._curr: 4783 return None 4784 4785 if self._curr.token_type in types: 4786 if advance: 4787 self._advance() 4788 return True 4789 4790 return None 4791 4792 def _match_pair(self, token_type_a, token_type_b, advance=True): 4793 if not self._curr or not self._next: 4794 return None 4795 4796 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4797 if advance: 4798 self._advance(2) 4799 return True 4800 4801 return None 4802 4803 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4804 if not self._match(TokenType.L_PAREN, expression=expression): 4805 self.raise_error("Expecting (") 4806 4807 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4808 if not self._match(TokenType.R_PAREN, expression=expression): 4809 self.raise_error("Expecting )") 4810 4811 def _match_texts(self, texts, advance=True): 4812 if self._curr and self._curr.text.upper() in texts: 4813 if advance: 4814 self._advance() 4815 return True 4816 return False 4817 4818 def _match_text_seq(self, *texts, advance=True): 4819 index = self._index 4820 for text in texts: 4821 if self._curr and self._curr.text.upper() == text: 4822 self._advance() 4823 else: 4824 self._retreat(index) 4825 return False 4826 4827 if not advance: 4828 self._retreat(index) 4829 4830 return True 4831 4832 @t.overload 4833 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4834 ... 4835 4836 @t.overload 4837 def _replace_columns_with_dots( 4838 self, this: t.Optional[exp.Expression] 4839 ) -> t.Optional[exp.Expression]: 4840 ... 4841 4842 def _replace_columns_with_dots(self, this): 4843 if isinstance(this, exp.Dot): 4844 exp.replace_children(this, self._replace_columns_with_dots) 4845 elif isinstance(this, exp.Column): 4846 exp.replace_children(this, self._replace_columns_with_dots) 4847 table = this.args.get("table") 4848 this = ( 4849 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4850 ) 4851 4852 return this 4853 4854 def _replace_lambda( 4855 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4856 ) -> t.Optional[exp.Expression]: 4857 if not node: 4858 return node 4859 4860 for column in node.find_all(exp.Column): 4861 if column.parts[0].name in lambda_variables: 4862 dot_or_id = column.to_dot() if column.table else column.this 4863 parent = column.parent 4864 4865 while isinstance(parent, exp.Dot): 4866 if not isinstance(parent.parent, exp.Dot): 4867 parent.replace(dot_or_id) 4868 break 4869 parent = parent.parent 4870 else: 4871 if column is node: 4872 node = dot_or_id 4873 else: 4874 column.replace(dot_or_id) 4875 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.IPADDRESS, 190 TokenType.IPPREFIX, 191 TokenType.ENUM, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = { 203 *Tokenizer.SINGLE_TOKENS.values(), 204 TokenType.SELECT, 205 } 206 207 DB_CREATABLES = { 208 TokenType.DATABASE, 209 TokenType.SCHEMA, 210 TokenType.TABLE, 211 TokenType.VIEW, 212 TokenType.DICTIONARY, 213 } 214 215 CREATABLES = { 216 TokenType.COLUMN, 217 TokenType.FUNCTION, 218 TokenType.INDEX, 219 TokenType.PROCEDURE, 220 *DB_CREATABLES, 221 } 222 223 # Tokens that can represent identifiers 224 ID_VAR_TOKENS = { 225 TokenType.VAR, 226 TokenType.ANTI, 227 TokenType.APPLY, 228 TokenType.ASC, 229 TokenType.AUTO_INCREMENT, 230 TokenType.BEGIN, 231 TokenType.CACHE, 232 TokenType.CASE, 233 TokenType.COLLATE, 234 TokenType.COMMAND, 235 TokenType.COMMENT, 236 TokenType.COMMIT, 237 TokenType.CONSTRAINT, 238 TokenType.DEFAULT, 239 TokenType.DELETE, 240 TokenType.DESC, 241 TokenType.DESCRIBE, 242 TokenType.DICTIONARY, 243 TokenType.DIV, 244 TokenType.END, 245 TokenType.EXECUTE, 246 TokenType.ESCAPE, 247 TokenType.FALSE, 248 TokenType.FIRST, 249 TokenType.FILTER, 250 TokenType.FORMAT, 251 TokenType.FULL, 252 TokenType.IF, 253 TokenType.IS, 254 TokenType.ISNULL, 255 TokenType.INTERVAL, 256 TokenType.KEEP, 257 TokenType.LEFT, 258 TokenType.LOAD, 259 TokenType.MERGE, 260 TokenType.NATURAL, 261 TokenType.NEXT, 262 TokenType.OFFSET, 263 TokenType.ORDINALITY, 264 TokenType.OVERWRITE, 265 TokenType.PARTITION, 266 TokenType.PERCENT, 267 TokenType.PIVOT, 268 TokenType.PRAGMA, 269 TokenType.RANGE, 270 TokenType.REFERENCES, 271 TokenType.RIGHT, 272 TokenType.ROW, 273 TokenType.ROWS, 274 TokenType.SEMI, 275 TokenType.SET, 276 TokenType.SETTINGS, 277 TokenType.SHOW, 278 TokenType.TEMPORARY, 279 TokenType.TOP, 280 TokenType.TRUE, 281 TokenType.UNIQUE, 282 TokenType.UNPIVOT, 283 TokenType.UPDATE, 284 TokenType.VOLATILE, 285 TokenType.WINDOW, 286 *CREATABLES, 287 *SUBQUERY_PREDICATES, 288 *TYPE_TOKENS, 289 *NO_PAREN_FUNCTIONS, 290 } 291 292 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 293 294 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 295 TokenType.APPLY, 296 TokenType.ASOF, 297 TokenType.FULL, 298 TokenType.LEFT, 299 TokenType.LOCK, 300 TokenType.NATURAL, 301 TokenType.OFFSET, 302 TokenType.RIGHT, 303 TokenType.WINDOW, 304 } 305 306 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 307 308 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 309 310 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 311 312 FUNC_TOKENS = { 313 TokenType.COMMAND, 314 TokenType.CURRENT_DATE, 315 TokenType.CURRENT_DATETIME, 316 TokenType.CURRENT_TIMESTAMP, 317 TokenType.CURRENT_TIME, 318 TokenType.CURRENT_USER, 319 TokenType.FILTER, 320 TokenType.FIRST, 321 TokenType.FORMAT, 322 TokenType.GLOB, 323 TokenType.IDENTIFIER, 324 TokenType.INDEX, 325 TokenType.ISNULL, 326 TokenType.ILIKE, 327 TokenType.LIKE, 328 TokenType.MERGE, 329 TokenType.OFFSET, 330 TokenType.PRIMARY_KEY, 331 TokenType.RANGE, 332 TokenType.REPLACE, 333 TokenType.RLIKE, 334 TokenType.ROW, 335 TokenType.UNNEST, 336 TokenType.VAR, 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.DATE, 340 TokenType.DATETIME, 341 TokenType.TABLE, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMPTZ, 344 TokenType.WINDOW, 345 TokenType.XOR, 346 *TYPE_TOKENS, 347 *SUBQUERY_PREDICATES, 348 } 349 350 CONJUNCTION = { 351 TokenType.AND: exp.And, 352 TokenType.OR: exp.Or, 353 } 354 355 EQUALITY = { 356 TokenType.EQ: exp.EQ, 357 TokenType.NEQ: exp.NEQ, 358 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 359 } 360 361 COMPARISON = { 362 TokenType.GT: exp.GT, 363 TokenType.GTE: exp.GTE, 364 TokenType.LT: exp.LT, 365 TokenType.LTE: exp.LTE, 366 } 367 368 BITWISE = { 369 TokenType.AMP: exp.BitwiseAnd, 370 TokenType.CARET: exp.BitwiseXor, 371 TokenType.PIPE: exp.BitwiseOr, 372 TokenType.DPIPE: exp.DPipe, 373 } 374 375 TERM = { 376 TokenType.DASH: exp.Sub, 377 TokenType.PLUS: exp.Add, 378 TokenType.MOD: exp.Mod, 379 TokenType.COLLATE: exp.Collate, 380 } 381 382 FACTOR = { 383 TokenType.DIV: exp.IntDiv, 384 TokenType.LR_ARROW: exp.Distance, 385 TokenType.SLASH: exp.Div, 386 TokenType.STAR: exp.Mul, 387 } 388 389 TIMESTAMPS = { 390 TokenType.TIME, 391 TokenType.TIMESTAMP, 392 TokenType.TIMESTAMPTZ, 393 TokenType.TIMESTAMPLTZ, 394 } 395 396 SET_OPERATIONS = { 397 TokenType.UNION, 398 TokenType.INTERSECT, 399 TokenType.EXCEPT, 400 } 401 402 JOIN_METHODS = { 403 TokenType.NATURAL, 404 TokenType.ASOF, 405 } 406 407 JOIN_SIDES = { 408 TokenType.LEFT, 409 TokenType.RIGHT, 410 TokenType.FULL, 411 } 412 413 JOIN_KINDS = { 414 TokenType.INNER, 415 TokenType.OUTER, 416 TokenType.CROSS, 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 JOIN_HINTS: t.Set[str] = set() 422 423 LAMBDAS = { 424 TokenType.ARROW: lambda self, expressions: self.expression( 425 exp.Lambda, 426 this=self._replace_lambda( 427 self._parse_conjunction(), 428 {node.name for node in expressions}, 429 ), 430 expressions=expressions, 431 ), 432 TokenType.FARROW: lambda self, expressions: self.expression( 433 exp.Kwarg, 434 this=exp.var(expressions[0].name), 435 expression=self._parse_conjunction(), 436 ), 437 } 438 439 COLUMN_OPERATORS = { 440 TokenType.DOT: None, 441 TokenType.DCOLON: lambda self, this, to: self.expression( 442 exp.Cast if self.STRICT_CAST else exp.TryCast, 443 this=this, 444 to=to, 445 ), 446 TokenType.ARROW: lambda self, this, path: self.expression( 447 exp.JSONExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DARROW: lambda self, this, path: self.expression( 452 exp.JSONExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtract, 458 this=this, 459 expression=path, 460 ), 461 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 462 exp.JSONBExtractScalar, 463 this=this, 464 expression=path, 465 ), 466 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 467 exp.JSONBContains, 468 this=this, 469 expression=key, 470 ), 471 } 472 473 EXPRESSION_PARSERS = { 474 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 475 exp.Column: lambda self: self._parse_column(), 476 exp.Condition: lambda self: self._parse_conjunction(), 477 exp.DataType: lambda self: self._parse_types(), 478 exp.Expression: lambda self: self._parse_statement(), 479 exp.From: lambda self: self._parse_from(), 480 exp.Group: lambda self: self._parse_group(), 481 exp.Having: lambda self: self._parse_having(), 482 exp.Identifier: lambda self: self._parse_id_var(), 483 exp.Join: lambda self: self._parse_join(), 484 exp.Lambda: lambda self: self._parse_lambda(), 485 exp.Lateral: lambda self: self._parse_lateral(), 486 exp.Limit: lambda self: self._parse_limit(), 487 exp.Offset: lambda self: self._parse_offset(), 488 exp.Order: lambda self: self._parse_order(), 489 exp.Ordered: lambda self: self._parse_ordered(), 490 exp.Properties: lambda self: self._parse_properties(), 491 exp.Qualify: lambda self: self._parse_qualify(), 492 exp.Returning: lambda self: self._parse_returning(), 493 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 494 exp.Table: lambda self: self._parse_table_parts(), 495 exp.TableAlias: lambda self: self._parse_table_alias(), 496 exp.Where: lambda self: self._parse_where(), 497 exp.Window: lambda self: self._parse_named_window(), 498 exp.With: lambda self: self._parse_with(), 499 "JOIN_TYPE": lambda self: self._parse_join_parts(), 500 } 501 502 STATEMENT_PARSERS = { 503 TokenType.ALTER: lambda self: self._parse_alter(), 504 TokenType.BEGIN: lambda self: self._parse_transaction(), 505 TokenType.CACHE: lambda self: self._parse_cache(), 506 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 507 TokenType.COMMENT: lambda self: self._parse_comment(), 508 TokenType.CREATE: lambda self: self._parse_create(), 509 TokenType.DELETE: lambda self: self._parse_delete(), 510 TokenType.DESC: lambda self: self._parse_describe(), 511 TokenType.DESCRIBE: lambda self: self._parse_describe(), 512 TokenType.DROP: lambda self: self._parse_drop(), 513 TokenType.FROM: lambda self: exp.select("*").from_( 514 t.cast(exp.From, self._parse_from(skip_from_token=True)) 515 ), 516 TokenType.INSERT: lambda self: self._parse_insert(), 517 TokenType.LOAD: lambda self: self._parse_load(), 518 TokenType.MERGE: lambda self: self._parse_merge(), 519 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 520 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 521 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 522 TokenType.SET: lambda self: self._parse_set(), 523 TokenType.UNCACHE: lambda self: self._parse_uncache(), 524 TokenType.UPDATE: lambda self: self._parse_update(), 525 TokenType.USE: lambda self: self.expression( 526 exp.Use, 527 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 528 and exp.var(self._prev.text), 529 this=self._parse_table(schema=False), 530 ), 531 } 532 533 UNARY_PARSERS = { 534 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 535 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 536 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 537 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 538 } 539 540 PRIMARY_PARSERS = { 541 TokenType.STRING: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=True 543 ), 544 TokenType.NUMBER: lambda self, token: self.expression( 545 exp.Literal, this=token.text, is_string=False 546 ), 547 TokenType.STAR: lambda self, _: self.expression( 548 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 549 ), 550 TokenType.NULL: lambda self, _: self.expression(exp.Null), 551 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 552 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 553 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 554 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 555 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 556 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 557 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 558 exp.National, this=token.text 559 ), 560 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 561 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 562 } 563 564 PLACEHOLDER_PARSERS = { 565 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 566 TokenType.PARAMETER: lambda self: self._parse_parameter(), 567 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 568 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 569 else None, 570 } 571 572 RANGE_PARSERS = { 573 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 574 TokenType.GLOB: binary_range_parser(exp.Glob), 575 TokenType.ILIKE: binary_range_parser(exp.ILike), 576 TokenType.IN: lambda self, this: self._parse_in(this), 577 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 578 TokenType.IS: lambda self, this: self._parse_is(this), 579 TokenType.LIKE: binary_range_parser(exp.Like), 580 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 581 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 582 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 583 } 584 585 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 586 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 587 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 588 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 589 "CHARACTER SET": lambda self: self._parse_character_set(), 590 "CHECKSUM": lambda self: self._parse_checksum(), 591 "CLUSTER BY": lambda self: self._parse_cluster(), 592 "CLUSTERED": lambda self: self._parse_clustered_by(), 593 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 594 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 595 "COPY": lambda self: self._parse_copy_property(), 596 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 597 "DEFINER": lambda self: self._parse_definer(), 598 "DETERMINISTIC": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "DISTKEY": lambda self: self._parse_distkey(), 602 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 603 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 604 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 605 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 606 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 607 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "FREESPACE": lambda self: self._parse_freespace(), 609 "HEAP": lambda self: self.expression(exp.HeapProperty), 610 "IMMUTABLE": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 612 ), 613 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 614 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 615 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 616 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 617 "LIKE": lambda self: self._parse_create_like(), 618 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 619 "LOCK": lambda self: self._parse_locking(), 620 "LOCKING": lambda self: self._parse_locking(), 621 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 622 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 623 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 624 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 625 "NO": lambda self: self._parse_no_property(), 626 "ON": lambda self: self._parse_on_property(), 627 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 628 "PARTITION BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 630 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 631 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 632 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 633 "RETURNS": lambda self: self._parse_returns(), 634 "ROW": lambda self: self._parse_row(), 635 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 636 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 637 "SETTINGS": lambda self: self.expression( 638 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 639 ), 640 "SORTKEY": lambda self: self._parse_sortkey(), 641 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 642 "STABLE": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("STABLE") 644 ), 645 "STORED": lambda self: self._parse_stored(), 646 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 647 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 648 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 649 "TO": lambda self: self._parse_to_table(), 650 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 651 "TTL": lambda self: self._parse_ttl(), 652 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 653 "VOLATILE": lambda self: self._parse_volatile_property(), 654 "WITH": lambda self: self._parse_with_property(), 655 } 656 657 CONSTRAINT_PARSERS = { 658 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 659 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 660 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 661 "CHARACTER SET": lambda self: self.expression( 662 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 663 ), 664 "CHECK": lambda self: self.expression( 665 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 666 ), 667 "COLLATE": lambda self: self.expression( 668 exp.CollateColumnConstraint, this=self._parse_var() 669 ), 670 "COMMENT": lambda self: self.expression( 671 exp.CommentColumnConstraint, this=self._parse_string() 672 ), 673 "COMPRESS": lambda self: self._parse_compress(), 674 "DEFAULT": lambda self: self.expression( 675 exp.DefaultColumnConstraint, this=self._parse_bitwise() 676 ), 677 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 678 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 679 "FORMAT": lambda self: self.expression( 680 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "GENERATED": lambda self: self._parse_generated_as_identity(), 683 "IDENTITY": lambda self: self._parse_auto_increment(), 684 "INLINE": lambda self: self._parse_inline(), 685 "LIKE": lambda self: self._parse_create_like(), 686 "NOT": lambda self: self._parse_not_constraint(), 687 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 688 "ON": lambda self: self._match(TokenType.UPDATE) 689 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 690 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 691 "PRIMARY KEY": lambda self: self._parse_primary_key(), 692 "REFERENCES": lambda self: self._parse_references(match=False), 693 "TITLE": lambda self: self.expression( 694 exp.TitleColumnConstraint, this=self._parse_var_or_string() 695 ), 696 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 697 "UNIQUE": lambda self: self._parse_unique(), 698 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 699 } 700 701 ALTER_PARSERS = { 702 "ADD": lambda self: self._parse_alter_table_add(), 703 "ALTER": lambda self: self._parse_alter_table_alter(), 704 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 705 "DROP": lambda self: self._parse_alter_table_drop(), 706 "RENAME": lambda self: self._parse_alter_table_rename(), 707 } 708 709 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 710 711 NO_PAREN_FUNCTION_PARSERS = { 712 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 713 TokenType.CASE: lambda self: self._parse_case(), 714 TokenType.IF: lambda self: self._parse_if(), 715 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 716 exp.NextValueFor, 717 this=self._parse_column(), 718 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 719 ), 720 } 721 722 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 723 724 FUNCTION_PARSERS = { 725 "ANY_VALUE": lambda self: self._parse_any_value(), 726 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 727 "CONCAT": lambda self: self._parse_concat(), 728 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 729 "DECODE": lambda self: self._parse_decode(), 730 "EXTRACT": lambda self: self._parse_extract(), 731 "JSON_OBJECT": lambda self: self._parse_json_object(), 732 "LOG": lambda self: self._parse_logarithm(), 733 "MATCH": lambda self: self._parse_match_against(), 734 "OPENJSON": lambda self: self._parse_open_json(), 735 "POSITION": lambda self: self._parse_position(), 736 "SAFE_CAST": lambda self: self._parse_cast(False), 737 "STRING_AGG": lambda self: self._parse_string_agg(), 738 "SUBSTRING": lambda self: self._parse_substring(), 739 "TRIM": lambda self: self._parse_trim(), 740 "TRY_CAST": lambda self: self._parse_cast(False), 741 "TRY_CONVERT": lambda self: self._parse_convert(False), 742 } 743 744 QUERY_MODIFIER_PARSERS = { 745 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 746 TokenType.WHERE: lambda self: ("where", self._parse_where()), 747 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 748 TokenType.HAVING: lambda self: ("having", self._parse_having()), 749 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 750 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 751 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 752 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 753 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 754 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 755 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 756 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 757 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 759 TokenType.CLUSTER_BY: lambda self: ( 760 "cluster", 761 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 762 ), 763 TokenType.DISTRIBUTE_BY: lambda self: ( 764 "distribute", 765 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 766 ), 767 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 768 } 769 770 SET_PARSERS = { 771 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 772 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 773 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 774 "TRANSACTION": lambda self: self._parse_set_transaction(), 775 } 776 777 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 778 779 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 780 781 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 782 783 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 784 785 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 786 787 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 788 TRANSACTION_CHARACTERISTICS = { 789 "ISOLATION LEVEL REPEATABLE READ", 790 "ISOLATION LEVEL READ COMMITTED", 791 "ISOLATION LEVEL READ UNCOMMITTED", 792 "ISOLATION LEVEL SERIALIZABLE", 793 "READ WRITE", 794 "READ ONLY", 795 } 796 797 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 798 799 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 800 801 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 802 803 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 804 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 805 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 806 807 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 808 809 STRICT_CAST = True 810 811 # A NULL arg in CONCAT yields NULL by default 812 CONCAT_NULL_OUTPUTS_STRING = False 813 814 PREFIXED_PIVOT_COLUMNS = False 815 IDENTIFY_PIVOT_STRINGS = False 816 817 LOG_BASE_FIRST = True 818 LOG_DEFAULTS_TO_LN = False 819 820 __slots__ = ( 821 "error_level", 822 "error_message_context", 823 "max_errors", 824 "sql", 825 "errors", 826 "_tokens", 827 "_index", 828 "_curr", 829 "_next", 830 "_prev", 831 "_prev_comments", 832 ) 833 834 # Autofilled 835 INDEX_OFFSET: int = 0 836 UNNEST_COLUMN_ONLY: bool = False 837 ALIAS_POST_TABLESAMPLE: bool = False 838 STRICT_STRING_CONCAT = False 839 NORMALIZE_FUNCTIONS = "upper" 840 NULL_ORDERING: str = "nulls_are_small" 841 SHOW_TRIE: t.Dict = {} 842 SET_TRIE: t.Dict = {} 843 FORMAT_MAPPING: t.Dict[str, str] = {} 844 FORMAT_TRIE: t.Dict = {} 845 TIME_MAPPING: t.Dict[str, str] = {} 846 TIME_TRIE: t.Dict = {} 847 848 def __init__( 849 self, 850 error_level: t.Optional[ErrorLevel] = None, 851 error_message_context: int = 100, 852 max_errors: int = 3, 853 ): 854 self.error_level = error_level or ErrorLevel.IMMEDIATE 855 self.error_message_context = error_message_context 856 self.max_errors = max_errors 857 self.reset() 858 859 def reset(self): 860 self.sql = "" 861 self.errors = [] 862 self._tokens = [] 863 self._index = 0 864 self._curr = None 865 self._next = None 866 self._prev = None 867 self._prev_comments = None 868 869 def parse( 870 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 871 ) -> t.List[t.Optional[exp.Expression]]: 872 """ 873 Parses a list of tokens and returns a list of syntax trees, one tree 874 per parsed SQL statement. 875 876 Args: 877 raw_tokens: The list of tokens. 878 sql: The original SQL string, used to produce helpful debug messages. 879 880 Returns: 881 The list of the produced syntax trees. 882 """ 883 return self._parse( 884 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 885 ) 886 887 def parse_into( 888 self, 889 expression_types: exp.IntoType, 890 raw_tokens: t.List[Token], 891 sql: t.Optional[str] = None, 892 ) -> t.List[t.Optional[exp.Expression]]: 893 """ 894 Parses a list of tokens into a given Expression type. If a collection of Expression 895 types is given instead, this method will try to parse the token list into each one 896 of them, stopping at the first for which the parsing succeeds. 897 898 Args: 899 expression_types: The expression type(s) to try and parse the token list into. 900 raw_tokens: The list of tokens. 901 sql: The original SQL string, used to produce helpful debug messages. 902 903 Returns: 904 The target Expression. 905 """ 906 errors = [] 907 for expression_type in ensure_list(expression_types): 908 parser = self.EXPRESSION_PARSERS.get(expression_type) 909 if not parser: 910 raise TypeError(f"No parser registered for {expression_type}") 911 912 try: 913 return self._parse(parser, raw_tokens, sql) 914 except ParseError as e: 915 e.errors[0]["into_expression"] = expression_type 916 errors.append(e) 917 918 raise ParseError( 919 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 920 errors=merge_errors(errors), 921 ) from errors[-1] 922 923 def _parse( 924 self, 925 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 926 raw_tokens: t.List[Token], 927 sql: t.Optional[str] = None, 928 ) -> t.List[t.Optional[exp.Expression]]: 929 self.reset() 930 self.sql = sql or "" 931 932 total = len(raw_tokens) 933 chunks: t.List[t.List[Token]] = [[]] 934 935 for i, token in enumerate(raw_tokens): 936 if token.token_type == TokenType.SEMICOLON: 937 if i < total - 1: 938 chunks.append([]) 939 else: 940 chunks[-1].append(token) 941 942 expressions = [] 943 944 for tokens in chunks: 945 self._index = -1 946 self._tokens = tokens 947 self._advance() 948 949 expressions.append(parse_method(self)) 950 951 if self._index < len(self._tokens): 952 self.raise_error("Invalid expression / Unexpected token") 953 954 self.check_errors() 955 956 return expressions 957 958 def check_errors(self) -> None: 959 """Logs or raises any found errors, depending on the chosen error level setting.""" 960 if self.error_level == ErrorLevel.WARN: 961 for error in self.errors: 962 logger.error(str(error)) 963 elif self.error_level == ErrorLevel.RAISE and self.errors: 964 raise ParseError( 965 concat_messages(self.errors, self.max_errors), 966 errors=merge_errors(self.errors), 967 ) 968 969 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 970 """ 971 Appends an error in the list of recorded errors or raises it, depending on the chosen 972 error level setting. 973 """ 974 token = token or self._curr or self._prev or Token.string("") 975 start = token.start 976 end = token.end + 1 977 start_context = self.sql[max(start - self.error_message_context, 0) : start] 978 highlight = self.sql[start:end] 979 end_context = self.sql[end : end + self.error_message_context] 980 981 error = ParseError.new( 982 f"{message}. Line {token.line}, Col: {token.col}.\n" 983 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 984 description=message, 985 line=token.line, 986 col=token.col, 987 start_context=start_context, 988 highlight=highlight, 989 end_context=end_context, 990 ) 991 992 if self.error_level == ErrorLevel.IMMEDIATE: 993 raise error 994 995 self.errors.append(error) 996 997 def expression( 998 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 999 ) -> E: 1000 """ 1001 Creates a new, validated Expression. 1002 1003 Args: 1004 exp_class: The expression class to instantiate. 1005 comments: An optional list of comments to attach to the expression. 1006 kwargs: The arguments to set for the expression along with their respective values. 1007 1008 Returns: 1009 The target expression. 1010 """ 1011 instance = exp_class(**kwargs) 1012 instance.add_comments(comments) if comments else self._add_comments(instance) 1013 return self.validate_expression(instance) 1014 1015 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1016 if expression and self._prev_comments: 1017 expression.add_comments(self._prev_comments) 1018 self._prev_comments = None 1019 1020 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1021 """ 1022 Validates an Expression, making sure that all its mandatory arguments are set. 1023 1024 Args: 1025 expression: The expression to validate. 1026 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1027 1028 Returns: 1029 The validated expression. 1030 """ 1031 if self.error_level != ErrorLevel.IGNORE: 1032 for error_message in expression.error_messages(args): 1033 self.raise_error(error_message) 1034 1035 return expression 1036 1037 def _find_sql(self, start: Token, end: Token) -> str: 1038 return self.sql[start.start : end.end + 1] 1039 1040 def _advance(self, times: int = 1) -> None: 1041 self._index += times 1042 self._curr = seq_get(self._tokens, self._index) 1043 self._next = seq_get(self._tokens, self._index + 1) 1044 1045 if self._index > 0: 1046 self._prev = self._tokens[self._index - 1] 1047 self._prev_comments = self._prev.comments 1048 else: 1049 self._prev = None 1050 self._prev_comments = None 1051 1052 def _retreat(self, index: int) -> None: 1053 if index != self._index: 1054 self._advance(index - self._index) 1055 1056 def _parse_command(self) -> exp.Command: 1057 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1058 1059 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1060 start = self._prev 1061 exists = self._parse_exists() if allow_exists else None 1062 1063 self._match(TokenType.ON) 1064 1065 kind = self._match_set(self.CREATABLES) and self._prev 1066 if not kind: 1067 return self._parse_as_command(start) 1068 1069 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1070 this = self._parse_user_defined_function(kind=kind.token_type) 1071 elif kind.token_type == TokenType.TABLE: 1072 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1073 elif kind.token_type == TokenType.COLUMN: 1074 this = self._parse_column() 1075 else: 1076 this = self._parse_id_var() 1077 1078 self._match(TokenType.IS) 1079 1080 return self.expression( 1081 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1082 ) 1083 1084 def _parse_to_table( 1085 self, 1086 ) -> exp.ToTableProperty: 1087 table = self._parse_table_parts(schema=True) 1088 return self.expression(exp.ToTableProperty, this=table) 1089 1090 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1091 def _parse_ttl(self) -> exp.Expression: 1092 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1093 this = self._parse_bitwise() 1094 1095 if self._match_text_seq("DELETE"): 1096 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1097 if self._match_text_seq("RECOMPRESS"): 1098 return self.expression( 1099 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1100 ) 1101 if self._match_text_seq("TO", "DISK"): 1102 return self.expression( 1103 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1104 ) 1105 if self._match_text_seq("TO", "VOLUME"): 1106 return self.expression( 1107 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1108 ) 1109 1110 return this 1111 1112 expressions = self._parse_csv(_parse_ttl_action) 1113 where = self._parse_where() 1114 group = self._parse_group() 1115 1116 aggregates = None 1117 if group and self._match(TokenType.SET): 1118 aggregates = self._parse_csv(self._parse_set_item) 1119 1120 return self.expression( 1121 exp.MergeTreeTTL, 1122 expressions=expressions, 1123 where=where, 1124 group=group, 1125 aggregates=aggregates, 1126 ) 1127 1128 def _parse_statement(self) -> t.Optional[exp.Expression]: 1129 if self._curr is None: 1130 return None 1131 1132 if self._match_set(self.STATEMENT_PARSERS): 1133 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1134 1135 if self._match_set(Tokenizer.COMMANDS): 1136 return self._parse_command() 1137 1138 expression = self._parse_expression() 1139 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1140 return self._parse_query_modifiers(expression) 1141 1142 def _parse_drop(self) -> exp.Drop | exp.Command: 1143 start = self._prev 1144 temporary = self._match(TokenType.TEMPORARY) 1145 materialized = self._match_text_seq("MATERIALIZED") 1146 1147 kind = self._match_set(self.CREATABLES) and self._prev.text 1148 if not kind: 1149 return self._parse_as_command(start) 1150 1151 return self.expression( 1152 exp.Drop, 1153 comments=start.comments, 1154 exists=self._parse_exists(), 1155 this=self._parse_table(schema=True), 1156 kind=kind, 1157 temporary=temporary, 1158 materialized=materialized, 1159 cascade=self._match_text_seq("CASCADE"), 1160 constraints=self._match_text_seq("CONSTRAINTS"), 1161 purge=self._match_text_seq("PURGE"), 1162 ) 1163 1164 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1165 return ( 1166 self._match(TokenType.IF) 1167 and (not not_ or self._match(TokenType.NOT)) 1168 and self._match(TokenType.EXISTS) 1169 ) 1170 1171 def _parse_create(self) -> exp.Create | exp.Command: 1172 # Note: this can't be None because we've matched a statement parser 1173 start = self._prev 1174 replace = start.text.upper() == "REPLACE" or self._match_pair( 1175 TokenType.OR, TokenType.REPLACE 1176 ) 1177 unique = self._match(TokenType.UNIQUE) 1178 1179 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1180 self._advance() 1181 1182 properties = None 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not create_token: 1186 # exp.Properties.Location.POST_CREATE 1187 properties = self._parse_properties() 1188 create_token = self._match_set(self.CREATABLES) and self._prev 1189 1190 if not properties or not create_token: 1191 return self._parse_as_command(start) 1192 1193 exists = self._parse_exists(not_=True) 1194 this = None 1195 expression: t.Optional[exp.Expression] = None 1196 indexes = None 1197 no_schema_binding = None 1198 begin = None 1199 clone = None 1200 1201 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1202 nonlocal properties 1203 if properties and temp_props: 1204 properties.expressions.extend(temp_props.expressions) 1205 elif temp_props: 1206 properties = temp_props 1207 1208 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1209 this = self._parse_user_defined_function(kind=create_token.token_type) 1210 1211 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1212 extend_props(self._parse_properties()) 1213 1214 self._match(TokenType.ALIAS) 1215 1216 if self._match(TokenType.COMMAND): 1217 expression = self._parse_as_command(self._prev) 1218 else: 1219 begin = self._match(TokenType.BEGIN) 1220 return_ = self._match_text_seq("RETURN") 1221 expression = self._parse_statement() 1222 1223 if return_: 1224 expression = self.expression(exp.Return, this=expression) 1225 elif create_token.token_type == TokenType.INDEX: 1226 this = self._parse_index(index=self._parse_id_var()) 1227 elif create_token.token_type in self.DB_CREATABLES: 1228 table_parts = self._parse_table_parts(schema=True) 1229 1230 # exp.Properties.Location.POST_NAME 1231 self._match(TokenType.COMMA) 1232 extend_props(self._parse_properties(before=True)) 1233 1234 this = self._parse_schema(this=table_parts) 1235 1236 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1237 extend_props(self._parse_properties()) 1238 1239 self._match(TokenType.ALIAS) 1240 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1241 # exp.Properties.Location.POST_ALIAS 1242 extend_props(self._parse_properties()) 1243 1244 expression = self._parse_ddl_select() 1245 1246 if create_token.token_type == TokenType.TABLE: 1247 # exp.Properties.Location.POST_EXPRESSION 1248 extend_props(self._parse_properties()) 1249 1250 indexes = [] 1251 while True: 1252 index = self._parse_index() 1253 1254 # exp.Properties.Location.POST_INDEX 1255 extend_props(self._parse_properties()) 1256 1257 if not index: 1258 break 1259 else: 1260 self._match(TokenType.COMMA) 1261 indexes.append(index) 1262 elif create_token.token_type == TokenType.VIEW: 1263 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1264 no_schema_binding = True 1265 1266 if self._match_text_seq("CLONE"): 1267 clone = self._parse_table(schema=True) 1268 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1269 clone_kind = ( 1270 self._match(TokenType.L_PAREN) 1271 and self._match_texts(self.CLONE_KINDS) 1272 and self._prev.text.upper() 1273 ) 1274 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1275 self._match(TokenType.R_PAREN) 1276 clone = self.expression( 1277 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1278 ) 1279 1280 return self.expression( 1281 exp.Create, 1282 this=this, 1283 kind=create_token.text, 1284 replace=replace, 1285 unique=unique, 1286 expression=expression, 1287 exists=exists, 1288 properties=properties, 1289 indexes=indexes, 1290 no_schema_binding=no_schema_binding, 1291 begin=begin, 1292 clone=clone, 1293 ) 1294 1295 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1296 # only used for teradata currently 1297 self._match(TokenType.COMMA) 1298 1299 kwargs = { 1300 "no": self._match_text_seq("NO"), 1301 "dual": self._match_text_seq("DUAL"), 1302 "before": self._match_text_seq("BEFORE"), 1303 "default": self._match_text_seq("DEFAULT"), 1304 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1305 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1306 "after": self._match_text_seq("AFTER"), 1307 "minimum": self._match_texts(("MIN", "MINIMUM")), 1308 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1309 } 1310 1311 if self._match_texts(self.PROPERTY_PARSERS): 1312 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1313 try: 1314 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1315 except TypeError: 1316 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1317 1318 return None 1319 1320 def _parse_property(self) -> t.Optional[exp.Expression]: 1321 if self._match_texts(self.PROPERTY_PARSERS): 1322 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1323 1324 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1325 return self._parse_character_set(default=True) 1326 1327 if self._match_text_seq("COMPOUND", "SORTKEY"): 1328 return self._parse_sortkey(compound=True) 1329 1330 if self._match_text_seq("SQL", "SECURITY"): 1331 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1332 1333 assignment = self._match_pair( 1334 TokenType.VAR, TokenType.EQ, advance=False 1335 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1336 1337 if assignment: 1338 key = self._parse_var_or_string() 1339 self._match(TokenType.EQ) 1340 return self.expression(exp.Property, this=key, value=self._parse_column()) 1341 1342 return None 1343 1344 def _parse_stored(self) -> exp.FileFormatProperty: 1345 self._match(TokenType.ALIAS) 1346 1347 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1348 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1349 1350 return self.expression( 1351 exp.FileFormatProperty, 1352 this=self.expression( 1353 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1354 ) 1355 if input_format or output_format 1356 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1357 ) 1358 1359 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1360 self._match(TokenType.EQ) 1361 self._match(TokenType.ALIAS) 1362 return self.expression(exp_class, this=self._parse_field()) 1363 1364 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1365 properties = [] 1366 while True: 1367 if before: 1368 prop = self._parse_property_before() 1369 else: 1370 prop = self._parse_property() 1371 1372 if not prop: 1373 break 1374 for p in ensure_list(prop): 1375 properties.append(p) 1376 1377 if properties: 1378 return self.expression(exp.Properties, expressions=properties) 1379 1380 return None 1381 1382 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1383 return self.expression( 1384 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1385 ) 1386 1387 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1388 if self._index >= 2: 1389 pre_volatile_token = self._tokens[self._index - 2] 1390 else: 1391 pre_volatile_token = None 1392 1393 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1394 return exp.VolatileProperty() 1395 1396 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1397 1398 def _parse_with_property( 1399 self, 1400 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1401 if self._match(TokenType.L_PAREN, advance=False): 1402 return self._parse_wrapped_csv(self._parse_property) 1403 1404 if self._match_text_seq("JOURNAL"): 1405 return self._parse_withjournaltable() 1406 1407 if self._match_text_seq("DATA"): 1408 return self._parse_withdata(no=False) 1409 elif self._match_text_seq("NO", "DATA"): 1410 return self._parse_withdata(no=True) 1411 1412 if not self._next: 1413 return None 1414 1415 return self._parse_withisolatedloading() 1416 1417 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1418 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1419 self._match(TokenType.EQ) 1420 1421 user = self._parse_id_var() 1422 self._match(TokenType.PARAMETER) 1423 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1424 1425 if not user or not host: 1426 return None 1427 1428 return exp.DefinerProperty(this=f"{user}@{host}") 1429 1430 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1431 self._match(TokenType.TABLE) 1432 self._match(TokenType.EQ) 1433 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1434 1435 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1436 return self.expression(exp.LogProperty, no=no) 1437 1438 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1439 return self.expression(exp.JournalProperty, **kwargs) 1440 1441 def _parse_checksum(self) -> exp.ChecksumProperty: 1442 self._match(TokenType.EQ) 1443 1444 on = None 1445 if self._match(TokenType.ON): 1446 on = True 1447 elif self._match_text_seq("OFF"): 1448 on = False 1449 1450 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1451 1452 def _parse_cluster(self) -> exp.Cluster: 1453 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1454 1455 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1456 self._match_text_seq("BY") 1457 1458 self._match_l_paren() 1459 expressions = self._parse_csv(self._parse_column) 1460 self._match_r_paren() 1461 1462 if self._match_text_seq("SORTED", "BY"): 1463 self._match_l_paren() 1464 sorted_by = self._parse_csv(self._parse_ordered) 1465 self._match_r_paren() 1466 else: 1467 sorted_by = None 1468 1469 self._match(TokenType.INTO) 1470 buckets = self._parse_number() 1471 self._match_text_seq("BUCKETS") 1472 1473 return self.expression( 1474 exp.ClusteredByProperty, 1475 expressions=expressions, 1476 sorted_by=sorted_by, 1477 buckets=buckets, 1478 ) 1479 1480 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1481 if not self._match_text_seq("GRANTS"): 1482 self._retreat(self._index - 1) 1483 return None 1484 1485 return self.expression(exp.CopyGrantsProperty) 1486 1487 def _parse_freespace(self) -> exp.FreespaceProperty: 1488 self._match(TokenType.EQ) 1489 return self.expression( 1490 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1491 ) 1492 1493 def _parse_mergeblockratio( 1494 self, no: bool = False, default: bool = False 1495 ) -> exp.MergeBlockRatioProperty: 1496 if self._match(TokenType.EQ): 1497 return self.expression( 1498 exp.MergeBlockRatioProperty, 1499 this=self._parse_number(), 1500 percent=self._match(TokenType.PERCENT), 1501 ) 1502 1503 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1504 1505 def _parse_datablocksize( 1506 self, 1507 default: t.Optional[bool] = None, 1508 minimum: t.Optional[bool] = None, 1509 maximum: t.Optional[bool] = None, 1510 ) -> exp.DataBlocksizeProperty: 1511 self._match(TokenType.EQ) 1512 size = self._parse_number() 1513 1514 units = None 1515 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1516 units = self._prev.text 1517 1518 return self.expression( 1519 exp.DataBlocksizeProperty, 1520 size=size, 1521 units=units, 1522 default=default, 1523 minimum=minimum, 1524 maximum=maximum, 1525 ) 1526 1527 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1528 self._match(TokenType.EQ) 1529 always = self._match_text_seq("ALWAYS") 1530 manual = self._match_text_seq("MANUAL") 1531 never = self._match_text_seq("NEVER") 1532 default = self._match_text_seq("DEFAULT") 1533 1534 autotemp = None 1535 if self._match_text_seq("AUTOTEMP"): 1536 autotemp = self._parse_schema() 1537 1538 return self.expression( 1539 exp.BlockCompressionProperty, 1540 always=always, 1541 manual=manual, 1542 never=never, 1543 default=default, 1544 autotemp=autotemp, 1545 ) 1546 1547 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1548 no = self._match_text_seq("NO") 1549 concurrent = self._match_text_seq("CONCURRENT") 1550 self._match_text_seq("ISOLATED", "LOADING") 1551 for_all = self._match_text_seq("FOR", "ALL") 1552 for_insert = self._match_text_seq("FOR", "INSERT") 1553 for_none = self._match_text_seq("FOR", "NONE") 1554 return self.expression( 1555 exp.IsolatedLoadingProperty, 1556 no=no, 1557 concurrent=concurrent, 1558 for_all=for_all, 1559 for_insert=for_insert, 1560 for_none=for_none, 1561 ) 1562 1563 def _parse_locking(self) -> exp.LockingProperty: 1564 if self._match(TokenType.TABLE): 1565 kind = "TABLE" 1566 elif self._match(TokenType.VIEW): 1567 kind = "VIEW" 1568 elif self._match(TokenType.ROW): 1569 kind = "ROW" 1570 elif self._match_text_seq("DATABASE"): 1571 kind = "DATABASE" 1572 else: 1573 kind = None 1574 1575 if kind in ("DATABASE", "TABLE", "VIEW"): 1576 this = self._parse_table_parts() 1577 else: 1578 this = None 1579 1580 if self._match(TokenType.FOR): 1581 for_or_in = "FOR" 1582 elif self._match(TokenType.IN): 1583 for_or_in = "IN" 1584 else: 1585 for_or_in = None 1586 1587 if self._match_text_seq("ACCESS"): 1588 lock_type = "ACCESS" 1589 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1590 lock_type = "EXCLUSIVE" 1591 elif self._match_text_seq("SHARE"): 1592 lock_type = "SHARE" 1593 elif self._match_text_seq("READ"): 1594 lock_type = "READ" 1595 elif self._match_text_seq("WRITE"): 1596 lock_type = "WRITE" 1597 elif self._match_text_seq("CHECKSUM"): 1598 lock_type = "CHECKSUM" 1599 else: 1600 lock_type = None 1601 1602 override = self._match_text_seq("OVERRIDE") 1603 1604 return self.expression( 1605 exp.LockingProperty, 1606 this=this, 1607 kind=kind, 1608 for_or_in=for_or_in, 1609 lock_type=lock_type, 1610 override=override, 1611 ) 1612 1613 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1614 if self._match(TokenType.PARTITION_BY): 1615 return self._parse_csv(self._parse_conjunction) 1616 return [] 1617 1618 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1619 self._match(TokenType.EQ) 1620 return self.expression( 1621 exp.PartitionedByProperty, 1622 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1623 ) 1624 1625 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1626 if self._match_text_seq("AND", "STATISTICS"): 1627 statistics = True 1628 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1629 statistics = False 1630 else: 1631 statistics = None 1632 1633 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1634 1635 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1636 if self._match_text_seq("PRIMARY", "INDEX"): 1637 return exp.NoPrimaryIndexProperty() 1638 return None 1639 1640 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1641 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1642 return exp.OnCommitProperty() 1643 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1644 return exp.OnCommitProperty(delete=True) 1645 return None 1646 1647 def _parse_distkey(self) -> exp.DistKeyProperty: 1648 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1649 1650 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1651 table = self._parse_table(schema=True) 1652 1653 options = [] 1654 while self._match_texts(("INCLUDING", "EXCLUDING")): 1655 this = self._prev.text.upper() 1656 1657 id_var = self._parse_id_var() 1658 if not id_var: 1659 return None 1660 1661 options.append( 1662 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1663 ) 1664 1665 return self.expression(exp.LikeProperty, this=table, expressions=options) 1666 1667 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1668 return self.expression( 1669 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1670 ) 1671 1672 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1673 self._match(TokenType.EQ) 1674 return self.expression( 1675 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1676 ) 1677 1678 def _parse_returns(self) -> exp.ReturnsProperty: 1679 value: t.Optional[exp.Expression] 1680 is_table = self._match(TokenType.TABLE) 1681 1682 if is_table: 1683 if self._match(TokenType.LT): 1684 value = self.expression( 1685 exp.Schema, 1686 this="TABLE", 1687 expressions=self._parse_csv(self._parse_struct_types), 1688 ) 1689 if not self._match(TokenType.GT): 1690 self.raise_error("Expecting >") 1691 else: 1692 value = self._parse_schema(exp.var("TABLE")) 1693 else: 1694 value = self._parse_types() 1695 1696 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1697 1698 def _parse_describe(self) -> exp.Describe: 1699 kind = self._match_set(self.CREATABLES) and self._prev.text 1700 this = self._parse_table() 1701 return self.expression(exp.Describe, this=this, kind=kind) 1702 1703 def _parse_insert(self) -> exp.Insert: 1704 comments = ensure_list(self._prev_comments) 1705 overwrite = self._match(TokenType.OVERWRITE) 1706 ignore = self._match(TokenType.IGNORE) 1707 local = self._match_text_seq("LOCAL") 1708 alternative = None 1709 1710 if self._match_text_seq("DIRECTORY"): 1711 this: t.Optional[exp.Expression] = self.expression( 1712 exp.Directory, 1713 this=self._parse_var_or_string(), 1714 local=local, 1715 row_format=self._parse_row_format(match_row=True), 1716 ) 1717 else: 1718 if self._match(TokenType.OR): 1719 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1720 1721 self._match(TokenType.INTO) 1722 comments += ensure_list(self._prev_comments) 1723 self._match(TokenType.TABLE) 1724 this = self._parse_table(schema=True) 1725 1726 returning = self._parse_returning() 1727 1728 return self.expression( 1729 exp.Insert, 1730 comments=comments, 1731 this=this, 1732 exists=self._parse_exists(), 1733 partition=self._parse_partition(), 1734 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1735 and self._parse_conjunction(), 1736 expression=self._parse_ddl_select(), 1737 conflict=self._parse_on_conflict(), 1738 returning=returning or self._parse_returning(), 1739 overwrite=overwrite, 1740 alternative=alternative, 1741 ignore=ignore, 1742 ) 1743 1744 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1745 conflict = self._match_text_seq("ON", "CONFLICT") 1746 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1747 1748 if not conflict and not duplicate: 1749 return None 1750 1751 nothing = None 1752 expressions = None 1753 key = None 1754 constraint = None 1755 1756 if conflict: 1757 if self._match_text_seq("ON", "CONSTRAINT"): 1758 constraint = self._parse_id_var() 1759 else: 1760 key = self._parse_csv(self._parse_value) 1761 1762 self._match_text_seq("DO") 1763 if self._match_text_seq("NOTHING"): 1764 nothing = True 1765 else: 1766 self._match(TokenType.UPDATE) 1767 self._match(TokenType.SET) 1768 expressions = self._parse_csv(self._parse_equality) 1769 1770 return self.expression( 1771 exp.OnConflict, 1772 duplicate=duplicate, 1773 expressions=expressions, 1774 nothing=nothing, 1775 key=key, 1776 constraint=constraint, 1777 ) 1778 1779 def _parse_returning(self) -> t.Optional[exp.Returning]: 1780 if not self._match(TokenType.RETURNING): 1781 return None 1782 return self.expression( 1783 exp.Returning, 1784 expressions=self._parse_csv(self._parse_expression), 1785 into=self._match(TokenType.INTO) and self._parse_table_part(), 1786 ) 1787 1788 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1789 if not self._match(TokenType.FORMAT): 1790 return None 1791 return self._parse_row_format() 1792 1793 def _parse_row_format( 1794 self, match_row: bool = False 1795 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1796 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1797 return None 1798 1799 if self._match_text_seq("SERDE"): 1800 this = self._parse_string() 1801 1802 serde_properties = None 1803 if self._match(TokenType.SERDE_PROPERTIES): 1804 serde_properties = self.expression( 1805 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1806 ) 1807 1808 return self.expression( 1809 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1810 ) 1811 1812 self._match_text_seq("DELIMITED") 1813 1814 kwargs = {} 1815 1816 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1817 kwargs["fields"] = self._parse_string() 1818 if self._match_text_seq("ESCAPED", "BY"): 1819 kwargs["escaped"] = self._parse_string() 1820 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1821 kwargs["collection_items"] = self._parse_string() 1822 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1823 kwargs["map_keys"] = self._parse_string() 1824 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1825 kwargs["lines"] = self._parse_string() 1826 if self._match_text_seq("NULL", "DEFINED", "AS"): 1827 kwargs["null"] = self._parse_string() 1828 1829 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1830 1831 def _parse_load(self) -> exp.LoadData | exp.Command: 1832 if self._match_text_seq("DATA"): 1833 local = self._match_text_seq("LOCAL") 1834 self._match_text_seq("INPATH") 1835 inpath = self._parse_string() 1836 overwrite = self._match(TokenType.OVERWRITE) 1837 self._match_pair(TokenType.INTO, TokenType.TABLE) 1838 1839 return self.expression( 1840 exp.LoadData, 1841 this=self._parse_table(schema=True), 1842 local=local, 1843 overwrite=overwrite, 1844 inpath=inpath, 1845 partition=self._parse_partition(), 1846 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1847 serde=self._match_text_seq("SERDE") and self._parse_string(), 1848 ) 1849 return self._parse_as_command(self._prev) 1850 1851 def _parse_delete(self) -> exp.Delete: 1852 # This handles MySQL's "Multiple-Table Syntax" 1853 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1854 tables = None 1855 comments = self._prev_comments 1856 if not self._match(TokenType.FROM, advance=False): 1857 tables = self._parse_csv(self._parse_table) or None 1858 1859 returning = self._parse_returning() 1860 1861 return self.expression( 1862 exp.Delete, 1863 comments=comments, 1864 tables=tables, 1865 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1866 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1867 where=self._parse_where(), 1868 returning=returning or self._parse_returning(), 1869 limit=self._parse_limit(), 1870 ) 1871 1872 def _parse_update(self) -> exp.Update: 1873 comments = self._prev_comments 1874 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1875 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1876 returning = self._parse_returning() 1877 return self.expression( 1878 exp.Update, 1879 comments=comments, 1880 **{ # type: ignore 1881 "this": this, 1882 "expressions": expressions, 1883 "from": self._parse_from(joins=True), 1884 "where": self._parse_where(), 1885 "returning": returning or self._parse_returning(), 1886 "limit": self._parse_limit(), 1887 }, 1888 ) 1889 1890 def _parse_uncache(self) -> exp.Uncache: 1891 if not self._match(TokenType.TABLE): 1892 self.raise_error("Expecting TABLE after UNCACHE") 1893 1894 return self.expression( 1895 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1896 ) 1897 1898 def _parse_cache(self) -> exp.Cache: 1899 lazy = self._match_text_seq("LAZY") 1900 self._match(TokenType.TABLE) 1901 table = self._parse_table(schema=True) 1902 1903 options = [] 1904 if self._match_text_seq("OPTIONS"): 1905 self._match_l_paren() 1906 k = self._parse_string() 1907 self._match(TokenType.EQ) 1908 v = self._parse_string() 1909 options = [k, v] 1910 self._match_r_paren() 1911 1912 self._match(TokenType.ALIAS) 1913 return self.expression( 1914 exp.Cache, 1915 this=table, 1916 lazy=lazy, 1917 options=options, 1918 expression=self._parse_select(nested=True), 1919 ) 1920 1921 def _parse_partition(self) -> t.Optional[exp.Partition]: 1922 if not self._match(TokenType.PARTITION): 1923 return None 1924 1925 return self.expression( 1926 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1927 ) 1928 1929 def _parse_value(self) -> exp.Tuple: 1930 if self._match(TokenType.L_PAREN): 1931 expressions = self._parse_csv(self._parse_conjunction) 1932 self._match_r_paren() 1933 return self.expression(exp.Tuple, expressions=expressions) 1934 1935 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1936 # https://prestodb.io/docs/current/sql/values.html 1937 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1938 1939 def _parse_select( 1940 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1941 ) -> t.Optional[exp.Expression]: 1942 cte = self._parse_with() 1943 if cte: 1944 this = self._parse_statement() 1945 1946 if not this: 1947 self.raise_error("Failed to parse any statement following CTE") 1948 return cte 1949 1950 if "with" in this.arg_types: 1951 this.set("with", cte) 1952 else: 1953 self.raise_error(f"{this.key} does not support CTE") 1954 this = cte 1955 elif self._match(TokenType.SELECT): 1956 comments = self._prev_comments 1957 1958 hint = self._parse_hint() 1959 all_ = self._match(TokenType.ALL) 1960 distinct = self._match(TokenType.DISTINCT) 1961 1962 kind = ( 1963 self._match(TokenType.ALIAS) 1964 and self._match_texts(("STRUCT", "VALUE")) 1965 and self._prev.text 1966 ) 1967 1968 if distinct: 1969 distinct = self.expression( 1970 exp.Distinct, 1971 on=self._parse_value() if self._match(TokenType.ON) else None, 1972 ) 1973 1974 if all_ and distinct: 1975 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1976 1977 limit = self._parse_limit(top=True) 1978 expressions = self._parse_expressions() 1979 1980 this = self.expression( 1981 exp.Select, 1982 kind=kind, 1983 hint=hint, 1984 distinct=distinct, 1985 expressions=expressions, 1986 limit=limit, 1987 ) 1988 this.comments = comments 1989 1990 into = self._parse_into() 1991 if into: 1992 this.set("into", into) 1993 1994 from_ = self._parse_from() 1995 if from_: 1996 this.set("from", from_) 1997 1998 this = self._parse_query_modifiers(this) 1999 elif (table or nested) and self._match(TokenType.L_PAREN): 2000 if self._match(TokenType.PIVOT): 2001 this = self._parse_simplified_pivot() 2002 elif self._match(TokenType.FROM): 2003 this = exp.select("*").from_( 2004 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2005 ) 2006 else: 2007 this = self._parse_table() if table else self._parse_select(nested=True) 2008 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2009 2010 self._match_r_paren() 2011 2012 # We return early here so that the UNION isn't attached to the subquery by the 2013 # following call to _parse_set_operations, but instead becomes the parent node 2014 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2015 elif self._match(TokenType.VALUES): 2016 this = self.expression( 2017 exp.Values, 2018 expressions=self._parse_csv(self._parse_value), 2019 alias=self._parse_table_alias(), 2020 ) 2021 else: 2022 this = None 2023 2024 return self._parse_set_operations(this) 2025 2026 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2027 if not skip_with_token and not self._match(TokenType.WITH): 2028 return None 2029 2030 comments = self._prev_comments 2031 recursive = self._match(TokenType.RECURSIVE) 2032 2033 expressions = [] 2034 while True: 2035 expressions.append(self._parse_cte()) 2036 2037 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2038 break 2039 else: 2040 self._match(TokenType.WITH) 2041 2042 return self.expression( 2043 exp.With, comments=comments, expressions=expressions, recursive=recursive 2044 ) 2045 2046 def _parse_cte(self) -> exp.CTE: 2047 alias = self._parse_table_alias() 2048 if not alias or not alias.this: 2049 self.raise_error("Expected CTE to have alias") 2050 2051 self._match(TokenType.ALIAS) 2052 return self.expression( 2053 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2054 ) 2055 2056 def _parse_table_alias( 2057 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2058 ) -> t.Optional[exp.TableAlias]: 2059 any_token = self._match(TokenType.ALIAS) 2060 alias = ( 2061 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2062 or self._parse_string_as_identifier() 2063 ) 2064 2065 index = self._index 2066 if self._match(TokenType.L_PAREN): 2067 columns = self._parse_csv(self._parse_function_parameter) 2068 self._match_r_paren() if columns else self._retreat(index) 2069 else: 2070 columns = None 2071 2072 if not alias and not columns: 2073 return None 2074 2075 return self.expression(exp.TableAlias, this=alias, columns=columns) 2076 2077 def _parse_subquery( 2078 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2079 ) -> t.Optional[exp.Subquery]: 2080 if not this: 2081 return None 2082 2083 return self.expression( 2084 exp.Subquery, 2085 this=this, 2086 pivots=self._parse_pivots(), 2087 alias=self._parse_table_alias() if parse_alias else None, 2088 ) 2089 2090 def _parse_query_modifiers( 2091 self, this: t.Optional[exp.Expression] 2092 ) -> t.Optional[exp.Expression]: 2093 if isinstance(this, self.MODIFIABLES): 2094 for join in iter(self._parse_join, None): 2095 this.append("joins", join) 2096 for lateral in iter(self._parse_lateral, None): 2097 this.append("laterals", lateral) 2098 2099 while True: 2100 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2101 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2102 key, expression = parser(self) 2103 2104 if expression: 2105 this.set(key, expression) 2106 if key == "limit": 2107 offset = expression.args.pop("offset", None) 2108 if offset: 2109 this.set("offset", exp.Offset(expression=offset)) 2110 continue 2111 break 2112 return this 2113 2114 def _parse_hint(self) -> t.Optional[exp.Hint]: 2115 if self._match(TokenType.HINT): 2116 hints = [] 2117 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2118 hints.extend(hint) 2119 2120 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2121 self.raise_error("Expected */ after HINT") 2122 2123 return self.expression(exp.Hint, expressions=hints) 2124 2125 return None 2126 2127 def _parse_into(self) -> t.Optional[exp.Into]: 2128 if not self._match(TokenType.INTO): 2129 return None 2130 2131 temp = self._match(TokenType.TEMPORARY) 2132 unlogged = self._match_text_seq("UNLOGGED") 2133 self._match(TokenType.TABLE) 2134 2135 return self.expression( 2136 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2137 ) 2138 2139 def _parse_from( 2140 self, joins: bool = False, skip_from_token: bool = False 2141 ) -> t.Optional[exp.From]: 2142 if not skip_from_token and not self._match(TokenType.FROM): 2143 return None 2144 2145 return self.expression( 2146 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2147 ) 2148 2149 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2150 if not self._match(TokenType.MATCH_RECOGNIZE): 2151 return None 2152 2153 self._match_l_paren() 2154 2155 partition = self._parse_partition_by() 2156 order = self._parse_order() 2157 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2158 2159 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2160 rows = exp.var("ONE ROW PER MATCH") 2161 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2162 text = "ALL ROWS PER MATCH" 2163 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2164 text += f" SHOW EMPTY MATCHES" 2165 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2166 text += f" OMIT EMPTY MATCHES" 2167 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2168 text += f" WITH UNMATCHED ROWS" 2169 rows = exp.var(text) 2170 else: 2171 rows = None 2172 2173 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2174 text = "AFTER MATCH SKIP" 2175 if self._match_text_seq("PAST", "LAST", "ROW"): 2176 text += f" PAST LAST ROW" 2177 elif self._match_text_seq("TO", "NEXT", "ROW"): 2178 text += f" TO NEXT ROW" 2179 elif self._match_text_seq("TO", "FIRST"): 2180 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2181 elif self._match_text_seq("TO", "LAST"): 2182 text += f" TO LAST {self._advance_any().text}" # type: ignore 2183 after = exp.var(text) 2184 else: 2185 after = None 2186 2187 if self._match_text_seq("PATTERN"): 2188 self._match_l_paren() 2189 2190 if not self._curr: 2191 self.raise_error("Expecting )", self._curr) 2192 2193 paren = 1 2194 start = self._curr 2195 2196 while self._curr and paren > 0: 2197 if self._curr.token_type == TokenType.L_PAREN: 2198 paren += 1 2199 if self._curr.token_type == TokenType.R_PAREN: 2200 paren -= 1 2201 2202 end = self._prev 2203 self._advance() 2204 2205 if paren > 0: 2206 self.raise_error("Expecting )", self._curr) 2207 2208 pattern = exp.var(self._find_sql(start, end)) 2209 else: 2210 pattern = None 2211 2212 define = ( 2213 self._parse_csv( 2214 lambda: self.expression( 2215 exp.Alias, 2216 alias=self._parse_id_var(any_token=True), 2217 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2218 ) 2219 ) 2220 if self._match_text_seq("DEFINE") 2221 else None 2222 ) 2223 2224 self._match_r_paren() 2225 2226 return self.expression( 2227 exp.MatchRecognize, 2228 partition_by=partition, 2229 order=order, 2230 measures=measures, 2231 rows=rows, 2232 after=after, 2233 pattern=pattern, 2234 define=define, 2235 alias=self._parse_table_alias(), 2236 ) 2237 2238 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2239 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2240 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2241 2242 if outer_apply or cross_apply: 2243 this = self._parse_select(table=True) 2244 view = None 2245 outer = not cross_apply 2246 elif self._match(TokenType.LATERAL): 2247 this = self._parse_select(table=True) 2248 view = self._match(TokenType.VIEW) 2249 outer = self._match(TokenType.OUTER) 2250 else: 2251 return None 2252 2253 if not this: 2254 this = ( 2255 self._parse_unnest() 2256 or self._parse_function() 2257 or self._parse_id_var(any_token=False) 2258 ) 2259 2260 while self._match(TokenType.DOT): 2261 this = exp.Dot( 2262 this=this, 2263 expression=self._parse_function() or self._parse_id_var(any_token=False), 2264 ) 2265 2266 if view: 2267 table = self._parse_id_var(any_token=False) 2268 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2269 table_alias: t.Optional[exp.TableAlias] = self.expression( 2270 exp.TableAlias, this=table, columns=columns 2271 ) 2272 elif isinstance(this, exp.Subquery) and this.alias: 2273 # Ensures parity between the Subquery's and the Lateral's "alias" args 2274 table_alias = this.args["alias"].copy() 2275 else: 2276 table_alias = self._parse_table_alias() 2277 2278 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2279 2280 def _parse_join_parts( 2281 self, 2282 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2283 return ( 2284 self._match_set(self.JOIN_METHODS) and self._prev, 2285 self._match_set(self.JOIN_SIDES) and self._prev, 2286 self._match_set(self.JOIN_KINDS) and self._prev, 2287 ) 2288 2289 def _parse_join( 2290 self, skip_join_token: bool = False, parse_bracket: bool = False 2291 ) -> t.Optional[exp.Join]: 2292 if self._match(TokenType.COMMA): 2293 return self.expression(exp.Join, this=self._parse_table()) 2294 2295 index = self._index 2296 method, side, kind = self._parse_join_parts() 2297 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2298 join = self._match(TokenType.JOIN) 2299 2300 if not skip_join_token and not join: 2301 self._retreat(index) 2302 kind = None 2303 method = None 2304 side = None 2305 2306 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2307 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2308 2309 if not skip_join_token and not join and not outer_apply and not cross_apply: 2310 return None 2311 2312 if outer_apply: 2313 side = Token(TokenType.LEFT, "LEFT") 2314 2315 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2316 2317 if method: 2318 kwargs["method"] = method.text 2319 if side: 2320 kwargs["side"] = side.text 2321 if kind: 2322 kwargs["kind"] = kind.text 2323 if hint: 2324 kwargs["hint"] = hint 2325 2326 if self._match(TokenType.ON): 2327 kwargs["on"] = self._parse_conjunction() 2328 elif self._match(TokenType.USING): 2329 kwargs["using"] = self._parse_wrapped_id_vars() 2330 elif not (kind and kind.token_type == TokenType.CROSS): 2331 index = self._index 2332 joins = self._parse_joins() 2333 2334 if joins and self._match(TokenType.ON): 2335 kwargs["on"] = self._parse_conjunction() 2336 elif joins and self._match(TokenType.USING): 2337 kwargs["using"] = self._parse_wrapped_id_vars() 2338 else: 2339 joins = None 2340 self._retreat(index) 2341 2342 kwargs["this"].set("joins", joins) 2343 2344 return self.expression(exp.Join, **kwargs) 2345 2346 def _parse_index( 2347 self, 2348 index: t.Optional[exp.Expression] = None, 2349 ) -> t.Optional[exp.Index]: 2350 if index: 2351 unique = None 2352 primary = None 2353 amp = None 2354 2355 self._match(TokenType.ON) 2356 self._match(TokenType.TABLE) # hive 2357 table = self._parse_table_parts(schema=True) 2358 else: 2359 unique = self._match(TokenType.UNIQUE) 2360 primary = self._match_text_seq("PRIMARY") 2361 amp = self._match_text_seq("AMP") 2362 2363 if not self._match(TokenType.INDEX): 2364 return None 2365 2366 index = self._parse_id_var() 2367 table = None 2368 2369 using = self._parse_field() if self._match(TokenType.USING) else None 2370 2371 if self._match(TokenType.L_PAREN, advance=False): 2372 columns = self._parse_wrapped_csv(self._parse_ordered) 2373 else: 2374 columns = None 2375 2376 return self.expression( 2377 exp.Index, 2378 this=index, 2379 table=table, 2380 using=using, 2381 columns=columns, 2382 unique=unique, 2383 primary=primary, 2384 amp=amp, 2385 partition_by=self._parse_partition_by(), 2386 ) 2387 2388 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2389 hints: t.List[exp.Expression] = [] 2390 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2391 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2392 hints.append( 2393 self.expression( 2394 exp.WithTableHint, 2395 expressions=self._parse_csv( 2396 lambda: self._parse_function() or self._parse_var(any_token=True) 2397 ), 2398 ) 2399 ) 2400 self._match_r_paren() 2401 else: 2402 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2403 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2404 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2405 2406 self._match_texts({"INDEX", "KEY"}) 2407 if self._match(TokenType.FOR): 2408 hint.set("target", self._advance_any() and self._prev.text.upper()) 2409 2410 hint.set("expressions", self._parse_wrapped_id_vars()) 2411 hints.append(hint) 2412 2413 return hints or None 2414 2415 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2416 return ( 2417 (not schema and self._parse_function(optional_parens=False)) 2418 or self._parse_id_var(any_token=False) 2419 or self._parse_string_as_identifier() 2420 or self._parse_placeholder() 2421 ) 2422 2423 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2424 catalog = None 2425 db = None 2426 table = self._parse_table_part(schema=schema) 2427 2428 while self._match(TokenType.DOT): 2429 if catalog: 2430 # This allows nesting the table in arbitrarily many dot expressions if needed 2431 table = self.expression( 2432 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2433 ) 2434 else: 2435 catalog = db 2436 db = table 2437 table = self._parse_table_part(schema=schema) 2438 2439 if not table: 2440 self.raise_error(f"Expected table name but got {self._curr}") 2441 2442 return self.expression( 2443 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2444 ) 2445 2446 def _parse_table( 2447 self, 2448 schema: bool = False, 2449 joins: bool = False, 2450 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2451 parse_bracket: bool = False, 2452 ) -> t.Optional[exp.Expression]: 2453 lateral = self._parse_lateral() 2454 if lateral: 2455 return lateral 2456 2457 unnest = self._parse_unnest() 2458 if unnest: 2459 return unnest 2460 2461 values = self._parse_derived_table_values() 2462 if values: 2463 return values 2464 2465 subquery = self._parse_select(table=True) 2466 if subquery: 2467 if not subquery.args.get("pivots"): 2468 subquery.set("pivots", self._parse_pivots()) 2469 return subquery 2470 2471 bracket = parse_bracket and self._parse_bracket(None) 2472 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2473 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2474 2475 if schema: 2476 return self._parse_schema(this=this) 2477 2478 if self.ALIAS_POST_TABLESAMPLE: 2479 table_sample = self._parse_table_sample() 2480 2481 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2482 if alias: 2483 this.set("alias", alias) 2484 2485 if not this.args.get("pivots"): 2486 this.set("pivots", self._parse_pivots()) 2487 2488 this.set("hints", self._parse_table_hints()) 2489 2490 if not self.ALIAS_POST_TABLESAMPLE: 2491 table_sample = self._parse_table_sample() 2492 2493 if table_sample: 2494 table_sample.set("this", this) 2495 this = table_sample 2496 2497 if joins: 2498 for join in iter(self._parse_join, None): 2499 this.append("joins", join) 2500 2501 return this 2502 2503 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2504 if not self._match(TokenType.UNNEST): 2505 return None 2506 2507 expressions = self._parse_wrapped_csv(self._parse_type) 2508 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2509 2510 alias = self._parse_table_alias() if with_alias else None 2511 2512 if alias and self.UNNEST_COLUMN_ONLY: 2513 if alias.args.get("columns"): 2514 self.raise_error("Unexpected extra column alias in unnest.") 2515 2516 alias.set("columns", [alias.this]) 2517 alias.set("this", None) 2518 2519 offset = None 2520 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2521 self._match(TokenType.ALIAS) 2522 offset = self._parse_id_var() or exp.to_identifier("offset") 2523 2524 return self.expression( 2525 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2526 ) 2527 2528 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2529 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2530 if not is_derived and not self._match(TokenType.VALUES): 2531 return None 2532 2533 expressions = self._parse_csv(self._parse_value) 2534 alias = self._parse_table_alias() 2535 2536 if is_derived: 2537 self._match_r_paren() 2538 2539 return self.expression( 2540 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2541 ) 2542 2543 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2544 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2545 as_modifier and self._match_text_seq("USING", "SAMPLE") 2546 ): 2547 return None 2548 2549 bucket_numerator = None 2550 bucket_denominator = None 2551 bucket_field = None 2552 percent = None 2553 rows = None 2554 size = None 2555 seed = None 2556 2557 kind = ( 2558 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2559 ) 2560 method = self._parse_var(tokens=(TokenType.ROW,)) 2561 2562 self._match(TokenType.L_PAREN) 2563 2564 num = self._parse_number() 2565 2566 if self._match_text_seq("BUCKET"): 2567 bucket_numerator = self._parse_number() 2568 self._match_text_seq("OUT", "OF") 2569 bucket_denominator = bucket_denominator = self._parse_number() 2570 self._match(TokenType.ON) 2571 bucket_field = self._parse_field() 2572 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2573 percent = num 2574 elif self._match(TokenType.ROWS): 2575 rows = num 2576 else: 2577 size = num 2578 2579 self._match(TokenType.R_PAREN) 2580 2581 if self._match(TokenType.L_PAREN): 2582 method = self._parse_var() 2583 seed = self._match(TokenType.COMMA) and self._parse_number() 2584 self._match_r_paren() 2585 elif self._match_texts(("SEED", "REPEATABLE")): 2586 seed = self._parse_wrapped(self._parse_number) 2587 2588 return self.expression( 2589 exp.TableSample, 2590 method=method, 2591 bucket_numerator=bucket_numerator, 2592 bucket_denominator=bucket_denominator, 2593 bucket_field=bucket_field, 2594 percent=percent, 2595 rows=rows, 2596 size=size, 2597 seed=seed, 2598 kind=kind, 2599 ) 2600 2601 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2602 return list(iter(self._parse_pivot, None)) or None 2603 2604 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2605 return list(iter(self._parse_join, None)) or None 2606 2607 # https://duckdb.org/docs/sql/statements/pivot 2608 def _parse_simplified_pivot(self) -> exp.Pivot: 2609 def _parse_on() -> t.Optional[exp.Expression]: 2610 this = self._parse_bitwise() 2611 return self._parse_in(this) if self._match(TokenType.IN) else this 2612 2613 this = self._parse_table() 2614 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2615 using = self._match(TokenType.USING) and self._parse_csv( 2616 lambda: self._parse_alias(self._parse_function()) 2617 ) 2618 group = self._parse_group() 2619 return self.expression( 2620 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2621 ) 2622 2623 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2624 index = self._index 2625 2626 if self._match(TokenType.PIVOT): 2627 unpivot = False 2628 elif self._match(TokenType.UNPIVOT): 2629 unpivot = True 2630 else: 2631 return None 2632 2633 expressions = [] 2634 field = None 2635 2636 if not self._match(TokenType.L_PAREN): 2637 self._retreat(index) 2638 return None 2639 2640 if unpivot: 2641 expressions = self._parse_csv(self._parse_column) 2642 else: 2643 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2644 2645 if not expressions: 2646 self.raise_error("Failed to parse PIVOT's aggregation list") 2647 2648 if not self._match(TokenType.FOR): 2649 self.raise_error("Expecting FOR") 2650 2651 value = self._parse_column() 2652 2653 if not self._match(TokenType.IN): 2654 self.raise_error("Expecting IN") 2655 2656 field = self._parse_in(value, alias=True) 2657 2658 self._match_r_paren() 2659 2660 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2661 2662 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2663 pivot.set("alias", self._parse_table_alias()) 2664 2665 if not unpivot: 2666 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2667 2668 columns: t.List[exp.Expression] = [] 2669 for fld in pivot.args["field"].expressions: 2670 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2671 for name in names: 2672 if self.PREFIXED_PIVOT_COLUMNS: 2673 name = f"{name}_{field_name}" if name else field_name 2674 else: 2675 name = f"{field_name}_{name}" if name else field_name 2676 2677 columns.append(exp.to_identifier(name)) 2678 2679 pivot.set("columns", columns) 2680 2681 return pivot 2682 2683 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2684 return [agg.alias for agg in aggregations] 2685 2686 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2687 if not skip_where_token and not self._match(TokenType.WHERE): 2688 return None 2689 2690 return self.expression( 2691 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2692 ) 2693 2694 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2695 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2696 return None 2697 2698 elements = defaultdict(list) 2699 2700 if self._match(TokenType.ALL): 2701 return self.expression(exp.Group, all=True) 2702 2703 while True: 2704 expressions = self._parse_csv(self._parse_conjunction) 2705 if expressions: 2706 elements["expressions"].extend(expressions) 2707 2708 grouping_sets = self._parse_grouping_sets() 2709 if grouping_sets: 2710 elements["grouping_sets"].extend(grouping_sets) 2711 2712 rollup = None 2713 cube = None 2714 totals = None 2715 2716 with_ = self._match(TokenType.WITH) 2717 if self._match(TokenType.ROLLUP): 2718 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2719 elements["rollup"].extend(ensure_list(rollup)) 2720 2721 if self._match(TokenType.CUBE): 2722 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2723 elements["cube"].extend(ensure_list(cube)) 2724 2725 if self._match_text_seq("TOTALS"): 2726 totals = True 2727 elements["totals"] = True # type: ignore 2728 2729 if not (grouping_sets or rollup or cube or totals): 2730 break 2731 2732 return self.expression(exp.Group, **elements) # type: ignore 2733 2734 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2735 if not self._match(TokenType.GROUPING_SETS): 2736 return None 2737 2738 return self._parse_wrapped_csv(self._parse_grouping_set) 2739 2740 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2741 if self._match(TokenType.L_PAREN): 2742 grouping_set = self._parse_csv(self._parse_column) 2743 self._match_r_paren() 2744 return self.expression(exp.Tuple, expressions=grouping_set) 2745 2746 return self._parse_column() 2747 2748 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2749 if not skip_having_token and not self._match(TokenType.HAVING): 2750 return None 2751 return self.expression(exp.Having, this=self._parse_conjunction()) 2752 2753 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2754 if not self._match(TokenType.QUALIFY): 2755 return None 2756 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2757 2758 def _parse_order( 2759 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2760 ) -> t.Optional[exp.Expression]: 2761 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2762 return this 2763 2764 return self.expression( 2765 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2766 ) 2767 2768 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2769 if not self._match(token): 2770 return None 2771 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2772 2773 def _parse_ordered(self) -> exp.Ordered: 2774 this = self._parse_conjunction() 2775 self._match(TokenType.ASC) 2776 2777 is_desc = self._match(TokenType.DESC) 2778 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2779 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2780 desc = is_desc or False 2781 asc = not desc 2782 nulls_first = is_nulls_first or False 2783 explicitly_null_ordered = is_nulls_first or is_nulls_last 2784 2785 if ( 2786 not explicitly_null_ordered 2787 and ( 2788 (asc and self.NULL_ORDERING == "nulls_are_small") 2789 or (desc and self.NULL_ORDERING != "nulls_are_small") 2790 ) 2791 and self.NULL_ORDERING != "nulls_are_last" 2792 ): 2793 nulls_first = True 2794 2795 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2796 2797 def _parse_limit( 2798 self, this: t.Optional[exp.Expression] = None, top: bool = False 2799 ) -> t.Optional[exp.Expression]: 2800 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2801 comments = self._prev_comments 2802 if top: 2803 limit_paren = self._match(TokenType.L_PAREN) 2804 expression = self._parse_number() 2805 2806 if limit_paren: 2807 self._match_r_paren() 2808 else: 2809 expression = self._parse_term() 2810 2811 if self._match(TokenType.COMMA): 2812 offset = expression 2813 expression = self._parse_term() 2814 else: 2815 offset = None 2816 2817 limit_exp = self.expression( 2818 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2819 ) 2820 2821 return limit_exp 2822 2823 if self._match(TokenType.FETCH): 2824 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2825 direction = self._prev.text if direction else "FIRST" 2826 2827 count = self._parse_number() 2828 percent = self._match(TokenType.PERCENT) 2829 2830 self._match_set((TokenType.ROW, TokenType.ROWS)) 2831 2832 only = self._match_text_seq("ONLY") 2833 with_ties = self._match_text_seq("WITH", "TIES") 2834 2835 if only and with_ties: 2836 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2837 2838 return self.expression( 2839 exp.Fetch, 2840 direction=direction, 2841 count=count, 2842 percent=percent, 2843 with_ties=with_ties, 2844 ) 2845 2846 return this 2847 2848 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2849 if not self._match(TokenType.OFFSET): 2850 return this 2851 2852 count = self._parse_term() 2853 self._match_set((TokenType.ROW, TokenType.ROWS)) 2854 return self.expression(exp.Offset, this=this, expression=count) 2855 2856 def _parse_locks(self) -> t.List[exp.Lock]: 2857 locks = [] 2858 while True: 2859 if self._match_text_seq("FOR", "UPDATE"): 2860 update = True 2861 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2862 "LOCK", "IN", "SHARE", "MODE" 2863 ): 2864 update = False 2865 else: 2866 break 2867 2868 expressions = None 2869 if self._match_text_seq("OF"): 2870 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2871 2872 wait: t.Optional[bool | exp.Expression] = None 2873 if self._match_text_seq("NOWAIT"): 2874 wait = True 2875 elif self._match_text_seq("WAIT"): 2876 wait = self._parse_primary() 2877 elif self._match_text_seq("SKIP", "LOCKED"): 2878 wait = False 2879 2880 locks.append( 2881 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2882 ) 2883 2884 return locks 2885 2886 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2887 if not self._match_set(self.SET_OPERATIONS): 2888 return this 2889 2890 token_type = self._prev.token_type 2891 2892 if token_type == TokenType.UNION: 2893 expression = exp.Union 2894 elif token_type == TokenType.EXCEPT: 2895 expression = exp.Except 2896 else: 2897 expression = exp.Intersect 2898 2899 return self.expression( 2900 expression, 2901 this=this, 2902 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2903 expression=self._parse_set_operations(self._parse_select(nested=True)), 2904 ) 2905 2906 def _parse_expression(self) -> t.Optional[exp.Expression]: 2907 return self._parse_alias(self._parse_conjunction()) 2908 2909 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2910 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2911 2912 def _parse_equality(self) -> t.Optional[exp.Expression]: 2913 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2914 2915 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2916 return self._parse_tokens(self._parse_range, self.COMPARISON) 2917 2918 def _parse_range(self) -> t.Optional[exp.Expression]: 2919 this = self._parse_bitwise() 2920 negate = self._match(TokenType.NOT) 2921 2922 if self._match_set(self.RANGE_PARSERS): 2923 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2924 if not expression: 2925 return this 2926 2927 this = expression 2928 elif self._match(TokenType.ISNULL): 2929 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2930 2931 # Postgres supports ISNULL and NOTNULL for conditions. 2932 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2933 if self._match(TokenType.NOTNULL): 2934 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2935 this = self.expression(exp.Not, this=this) 2936 2937 if negate: 2938 this = self.expression(exp.Not, this=this) 2939 2940 if self._match(TokenType.IS): 2941 this = self._parse_is(this) 2942 2943 return this 2944 2945 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2946 index = self._index - 1 2947 negate = self._match(TokenType.NOT) 2948 2949 if self._match_text_seq("DISTINCT", "FROM"): 2950 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2951 return self.expression(klass, this=this, expression=self._parse_expression()) 2952 2953 expression = self._parse_null() or self._parse_boolean() 2954 if not expression: 2955 self._retreat(index) 2956 return None 2957 2958 this = self.expression(exp.Is, this=this, expression=expression) 2959 return self.expression(exp.Not, this=this) if negate else this 2960 2961 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2962 unnest = self._parse_unnest(with_alias=False) 2963 if unnest: 2964 this = self.expression(exp.In, this=this, unnest=unnest) 2965 elif self._match(TokenType.L_PAREN): 2966 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2967 2968 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2969 this = self.expression(exp.In, this=this, query=expressions[0]) 2970 else: 2971 this = self.expression(exp.In, this=this, expressions=expressions) 2972 2973 self._match_r_paren(this) 2974 else: 2975 this = self.expression(exp.In, this=this, field=self._parse_field()) 2976 2977 return this 2978 2979 def _parse_between(self, this: exp.Expression) -> exp.Between: 2980 low = self._parse_bitwise() 2981 self._match(TokenType.AND) 2982 high = self._parse_bitwise() 2983 return self.expression(exp.Between, this=this, low=low, high=high) 2984 2985 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2986 if not self._match(TokenType.ESCAPE): 2987 return this 2988 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2989 2990 def _parse_interval(self) -> t.Optional[exp.Interval]: 2991 if not self._match(TokenType.INTERVAL): 2992 return None 2993 2994 if self._match(TokenType.STRING, advance=False): 2995 this = self._parse_primary() 2996 else: 2997 this = self._parse_term() 2998 2999 unit = self._parse_function() or self._parse_var() 3000 3001 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3002 # each INTERVAL expression into this canonical form so it's easy to transpile 3003 if this and this.is_number: 3004 this = exp.Literal.string(this.name) 3005 elif this and this.is_string: 3006 parts = this.name.split() 3007 3008 if len(parts) == 2: 3009 if unit: 3010 # this is not actually a unit, it's something else 3011 unit = None 3012 self._retreat(self._index - 1) 3013 else: 3014 this = exp.Literal.string(parts[0]) 3015 unit = self.expression(exp.Var, this=parts[1]) 3016 3017 return self.expression(exp.Interval, this=this, unit=unit) 3018 3019 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3020 this = self._parse_term() 3021 3022 while True: 3023 if self._match_set(self.BITWISE): 3024 this = self.expression( 3025 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3026 ) 3027 elif self._match_pair(TokenType.LT, TokenType.LT): 3028 this = self.expression( 3029 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3030 ) 3031 elif self._match_pair(TokenType.GT, TokenType.GT): 3032 this = self.expression( 3033 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3034 ) 3035 else: 3036 break 3037 3038 return this 3039 3040 def _parse_term(self) -> t.Optional[exp.Expression]: 3041 return self._parse_tokens(self._parse_factor, self.TERM) 3042 3043 def _parse_factor(self) -> t.Optional[exp.Expression]: 3044 return self._parse_tokens(self._parse_unary, self.FACTOR) 3045 3046 def _parse_unary(self) -> t.Optional[exp.Expression]: 3047 if self._match_set(self.UNARY_PARSERS): 3048 return self.UNARY_PARSERS[self._prev.token_type](self) 3049 return self._parse_at_time_zone(self._parse_type()) 3050 3051 def _parse_type(self) -> t.Optional[exp.Expression]: 3052 interval = self._parse_interval() 3053 if interval: 3054 return interval 3055 3056 index = self._index 3057 data_type = self._parse_types(check_func=True) 3058 this = self._parse_column() 3059 3060 if data_type: 3061 if isinstance(this, exp.Literal): 3062 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3063 if parser: 3064 return parser(self, this, data_type) 3065 return self.expression(exp.Cast, this=this, to=data_type) 3066 if not data_type.expressions: 3067 self._retreat(index) 3068 return self._parse_column() 3069 return self._parse_column_ops(data_type) 3070 3071 return this 3072 3073 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3074 this = self._parse_type() 3075 if not this: 3076 return None 3077 3078 return self.expression( 3079 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3080 ) 3081 3082 def _parse_types( 3083 self, check_func: bool = False, schema: bool = False 3084 ) -> t.Optional[exp.Expression]: 3085 index = self._index 3086 3087 prefix = self._match_text_seq("SYSUDTLIB", ".") 3088 3089 if not self._match_set(self.TYPE_TOKENS): 3090 return None 3091 3092 type_token = self._prev.token_type 3093 3094 if type_token == TokenType.PSEUDO_TYPE: 3095 return self.expression(exp.PseudoType, this=self._prev.text) 3096 3097 nested = type_token in self.NESTED_TYPE_TOKENS 3098 is_struct = type_token == TokenType.STRUCT 3099 expressions = None 3100 maybe_func = False 3101 3102 if self._match(TokenType.L_PAREN): 3103 if is_struct: 3104 expressions = self._parse_csv(self._parse_struct_types) 3105 elif nested: 3106 expressions = self._parse_csv( 3107 lambda: self._parse_types(check_func=check_func, schema=schema) 3108 ) 3109 elif type_token in self.ENUM_TYPE_TOKENS: 3110 expressions = self._parse_csv(self._parse_primary) 3111 else: 3112 expressions = self._parse_csv(self._parse_type_size) 3113 3114 if not expressions or not self._match(TokenType.R_PAREN): 3115 self._retreat(index) 3116 return None 3117 3118 maybe_func = True 3119 3120 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3121 this = exp.DataType( 3122 this=exp.DataType.Type.ARRAY, 3123 expressions=[ 3124 exp.DataType( 3125 this=exp.DataType.Type[type_token.value], 3126 expressions=expressions, 3127 nested=nested, 3128 ) 3129 ], 3130 nested=True, 3131 ) 3132 3133 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3134 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3135 3136 return this 3137 3138 if self._match(TokenType.L_BRACKET): 3139 self._retreat(index) 3140 return None 3141 3142 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3143 if nested and self._match(TokenType.LT): 3144 if is_struct: 3145 expressions = self._parse_csv(self._parse_struct_types) 3146 else: 3147 expressions = self._parse_csv( 3148 lambda: self._parse_types(check_func=check_func, schema=schema) 3149 ) 3150 3151 if not self._match(TokenType.GT): 3152 self.raise_error("Expecting >") 3153 3154 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3155 values = self._parse_csv(self._parse_conjunction) 3156 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3157 3158 value: t.Optional[exp.Expression] = None 3159 if type_token in self.TIMESTAMPS: 3160 if self._match_text_seq("WITH", "TIME", "ZONE"): 3161 maybe_func = False 3162 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3163 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3164 maybe_func = False 3165 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3166 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3167 maybe_func = False 3168 elif type_token == TokenType.INTERVAL: 3169 unit = self._parse_var() 3170 3171 if not unit: 3172 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3173 else: 3174 value = self.expression(exp.Interval, unit=unit) 3175 3176 if maybe_func and check_func: 3177 index2 = self._index 3178 peek = self._parse_string() 3179 3180 if not peek: 3181 self._retreat(index) 3182 return None 3183 3184 self._retreat(index2) 3185 3186 if value: 3187 return value 3188 3189 return exp.DataType( 3190 this=exp.DataType.Type[type_token.value], 3191 expressions=expressions, 3192 nested=nested, 3193 values=values, 3194 prefix=prefix, 3195 ) 3196 3197 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3198 this = self._parse_type() or self._parse_id_var() 3199 self._match(TokenType.COLON) 3200 return self._parse_column_def(this) 3201 3202 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3203 if not self._match_text_seq("AT", "TIME", "ZONE"): 3204 return this 3205 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3206 3207 def _parse_column(self) -> t.Optional[exp.Expression]: 3208 this = self._parse_field() 3209 if isinstance(this, exp.Identifier): 3210 this = self.expression(exp.Column, this=this) 3211 elif not this: 3212 return self._parse_bracket(this) 3213 return self._parse_column_ops(this) 3214 3215 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3216 this = self._parse_bracket(this) 3217 3218 while self._match_set(self.COLUMN_OPERATORS): 3219 op_token = self._prev.token_type 3220 op = self.COLUMN_OPERATORS.get(op_token) 3221 3222 if op_token == TokenType.DCOLON: 3223 field = self._parse_types() 3224 if not field: 3225 self.raise_error("Expected type") 3226 elif op and self._curr: 3227 self._advance() 3228 value = self._prev.text 3229 field = ( 3230 exp.Literal.number(value) 3231 if self._prev.token_type == TokenType.NUMBER 3232 else exp.Literal.string(value) 3233 ) 3234 else: 3235 field = self._parse_field(anonymous_func=True, any_token=True) 3236 3237 if isinstance(field, exp.Func): 3238 # bigquery allows function calls like x.y.count(...) 3239 # SAFE.SUBSTR(...) 3240 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3241 this = self._replace_columns_with_dots(this) 3242 3243 if op: 3244 this = op(self, this, field) 3245 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3246 this = self.expression( 3247 exp.Column, 3248 this=field, 3249 table=this.this, 3250 db=this.args.get("table"), 3251 catalog=this.args.get("db"), 3252 ) 3253 else: 3254 this = self.expression(exp.Dot, this=this, expression=field) 3255 this = self._parse_bracket(this) 3256 return this 3257 3258 def _parse_primary(self) -> t.Optional[exp.Expression]: 3259 if self._match_set(self.PRIMARY_PARSERS): 3260 token_type = self._prev.token_type 3261 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3262 3263 if token_type == TokenType.STRING: 3264 expressions = [primary] 3265 while self._match(TokenType.STRING): 3266 expressions.append(exp.Literal.string(self._prev.text)) 3267 3268 if len(expressions) > 1: 3269 return self.expression(exp.Concat, expressions=expressions) 3270 3271 return primary 3272 3273 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3274 return exp.Literal.number(f"0.{self._prev.text}") 3275 3276 if self._match(TokenType.L_PAREN): 3277 comments = self._prev_comments 3278 query = self._parse_select() 3279 3280 if query: 3281 expressions = [query] 3282 else: 3283 expressions = self._parse_expressions() 3284 3285 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3286 3287 if isinstance(this, exp.Subqueryable): 3288 this = self._parse_set_operations( 3289 self._parse_subquery(this=this, parse_alias=False) 3290 ) 3291 elif len(expressions) > 1: 3292 this = self.expression(exp.Tuple, expressions=expressions) 3293 else: 3294 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3295 3296 if this: 3297 this.add_comments(comments) 3298 3299 self._match_r_paren(expression=this) 3300 return this 3301 3302 return None 3303 3304 def _parse_field( 3305 self, 3306 any_token: bool = False, 3307 tokens: t.Optional[t.Collection[TokenType]] = None, 3308 anonymous_func: bool = False, 3309 ) -> t.Optional[exp.Expression]: 3310 return ( 3311 self._parse_primary() 3312 or self._parse_function(anonymous=anonymous_func) 3313 or self._parse_id_var(any_token=any_token, tokens=tokens) 3314 ) 3315 3316 def _parse_function( 3317 self, 3318 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3319 anonymous: bool = False, 3320 optional_parens: bool = True, 3321 ) -> t.Optional[exp.Expression]: 3322 if not self._curr: 3323 return None 3324 3325 token_type = self._curr.token_type 3326 3327 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3328 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3329 3330 if not self._next or self._next.token_type != TokenType.L_PAREN: 3331 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3332 self._advance() 3333 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3334 3335 return None 3336 3337 if token_type not in self.FUNC_TOKENS: 3338 return None 3339 3340 this = self._curr.text 3341 upper = this.upper() 3342 self._advance(2) 3343 3344 parser = self.FUNCTION_PARSERS.get(upper) 3345 3346 if parser and not anonymous: 3347 this = parser(self) 3348 else: 3349 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3350 3351 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3352 this = self.expression(subquery_predicate, this=self._parse_select()) 3353 self._match_r_paren() 3354 return this 3355 3356 if functions is None: 3357 functions = self.FUNCTIONS 3358 3359 function = functions.get(upper) 3360 3361 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3362 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3363 3364 if function and not anonymous: 3365 func = self.validate_expression(function(args), args) 3366 if not self.NORMALIZE_FUNCTIONS: 3367 func.meta["name"] = this 3368 this = func 3369 else: 3370 this = self.expression(exp.Anonymous, this=this, expressions=args) 3371 3372 self._match(TokenType.R_PAREN, expression=this) 3373 return self._parse_window(this) 3374 3375 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3376 return self._parse_column_def(self._parse_id_var()) 3377 3378 def _parse_user_defined_function( 3379 self, kind: t.Optional[TokenType] = None 3380 ) -> t.Optional[exp.Expression]: 3381 this = self._parse_id_var() 3382 3383 while self._match(TokenType.DOT): 3384 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3385 3386 if not self._match(TokenType.L_PAREN): 3387 return this 3388 3389 expressions = self._parse_csv(self._parse_function_parameter) 3390 self._match_r_paren() 3391 return self.expression( 3392 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3393 ) 3394 3395 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3396 literal = self._parse_primary() 3397 if literal: 3398 return self.expression(exp.Introducer, this=token.text, expression=literal) 3399 3400 return self.expression(exp.Identifier, this=token.text) 3401 3402 def _parse_session_parameter(self) -> exp.SessionParameter: 3403 kind = None 3404 this = self._parse_id_var() or self._parse_primary() 3405 3406 if this and self._match(TokenType.DOT): 3407 kind = this.name 3408 this = self._parse_var() or self._parse_primary() 3409 3410 return self.expression(exp.SessionParameter, this=this, kind=kind) 3411 3412 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3413 index = self._index 3414 3415 if self._match(TokenType.L_PAREN): 3416 expressions = self._parse_csv(self._parse_id_var) 3417 3418 if not self._match(TokenType.R_PAREN): 3419 self._retreat(index) 3420 else: 3421 expressions = [self._parse_id_var()] 3422 3423 if self._match_set(self.LAMBDAS): 3424 return self.LAMBDAS[self._prev.token_type](self, expressions) 3425 3426 self._retreat(index) 3427 3428 this: t.Optional[exp.Expression] 3429 3430 if self._match(TokenType.DISTINCT): 3431 this = self.expression( 3432 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3433 ) 3434 else: 3435 this = self._parse_select_or_expression(alias=alias) 3436 3437 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3438 3439 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3440 index = self._index 3441 3442 if not self.errors: 3443 try: 3444 if self._parse_select(nested=True): 3445 return this 3446 except ParseError: 3447 pass 3448 finally: 3449 self.errors.clear() 3450 self._retreat(index) 3451 3452 if not self._match(TokenType.L_PAREN): 3453 return this 3454 3455 args = self._parse_csv( 3456 lambda: self._parse_constraint() 3457 or self._parse_column_def(self._parse_field(any_token=True)) 3458 ) 3459 3460 self._match_r_paren() 3461 return self.expression(exp.Schema, this=this, expressions=args) 3462 3463 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3464 # column defs are not really columns, they're identifiers 3465 if isinstance(this, exp.Column): 3466 this = this.this 3467 3468 kind = self._parse_types(schema=True) 3469 3470 if self._match_text_seq("FOR", "ORDINALITY"): 3471 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3472 3473 constraints = [] 3474 while True: 3475 constraint = self._parse_column_constraint() 3476 if not constraint: 3477 break 3478 constraints.append(constraint) 3479 3480 if not kind and not constraints: 3481 return this 3482 3483 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3484 3485 def _parse_auto_increment( 3486 self, 3487 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3488 start = None 3489 increment = None 3490 3491 if self._match(TokenType.L_PAREN, advance=False): 3492 args = self._parse_wrapped_csv(self._parse_bitwise) 3493 start = seq_get(args, 0) 3494 increment = seq_get(args, 1) 3495 elif self._match_text_seq("START"): 3496 start = self._parse_bitwise() 3497 self._match_text_seq("INCREMENT") 3498 increment = self._parse_bitwise() 3499 3500 if start and increment: 3501 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3502 3503 return exp.AutoIncrementColumnConstraint() 3504 3505 def _parse_compress(self) -> exp.CompressColumnConstraint: 3506 if self._match(TokenType.L_PAREN, advance=False): 3507 return self.expression( 3508 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3509 ) 3510 3511 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3512 3513 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3514 if self._match_text_seq("BY", "DEFAULT"): 3515 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3516 this = self.expression( 3517 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3518 ) 3519 else: 3520 self._match_text_seq("ALWAYS") 3521 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3522 3523 self._match(TokenType.ALIAS) 3524 identity = self._match_text_seq("IDENTITY") 3525 3526 if self._match(TokenType.L_PAREN): 3527 if self._match_text_seq("START", "WITH"): 3528 this.set("start", self._parse_bitwise()) 3529 if self._match_text_seq("INCREMENT", "BY"): 3530 this.set("increment", self._parse_bitwise()) 3531 if self._match_text_seq("MINVALUE"): 3532 this.set("minvalue", self._parse_bitwise()) 3533 if self._match_text_seq("MAXVALUE"): 3534 this.set("maxvalue", self._parse_bitwise()) 3535 3536 if self._match_text_seq("CYCLE"): 3537 this.set("cycle", True) 3538 elif self._match_text_seq("NO", "CYCLE"): 3539 this.set("cycle", False) 3540 3541 if not identity: 3542 this.set("expression", self._parse_bitwise()) 3543 3544 self._match_r_paren() 3545 3546 return this 3547 3548 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3549 self._match_text_seq("LENGTH") 3550 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3551 3552 def _parse_not_constraint( 3553 self, 3554 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3555 if self._match_text_seq("NULL"): 3556 return self.expression(exp.NotNullColumnConstraint) 3557 if self._match_text_seq("CASESPECIFIC"): 3558 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3559 return None 3560 3561 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3562 if self._match(TokenType.CONSTRAINT): 3563 this = self._parse_id_var() 3564 else: 3565 this = None 3566 3567 if self._match_texts(self.CONSTRAINT_PARSERS): 3568 return self.expression( 3569 exp.ColumnConstraint, 3570 this=this, 3571 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3572 ) 3573 3574 return this 3575 3576 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3577 if not self._match(TokenType.CONSTRAINT): 3578 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3579 3580 this = self._parse_id_var() 3581 expressions = [] 3582 3583 while True: 3584 constraint = self._parse_unnamed_constraint() or self._parse_function() 3585 if not constraint: 3586 break 3587 expressions.append(constraint) 3588 3589 return self.expression(exp.Constraint, this=this, expressions=expressions) 3590 3591 def _parse_unnamed_constraint( 3592 self, constraints: t.Optional[t.Collection[str]] = None 3593 ) -> t.Optional[exp.Expression]: 3594 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3595 return None 3596 3597 constraint = self._prev.text.upper() 3598 if constraint not in self.CONSTRAINT_PARSERS: 3599 self.raise_error(f"No parser found for schema constraint {constraint}.") 3600 3601 return self.CONSTRAINT_PARSERS[constraint](self) 3602 3603 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3604 self._match_text_seq("KEY") 3605 return self.expression( 3606 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3607 ) 3608 3609 def _parse_key_constraint_options(self) -> t.List[str]: 3610 options = [] 3611 while True: 3612 if not self._curr: 3613 break 3614 3615 if self._match(TokenType.ON): 3616 action = None 3617 on = self._advance_any() and self._prev.text 3618 3619 if self._match_text_seq("NO", "ACTION"): 3620 action = "NO ACTION" 3621 elif self._match_text_seq("CASCADE"): 3622 action = "CASCADE" 3623 elif self._match_pair(TokenType.SET, TokenType.NULL): 3624 action = "SET NULL" 3625 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3626 action = "SET DEFAULT" 3627 else: 3628 self.raise_error("Invalid key constraint") 3629 3630 options.append(f"ON {on} {action}") 3631 elif self._match_text_seq("NOT", "ENFORCED"): 3632 options.append("NOT ENFORCED") 3633 elif self._match_text_seq("DEFERRABLE"): 3634 options.append("DEFERRABLE") 3635 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3636 options.append("INITIALLY DEFERRED") 3637 elif self._match_text_seq("NORELY"): 3638 options.append("NORELY") 3639 elif self._match_text_seq("MATCH", "FULL"): 3640 options.append("MATCH FULL") 3641 else: 3642 break 3643 3644 return options 3645 3646 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3647 if match and not self._match(TokenType.REFERENCES): 3648 return None 3649 3650 expressions = None 3651 this = self._parse_table(schema=True) 3652 options = self._parse_key_constraint_options() 3653 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3654 3655 def _parse_foreign_key(self) -> exp.ForeignKey: 3656 expressions = self._parse_wrapped_id_vars() 3657 reference = self._parse_references() 3658 options = {} 3659 3660 while self._match(TokenType.ON): 3661 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3662 self.raise_error("Expected DELETE or UPDATE") 3663 3664 kind = self._prev.text.lower() 3665 3666 if self._match_text_seq("NO", "ACTION"): 3667 action = "NO ACTION" 3668 elif self._match(TokenType.SET): 3669 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3670 action = "SET " + self._prev.text.upper() 3671 else: 3672 self._advance() 3673 action = self._prev.text.upper() 3674 3675 options[kind] = action 3676 3677 return self.expression( 3678 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3679 ) 3680 3681 def _parse_primary_key( 3682 self, wrapped_optional: bool = False, in_props: bool = False 3683 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3684 desc = ( 3685 self._match_set((TokenType.ASC, TokenType.DESC)) 3686 and self._prev.token_type == TokenType.DESC 3687 ) 3688 3689 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3690 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3691 3692 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3693 options = self._parse_key_constraint_options() 3694 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3695 3696 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3697 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3698 return this 3699 3700 bracket_kind = self._prev.token_type 3701 3702 if self._match(TokenType.COLON): 3703 expressions: t.List[t.Optional[exp.Expression]] = [ 3704 self.expression(exp.Slice, expression=self._parse_conjunction()) 3705 ] 3706 else: 3707 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3708 3709 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3710 if bracket_kind == TokenType.L_BRACE: 3711 this = self.expression(exp.Struct, expressions=expressions) 3712 elif not this or this.name.upper() == "ARRAY": 3713 this = self.expression(exp.Array, expressions=expressions) 3714 else: 3715 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3716 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3717 3718 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3719 self.raise_error("Expected ]") 3720 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3721 self.raise_error("Expected }") 3722 3723 self._add_comments(this) 3724 return self._parse_bracket(this) 3725 3726 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3727 if self._match(TokenType.COLON): 3728 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3729 return this 3730 3731 def _parse_case(self) -> t.Optional[exp.Expression]: 3732 ifs = [] 3733 default = None 3734 3735 expression = self._parse_conjunction() 3736 3737 while self._match(TokenType.WHEN): 3738 this = self._parse_conjunction() 3739 self._match(TokenType.THEN) 3740 then = self._parse_conjunction() 3741 ifs.append(self.expression(exp.If, this=this, true=then)) 3742 3743 if self._match(TokenType.ELSE): 3744 default = self._parse_conjunction() 3745 3746 if not self._match(TokenType.END): 3747 self.raise_error("Expected END after CASE", self._prev) 3748 3749 return self._parse_window( 3750 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3751 ) 3752 3753 def _parse_if(self) -> t.Optional[exp.Expression]: 3754 if self._match(TokenType.L_PAREN): 3755 args = self._parse_csv(self._parse_conjunction) 3756 this = self.validate_expression(exp.If.from_arg_list(args), args) 3757 self._match_r_paren() 3758 else: 3759 index = self._index - 1 3760 condition = self._parse_conjunction() 3761 3762 if not condition: 3763 self._retreat(index) 3764 return None 3765 3766 self._match(TokenType.THEN) 3767 true = self._parse_conjunction() 3768 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3769 self._match(TokenType.END) 3770 this = self.expression(exp.If, this=condition, true=true, false=false) 3771 3772 return self._parse_window(this) 3773 3774 def _parse_extract(self) -> exp.Extract: 3775 this = self._parse_function() or self._parse_var() or self._parse_type() 3776 3777 if self._match(TokenType.FROM): 3778 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3779 3780 if not self._match(TokenType.COMMA): 3781 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3782 3783 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3784 3785 def _parse_any_value(self) -> exp.AnyValue: 3786 this = self._parse_lambda() 3787 is_max = None 3788 having = None 3789 3790 if self._match(TokenType.HAVING): 3791 self._match_texts(("MAX", "MIN")) 3792 is_max = self._prev.text == "MAX" 3793 having = self._parse_column() 3794 3795 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3796 3797 def _parse_cast(self, strict: bool) -> exp.Expression: 3798 this = self._parse_conjunction() 3799 3800 if not self._match(TokenType.ALIAS): 3801 if self._match(TokenType.COMMA): 3802 return self.expression( 3803 exp.CastToStrType, this=this, expression=self._parse_string() 3804 ) 3805 else: 3806 self.raise_error("Expected AS after CAST") 3807 3808 fmt = None 3809 to = self._parse_types() 3810 3811 if not to: 3812 self.raise_error("Expected TYPE after CAST") 3813 elif to.this == exp.DataType.Type.CHAR: 3814 if self._match(TokenType.CHARACTER_SET): 3815 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3816 elif self._match(TokenType.FORMAT): 3817 fmt_string = self._parse_string() 3818 fmt = self._parse_at_time_zone(fmt_string) 3819 3820 if to.this in exp.DataType.TEMPORAL_TYPES: 3821 this = self.expression( 3822 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3823 this=this, 3824 format=exp.Literal.string( 3825 format_time( 3826 fmt_string.this if fmt_string else "", 3827 self.FORMAT_MAPPING or self.TIME_MAPPING, 3828 self.FORMAT_TRIE or self.TIME_TRIE, 3829 ) 3830 ), 3831 ) 3832 3833 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3834 this.set("zone", fmt.args["zone"]) 3835 3836 return this 3837 3838 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3839 3840 def _parse_concat(self) -> t.Optional[exp.Expression]: 3841 args = self._parse_csv(self._parse_conjunction) 3842 if self.CONCAT_NULL_OUTPUTS_STRING: 3843 args = [ 3844 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3845 for arg in args 3846 if arg 3847 ] 3848 3849 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3850 # we find such a call we replace it with its argument. 3851 if len(args) == 1: 3852 return args[0] 3853 3854 return self.expression( 3855 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3856 ) 3857 3858 def _parse_string_agg(self) -> exp.Expression: 3859 if self._match(TokenType.DISTINCT): 3860 args: t.List[t.Optional[exp.Expression]] = [ 3861 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3862 ] 3863 if self._match(TokenType.COMMA): 3864 args.extend(self._parse_csv(self._parse_conjunction)) 3865 else: 3866 args = self._parse_csv(self._parse_conjunction) 3867 3868 index = self._index 3869 if not self._match(TokenType.R_PAREN) and args: 3870 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3871 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3872 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3873 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3874 3875 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3876 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3877 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3878 if not self._match_text_seq("WITHIN", "GROUP"): 3879 self._retreat(index) 3880 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3881 3882 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3883 order = self._parse_order(this=seq_get(args, 0)) 3884 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3885 3886 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3887 this = self._parse_bitwise() 3888 3889 if self._match(TokenType.USING): 3890 to: t.Optional[exp.Expression] = self.expression( 3891 exp.CharacterSet, this=self._parse_var() 3892 ) 3893 elif self._match(TokenType.COMMA): 3894 to = self._parse_types() 3895 else: 3896 to = None 3897 3898 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3899 3900 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3901 """ 3902 There are generally two variants of the DECODE function: 3903 3904 - DECODE(bin, charset) 3905 - DECODE(expression, search, result [, search, result] ... [, default]) 3906 3907 The second variant will always be parsed into a CASE expression. Note that NULL 3908 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3909 instead of relying on pattern matching. 3910 """ 3911 args = self._parse_csv(self._parse_conjunction) 3912 3913 if len(args) < 3: 3914 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3915 3916 expression, *expressions = args 3917 if not expression: 3918 return None 3919 3920 ifs = [] 3921 for search, result in zip(expressions[::2], expressions[1::2]): 3922 if not search or not result: 3923 return None 3924 3925 if isinstance(search, exp.Literal): 3926 ifs.append( 3927 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3928 ) 3929 elif isinstance(search, exp.Null): 3930 ifs.append( 3931 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3932 ) 3933 else: 3934 cond = exp.or_( 3935 exp.EQ(this=expression.copy(), expression=search), 3936 exp.and_( 3937 exp.Is(this=expression.copy(), expression=exp.Null()), 3938 exp.Is(this=search.copy(), expression=exp.Null()), 3939 copy=False, 3940 ), 3941 copy=False, 3942 ) 3943 ifs.append(exp.If(this=cond, true=result)) 3944 3945 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3946 3947 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3948 self._match_text_seq("KEY") 3949 key = self._parse_field() 3950 self._match(TokenType.COLON) 3951 self._match_text_seq("VALUE") 3952 value = self._parse_field() 3953 3954 if not key and not value: 3955 return None 3956 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3957 3958 def _parse_json_object(self) -> exp.JSONObject: 3959 star = self._parse_star() 3960 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3961 3962 null_handling = None 3963 if self._match_text_seq("NULL", "ON", "NULL"): 3964 null_handling = "NULL ON NULL" 3965 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3966 null_handling = "ABSENT ON NULL" 3967 3968 unique_keys = None 3969 if self._match_text_seq("WITH", "UNIQUE"): 3970 unique_keys = True 3971 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3972 unique_keys = False 3973 3974 self._match_text_seq("KEYS") 3975 3976 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3977 format_json = self._match_text_seq("FORMAT", "JSON") 3978 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3979 3980 return self.expression( 3981 exp.JSONObject, 3982 expressions=expressions, 3983 null_handling=null_handling, 3984 unique_keys=unique_keys, 3985 return_type=return_type, 3986 format_json=format_json, 3987 encoding=encoding, 3988 ) 3989 3990 def _parse_logarithm(self) -> exp.Func: 3991 # Default argument order is base, expression 3992 args = self._parse_csv(self._parse_range) 3993 3994 if len(args) > 1: 3995 if not self.LOG_BASE_FIRST: 3996 args.reverse() 3997 return exp.Log.from_arg_list(args) 3998 3999 return self.expression( 4000 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4001 ) 4002 4003 def _parse_match_against(self) -> exp.MatchAgainst: 4004 expressions = self._parse_csv(self._parse_column) 4005 4006 self._match_text_seq(")", "AGAINST", "(") 4007 4008 this = self._parse_string() 4009 4010 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4011 modifier = "IN NATURAL LANGUAGE MODE" 4012 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4013 modifier = f"{modifier} WITH QUERY EXPANSION" 4014 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4015 modifier = "IN BOOLEAN MODE" 4016 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4017 modifier = "WITH QUERY EXPANSION" 4018 else: 4019 modifier = None 4020 4021 return self.expression( 4022 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4023 ) 4024 4025 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4026 def _parse_open_json(self) -> exp.OpenJSON: 4027 this = self._parse_bitwise() 4028 path = self._match(TokenType.COMMA) and self._parse_string() 4029 4030 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4031 this = self._parse_field(any_token=True) 4032 kind = self._parse_types() 4033 path = self._parse_string() 4034 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4035 4036 return self.expression( 4037 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4038 ) 4039 4040 expressions = None 4041 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4042 self._match_l_paren() 4043 expressions = self._parse_csv(_parse_open_json_column_def) 4044 4045 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4046 4047 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4048 args = self._parse_csv(self._parse_bitwise) 4049 4050 if self._match(TokenType.IN): 4051 return self.expression( 4052 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4053 ) 4054 4055 if haystack_first: 4056 haystack = seq_get(args, 0) 4057 needle = seq_get(args, 1) 4058 else: 4059 needle = seq_get(args, 0) 4060 haystack = seq_get(args, 1) 4061 4062 return self.expression( 4063 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4064 ) 4065 4066 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4067 args = self._parse_csv(self._parse_table) 4068 return exp.JoinHint(this=func_name.upper(), expressions=args) 4069 4070 def _parse_substring(self) -> exp.Substring: 4071 # Postgres supports the form: substring(string [from int] [for int]) 4072 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4073 4074 args = self._parse_csv(self._parse_bitwise) 4075 4076 if self._match(TokenType.FROM): 4077 args.append(self._parse_bitwise()) 4078 if self._match(TokenType.FOR): 4079 args.append(self._parse_bitwise()) 4080 4081 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4082 4083 def _parse_trim(self) -> exp.Trim: 4084 # https://www.w3resource.com/sql/character-functions/trim.php 4085 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4086 4087 position = None 4088 collation = None 4089 4090 if self._match_texts(self.TRIM_TYPES): 4091 position = self._prev.text.upper() 4092 4093 expression = self._parse_bitwise() 4094 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4095 this = self._parse_bitwise() 4096 else: 4097 this = expression 4098 expression = None 4099 4100 if self._match(TokenType.COLLATE): 4101 collation = self._parse_bitwise() 4102 4103 return self.expression( 4104 exp.Trim, this=this, position=position, expression=expression, collation=collation 4105 ) 4106 4107 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4108 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4109 4110 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4111 return self._parse_window(self._parse_id_var(), alias=True) 4112 4113 def _parse_respect_or_ignore_nulls( 4114 self, this: t.Optional[exp.Expression] 4115 ) -> t.Optional[exp.Expression]: 4116 if self._match_text_seq("IGNORE", "NULLS"): 4117 return self.expression(exp.IgnoreNulls, this=this) 4118 if self._match_text_seq("RESPECT", "NULLS"): 4119 return self.expression(exp.RespectNulls, this=this) 4120 return this 4121 4122 def _parse_window( 4123 self, this: t.Optional[exp.Expression], alias: bool = False 4124 ) -> t.Optional[exp.Expression]: 4125 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4126 self._match(TokenType.WHERE) 4127 this = self.expression( 4128 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4129 ) 4130 self._match_r_paren() 4131 4132 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4133 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4134 if self._match_text_seq("WITHIN", "GROUP"): 4135 order = self._parse_wrapped(self._parse_order) 4136 this = self.expression(exp.WithinGroup, this=this, expression=order) 4137 4138 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4139 # Some dialects choose to implement and some do not. 4140 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4141 4142 # There is some code above in _parse_lambda that handles 4143 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4144 4145 # The below changes handle 4146 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4147 4148 # Oracle allows both formats 4149 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4150 # and Snowflake chose to do the same for familiarity 4151 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4152 this = self._parse_respect_or_ignore_nulls(this) 4153 4154 # bigquery select from window x AS (partition by ...) 4155 if alias: 4156 over = None 4157 self._match(TokenType.ALIAS) 4158 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4159 return this 4160 else: 4161 over = self._prev.text.upper() 4162 4163 if not self._match(TokenType.L_PAREN): 4164 return self.expression( 4165 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4166 ) 4167 4168 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4169 4170 first = self._match(TokenType.FIRST) 4171 if self._match_text_seq("LAST"): 4172 first = False 4173 4174 partition = self._parse_partition_by() 4175 order = self._parse_order() 4176 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4177 4178 if kind: 4179 self._match(TokenType.BETWEEN) 4180 start = self._parse_window_spec() 4181 self._match(TokenType.AND) 4182 end = self._parse_window_spec() 4183 4184 spec = self.expression( 4185 exp.WindowSpec, 4186 kind=kind, 4187 start=start["value"], 4188 start_side=start["side"], 4189 end=end["value"], 4190 end_side=end["side"], 4191 ) 4192 else: 4193 spec = None 4194 4195 self._match_r_paren() 4196 4197 window = self.expression( 4198 exp.Window, 4199 this=this, 4200 partition_by=partition, 4201 order=order, 4202 spec=spec, 4203 alias=window_alias, 4204 over=over, 4205 first=first, 4206 ) 4207 4208 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4209 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4210 return self._parse_window(window, alias=alias) 4211 4212 return window 4213 4214 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4215 self._match(TokenType.BETWEEN) 4216 4217 return { 4218 "value": ( 4219 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4220 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4221 or self._parse_bitwise() 4222 ), 4223 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4224 } 4225 4226 def _parse_alias( 4227 self, this: t.Optional[exp.Expression], explicit: bool = False 4228 ) -> t.Optional[exp.Expression]: 4229 any_token = self._match(TokenType.ALIAS) 4230 4231 if explicit and not any_token: 4232 return this 4233 4234 if self._match(TokenType.L_PAREN): 4235 aliases = self.expression( 4236 exp.Aliases, 4237 this=this, 4238 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4239 ) 4240 self._match_r_paren(aliases) 4241 return aliases 4242 4243 alias = self._parse_id_var(any_token) 4244 4245 if alias: 4246 return self.expression(exp.Alias, this=this, alias=alias) 4247 4248 return this 4249 4250 def _parse_id_var( 4251 self, 4252 any_token: bool = True, 4253 tokens: t.Optional[t.Collection[TokenType]] = None, 4254 ) -> t.Optional[exp.Expression]: 4255 identifier = self._parse_identifier() 4256 4257 if identifier: 4258 return identifier 4259 4260 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4261 quoted = self._prev.token_type == TokenType.STRING 4262 return exp.Identifier(this=self._prev.text, quoted=quoted) 4263 4264 return None 4265 4266 def _parse_string(self) -> t.Optional[exp.Expression]: 4267 if self._match(TokenType.STRING): 4268 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4269 return self._parse_placeholder() 4270 4271 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4272 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4273 4274 def _parse_number(self) -> t.Optional[exp.Expression]: 4275 if self._match(TokenType.NUMBER): 4276 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4277 return self._parse_placeholder() 4278 4279 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4280 if self._match(TokenType.IDENTIFIER): 4281 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4282 return self._parse_placeholder() 4283 4284 def _parse_var( 4285 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4286 ) -> t.Optional[exp.Expression]: 4287 if ( 4288 (any_token and self._advance_any()) 4289 or self._match(TokenType.VAR) 4290 or (self._match_set(tokens) if tokens else False) 4291 ): 4292 return self.expression(exp.Var, this=self._prev.text) 4293 return self._parse_placeholder() 4294 4295 def _advance_any(self) -> t.Optional[Token]: 4296 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4297 self._advance() 4298 return self._prev 4299 return None 4300 4301 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4302 return self._parse_var() or self._parse_string() 4303 4304 def _parse_null(self) -> t.Optional[exp.Expression]: 4305 if self._match(TokenType.NULL): 4306 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4307 return self._parse_placeholder() 4308 4309 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4310 if self._match(TokenType.TRUE): 4311 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4312 if self._match(TokenType.FALSE): 4313 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4314 return self._parse_placeholder() 4315 4316 def _parse_star(self) -> t.Optional[exp.Expression]: 4317 if self._match(TokenType.STAR): 4318 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4319 return self._parse_placeholder() 4320 4321 def _parse_parameter(self) -> exp.Parameter: 4322 wrapped = self._match(TokenType.L_BRACE) 4323 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4324 self._match(TokenType.R_BRACE) 4325 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4326 4327 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4328 if self._match_set(self.PLACEHOLDER_PARSERS): 4329 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4330 if placeholder: 4331 return placeholder 4332 self._advance(-1) 4333 return None 4334 4335 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4336 if not self._match(TokenType.EXCEPT): 4337 return None 4338 if self._match(TokenType.L_PAREN, advance=False): 4339 return self._parse_wrapped_csv(self._parse_column) 4340 return self._parse_csv(self._parse_column) 4341 4342 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4343 if not self._match(TokenType.REPLACE): 4344 return None 4345 if self._match(TokenType.L_PAREN, advance=False): 4346 return self._parse_wrapped_csv(self._parse_expression) 4347 return self._parse_expressions() 4348 4349 def _parse_csv( 4350 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4351 ) -> t.List[t.Optional[exp.Expression]]: 4352 parse_result = parse_method() 4353 items = [parse_result] if parse_result is not None else [] 4354 4355 while self._match(sep): 4356 self._add_comments(parse_result) 4357 parse_result = parse_method() 4358 if parse_result is not None: 4359 items.append(parse_result) 4360 4361 return items 4362 4363 def _parse_tokens( 4364 self, parse_method: t.Callable, expressions: t.Dict 4365 ) -> t.Optional[exp.Expression]: 4366 this = parse_method() 4367 4368 while self._match_set(expressions): 4369 this = self.expression( 4370 expressions[self._prev.token_type], 4371 this=this, 4372 comments=self._prev_comments, 4373 expression=parse_method(), 4374 ) 4375 4376 return this 4377 4378 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4379 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4380 4381 def _parse_wrapped_csv( 4382 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4383 ) -> t.List[t.Optional[exp.Expression]]: 4384 return self._parse_wrapped( 4385 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4386 ) 4387 4388 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4389 wrapped = self._match(TokenType.L_PAREN) 4390 if not wrapped and not optional: 4391 self.raise_error("Expecting (") 4392 parse_result = parse_method() 4393 if wrapped: 4394 self._match_r_paren() 4395 return parse_result 4396 4397 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4398 return self._parse_csv(self._parse_expression) 4399 4400 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4401 return self._parse_select() or self._parse_set_operations( 4402 self._parse_expression() if alias else self._parse_conjunction() 4403 ) 4404 4405 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4406 return self._parse_query_modifiers( 4407 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4408 ) 4409 4410 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4411 this = None 4412 if self._match_texts(self.TRANSACTION_KIND): 4413 this = self._prev.text 4414 4415 self._match_texts({"TRANSACTION", "WORK"}) 4416 4417 modes = [] 4418 while True: 4419 mode = [] 4420 while self._match(TokenType.VAR): 4421 mode.append(self._prev.text) 4422 4423 if mode: 4424 modes.append(" ".join(mode)) 4425 if not self._match(TokenType.COMMA): 4426 break 4427 4428 return self.expression(exp.Transaction, this=this, modes=modes) 4429 4430 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4431 chain = None 4432 savepoint = None 4433 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4434 4435 self._match_texts({"TRANSACTION", "WORK"}) 4436 4437 if self._match_text_seq("TO"): 4438 self._match_text_seq("SAVEPOINT") 4439 savepoint = self._parse_id_var() 4440 4441 if self._match(TokenType.AND): 4442 chain = not self._match_text_seq("NO") 4443 self._match_text_seq("CHAIN") 4444 4445 if is_rollback: 4446 return self.expression(exp.Rollback, savepoint=savepoint) 4447 4448 return self.expression(exp.Commit, chain=chain) 4449 4450 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4451 if not self._match_text_seq("ADD"): 4452 return None 4453 4454 self._match(TokenType.COLUMN) 4455 exists_column = self._parse_exists(not_=True) 4456 expression = self._parse_column_def(self._parse_field(any_token=True)) 4457 4458 if expression: 4459 expression.set("exists", exists_column) 4460 4461 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4462 if self._match_texts(("FIRST", "AFTER")): 4463 position = self._prev.text 4464 column_position = self.expression( 4465 exp.ColumnPosition, this=self._parse_column(), position=position 4466 ) 4467 expression.set("position", column_position) 4468 4469 return expression 4470 4471 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4472 drop = self._match(TokenType.DROP) and self._parse_drop() 4473 if drop and not isinstance(drop, exp.Command): 4474 drop.set("kind", drop.args.get("kind", "COLUMN")) 4475 return drop 4476 4477 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4478 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4479 return self.expression( 4480 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4481 ) 4482 4483 def _parse_add_constraint(self) -> exp.AddConstraint: 4484 this = None 4485 kind = self._prev.token_type 4486 4487 if kind == TokenType.CONSTRAINT: 4488 this = self._parse_id_var() 4489 4490 if self._match_text_seq("CHECK"): 4491 expression = self._parse_wrapped(self._parse_conjunction) 4492 enforced = self._match_text_seq("ENFORCED") 4493 4494 return self.expression( 4495 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4496 ) 4497 4498 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4499 expression = self._parse_foreign_key() 4500 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4501 expression = self._parse_primary_key() 4502 else: 4503 expression = None 4504 4505 return self.expression(exp.AddConstraint, this=this, expression=expression) 4506 4507 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4508 index = self._index - 1 4509 4510 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4511 return self._parse_csv(self._parse_add_constraint) 4512 4513 self._retreat(index) 4514 return self._parse_csv(self._parse_add_column) 4515 4516 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4517 self._match(TokenType.COLUMN) 4518 column = self._parse_field(any_token=True) 4519 4520 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4521 return self.expression(exp.AlterColumn, this=column, drop=True) 4522 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4523 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4524 4525 self._match_text_seq("SET", "DATA") 4526 return self.expression( 4527 exp.AlterColumn, 4528 this=column, 4529 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4530 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4531 using=self._match(TokenType.USING) and self._parse_conjunction(), 4532 ) 4533 4534 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4535 index = self._index - 1 4536 4537 partition_exists = self._parse_exists() 4538 if self._match(TokenType.PARTITION, advance=False): 4539 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4540 4541 self._retreat(index) 4542 return self._parse_csv(self._parse_drop_column) 4543 4544 def _parse_alter_table_rename(self) -> exp.RenameTable: 4545 self._match_text_seq("TO") 4546 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4547 4548 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4549 start = self._prev 4550 4551 if not self._match(TokenType.TABLE): 4552 return self._parse_as_command(start) 4553 4554 exists = self._parse_exists() 4555 this = self._parse_table(schema=True) 4556 4557 if self._next: 4558 self._advance() 4559 4560 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4561 if parser: 4562 actions = ensure_list(parser(self)) 4563 4564 if not self._curr: 4565 return self.expression( 4566 exp.AlterTable, 4567 this=this, 4568 exists=exists, 4569 actions=actions, 4570 ) 4571 return self._parse_as_command(start) 4572 4573 def _parse_merge(self) -> exp.Merge: 4574 self._match(TokenType.INTO) 4575 target = self._parse_table() 4576 4577 self._match(TokenType.USING) 4578 using = self._parse_table() 4579 4580 self._match(TokenType.ON) 4581 on = self._parse_conjunction() 4582 4583 whens = [] 4584 while self._match(TokenType.WHEN): 4585 matched = not self._match(TokenType.NOT) 4586 self._match_text_seq("MATCHED") 4587 source = ( 4588 False 4589 if self._match_text_seq("BY", "TARGET") 4590 else self._match_text_seq("BY", "SOURCE") 4591 ) 4592 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4593 4594 self._match(TokenType.THEN) 4595 4596 if self._match(TokenType.INSERT): 4597 _this = self._parse_star() 4598 if _this: 4599 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4600 else: 4601 then = self.expression( 4602 exp.Insert, 4603 this=self._parse_value(), 4604 expression=self._match(TokenType.VALUES) and self._parse_value(), 4605 ) 4606 elif self._match(TokenType.UPDATE): 4607 expressions = self._parse_star() 4608 if expressions: 4609 then = self.expression(exp.Update, expressions=expressions) 4610 else: 4611 then = self.expression( 4612 exp.Update, 4613 expressions=self._match(TokenType.SET) 4614 and self._parse_csv(self._parse_equality), 4615 ) 4616 elif self._match(TokenType.DELETE): 4617 then = self.expression(exp.Var, this=self._prev.text) 4618 else: 4619 then = None 4620 4621 whens.append( 4622 self.expression( 4623 exp.When, 4624 matched=matched, 4625 source=source, 4626 condition=condition, 4627 then=then, 4628 ) 4629 ) 4630 4631 return self.expression( 4632 exp.Merge, 4633 this=target, 4634 using=using, 4635 on=on, 4636 expressions=whens, 4637 ) 4638 4639 def _parse_show(self) -> t.Optional[exp.Expression]: 4640 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4641 if parser: 4642 return parser(self) 4643 self._advance() 4644 return self.expression(exp.Show, this=self._prev.text.upper()) 4645 4646 def _parse_set_item_assignment( 4647 self, kind: t.Optional[str] = None 4648 ) -> t.Optional[exp.Expression]: 4649 index = self._index 4650 4651 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4652 return self._parse_set_transaction(global_=kind == "GLOBAL") 4653 4654 left = self._parse_primary() or self._parse_id_var() 4655 4656 if not self._match_texts(("=", "TO")): 4657 self._retreat(index) 4658 return None 4659 4660 right = self._parse_statement() or self._parse_id_var() 4661 this = self.expression(exp.EQ, this=left, expression=right) 4662 4663 return self.expression(exp.SetItem, this=this, kind=kind) 4664 4665 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4666 self._match_text_seq("TRANSACTION") 4667 characteristics = self._parse_csv( 4668 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4669 ) 4670 return self.expression( 4671 exp.SetItem, 4672 expressions=characteristics, 4673 kind="TRANSACTION", 4674 **{"global": global_}, # type: ignore 4675 ) 4676 4677 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4678 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4679 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4680 4681 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4682 index = self._index 4683 set_ = self.expression( 4684 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4685 ) 4686 4687 if self._curr: 4688 self._retreat(index) 4689 return self._parse_as_command(self._prev) 4690 4691 return set_ 4692 4693 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4694 for option in options: 4695 if self._match_text_seq(*option.split(" ")): 4696 return exp.var(option) 4697 return None 4698 4699 def _parse_as_command(self, start: Token) -> exp.Command: 4700 while self._curr: 4701 self._advance() 4702 text = self._find_sql(start, self._prev) 4703 size = len(start.text) 4704 return exp.Command(this=text[:size], expression=text[size:]) 4705 4706 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4707 settings = [] 4708 4709 self._match_l_paren() 4710 kind = self._parse_id_var() 4711 4712 if self._match(TokenType.L_PAREN): 4713 while True: 4714 key = self._parse_id_var() 4715 value = self._parse_primary() 4716 4717 if not key and value is None: 4718 break 4719 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4720 self._match(TokenType.R_PAREN) 4721 4722 self._match_r_paren() 4723 4724 return self.expression( 4725 exp.DictProperty, 4726 this=this, 4727 kind=kind.this if kind else None, 4728 settings=settings, 4729 ) 4730 4731 def _parse_dict_range(self, this: str) -> exp.DictRange: 4732 self._match_l_paren() 4733 has_min = self._match_text_seq("MIN") 4734 if has_min: 4735 min = self._parse_var() or self._parse_primary() 4736 self._match_text_seq("MAX") 4737 max = self._parse_var() or self._parse_primary() 4738 else: 4739 max = self._parse_var() or self._parse_primary() 4740 min = exp.Literal.number(0) 4741 self._match_r_paren() 4742 return self.expression(exp.DictRange, this=this, min=min, max=max) 4743 4744 def _find_parser( 4745 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4746 ) -> t.Optional[t.Callable]: 4747 if not self._curr: 4748 return None 4749 4750 index = self._index 4751 this = [] 4752 while True: 4753 # The current token might be multiple words 4754 curr = self._curr.text.upper() 4755 key = curr.split(" ") 4756 this.append(curr) 4757 4758 self._advance() 4759 result, trie = in_trie(trie, key) 4760 if result == TrieResult.FAILED: 4761 break 4762 4763 if result == TrieResult.EXISTS: 4764 subparser = parsers[" ".join(this)] 4765 return subparser 4766 4767 self._retreat(index) 4768 return None 4769 4770 def _match(self, token_type, advance=True, expression=None): 4771 if not self._curr: 4772 return None 4773 4774 if self._curr.token_type == token_type: 4775 if advance: 4776 self._advance() 4777 self._add_comments(expression) 4778 return True 4779 4780 return None 4781 4782 def _match_set(self, types, advance=True): 4783 if not self._curr: 4784 return None 4785 4786 if self._curr.token_type in types: 4787 if advance: 4788 self._advance() 4789 return True 4790 4791 return None 4792 4793 def _match_pair(self, token_type_a, token_type_b, advance=True): 4794 if not self._curr or not self._next: 4795 return None 4796 4797 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4798 if advance: 4799 self._advance(2) 4800 return True 4801 4802 return None 4803 4804 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4805 if not self._match(TokenType.L_PAREN, expression=expression): 4806 self.raise_error("Expecting (") 4807 4808 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4809 if not self._match(TokenType.R_PAREN, expression=expression): 4810 self.raise_error("Expecting )") 4811 4812 def _match_texts(self, texts, advance=True): 4813 if self._curr and self._curr.text.upper() in texts: 4814 if advance: 4815 self._advance() 4816 return True 4817 return False 4818 4819 def _match_text_seq(self, *texts, advance=True): 4820 index = self._index 4821 for text in texts: 4822 if self._curr and self._curr.text.upper() == text: 4823 self._advance() 4824 else: 4825 self._retreat(index) 4826 return False 4827 4828 if not advance: 4829 self._retreat(index) 4830 4831 return True 4832 4833 @t.overload 4834 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4835 ... 4836 4837 @t.overload 4838 def _replace_columns_with_dots( 4839 self, this: t.Optional[exp.Expression] 4840 ) -> t.Optional[exp.Expression]: 4841 ... 4842 4843 def _replace_columns_with_dots(self, this): 4844 if isinstance(this, exp.Dot): 4845 exp.replace_children(this, self._replace_columns_with_dots) 4846 elif isinstance(this, exp.Column): 4847 exp.replace_children(this, self._replace_columns_with_dots) 4848 table = this.args.get("table") 4849 this = ( 4850 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4851 ) 4852 4853 return this 4854 4855 def _replace_lambda( 4856 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4857 ) -> t.Optional[exp.Expression]: 4858 if not node: 4859 return node 4860 4861 for column in node.find_all(exp.Column): 4862 if column.parts[0].name in lambda_variables: 4863 dot_or_id = column.to_dot() if column.table else column.this 4864 parent = column.parent 4865 4866 while isinstance(parent, exp.Dot): 4867 if not isinstance(parent.parent, exp.Dot): 4868 parent.replace(dot_or_id) 4869 break 4870 parent = parent.parent 4871 else: 4872 if column is node: 4873 node = dot_or_id 4874 else: 4875 column.replace(dot_or_id) 4876 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
848 def __init__( 849 self, 850 error_level: t.Optional[ErrorLevel] = None, 851 error_message_context: int = 100, 852 max_errors: int = 3, 853 ): 854 self.error_level = error_level or ErrorLevel.IMMEDIATE 855 self.error_message_context = error_message_context 856 self.max_errors = max_errors 857 self.reset()
869 def parse( 870 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 871 ) -> t.List[t.Optional[exp.Expression]]: 872 """ 873 Parses a list of tokens and returns a list of syntax trees, one tree 874 per parsed SQL statement. 875 876 Args: 877 raw_tokens: The list of tokens. 878 sql: The original SQL string, used to produce helpful debug messages. 879 880 Returns: 881 The list of the produced syntax trees. 882 """ 883 return self._parse( 884 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 885 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
887 def parse_into( 888 self, 889 expression_types: exp.IntoType, 890 raw_tokens: t.List[Token], 891 sql: t.Optional[str] = None, 892 ) -> t.List[t.Optional[exp.Expression]]: 893 """ 894 Parses a list of tokens into a given Expression type. If a collection of Expression 895 types is given instead, this method will try to parse the token list into each one 896 of them, stopping at the first for which the parsing succeeds. 897 898 Args: 899 expression_types: The expression type(s) to try and parse the token list into. 900 raw_tokens: The list of tokens. 901 sql: The original SQL string, used to produce helpful debug messages. 902 903 Returns: 904 The target Expression. 905 """ 906 errors = [] 907 for expression_type in ensure_list(expression_types): 908 parser = self.EXPRESSION_PARSERS.get(expression_type) 909 if not parser: 910 raise TypeError(f"No parser registered for {expression_type}") 911 912 try: 913 return self._parse(parser, raw_tokens, sql) 914 except ParseError as e: 915 e.errors[0]["into_expression"] = expression_type 916 errors.append(e) 917 918 raise ParseError( 919 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 920 errors=merge_errors(errors), 921 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
958 def check_errors(self) -> None: 959 """Logs or raises any found errors, depending on the chosen error level setting.""" 960 if self.error_level == ErrorLevel.WARN: 961 for error in self.errors: 962 logger.error(str(error)) 963 elif self.error_level == ErrorLevel.RAISE and self.errors: 964 raise ParseError( 965 concat_messages(self.errors, self.max_errors), 966 errors=merge_errors(self.errors), 967 )
Logs or raises any found errors, depending on the chosen error level setting.
969 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 970 """ 971 Appends an error in the list of recorded errors or raises it, depending on the chosen 972 error level setting. 973 """ 974 token = token or self._curr or self._prev or Token.string("") 975 start = token.start 976 end = token.end + 1 977 start_context = self.sql[max(start - self.error_message_context, 0) : start] 978 highlight = self.sql[start:end] 979 end_context = self.sql[end : end + self.error_message_context] 980 981 error = ParseError.new( 982 f"{message}. Line {token.line}, Col: {token.col}.\n" 983 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 984 description=message, 985 line=token.line, 986 col=token.col, 987 start_context=start_context, 988 highlight=highlight, 989 end_context=end_context, 990 ) 991 992 if self.error_level == ErrorLevel.IMMEDIATE: 993 raise error 994 995 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
997 def expression( 998 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 999 ) -> E: 1000 """ 1001 Creates a new, validated Expression. 1002 1003 Args: 1004 exp_class: The expression class to instantiate. 1005 comments: An optional list of comments to attach to the expression. 1006 kwargs: The arguments to set for the expression along with their respective values. 1007 1008 Returns: 1009 The target expression. 1010 """ 1011 instance = exp_class(**kwargs) 1012 instance.add_comments(comments) if comments else self._add_comments(instance) 1013 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1020 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1021 """ 1022 Validates an Expression, making sure that all its mandatory arguments are set. 1023 1024 Args: 1025 expression: The expression to validate. 1026 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1027 1028 Returns: 1029 The validated expression. 1030 """ 1031 if self.error_level != ErrorLevel.IGNORE: 1032 for error_message in expression.error_messages(args): 1033 self.raise_error(error_message) 1034 1035 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.