sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 exists=self._parse_exists(), 1148 this=self._parse_table(schema=True), 1149 kind=kind, 1150 temporary=temporary, 1151 materialized=materialized, 1152 cascade=self._match_text_seq("CASCADE"), 1153 constraints=self._match_text_seq("CONSTRAINTS"), 1154 purge=self._match_text_seq("PURGE"), 1155 ) 1156 1157 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1158 return ( 1159 self._match(TokenType.IF) 1160 and (not not_ or self._match(TokenType.NOT)) 1161 and self._match(TokenType.EXISTS) 1162 ) 1163 1164 def _parse_create(self) -> exp.Create | exp.Command: 1165 # Note: this can't be None because we've matched a statement parser 1166 start = self._prev 1167 replace = start.text.upper() == "REPLACE" or self._match_pair( 1168 TokenType.OR, TokenType.REPLACE 1169 ) 1170 unique = self._match(TokenType.UNIQUE) 1171 1172 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1173 self._advance() 1174 1175 properties = None 1176 create_token = self._match_set(self.CREATABLES) and self._prev 1177 1178 if not create_token: 1179 # exp.Properties.Location.POST_CREATE 1180 properties = self._parse_properties() 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not properties or not create_token: 1184 return self._parse_as_command(start) 1185 1186 exists = self._parse_exists(not_=True) 1187 this = None 1188 expression = None 1189 indexes = None 1190 no_schema_binding = None 1191 begin = None 1192 clone = None 1193 1194 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1195 nonlocal properties 1196 if properties and temp_props: 1197 properties.expressions.extend(temp_props.expressions) 1198 elif temp_props: 1199 properties = temp_props 1200 1201 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1202 this = self._parse_user_defined_function(kind=create_token.token_type) 1203 1204 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1205 extend_props(self._parse_properties()) 1206 1207 self._match(TokenType.ALIAS) 1208 begin = self._match(TokenType.BEGIN) 1209 return_ = self._match_text_seq("RETURN") 1210 expression = self._parse_statement() 1211 1212 if return_: 1213 expression = self.expression(exp.Return, this=expression) 1214 elif create_token.token_type == TokenType.INDEX: 1215 this = self._parse_index(index=self._parse_id_var()) 1216 elif create_token.token_type in self.DB_CREATABLES: 1217 table_parts = self._parse_table_parts(schema=True) 1218 1219 # exp.Properties.Location.POST_NAME 1220 self._match(TokenType.COMMA) 1221 extend_props(self._parse_properties(before=True)) 1222 1223 this = self._parse_schema(this=table_parts) 1224 1225 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1226 extend_props(self._parse_properties()) 1227 1228 self._match(TokenType.ALIAS) 1229 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1230 # exp.Properties.Location.POST_ALIAS 1231 extend_props(self._parse_properties()) 1232 1233 expression = self._parse_ddl_select() 1234 1235 if create_token.token_type == TokenType.TABLE: 1236 indexes = [] 1237 while True: 1238 index = self._parse_index() 1239 1240 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1241 extend_props(self._parse_properties()) 1242 1243 if not index: 1244 break 1245 else: 1246 self._match(TokenType.COMMA) 1247 indexes.append(index) 1248 elif create_token.token_type == TokenType.VIEW: 1249 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1250 no_schema_binding = True 1251 1252 if self._match_text_seq("CLONE"): 1253 clone = self._parse_table(schema=True) 1254 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1255 clone_kind = ( 1256 self._match(TokenType.L_PAREN) 1257 and self._match_texts(self.CLONE_KINDS) 1258 and self._prev.text.upper() 1259 ) 1260 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1261 self._match(TokenType.R_PAREN) 1262 clone = self.expression( 1263 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1264 ) 1265 1266 return self.expression( 1267 exp.Create, 1268 this=this, 1269 kind=create_token.text, 1270 replace=replace, 1271 unique=unique, 1272 expression=expression, 1273 exists=exists, 1274 properties=properties, 1275 indexes=indexes, 1276 no_schema_binding=no_schema_binding, 1277 begin=begin, 1278 clone=clone, 1279 ) 1280 1281 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1282 # only used for teradata currently 1283 self._match(TokenType.COMMA) 1284 1285 kwargs = { 1286 "no": self._match_text_seq("NO"), 1287 "dual": self._match_text_seq("DUAL"), 1288 "before": self._match_text_seq("BEFORE"), 1289 "default": self._match_text_seq("DEFAULT"), 1290 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1291 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1292 "after": self._match_text_seq("AFTER"), 1293 "minimum": self._match_texts(("MIN", "MINIMUM")), 1294 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1295 } 1296 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1299 try: 1300 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1301 except TypeError: 1302 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1303 1304 return None 1305 1306 def _parse_property(self) -> t.Optional[exp.Expression]: 1307 if self._match_texts(self.PROPERTY_PARSERS): 1308 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1309 1310 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1311 return self._parse_character_set(default=True) 1312 1313 if self._match_text_seq("COMPOUND", "SORTKEY"): 1314 return self._parse_sortkey(compound=True) 1315 1316 if self._match_text_seq("SQL", "SECURITY"): 1317 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1318 1319 assignment = self._match_pair( 1320 TokenType.VAR, TokenType.EQ, advance=False 1321 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1322 1323 if assignment: 1324 key = self._parse_var_or_string() 1325 self._match(TokenType.EQ) 1326 return self.expression(exp.Property, this=key, value=self._parse_column()) 1327 1328 return None 1329 1330 def _parse_stored(self) -> exp.FileFormatProperty: 1331 self._match(TokenType.ALIAS) 1332 1333 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1334 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 this=self.expression( 1339 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1340 ) 1341 if input_format or output_format 1342 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1343 ) 1344 1345 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1346 self._match(TokenType.EQ) 1347 self._match(TokenType.ALIAS) 1348 return self.expression(exp_class, this=self._parse_field()) 1349 1350 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1351 properties = [] 1352 while True: 1353 if before: 1354 prop = self._parse_property_before() 1355 else: 1356 prop = self._parse_property() 1357 1358 if not prop: 1359 break 1360 for p in ensure_list(prop): 1361 properties.append(p) 1362 1363 if properties: 1364 return self.expression(exp.Properties, expressions=properties) 1365 1366 return None 1367 1368 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1369 return self.expression( 1370 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1371 ) 1372 1373 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1374 if self._index >= 2: 1375 pre_volatile_token = self._tokens[self._index - 2] 1376 else: 1377 pre_volatile_token = None 1378 1379 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1380 return exp.VolatileProperty() 1381 1382 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1383 1384 def _parse_with_property( 1385 self, 1386 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1387 self._match(TokenType.WITH) 1388 if self._match(TokenType.L_PAREN, advance=False): 1389 return self._parse_wrapped_csv(self._parse_property) 1390 1391 if self._match_text_seq("JOURNAL"): 1392 return self._parse_withjournaltable() 1393 1394 if self._match_text_seq("DATA"): 1395 return self._parse_withdata(no=False) 1396 elif self._match_text_seq("NO", "DATA"): 1397 return self._parse_withdata(no=True) 1398 1399 if not self._next: 1400 return None 1401 1402 return self._parse_withisolatedloading() 1403 1404 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1405 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1406 self._match(TokenType.EQ) 1407 1408 user = self._parse_id_var() 1409 self._match(TokenType.PARAMETER) 1410 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1411 1412 if not user or not host: 1413 return None 1414 1415 return exp.DefinerProperty(this=f"{user}@{host}") 1416 1417 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1418 self._match(TokenType.TABLE) 1419 self._match(TokenType.EQ) 1420 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1421 1422 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1423 return self.expression(exp.LogProperty, no=no) 1424 1425 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1426 return self.expression(exp.JournalProperty, **kwargs) 1427 1428 def _parse_checksum(self) -> exp.ChecksumProperty: 1429 self._match(TokenType.EQ) 1430 1431 on = None 1432 if self._match(TokenType.ON): 1433 on = True 1434 elif self._match_text_seq("OFF"): 1435 on = False 1436 1437 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1438 1439 def _parse_cluster(self) -> exp.Cluster: 1440 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1441 1442 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1443 self._match_text_seq("BY") 1444 1445 self._match_l_paren() 1446 expressions = self._parse_csv(self._parse_column) 1447 self._match_r_paren() 1448 1449 if self._match_text_seq("SORTED", "BY"): 1450 self._match_l_paren() 1451 sorted_by = self._parse_csv(self._parse_ordered) 1452 self._match_r_paren() 1453 else: 1454 sorted_by = None 1455 1456 self._match(TokenType.INTO) 1457 buckets = self._parse_number() 1458 self._match_text_seq("BUCKETS") 1459 1460 return self.expression( 1461 exp.ClusteredByProperty, 1462 expressions=expressions, 1463 sorted_by=sorted_by, 1464 buckets=buckets, 1465 ) 1466 1467 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1468 if not self._match_text_seq("GRANTS"): 1469 self._retreat(self._index - 1) 1470 return None 1471 1472 return self.expression(exp.CopyGrantsProperty) 1473 1474 def _parse_freespace(self) -> exp.FreespaceProperty: 1475 self._match(TokenType.EQ) 1476 return self.expression( 1477 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1478 ) 1479 1480 def _parse_mergeblockratio( 1481 self, no: bool = False, default: bool = False 1482 ) -> exp.MergeBlockRatioProperty: 1483 if self._match(TokenType.EQ): 1484 return self.expression( 1485 exp.MergeBlockRatioProperty, 1486 this=self._parse_number(), 1487 percent=self._match(TokenType.PERCENT), 1488 ) 1489 1490 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1491 1492 def _parse_datablocksize( 1493 self, 1494 default: t.Optional[bool] = None, 1495 minimum: t.Optional[bool] = None, 1496 maximum: t.Optional[bool] = None, 1497 ) -> exp.DataBlocksizeProperty: 1498 self._match(TokenType.EQ) 1499 size = self._parse_number() 1500 1501 units = None 1502 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1503 units = self._prev.text 1504 1505 return self.expression( 1506 exp.DataBlocksizeProperty, 1507 size=size, 1508 units=units, 1509 default=default, 1510 minimum=minimum, 1511 maximum=maximum, 1512 ) 1513 1514 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1515 self._match(TokenType.EQ) 1516 always = self._match_text_seq("ALWAYS") 1517 manual = self._match_text_seq("MANUAL") 1518 never = self._match_text_seq("NEVER") 1519 default = self._match_text_seq("DEFAULT") 1520 1521 autotemp = None 1522 if self._match_text_seq("AUTOTEMP"): 1523 autotemp = self._parse_schema() 1524 1525 return self.expression( 1526 exp.BlockCompressionProperty, 1527 always=always, 1528 manual=manual, 1529 never=never, 1530 default=default, 1531 autotemp=autotemp, 1532 ) 1533 1534 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1535 no = self._match_text_seq("NO") 1536 concurrent = self._match_text_seq("CONCURRENT") 1537 self._match_text_seq("ISOLATED", "LOADING") 1538 for_all = self._match_text_seq("FOR", "ALL") 1539 for_insert = self._match_text_seq("FOR", "INSERT") 1540 for_none = self._match_text_seq("FOR", "NONE") 1541 return self.expression( 1542 exp.IsolatedLoadingProperty, 1543 no=no, 1544 concurrent=concurrent, 1545 for_all=for_all, 1546 for_insert=for_insert, 1547 for_none=for_none, 1548 ) 1549 1550 def _parse_locking(self) -> exp.LockingProperty: 1551 if self._match(TokenType.TABLE): 1552 kind = "TABLE" 1553 elif self._match(TokenType.VIEW): 1554 kind = "VIEW" 1555 elif self._match(TokenType.ROW): 1556 kind = "ROW" 1557 elif self._match_text_seq("DATABASE"): 1558 kind = "DATABASE" 1559 else: 1560 kind = None 1561 1562 if kind in ("DATABASE", "TABLE", "VIEW"): 1563 this = self._parse_table_parts() 1564 else: 1565 this = None 1566 1567 if self._match(TokenType.FOR): 1568 for_or_in = "FOR" 1569 elif self._match(TokenType.IN): 1570 for_or_in = "IN" 1571 else: 1572 for_or_in = None 1573 1574 if self._match_text_seq("ACCESS"): 1575 lock_type = "ACCESS" 1576 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1577 lock_type = "EXCLUSIVE" 1578 elif self._match_text_seq("SHARE"): 1579 lock_type = "SHARE" 1580 elif self._match_text_seq("READ"): 1581 lock_type = "READ" 1582 elif self._match_text_seq("WRITE"): 1583 lock_type = "WRITE" 1584 elif self._match_text_seq("CHECKSUM"): 1585 lock_type = "CHECKSUM" 1586 else: 1587 lock_type = None 1588 1589 override = self._match_text_seq("OVERRIDE") 1590 1591 return self.expression( 1592 exp.LockingProperty, 1593 this=this, 1594 kind=kind, 1595 for_or_in=for_or_in, 1596 lock_type=lock_type, 1597 override=override, 1598 ) 1599 1600 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1601 if self._match(TokenType.PARTITION_BY): 1602 return self._parse_csv(self._parse_conjunction) 1603 return [] 1604 1605 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1606 self._match(TokenType.EQ) 1607 return self.expression( 1608 exp.PartitionedByProperty, 1609 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1610 ) 1611 1612 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1613 if self._match_text_seq("AND", "STATISTICS"): 1614 statistics = True 1615 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1616 statistics = False 1617 else: 1618 statistics = None 1619 1620 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1621 1622 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1623 if self._match_text_seq("PRIMARY", "INDEX"): 1624 return exp.NoPrimaryIndexProperty() 1625 return None 1626 1627 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1628 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1629 return exp.OnCommitProperty() 1630 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1631 return exp.OnCommitProperty(delete=True) 1632 return None 1633 1634 def _parse_distkey(self) -> exp.DistKeyProperty: 1635 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1636 1637 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1638 table = self._parse_table(schema=True) 1639 1640 options = [] 1641 while self._match_texts(("INCLUDING", "EXCLUDING")): 1642 this = self._prev.text.upper() 1643 1644 id_var = self._parse_id_var() 1645 if not id_var: 1646 return None 1647 1648 options.append( 1649 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1650 ) 1651 1652 return self.expression(exp.LikeProperty, this=table, expressions=options) 1653 1654 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1655 return self.expression( 1656 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1657 ) 1658 1659 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1660 self._match(TokenType.EQ) 1661 return self.expression( 1662 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1663 ) 1664 1665 def _parse_returns(self) -> exp.ReturnsProperty: 1666 value: t.Optional[exp.Expression] 1667 is_table = self._match(TokenType.TABLE) 1668 1669 if is_table: 1670 if self._match(TokenType.LT): 1671 value = self.expression( 1672 exp.Schema, 1673 this="TABLE", 1674 expressions=self._parse_csv(self._parse_struct_types), 1675 ) 1676 if not self._match(TokenType.GT): 1677 self.raise_error("Expecting >") 1678 else: 1679 value = self._parse_schema(exp.var("TABLE")) 1680 else: 1681 value = self._parse_types() 1682 1683 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1684 1685 def _parse_describe(self) -> exp.Describe: 1686 kind = self._match_set(self.CREATABLES) and self._prev.text 1687 this = self._parse_table() 1688 return self.expression(exp.Describe, this=this, kind=kind) 1689 1690 def _parse_insert(self) -> exp.Insert: 1691 overwrite = self._match(TokenType.OVERWRITE) 1692 ignore = self._match(TokenType.IGNORE) 1693 local = self._match_text_seq("LOCAL") 1694 alternative = None 1695 1696 if self._match_text_seq("DIRECTORY"): 1697 this: t.Optional[exp.Expression] = self.expression( 1698 exp.Directory, 1699 this=self._parse_var_or_string(), 1700 local=local, 1701 row_format=self._parse_row_format(match_row=True), 1702 ) 1703 else: 1704 if self._match(TokenType.OR): 1705 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1706 1707 self._match(TokenType.INTO) 1708 self._match(TokenType.TABLE) 1709 this = self._parse_table(schema=True) 1710 1711 returning = self._parse_returning() 1712 1713 return self.expression( 1714 exp.Insert, 1715 this=this, 1716 exists=self._parse_exists(), 1717 partition=self._parse_partition(), 1718 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1719 and self._parse_conjunction(), 1720 expression=self._parse_ddl_select(), 1721 conflict=self._parse_on_conflict(), 1722 returning=returning or self._parse_returning(), 1723 overwrite=overwrite, 1724 alternative=alternative, 1725 ignore=ignore, 1726 ) 1727 1728 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1729 conflict = self._match_text_seq("ON", "CONFLICT") 1730 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1731 1732 if not conflict and not duplicate: 1733 return None 1734 1735 nothing = None 1736 expressions = None 1737 key = None 1738 constraint = None 1739 1740 if conflict: 1741 if self._match_text_seq("ON", "CONSTRAINT"): 1742 constraint = self._parse_id_var() 1743 else: 1744 key = self._parse_csv(self._parse_value) 1745 1746 self._match_text_seq("DO") 1747 if self._match_text_seq("NOTHING"): 1748 nothing = True 1749 else: 1750 self._match(TokenType.UPDATE) 1751 self._match(TokenType.SET) 1752 expressions = self._parse_csv(self._parse_equality) 1753 1754 return self.expression( 1755 exp.OnConflict, 1756 duplicate=duplicate, 1757 expressions=expressions, 1758 nothing=nothing, 1759 key=key, 1760 constraint=constraint, 1761 ) 1762 1763 def _parse_returning(self) -> t.Optional[exp.Returning]: 1764 if not self._match(TokenType.RETURNING): 1765 return None 1766 return self.expression( 1767 exp.Returning, 1768 expressions=self._parse_csv(self._parse_expression), 1769 into=self._match(TokenType.INTO) and self._parse_table_part(), 1770 ) 1771 1772 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1773 if not self._match(TokenType.FORMAT): 1774 return None 1775 return self._parse_row_format() 1776 1777 def _parse_row_format( 1778 self, match_row: bool = False 1779 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1780 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1781 return None 1782 1783 if self._match_text_seq("SERDE"): 1784 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1785 1786 self._match_text_seq("DELIMITED") 1787 1788 kwargs = {} 1789 1790 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1791 kwargs["fields"] = self._parse_string() 1792 if self._match_text_seq("ESCAPED", "BY"): 1793 kwargs["escaped"] = self._parse_string() 1794 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1795 kwargs["collection_items"] = self._parse_string() 1796 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1797 kwargs["map_keys"] = self._parse_string() 1798 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1799 kwargs["lines"] = self._parse_string() 1800 if self._match_text_seq("NULL", "DEFINED", "AS"): 1801 kwargs["null"] = self._parse_string() 1802 1803 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1804 1805 def _parse_load(self) -> exp.LoadData | exp.Command: 1806 if self._match_text_seq("DATA"): 1807 local = self._match_text_seq("LOCAL") 1808 self._match_text_seq("INPATH") 1809 inpath = self._parse_string() 1810 overwrite = self._match(TokenType.OVERWRITE) 1811 self._match_pair(TokenType.INTO, TokenType.TABLE) 1812 1813 return self.expression( 1814 exp.LoadData, 1815 this=self._parse_table(schema=True), 1816 local=local, 1817 overwrite=overwrite, 1818 inpath=inpath, 1819 partition=self._parse_partition(), 1820 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1821 serde=self._match_text_seq("SERDE") and self._parse_string(), 1822 ) 1823 return self._parse_as_command(self._prev) 1824 1825 def _parse_delete(self) -> exp.Delete: 1826 # This handles MySQL's "Multiple-Table Syntax" 1827 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1828 tables = None 1829 if not self._match(TokenType.FROM, advance=False): 1830 tables = self._parse_csv(self._parse_table) or None 1831 1832 returning = self._parse_returning() 1833 1834 return self.expression( 1835 exp.Delete, 1836 tables=tables, 1837 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1838 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1839 where=self._parse_where(), 1840 returning=returning or self._parse_returning(), 1841 limit=self._parse_limit(), 1842 ) 1843 1844 def _parse_update(self) -> exp.Update: 1845 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1846 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1847 returning = self._parse_returning() 1848 return self.expression( 1849 exp.Update, 1850 **{ # type: ignore 1851 "this": this, 1852 "expressions": expressions, 1853 "from": self._parse_from(joins=True), 1854 "where": self._parse_where(), 1855 "returning": returning or self._parse_returning(), 1856 "limit": self._parse_limit(), 1857 }, 1858 ) 1859 1860 def _parse_uncache(self) -> exp.Uncache: 1861 if not self._match(TokenType.TABLE): 1862 self.raise_error("Expecting TABLE after UNCACHE") 1863 1864 return self.expression( 1865 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1866 ) 1867 1868 def _parse_cache(self) -> exp.Cache: 1869 lazy = self._match_text_seq("LAZY") 1870 self._match(TokenType.TABLE) 1871 table = self._parse_table(schema=True) 1872 1873 options = [] 1874 if self._match_text_seq("OPTIONS"): 1875 self._match_l_paren() 1876 k = self._parse_string() 1877 self._match(TokenType.EQ) 1878 v = self._parse_string() 1879 options = [k, v] 1880 self._match_r_paren() 1881 1882 self._match(TokenType.ALIAS) 1883 return self.expression( 1884 exp.Cache, 1885 this=table, 1886 lazy=lazy, 1887 options=options, 1888 expression=self._parse_select(nested=True), 1889 ) 1890 1891 def _parse_partition(self) -> t.Optional[exp.Partition]: 1892 if not self._match(TokenType.PARTITION): 1893 return None 1894 1895 return self.expression( 1896 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1897 ) 1898 1899 def _parse_value(self) -> exp.Tuple: 1900 if self._match(TokenType.L_PAREN): 1901 expressions = self._parse_csv(self._parse_conjunction) 1902 self._match_r_paren() 1903 return self.expression(exp.Tuple, expressions=expressions) 1904 1905 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1906 # https://prestodb.io/docs/current/sql/values.html 1907 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1908 1909 def _parse_select( 1910 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1911 ) -> t.Optional[exp.Expression]: 1912 cte = self._parse_with() 1913 if cte: 1914 this = self._parse_statement() 1915 1916 if not this: 1917 self.raise_error("Failed to parse any statement following CTE") 1918 return cte 1919 1920 if "with" in this.arg_types: 1921 this.set("with", cte) 1922 else: 1923 self.raise_error(f"{this.key} does not support CTE") 1924 this = cte 1925 elif self._match(TokenType.SELECT): 1926 comments = self._prev_comments 1927 1928 hint = self._parse_hint() 1929 all_ = self._match(TokenType.ALL) 1930 distinct = self._match(TokenType.DISTINCT) 1931 1932 kind = ( 1933 self._match(TokenType.ALIAS) 1934 and self._match_texts(("STRUCT", "VALUE")) 1935 and self._prev.text 1936 ) 1937 1938 if distinct: 1939 distinct = self.expression( 1940 exp.Distinct, 1941 on=self._parse_value() if self._match(TokenType.ON) else None, 1942 ) 1943 1944 if all_ and distinct: 1945 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1946 1947 limit = self._parse_limit(top=True) 1948 expressions = self._parse_expressions() 1949 1950 this = self.expression( 1951 exp.Select, 1952 kind=kind, 1953 hint=hint, 1954 distinct=distinct, 1955 expressions=expressions, 1956 limit=limit, 1957 ) 1958 this.comments = comments 1959 1960 into = self._parse_into() 1961 if into: 1962 this.set("into", into) 1963 1964 from_ = self._parse_from() 1965 if from_: 1966 this.set("from", from_) 1967 1968 this = self._parse_query_modifiers(this) 1969 elif (table or nested) and self._match(TokenType.L_PAREN): 1970 if self._match(TokenType.PIVOT): 1971 this = self._parse_simplified_pivot() 1972 elif self._match(TokenType.FROM): 1973 this = exp.select("*").from_( 1974 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1975 ) 1976 else: 1977 this = self._parse_table() if table else self._parse_select(nested=True) 1978 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1979 1980 self._match_r_paren() 1981 1982 # We return early here so that the UNION isn't attached to the subquery by the 1983 # following call to _parse_set_operations, but instead becomes the parent node 1984 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1985 elif self._match(TokenType.VALUES): 1986 this = self.expression( 1987 exp.Values, 1988 expressions=self._parse_csv(self._parse_value), 1989 alias=self._parse_table_alias(), 1990 ) 1991 else: 1992 this = None 1993 1994 return self._parse_set_operations(this) 1995 1996 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1997 if not skip_with_token and not self._match(TokenType.WITH): 1998 return None 1999 2000 comments = self._prev_comments 2001 recursive = self._match(TokenType.RECURSIVE) 2002 2003 expressions = [] 2004 while True: 2005 expressions.append(self._parse_cte()) 2006 2007 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2008 break 2009 else: 2010 self._match(TokenType.WITH) 2011 2012 return self.expression( 2013 exp.With, comments=comments, expressions=expressions, recursive=recursive 2014 ) 2015 2016 def _parse_cte(self) -> exp.CTE: 2017 alias = self._parse_table_alias() 2018 if not alias or not alias.this: 2019 self.raise_error("Expected CTE to have alias") 2020 2021 self._match(TokenType.ALIAS) 2022 return self.expression( 2023 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2024 ) 2025 2026 def _parse_table_alias( 2027 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2028 ) -> t.Optional[exp.TableAlias]: 2029 any_token = self._match(TokenType.ALIAS) 2030 alias = ( 2031 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2032 or self._parse_string_as_identifier() 2033 ) 2034 2035 index = self._index 2036 if self._match(TokenType.L_PAREN): 2037 columns = self._parse_csv(self._parse_function_parameter) 2038 self._match_r_paren() if columns else self._retreat(index) 2039 else: 2040 columns = None 2041 2042 if not alias and not columns: 2043 return None 2044 2045 return self.expression(exp.TableAlias, this=alias, columns=columns) 2046 2047 def _parse_subquery( 2048 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2049 ) -> t.Optional[exp.Subquery]: 2050 if not this: 2051 return None 2052 2053 return self.expression( 2054 exp.Subquery, 2055 this=this, 2056 pivots=self._parse_pivots(), 2057 alias=self._parse_table_alias() if parse_alias else None, 2058 ) 2059 2060 def _parse_query_modifiers( 2061 self, this: t.Optional[exp.Expression] 2062 ) -> t.Optional[exp.Expression]: 2063 if isinstance(this, self.MODIFIABLES): 2064 for join in iter(self._parse_join, None): 2065 this.append("joins", join) 2066 for lateral in iter(self._parse_lateral, None): 2067 this.append("laterals", lateral) 2068 2069 while True: 2070 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2071 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2072 key, expression = parser(self) 2073 2074 if expression: 2075 this.set(key, expression) 2076 if key == "limit": 2077 offset = expression.args.pop("offset", None) 2078 if offset: 2079 this.set("offset", exp.Offset(expression=offset)) 2080 continue 2081 break 2082 return this 2083 2084 def _parse_hint(self) -> t.Optional[exp.Hint]: 2085 if self._match(TokenType.HINT): 2086 hints = [] 2087 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2088 hints.extend(hint) 2089 2090 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2091 self.raise_error("Expected */ after HINT") 2092 2093 return self.expression(exp.Hint, expressions=hints) 2094 2095 return None 2096 2097 def _parse_into(self) -> t.Optional[exp.Into]: 2098 if not self._match(TokenType.INTO): 2099 return None 2100 2101 temp = self._match(TokenType.TEMPORARY) 2102 unlogged = self._match_text_seq("UNLOGGED") 2103 self._match(TokenType.TABLE) 2104 2105 return self.expression( 2106 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2107 ) 2108 2109 def _parse_from( 2110 self, joins: bool = False, skip_from_token: bool = False 2111 ) -> t.Optional[exp.From]: 2112 if not skip_from_token and not self._match(TokenType.FROM): 2113 return None 2114 2115 return self.expression( 2116 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2117 ) 2118 2119 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2120 if not self._match(TokenType.MATCH_RECOGNIZE): 2121 return None 2122 2123 self._match_l_paren() 2124 2125 partition = self._parse_partition_by() 2126 order = self._parse_order() 2127 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2128 2129 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2130 rows = exp.var("ONE ROW PER MATCH") 2131 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2132 text = "ALL ROWS PER MATCH" 2133 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2134 text += f" SHOW EMPTY MATCHES" 2135 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2136 text += f" OMIT EMPTY MATCHES" 2137 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2138 text += f" WITH UNMATCHED ROWS" 2139 rows = exp.var(text) 2140 else: 2141 rows = None 2142 2143 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2144 text = "AFTER MATCH SKIP" 2145 if self._match_text_seq("PAST", "LAST", "ROW"): 2146 text += f" PAST LAST ROW" 2147 elif self._match_text_seq("TO", "NEXT", "ROW"): 2148 text += f" TO NEXT ROW" 2149 elif self._match_text_seq("TO", "FIRST"): 2150 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2151 elif self._match_text_seq("TO", "LAST"): 2152 text += f" TO LAST {self._advance_any().text}" # type: ignore 2153 after = exp.var(text) 2154 else: 2155 after = None 2156 2157 if self._match_text_seq("PATTERN"): 2158 self._match_l_paren() 2159 2160 if not self._curr: 2161 self.raise_error("Expecting )", self._curr) 2162 2163 paren = 1 2164 start = self._curr 2165 2166 while self._curr and paren > 0: 2167 if self._curr.token_type == TokenType.L_PAREN: 2168 paren += 1 2169 if self._curr.token_type == TokenType.R_PAREN: 2170 paren -= 1 2171 2172 end = self._prev 2173 self._advance() 2174 2175 if paren > 0: 2176 self.raise_error("Expecting )", self._curr) 2177 2178 pattern = exp.var(self._find_sql(start, end)) 2179 else: 2180 pattern = None 2181 2182 define = ( 2183 self._parse_csv( 2184 lambda: self.expression( 2185 exp.Alias, 2186 alias=self._parse_id_var(any_token=True), 2187 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2188 ) 2189 ) 2190 if self._match_text_seq("DEFINE") 2191 else None 2192 ) 2193 2194 self._match_r_paren() 2195 2196 return self.expression( 2197 exp.MatchRecognize, 2198 partition_by=partition, 2199 order=order, 2200 measures=measures, 2201 rows=rows, 2202 after=after, 2203 pattern=pattern, 2204 define=define, 2205 alias=self._parse_table_alias(), 2206 ) 2207 2208 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2209 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2210 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2211 2212 if outer_apply or cross_apply: 2213 this = self._parse_select(table=True) 2214 view = None 2215 outer = not cross_apply 2216 elif self._match(TokenType.LATERAL): 2217 this = self._parse_select(table=True) 2218 view = self._match(TokenType.VIEW) 2219 outer = self._match(TokenType.OUTER) 2220 else: 2221 return None 2222 2223 if not this: 2224 this = self._parse_function() or self._parse_id_var(any_token=False) 2225 while self._match(TokenType.DOT): 2226 this = exp.Dot( 2227 this=this, 2228 expression=self._parse_function() or self._parse_id_var(any_token=False), 2229 ) 2230 2231 if view: 2232 table = self._parse_id_var(any_token=False) 2233 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2234 table_alias: t.Optional[exp.TableAlias] = self.expression( 2235 exp.TableAlias, this=table, columns=columns 2236 ) 2237 elif isinstance(this, exp.Subquery) and this.alias: 2238 # Ensures parity between the Subquery's and the Lateral's "alias" args 2239 table_alias = this.args["alias"].copy() 2240 else: 2241 table_alias = self._parse_table_alias() 2242 2243 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2244 2245 def _parse_join_parts( 2246 self, 2247 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2248 return ( 2249 self._match_set(self.JOIN_METHODS) and self._prev, 2250 self._match_set(self.JOIN_SIDES) and self._prev, 2251 self._match_set(self.JOIN_KINDS) and self._prev, 2252 ) 2253 2254 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2255 if self._match(TokenType.COMMA): 2256 return self.expression(exp.Join, this=self._parse_table()) 2257 2258 index = self._index 2259 method, side, kind = self._parse_join_parts() 2260 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2261 join = self._match(TokenType.JOIN) 2262 2263 if not skip_join_token and not join: 2264 self._retreat(index) 2265 kind = None 2266 method = None 2267 side = None 2268 2269 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2270 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2271 2272 if not skip_join_token and not join and not outer_apply and not cross_apply: 2273 return None 2274 2275 if outer_apply: 2276 side = Token(TokenType.LEFT, "LEFT") 2277 2278 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2279 2280 if method: 2281 kwargs["method"] = method.text 2282 if side: 2283 kwargs["side"] = side.text 2284 if kind: 2285 kwargs["kind"] = kind.text 2286 if hint: 2287 kwargs["hint"] = hint 2288 2289 if self._match(TokenType.ON): 2290 kwargs["on"] = self._parse_conjunction() 2291 elif self._match(TokenType.USING): 2292 kwargs["using"] = self._parse_wrapped_id_vars() 2293 elif not (kind and kind.token_type == TokenType.CROSS): 2294 index = self._index 2295 joins = self._parse_joins() 2296 2297 if joins and self._match(TokenType.ON): 2298 kwargs["on"] = self._parse_conjunction() 2299 elif joins and self._match(TokenType.USING): 2300 kwargs["using"] = self._parse_wrapped_id_vars() 2301 else: 2302 joins = None 2303 self._retreat(index) 2304 2305 kwargs["this"].set("joins", joins) 2306 2307 return self.expression(exp.Join, **kwargs) 2308 2309 def _parse_index( 2310 self, 2311 index: t.Optional[exp.Expression] = None, 2312 ) -> t.Optional[exp.Index]: 2313 if index: 2314 unique = None 2315 primary = None 2316 amp = None 2317 2318 self._match(TokenType.ON) 2319 self._match(TokenType.TABLE) # hive 2320 table = self._parse_table_parts(schema=True) 2321 else: 2322 unique = self._match(TokenType.UNIQUE) 2323 primary = self._match_text_seq("PRIMARY") 2324 amp = self._match_text_seq("AMP") 2325 2326 if not self._match(TokenType.INDEX): 2327 return None 2328 2329 index = self._parse_id_var() 2330 table = None 2331 2332 using = self._parse_field() if self._match(TokenType.USING) else None 2333 2334 if self._match(TokenType.L_PAREN, advance=False): 2335 columns = self._parse_wrapped_csv(self._parse_ordered) 2336 else: 2337 columns = None 2338 2339 return self.expression( 2340 exp.Index, 2341 this=index, 2342 table=table, 2343 using=using, 2344 columns=columns, 2345 unique=unique, 2346 primary=primary, 2347 amp=amp, 2348 partition_by=self._parse_partition_by(), 2349 ) 2350 2351 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2352 hints: t.List[exp.Expression] = [] 2353 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2354 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2355 hints.append( 2356 self.expression( 2357 exp.WithTableHint, 2358 expressions=self._parse_csv( 2359 lambda: self._parse_function() or self._parse_var(any_token=True) 2360 ), 2361 ) 2362 ) 2363 self._match_r_paren() 2364 else: 2365 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2366 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2367 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2368 2369 self._match_texts({"INDEX", "KEY"}) 2370 if self._match(TokenType.FOR): 2371 hint.set("target", self._advance_any() and self._prev.text.upper()) 2372 2373 hint.set("expressions", self._parse_wrapped_id_vars()) 2374 hints.append(hint) 2375 2376 return hints or None 2377 2378 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2379 return ( 2380 (not schema and self._parse_function(optional_parens=False)) 2381 or self._parse_id_var(any_token=False) 2382 or self._parse_string_as_identifier() 2383 or self._parse_placeholder() 2384 ) 2385 2386 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2387 catalog = None 2388 db = None 2389 table = self._parse_table_part(schema=schema) 2390 2391 while self._match(TokenType.DOT): 2392 if catalog: 2393 # This allows nesting the table in arbitrarily many dot expressions if needed 2394 table = self.expression( 2395 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2396 ) 2397 else: 2398 catalog = db 2399 db = table 2400 table = self._parse_table_part(schema=schema) 2401 2402 if not table: 2403 self.raise_error(f"Expected table name but got {self._curr}") 2404 2405 return self.expression( 2406 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2407 ) 2408 2409 def _parse_table( 2410 self, 2411 schema: bool = False, 2412 joins: bool = False, 2413 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2414 ) -> t.Optional[exp.Expression]: 2415 lateral = self._parse_lateral() 2416 if lateral: 2417 return lateral 2418 2419 unnest = self._parse_unnest() 2420 if unnest: 2421 return unnest 2422 2423 values = self._parse_derived_table_values() 2424 if values: 2425 return values 2426 2427 subquery = self._parse_select(table=True) 2428 if subquery: 2429 if not subquery.args.get("pivots"): 2430 subquery.set("pivots", self._parse_pivots()) 2431 return subquery 2432 2433 this: exp.Expression = self._parse_table_parts(schema=schema) 2434 2435 if schema: 2436 return self._parse_schema(this=this) 2437 2438 if self.ALIAS_POST_TABLESAMPLE: 2439 table_sample = self._parse_table_sample() 2440 2441 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2442 if alias: 2443 this.set("alias", alias) 2444 2445 if not this.args.get("pivots"): 2446 this.set("pivots", self._parse_pivots()) 2447 2448 this.set("hints", self._parse_table_hints()) 2449 2450 if not self.ALIAS_POST_TABLESAMPLE: 2451 table_sample = self._parse_table_sample() 2452 2453 if table_sample: 2454 table_sample.set("this", this) 2455 this = table_sample 2456 2457 if joins: 2458 for join in iter(self._parse_join, None): 2459 this.append("joins", join) 2460 2461 return this 2462 2463 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2464 if not self._match(TokenType.UNNEST): 2465 return None 2466 2467 expressions = self._parse_wrapped_csv(self._parse_type) 2468 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2469 2470 alias = self._parse_table_alias() if with_alias else None 2471 2472 if alias and self.UNNEST_COLUMN_ONLY: 2473 if alias.args.get("columns"): 2474 self.raise_error("Unexpected extra column alias in unnest.") 2475 2476 alias.set("columns", [alias.this]) 2477 alias.set("this", None) 2478 2479 offset = None 2480 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2481 self._match(TokenType.ALIAS) 2482 offset = self._parse_id_var() or exp.to_identifier("offset") 2483 2484 return self.expression( 2485 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2486 ) 2487 2488 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2489 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2490 if not is_derived and not self._match(TokenType.VALUES): 2491 return None 2492 2493 expressions = self._parse_csv(self._parse_value) 2494 alias = self._parse_table_alias() 2495 2496 if is_derived: 2497 self._match_r_paren() 2498 2499 return self.expression( 2500 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2501 ) 2502 2503 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2504 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2505 as_modifier and self._match_text_seq("USING", "SAMPLE") 2506 ): 2507 return None 2508 2509 bucket_numerator = None 2510 bucket_denominator = None 2511 bucket_field = None 2512 percent = None 2513 rows = None 2514 size = None 2515 seed = None 2516 2517 kind = ( 2518 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2519 ) 2520 method = self._parse_var(tokens=(TokenType.ROW,)) 2521 2522 self._match(TokenType.L_PAREN) 2523 2524 num = self._parse_number() 2525 2526 if self._match_text_seq("BUCKET"): 2527 bucket_numerator = self._parse_number() 2528 self._match_text_seq("OUT", "OF") 2529 bucket_denominator = bucket_denominator = self._parse_number() 2530 self._match(TokenType.ON) 2531 bucket_field = self._parse_field() 2532 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2533 percent = num 2534 elif self._match(TokenType.ROWS): 2535 rows = num 2536 else: 2537 size = num 2538 2539 self._match(TokenType.R_PAREN) 2540 2541 if self._match(TokenType.L_PAREN): 2542 method = self._parse_var() 2543 seed = self._match(TokenType.COMMA) and self._parse_number() 2544 self._match_r_paren() 2545 elif self._match_texts(("SEED", "REPEATABLE")): 2546 seed = self._parse_wrapped(self._parse_number) 2547 2548 return self.expression( 2549 exp.TableSample, 2550 method=method, 2551 bucket_numerator=bucket_numerator, 2552 bucket_denominator=bucket_denominator, 2553 bucket_field=bucket_field, 2554 percent=percent, 2555 rows=rows, 2556 size=size, 2557 seed=seed, 2558 kind=kind, 2559 ) 2560 2561 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2562 return list(iter(self._parse_pivot, None)) or None 2563 2564 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2565 return list(iter(self._parse_join, None)) or None 2566 2567 # https://duckdb.org/docs/sql/statements/pivot 2568 def _parse_simplified_pivot(self) -> exp.Pivot: 2569 def _parse_on() -> t.Optional[exp.Expression]: 2570 this = self._parse_bitwise() 2571 return self._parse_in(this) if self._match(TokenType.IN) else this 2572 2573 this = self._parse_table() 2574 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2575 using = self._match(TokenType.USING) and self._parse_csv( 2576 lambda: self._parse_alias(self._parse_function()) 2577 ) 2578 group = self._parse_group() 2579 return self.expression( 2580 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2581 ) 2582 2583 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2584 index = self._index 2585 2586 if self._match(TokenType.PIVOT): 2587 unpivot = False 2588 elif self._match(TokenType.UNPIVOT): 2589 unpivot = True 2590 else: 2591 return None 2592 2593 expressions = [] 2594 field = None 2595 2596 if not self._match(TokenType.L_PAREN): 2597 self._retreat(index) 2598 return None 2599 2600 if unpivot: 2601 expressions = self._parse_csv(self._parse_column) 2602 else: 2603 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2604 2605 if not expressions: 2606 self.raise_error("Failed to parse PIVOT's aggregation list") 2607 2608 if not self._match(TokenType.FOR): 2609 self.raise_error("Expecting FOR") 2610 2611 value = self._parse_column() 2612 2613 if not self._match(TokenType.IN): 2614 self.raise_error("Expecting IN") 2615 2616 field = self._parse_in(value, alias=True) 2617 2618 self._match_r_paren() 2619 2620 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2621 2622 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2623 pivot.set("alias", self._parse_table_alias()) 2624 2625 if not unpivot: 2626 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2627 2628 columns: t.List[exp.Expression] = [] 2629 for fld in pivot.args["field"].expressions: 2630 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2631 for name in names: 2632 if self.PREFIXED_PIVOT_COLUMNS: 2633 name = f"{name}_{field_name}" if name else field_name 2634 else: 2635 name = f"{field_name}_{name}" if name else field_name 2636 2637 columns.append(exp.to_identifier(name)) 2638 2639 pivot.set("columns", columns) 2640 2641 return pivot 2642 2643 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2644 return [agg.alias for agg in aggregations] 2645 2646 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2647 if not skip_where_token and not self._match(TokenType.WHERE): 2648 return None 2649 2650 return self.expression( 2651 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2652 ) 2653 2654 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2655 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2656 return None 2657 2658 elements = defaultdict(list) 2659 2660 if self._match(TokenType.ALL): 2661 return self.expression(exp.Group, all=True) 2662 2663 while True: 2664 expressions = self._parse_csv(self._parse_conjunction) 2665 if expressions: 2666 elements["expressions"].extend(expressions) 2667 2668 grouping_sets = self._parse_grouping_sets() 2669 if grouping_sets: 2670 elements["grouping_sets"].extend(grouping_sets) 2671 2672 rollup = None 2673 cube = None 2674 totals = None 2675 2676 with_ = self._match(TokenType.WITH) 2677 if self._match(TokenType.ROLLUP): 2678 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2679 elements["rollup"].extend(ensure_list(rollup)) 2680 2681 if self._match(TokenType.CUBE): 2682 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2683 elements["cube"].extend(ensure_list(cube)) 2684 2685 if self._match_text_seq("TOTALS"): 2686 totals = True 2687 elements["totals"] = True # type: ignore 2688 2689 if not (grouping_sets or rollup or cube or totals): 2690 break 2691 2692 return self.expression(exp.Group, **elements) # type: ignore 2693 2694 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2695 if not self._match(TokenType.GROUPING_SETS): 2696 return None 2697 2698 return self._parse_wrapped_csv(self._parse_grouping_set) 2699 2700 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2701 if self._match(TokenType.L_PAREN): 2702 grouping_set = self._parse_csv(self._parse_column) 2703 self._match_r_paren() 2704 return self.expression(exp.Tuple, expressions=grouping_set) 2705 2706 return self._parse_column() 2707 2708 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2709 if not skip_having_token and not self._match(TokenType.HAVING): 2710 return None 2711 return self.expression(exp.Having, this=self._parse_conjunction()) 2712 2713 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2714 if not self._match(TokenType.QUALIFY): 2715 return None 2716 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2717 2718 def _parse_order( 2719 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2720 ) -> t.Optional[exp.Expression]: 2721 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2722 return this 2723 2724 return self.expression( 2725 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2726 ) 2727 2728 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2729 if not self._match(token): 2730 return None 2731 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2732 2733 def _parse_ordered(self) -> exp.Ordered: 2734 this = self._parse_conjunction() 2735 self._match(TokenType.ASC) 2736 2737 is_desc = self._match(TokenType.DESC) 2738 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2739 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2740 desc = is_desc or False 2741 asc = not desc 2742 nulls_first = is_nulls_first or False 2743 explicitly_null_ordered = is_nulls_first or is_nulls_last 2744 2745 if ( 2746 not explicitly_null_ordered 2747 and ( 2748 (asc and self.NULL_ORDERING == "nulls_are_small") 2749 or (desc and self.NULL_ORDERING != "nulls_are_small") 2750 ) 2751 and self.NULL_ORDERING != "nulls_are_last" 2752 ): 2753 nulls_first = True 2754 2755 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2756 2757 def _parse_limit( 2758 self, this: t.Optional[exp.Expression] = None, top: bool = False 2759 ) -> t.Optional[exp.Expression]: 2760 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2761 limit_paren = self._match(TokenType.L_PAREN) 2762 expression = self._parse_number() if top else self._parse_term() 2763 2764 if self._match(TokenType.COMMA): 2765 offset = expression 2766 expression = self._parse_term() 2767 else: 2768 offset = None 2769 2770 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2771 2772 if limit_paren: 2773 self._match_r_paren() 2774 2775 return limit_exp 2776 2777 if self._match(TokenType.FETCH): 2778 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2779 direction = self._prev.text if direction else "FIRST" 2780 2781 count = self._parse_number() 2782 percent = self._match(TokenType.PERCENT) 2783 2784 self._match_set((TokenType.ROW, TokenType.ROWS)) 2785 2786 only = self._match_text_seq("ONLY") 2787 with_ties = self._match_text_seq("WITH", "TIES") 2788 2789 if only and with_ties: 2790 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2791 2792 return self.expression( 2793 exp.Fetch, 2794 direction=direction, 2795 count=count, 2796 percent=percent, 2797 with_ties=with_ties, 2798 ) 2799 2800 return this 2801 2802 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2803 if not self._match(TokenType.OFFSET): 2804 return this 2805 2806 count = self._parse_number() 2807 self._match_set((TokenType.ROW, TokenType.ROWS)) 2808 return self.expression(exp.Offset, this=this, expression=count) 2809 2810 def _parse_locks(self) -> t.List[exp.Lock]: 2811 locks = [] 2812 while True: 2813 if self._match_text_seq("FOR", "UPDATE"): 2814 update = True 2815 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2816 "LOCK", "IN", "SHARE", "MODE" 2817 ): 2818 update = False 2819 else: 2820 break 2821 2822 expressions = None 2823 if self._match_text_seq("OF"): 2824 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2825 2826 wait: t.Optional[bool | exp.Expression] = None 2827 if self._match_text_seq("NOWAIT"): 2828 wait = True 2829 elif self._match_text_seq("WAIT"): 2830 wait = self._parse_primary() 2831 elif self._match_text_seq("SKIP", "LOCKED"): 2832 wait = False 2833 2834 locks.append( 2835 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2836 ) 2837 2838 return locks 2839 2840 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2841 if not self._match_set(self.SET_OPERATIONS): 2842 return this 2843 2844 token_type = self._prev.token_type 2845 2846 if token_type == TokenType.UNION: 2847 expression = exp.Union 2848 elif token_type == TokenType.EXCEPT: 2849 expression = exp.Except 2850 else: 2851 expression = exp.Intersect 2852 2853 return self.expression( 2854 expression, 2855 this=this, 2856 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2857 expression=self._parse_set_operations(self._parse_select(nested=True)), 2858 ) 2859 2860 def _parse_expression(self) -> t.Optional[exp.Expression]: 2861 return self._parse_alias(self._parse_conjunction()) 2862 2863 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2864 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2865 2866 def _parse_equality(self) -> t.Optional[exp.Expression]: 2867 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2868 2869 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2870 return self._parse_tokens(self._parse_range, self.COMPARISON) 2871 2872 def _parse_range(self) -> t.Optional[exp.Expression]: 2873 this = self._parse_bitwise() 2874 negate = self._match(TokenType.NOT) 2875 2876 if self._match_set(self.RANGE_PARSERS): 2877 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2878 if not expression: 2879 return this 2880 2881 this = expression 2882 elif self._match(TokenType.ISNULL): 2883 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2884 2885 # Postgres supports ISNULL and NOTNULL for conditions. 2886 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2887 if self._match(TokenType.NOTNULL): 2888 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2889 this = self.expression(exp.Not, this=this) 2890 2891 if negate: 2892 this = self.expression(exp.Not, this=this) 2893 2894 if self._match(TokenType.IS): 2895 this = self._parse_is(this) 2896 2897 return this 2898 2899 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2900 index = self._index - 1 2901 negate = self._match(TokenType.NOT) 2902 2903 if self._match_text_seq("DISTINCT", "FROM"): 2904 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2905 return self.expression(klass, this=this, expression=self._parse_expression()) 2906 2907 expression = self._parse_null() or self._parse_boolean() 2908 if not expression: 2909 self._retreat(index) 2910 return None 2911 2912 this = self.expression(exp.Is, this=this, expression=expression) 2913 return self.expression(exp.Not, this=this) if negate else this 2914 2915 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2916 unnest = self._parse_unnest(with_alias=False) 2917 if unnest: 2918 this = self.expression(exp.In, this=this, unnest=unnest) 2919 elif self._match(TokenType.L_PAREN): 2920 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2921 2922 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2923 this = self.expression(exp.In, this=this, query=expressions[0]) 2924 else: 2925 this = self.expression(exp.In, this=this, expressions=expressions) 2926 2927 self._match_r_paren(this) 2928 else: 2929 this = self.expression(exp.In, this=this, field=self._parse_field()) 2930 2931 return this 2932 2933 def _parse_between(self, this: exp.Expression) -> exp.Between: 2934 low = self._parse_bitwise() 2935 self._match(TokenType.AND) 2936 high = self._parse_bitwise() 2937 return self.expression(exp.Between, this=this, low=low, high=high) 2938 2939 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2940 if not self._match(TokenType.ESCAPE): 2941 return this 2942 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2943 2944 def _parse_interval(self) -> t.Optional[exp.Interval]: 2945 if not self._match(TokenType.INTERVAL): 2946 return None 2947 2948 if self._match(TokenType.STRING, advance=False): 2949 this = self._parse_primary() 2950 else: 2951 this = self._parse_term() 2952 2953 unit = self._parse_function() or self._parse_var() 2954 2955 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2956 # each INTERVAL expression into this canonical form so it's easy to transpile 2957 if this and this.is_number: 2958 this = exp.Literal.string(this.name) 2959 elif this and this.is_string: 2960 parts = this.name.split() 2961 2962 if len(parts) == 2: 2963 if unit: 2964 # this is not actually a unit, it's something else 2965 unit = None 2966 self._retreat(self._index - 1) 2967 else: 2968 this = exp.Literal.string(parts[0]) 2969 unit = self.expression(exp.Var, this=parts[1]) 2970 2971 return self.expression(exp.Interval, this=this, unit=unit) 2972 2973 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2974 this = self._parse_term() 2975 2976 while True: 2977 if self._match_set(self.BITWISE): 2978 this = self.expression( 2979 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2980 ) 2981 elif self._match_pair(TokenType.LT, TokenType.LT): 2982 this = self.expression( 2983 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2984 ) 2985 elif self._match_pair(TokenType.GT, TokenType.GT): 2986 this = self.expression( 2987 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2988 ) 2989 else: 2990 break 2991 2992 return this 2993 2994 def _parse_term(self) -> t.Optional[exp.Expression]: 2995 return self._parse_tokens(self._parse_factor, self.TERM) 2996 2997 def _parse_factor(self) -> t.Optional[exp.Expression]: 2998 return self._parse_tokens(self._parse_unary, self.FACTOR) 2999 3000 def _parse_unary(self) -> t.Optional[exp.Expression]: 3001 if self._match_set(self.UNARY_PARSERS): 3002 return self.UNARY_PARSERS[self._prev.token_type](self) 3003 return self._parse_at_time_zone(self._parse_type()) 3004 3005 def _parse_type(self) -> t.Optional[exp.Expression]: 3006 interval = self._parse_interval() 3007 if interval: 3008 return interval 3009 3010 index = self._index 3011 data_type = self._parse_types(check_func=True) 3012 this = self._parse_column() 3013 3014 if data_type: 3015 if isinstance(this, exp.Literal): 3016 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3017 if parser: 3018 return parser(self, this, data_type) 3019 return self.expression(exp.Cast, this=this, to=data_type) 3020 if not data_type.expressions: 3021 self._retreat(index) 3022 return self._parse_column() 3023 return self._parse_column_ops(data_type) 3024 3025 return this 3026 3027 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3028 this = self._parse_type() 3029 if not this: 3030 return None 3031 3032 return self.expression( 3033 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3034 ) 3035 3036 def _parse_types( 3037 self, check_func: bool = False, schema: bool = False 3038 ) -> t.Optional[exp.Expression]: 3039 index = self._index 3040 3041 prefix = self._match_text_seq("SYSUDTLIB", ".") 3042 3043 if not self._match_set(self.TYPE_TOKENS): 3044 return None 3045 3046 type_token = self._prev.token_type 3047 3048 if type_token == TokenType.PSEUDO_TYPE: 3049 return self.expression(exp.PseudoType, this=self._prev.text) 3050 3051 nested = type_token in self.NESTED_TYPE_TOKENS 3052 is_struct = type_token == TokenType.STRUCT 3053 expressions = None 3054 maybe_func = False 3055 3056 if self._match(TokenType.L_PAREN): 3057 if is_struct: 3058 expressions = self._parse_csv(self._parse_struct_types) 3059 elif nested: 3060 expressions = self._parse_csv( 3061 lambda: self._parse_types(check_func=check_func, schema=schema) 3062 ) 3063 elif type_token in self.ENUM_TYPE_TOKENS: 3064 expressions = self._parse_csv(self._parse_primary) 3065 else: 3066 expressions = self._parse_csv(self._parse_type_size) 3067 3068 if not expressions or not self._match(TokenType.R_PAREN): 3069 self._retreat(index) 3070 return None 3071 3072 maybe_func = True 3073 3074 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3075 this = exp.DataType( 3076 this=exp.DataType.Type.ARRAY, 3077 expressions=[ 3078 exp.DataType( 3079 this=exp.DataType.Type[type_token.value], 3080 expressions=expressions, 3081 nested=nested, 3082 ) 3083 ], 3084 nested=True, 3085 ) 3086 3087 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3088 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3089 3090 return this 3091 3092 if self._match(TokenType.L_BRACKET): 3093 self._retreat(index) 3094 return None 3095 3096 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3097 if nested and self._match(TokenType.LT): 3098 if is_struct: 3099 expressions = self._parse_csv(self._parse_struct_types) 3100 else: 3101 expressions = self._parse_csv( 3102 lambda: self._parse_types(check_func=check_func, schema=schema) 3103 ) 3104 3105 if not self._match(TokenType.GT): 3106 self.raise_error("Expecting >") 3107 3108 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3109 values = self._parse_csv(self._parse_conjunction) 3110 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3111 3112 value: t.Optional[exp.Expression] = None 3113 if type_token in self.TIMESTAMPS: 3114 if self._match_text_seq("WITH", "TIME", "ZONE"): 3115 maybe_func = False 3116 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3117 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3118 maybe_func = False 3119 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3120 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3121 maybe_func = False 3122 elif type_token == TokenType.INTERVAL: 3123 unit = self._parse_var() 3124 3125 if not unit: 3126 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3127 else: 3128 value = self.expression(exp.Interval, unit=unit) 3129 3130 if maybe_func and check_func: 3131 index2 = self._index 3132 peek = self._parse_string() 3133 3134 if not peek: 3135 self._retreat(index) 3136 return None 3137 3138 self._retreat(index2) 3139 3140 if value: 3141 return value 3142 3143 return exp.DataType( 3144 this=exp.DataType.Type[type_token.value], 3145 expressions=expressions, 3146 nested=nested, 3147 values=values, 3148 prefix=prefix, 3149 ) 3150 3151 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3152 this = self._parse_type() or self._parse_id_var() 3153 self._match(TokenType.COLON) 3154 return self._parse_column_def(this) 3155 3156 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3157 if not self._match_text_seq("AT", "TIME", "ZONE"): 3158 return this 3159 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3160 3161 def _parse_column(self) -> t.Optional[exp.Expression]: 3162 this = self._parse_field() 3163 if isinstance(this, exp.Identifier): 3164 this = self.expression(exp.Column, this=this) 3165 elif not this: 3166 return self._parse_bracket(this) 3167 return self._parse_column_ops(this) 3168 3169 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3170 this = self._parse_bracket(this) 3171 3172 while self._match_set(self.COLUMN_OPERATORS): 3173 op_token = self._prev.token_type 3174 op = self.COLUMN_OPERATORS.get(op_token) 3175 3176 if op_token == TokenType.DCOLON: 3177 field = self._parse_types() 3178 if not field: 3179 self.raise_error("Expected type") 3180 elif op and self._curr: 3181 self._advance() 3182 value = self._prev.text 3183 field = ( 3184 exp.Literal.number(value) 3185 if self._prev.token_type == TokenType.NUMBER 3186 else exp.Literal.string(value) 3187 ) 3188 else: 3189 field = self._parse_field(anonymous_func=True, any_token=True) 3190 3191 if isinstance(field, exp.Func): 3192 # bigquery allows function calls like x.y.count(...) 3193 # SAFE.SUBSTR(...) 3194 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3195 this = self._replace_columns_with_dots(this) 3196 3197 if op: 3198 this = op(self, this, field) 3199 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3200 this = self.expression( 3201 exp.Column, 3202 this=field, 3203 table=this.this, 3204 db=this.args.get("table"), 3205 catalog=this.args.get("db"), 3206 ) 3207 else: 3208 this = self.expression(exp.Dot, this=this, expression=field) 3209 this = self._parse_bracket(this) 3210 return this 3211 3212 def _parse_primary(self) -> t.Optional[exp.Expression]: 3213 if self._match_set(self.PRIMARY_PARSERS): 3214 token_type = self._prev.token_type 3215 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3216 3217 if token_type == TokenType.STRING: 3218 expressions = [primary] 3219 while self._match(TokenType.STRING): 3220 expressions.append(exp.Literal.string(self._prev.text)) 3221 3222 if len(expressions) > 1: 3223 return self.expression(exp.Concat, expressions=expressions) 3224 3225 return primary 3226 3227 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3228 return exp.Literal.number(f"0.{self._prev.text}") 3229 3230 if self._match(TokenType.L_PAREN): 3231 comments = self._prev_comments 3232 query = self._parse_select() 3233 3234 if query: 3235 expressions = [query] 3236 else: 3237 expressions = self._parse_expressions() 3238 3239 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3240 3241 if isinstance(this, exp.Subqueryable): 3242 this = self._parse_set_operations( 3243 self._parse_subquery(this=this, parse_alias=False) 3244 ) 3245 elif len(expressions) > 1: 3246 this = self.expression(exp.Tuple, expressions=expressions) 3247 else: 3248 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3249 3250 if this: 3251 this.add_comments(comments) 3252 3253 self._match_r_paren(expression=this) 3254 return this 3255 3256 return None 3257 3258 def _parse_field( 3259 self, 3260 any_token: bool = False, 3261 tokens: t.Optional[t.Collection[TokenType]] = None, 3262 anonymous_func: bool = False, 3263 ) -> t.Optional[exp.Expression]: 3264 return ( 3265 self._parse_primary() 3266 or self._parse_function(anonymous=anonymous_func) 3267 or self._parse_id_var(any_token=any_token, tokens=tokens) 3268 ) 3269 3270 def _parse_function( 3271 self, 3272 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3273 anonymous: bool = False, 3274 optional_parens: bool = True, 3275 ) -> t.Optional[exp.Expression]: 3276 if not self._curr: 3277 return None 3278 3279 token_type = self._curr.token_type 3280 3281 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3282 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3283 3284 if not self._next or self._next.token_type != TokenType.L_PAREN: 3285 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3286 self._advance() 3287 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3288 3289 return None 3290 3291 if token_type not in self.FUNC_TOKENS: 3292 return None 3293 3294 this = self._curr.text 3295 upper = this.upper() 3296 self._advance(2) 3297 3298 parser = self.FUNCTION_PARSERS.get(upper) 3299 3300 if parser and not anonymous: 3301 this = parser(self) 3302 else: 3303 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3304 3305 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3306 this = self.expression(subquery_predicate, this=self._parse_select()) 3307 self._match_r_paren() 3308 return this 3309 3310 if functions is None: 3311 functions = self.FUNCTIONS 3312 3313 function = functions.get(upper) 3314 3315 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3316 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3317 3318 if function and not anonymous: 3319 this = self.validate_expression(function(args), args) 3320 else: 3321 this = self.expression(exp.Anonymous, this=this, expressions=args) 3322 3323 self._match_r_paren(this) 3324 return self._parse_window(this) 3325 3326 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3327 return self._parse_column_def(self._parse_id_var()) 3328 3329 def _parse_user_defined_function( 3330 self, kind: t.Optional[TokenType] = None 3331 ) -> t.Optional[exp.Expression]: 3332 this = self._parse_id_var() 3333 3334 while self._match(TokenType.DOT): 3335 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3336 3337 if not self._match(TokenType.L_PAREN): 3338 return this 3339 3340 expressions = self._parse_csv(self._parse_function_parameter) 3341 self._match_r_paren() 3342 return self.expression( 3343 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3344 ) 3345 3346 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3347 literal = self._parse_primary() 3348 if literal: 3349 return self.expression(exp.Introducer, this=token.text, expression=literal) 3350 3351 return self.expression(exp.Identifier, this=token.text) 3352 3353 def _parse_session_parameter(self) -> exp.SessionParameter: 3354 kind = None 3355 this = self._parse_id_var() or self._parse_primary() 3356 3357 if this and self._match(TokenType.DOT): 3358 kind = this.name 3359 this = self._parse_var() or self._parse_primary() 3360 3361 return self.expression(exp.SessionParameter, this=this, kind=kind) 3362 3363 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3364 index = self._index 3365 3366 if self._match(TokenType.L_PAREN): 3367 expressions = self._parse_csv(self._parse_id_var) 3368 3369 if not self._match(TokenType.R_PAREN): 3370 self._retreat(index) 3371 else: 3372 expressions = [self._parse_id_var()] 3373 3374 if self._match_set(self.LAMBDAS): 3375 return self.LAMBDAS[self._prev.token_type](self, expressions) 3376 3377 self._retreat(index) 3378 3379 this: t.Optional[exp.Expression] 3380 3381 if self._match(TokenType.DISTINCT): 3382 this = self.expression( 3383 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3384 ) 3385 else: 3386 this = self._parse_select_or_expression(alias=alias) 3387 3388 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3389 3390 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3391 index = self._index 3392 3393 if not self.errors: 3394 try: 3395 if self._parse_select(nested=True): 3396 return this 3397 except ParseError: 3398 pass 3399 finally: 3400 self.errors.clear() 3401 self._retreat(index) 3402 3403 if not self._match(TokenType.L_PAREN): 3404 return this 3405 3406 args = self._parse_csv( 3407 lambda: self._parse_constraint() 3408 or self._parse_column_def(self._parse_field(any_token=True)) 3409 ) 3410 3411 self._match_r_paren() 3412 return self.expression(exp.Schema, this=this, expressions=args) 3413 3414 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3415 # column defs are not really columns, they're identifiers 3416 if isinstance(this, exp.Column): 3417 this = this.this 3418 3419 kind = self._parse_types(schema=True) 3420 3421 if self._match_text_seq("FOR", "ORDINALITY"): 3422 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3423 3424 constraints = [] 3425 while True: 3426 constraint = self._parse_column_constraint() 3427 if not constraint: 3428 break 3429 constraints.append(constraint) 3430 3431 if not kind and not constraints: 3432 return this 3433 3434 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3435 3436 def _parse_auto_increment( 3437 self, 3438 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3439 start = None 3440 increment = None 3441 3442 if self._match(TokenType.L_PAREN, advance=False): 3443 args = self._parse_wrapped_csv(self._parse_bitwise) 3444 start = seq_get(args, 0) 3445 increment = seq_get(args, 1) 3446 elif self._match_text_seq("START"): 3447 start = self._parse_bitwise() 3448 self._match_text_seq("INCREMENT") 3449 increment = self._parse_bitwise() 3450 3451 if start and increment: 3452 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3453 3454 return exp.AutoIncrementColumnConstraint() 3455 3456 def _parse_compress(self) -> exp.CompressColumnConstraint: 3457 if self._match(TokenType.L_PAREN, advance=False): 3458 return self.expression( 3459 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3460 ) 3461 3462 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3463 3464 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3465 if self._match_text_seq("BY", "DEFAULT"): 3466 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3467 this = self.expression( 3468 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3469 ) 3470 else: 3471 self._match_text_seq("ALWAYS") 3472 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3473 3474 self._match(TokenType.ALIAS) 3475 identity = self._match_text_seq("IDENTITY") 3476 3477 if self._match(TokenType.L_PAREN): 3478 if self._match_text_seq("START", "WITH"): 3479 this.set("start", self._parse_bitwise()) 3480 if self._match_text_seq("INCREMENT", "BY"): 3481 this.set("increment", self._parse_bitwise()) 3482 if self._match_text_seq("MINVALUE"): 3483 this.set("minvalue", self._parse_bitwise()) 3484 if self._match_text_seq("MAXVALUE"): 3485 this.set("maxvalue", self._parse_bitwise()) 3486 3487 if self._match_text_seq("CYCLE"): 3488 this.set("cycle", True) 3489 elif self._match_text_seq("NO", "CYCLE"): 3490 this.set("cycle", False) 3491 3492 if not identity: 3493 this.set("expression", self._parse_bitwise()) 3494 3495 self._match_r_paren() 3496 3497 return this 3498 3499 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3500 self._match_text_seq("LENGTH") 3501 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3502 3503 def _parse_not_constraint( 3504 self, 3505 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3506 if self._match_text_seq("NULL"): 3507 return self.expression(exp.NotNullColumnConstraint) 3508 if self._match_text_seq("CASESPECIFIC"): 3509 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3510 return None 3511 3512 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3513 if self._match(TokenType.CONSTRAINT): 3514 this = self._parse_id_var() 3515 else: 3516 this = None 3517 3518 if self._match_texts(self.CONSTRAINT_PARSERS): 3519 return self.expression( 3520 exp.ColumnConstraint, 3521 this=this, 3522 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3523 ) 3524 3525 return this 3526 3527 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3528 if not self._match(TokenType.CONSTRAINT): 3529 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3530 3531 this = self._parse_id_var() 3532 expressions = [] 3533 3534 while True: 3535 constraint = self._parse_unnamed_constraint() or self._parse_function() 3536 if not constraint: 3537 break 3538 expressions.append(constraint) 3539 3540 return self.expression(exp.Constraint, this=this, expressions=expressions) 3541 3542 def _parse_unnamed_constraint( 3543 self, constraints: t.Optional[t.Collection[str]] = None 3544 ) -> t.Optional[exp.Expression]: 3545 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3546 return None 3547 3548 constraint = self._prev.text.upper() 3549 if constraint not in self.CONSTRAINT_PARSERS: 3550 self.raise_error(f"No parser found for schema constraint {constraint}.") 3551 3552 return self.CONSTRAINT_PARSERS[constraint](self) 3553 3554 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3555 self._match_text_seq("KEY") 3556 return self.expression( 3557 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3558 ) 3559 3560 def _parse_key_constraint_options(self) -> t.List[str]: 3561 options = [] 3562 while True: 3563 if not self._curr: 3564 break 3565 3566 if self._match(TokenType.ON): 3567 action = None 3568 on = self._advance_any() and self._prev.text 3569 3570 if self._match_text_seq("NO", "ACTION"): 3571 action = "NO ACTION" 3572 elif self._match_text_seq("CASCADE"): 3573 action = "CASCADE" 3574 elif self._match_pair(TokenType.SET, TokenType.NULL): 3575 action = "SET NULL" 3576 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3577 action = "SET DEFAULT" 3578 else: 3579 self.raise_error("Invalid key constraint") 3580 3581 options.append(f"ON {on} {action}") 3582 elif self._match_text_seq("NOT", "ENFORCED"): 3583 options.append("NOT ENFORCED") 3584 elif self._match_text_seq("DEFERRABLE"): 3585 options.append("DEFERRABLE") 3586 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3587 options.append("INITIALLY DEFERRED") 3588 elif self._match_text_seq("NORELY"): 3589 options.append("NORELY") 3590 elif self._match_text_seq("MATCH", "FULL"): 3591 options.append("MATCH FULL") 3592 else: 3593 break 3594 3595 return options 3596 3597 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3598 if match and not self._match(TokenType.REFERENCES): 3599 return None 3600 3601 expressions = None 3602 this = self._parse_table(schema=True) 3603 options = self._parse_key_constraint_options() 3604 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3605 3606 def _parse_foreign_key(self) -> exp.ForeignKey: 3607 expressions = self._parse_wrapped_id_vars() 3608 reference = self._parse_references() 3609 options = {} 3610 3611 while self._match(TokenType.ON): 3612 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3613 self.raise_error("Expected DELETE or UPDATE") 3614 3615 kind = self._prev.text.lower() 3616 3617 if self._match_text_seq("NO", "ACTION"): 3618 action = "NO ACTION" 3619 elif self._match(TokenType.SET): 3620 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3621 action = "SET " + self._prev.text.upper() 3622 else: 3623 self._advance() 3624 action = self._prev.text.upper() 3625 3626 options[kind] = action 3627 3628 return self.expression( 3629 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3630 ) 3631 3632 def _parse_primary_key( 3633 self, wrapped_optional: bool = False, in_props: bool = False 3634 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3635 desc = ( 3636 self._match_set((TokenType.ASC, TokenType.DESC)) 3637 and self._prev.token_type == TokenType.DESC 3638 ) 3639 3640 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3641 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3642 3643 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3644 options = self._parse_key_constraint_options() 3645 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3646 3647 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3648 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3649 return this 3650 3651 bracket_kind = self._prev.token_type 3652 3653 if self._match(TokenType.COLON): 3654 expressions: t.List[t.Optional[exp.Expression]] = [ 3655 self.expression(exp.Slice, expression=self._parse_conjunction()) 3656 ] 3657 else: 3658 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3659 3660 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3661 if bracket_kind == TokenType.L_BRACE: 3662 this = self.expression(exp.Struct, expressions=expressions) 3663 elif not this or this.name.upper() == "ARRAY": 3664 this = self.expression(exp.Array, expressions=expressions) 3665 else: 3666 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3667 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3668 3669 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3670 self.raise_error("Expected ]") 3671 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3672 self.raise_error("Expected }") 3673 3674 self._add_comments(this) 3675 return self._parse_bracket(this) 3676 3677 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3678 if self._match(TokenType.COLON): 3679 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3680 return this 3681 3682 def _parse_case(self) -> t.Optional[exp.Expression]: 3683 ifs = [] 3684 default = None 3685 3686 expression = self._parse_conjunction() 3687 3688 while self._match(TokenType.WHEN): 3689 this = self._parse_conjunction() 3690 self._match(TokenType.THEN) 3691 then = self._parse_conjunction() 3692 ifs.append(self.expression(exp.If, this=this, true=then)) 3693 3694 if self._match(TokenType.ELSE): 3695 default = self._parse_conjunction() 3696 3697 if not self._match(TokenType.END): 3698 self.raise_error("Expected END after CASE", self._prev) 3699 3700 return self._parse_window( 3701 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3702 ) 3703 3704 def _parse_if(self) -> t.Optional[exp.Expression]: 3705 if self._match(TokenType.L_PAREN): 3706 args = self._parse_csv(self._parse_conjunction) 3707 this = self.validate_expression(exp.If.from_arg_list(args), args) 3708 self._match_r_paren() 3709 else: 3710 index = self._index - 1 3711 condition = self._parse_conjunction() 3712 3713 if not condition: 3714 self._retreat(index) 3715 return None 3716 3717 self._match(TokenType.THEN) 3718 true = self._parse_conjunction() 3719 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3720 self._match(TokenType.END) 3721 this = self.expression(exp.If, this=condition, true=true, false=false) 3722 3723 return self._parse_window(this) 3724 3725 def _parse_extract(self) -> exp.Extract: 3726 this = self._parse_function() or self._parse_var() or self._parse_type() 3727 3728 if self._match(TokenType.FROM): 3729 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3730 3731 if not self._match(TokenType.COMMA): 3732 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3733 3734 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3735 3736 def _parse_any_value(self) -> exp.AnyValue: 3737 this = self._parse_lambda() 3738 is_max = None 3739 having = None 3740 3741 if self._match(TokenType.HAVING): 3742 self._match_texts(("MAX", "MIN")) 3743 is_max = self._prev.text == "MAX" 3744 having = self._parse_column() 3745 3746 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3747 3748 def _parse_cast(self, strict: bool) -> exp.Expression: 3749 this = self._parse_conjunction() 3750 3751 if not self._match(TokenType.ALIAS): 3752 if self._match(TokenType.COMMA): 3753 return self.expression( 3754 exp.CastToStrType, this=this, expression=self._parse_string() 3755 ) 3756 else: 3757 self.raise_error("Expected AS after CAST") 3758 3759 fmt = None 3760 to = self._parse_types() 3761 3762 if not to: 3763 self.raise_error("Expected TYPE after CAST") 3764 elif to.this == exp.DataType.Type.CHAR: 3765 if self._match(TokenType.CHARACTER_SET): 3766 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3767 elif self._match(TokenType.FORMAT): 3768 fmt_string = self._parse_string() 3769 fmt = self._parse_at_time_zone(fmt_string) 3770 3771 if to.this in exp.DataType.TEMPORAL_TYPES: 3772 this = self.expression( 3773 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3774 this=this, 3775 format=exp.Literal.string( 3776 format_time( 3777 fmt_string.this if fmt_string else "", 3778 self.FORMAT_MAPPING or self.TIME_MAPPING, 3779 self.FORMAT_TRIE or self.TIME_TRIE, 3780 ) 3781 ), 3782 ) 3783 3784 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3785 this.set("zone", fmt.args["zone"]) 3786 3787 return this 3788 3789 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3790 3791 def _parse_concat(self) -> t.Optional[exp.Expression]: 3792 args = self._parse_csv(self._parse_conjunction) 3793 if self.CONCAT_NULL_OUTPUTS_STRING: 3794 args = [ 3795 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3796 for arg in args 3797 if arg 3798 ] 3799 3800 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3801 # we find such a call we replace it with its argument. 3802 if len(args) == 1: 3803 return args[0] 3804 3805 return self.expression( 3806 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3807 ) 3808 3809 def _parse_string_agg(self) -> exp.Expression: 3810 if self._match(TokenType.DISTINCT): 3811 args: t.List[t.Optional[exp.Expression]] = [ 3812 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3813 ] 3814 if self._match(TokenType.COMMA): 3815 args.extend(self._parse_csv(self._parse_conjunction)) 3816 else: 3817 args = self._parse_csv(self._parse_conjunction) 3818 3819 index = self._index 3820 if not self._match(TokenType.R_PAREN): 3821 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3822 return self.expression( 3823 exp.GroupConcat, 3824 this=seq_get(args, 0), 3825 separator=self._parse_order(this=seq_get(args, 1)), 3826 ) 3827 3828 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3829 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3830 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3831 if not self._match_text_seq("WITHIN", "GROUP"): 3832 self._retreat(index) 3833 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3834 3835 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3836 order = self._parse_order(this=seq_get(args, 0)) 3837 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3838 3839 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3840 this = self._parse_bitwise() 3841 3842 if self._match(TokenType.USING): 3843 to: t.Optional[exp.Expression] = self.expression( 3844 exp.CharacterSet, this=self._parse_var() 3845 ) 3846 elif self._match(TokenType.COMMA): 3847 to = self._parse_types() 3848 else: 3849 to = None 3850 3851 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3852 3853 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3854 """ 3855 There are generally two variants of the DECODE function: 3856 3857 - DECODE(bin, charset) 3858 - DECODE(expression, search, result [, search, result] ... [, default]) 3859 3860 The second variant will always be parsed into a CASE expression. Note that NULL 3861 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3862 instead of relying on pattern matching. 3863 """ 3864 args = self._parse_csv(self._parse_conjunction) 3865 3866 if len(args) < 3: 3867 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3868 3869 expression, *expressions = args 3870 if not expression: 3871 return None 3872 3873 ifs = [] 3874 for search, result in zip(expressions[::2], expressions[1::2]): 3875 if not search or not result: 3876 return None 3877 3878 if isinstance(search, exp.Literal): 3879 ifs.append( 3880 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3881 ) 3882 elif isinstance(search, exp.Null): 3883 ifs.append( 3884 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3885 ) 3886 else: 3887 cond = exp.or_( 3888 exp.EQ(this=expression.copy(), expression=search), 3889 exp.and_( 3890 exp.Is(this=expression.copy(), expression=exp.Null()), 3891 exp.Is(this=search.copy(), expression=exp.Null()), 3892 copy=False, 3893 ), 3894 copy=False, 3895 ) 3896 ifs.append(exp.If(this=cond, true=result)) 3897 3898 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3899 3900 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3901 self._match_text_seq("KEY") 3902 key = self._parse_field() 3903 self._match(TokenType.COLON) 3904 self._match_text_seq("VALUE") 3905 value = self._parse_field() 3906 3907 if not key and not value: 3908 return None 3909 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3910 3911 def _parse_json_object(self) -> exp.JSONObject: 3912 star = self._parse_star() 3913 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3914 3915 null_handling = None 3916 if self._match_text_seq("NULL", "ON", "NULL"): 3917 null_handling = "NULL ON NULL" 3918 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3919 null_handling = "ABSENT ON NULL" 3920 3921 unique_keys = None 3922 if self._match_text_seq("WITH", "UNIQUE"): 3923 unique_keys = True 3924 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3925 unique_keys = False 3926 3927 self._match_text_seq("KEYS") 3928 3929 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3930 format_json = self._match_text_seq("FORMAT", "JSON") 3931 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3932 3933 return self.expression( 3934 exp.JSONObject, 3935 expressions=expressions, 3936 null_handling=null_handling, 3937 unique_keys=unique_keys, 3938 return_type=return_type, 3939 format_json=format_json, 3940 encoding=encoding, 3941 ) 3942 3943 def _parse_logarithm(self) -> exp.Func: 3944 # Default argument order is base, expression 3945 args = self._parse_csv(self._parse_range) 3946 3947 if len(args) > 1: 3948 if not self.LOG_BASE_FIRST: 3949 args.reverse() 3950 return exp.Log.from_arg_list(args) 3951 3952 return self.expression( 3953 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3954 ) 3955 3956 def _parse_match_against(self) -> exp.MatchAgainst: 3957 expressions = self._parse_csv(self._parse_column) 3958 3959 self._match_text_seq(")", "AGAINST", "(") 3960 3961 this = self._parse_string() 3962 3963 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3964 modifier = "IN NATURAL LANGUAGE MODE" 3965 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3966 modifier = f"{modifier} WITH QUERY EXPANSION" 3967 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3968 modifier = "IN BOOLEAN MODE" 3969 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3970 modifier = "WITH QUERY EXPANSION" 3971 else: 3972 modifier = None 3973 3974 return self.expression( 3975 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3976 ) 3977 3978 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3979 def _parse_open_json(self) -> exp.OpenJSON: 3980 this = self._parse_bitwise() 3981 path = self._match(TokenType.COMMA) and self._parse_string() 3982 3983 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3984 this = self._parse_field(any_token=True) 3985 kind = self._parse_types() 3986 path = self._parse_string() 3987 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3988 3989 return self.expression( 3990 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3991 ) 3992 3993 expressions = None 3994 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3995 self._match_l_paren() 3996 expressions = self._parse_csv(_parse_open_json_column_def) 3997 3998 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3999 4000 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4001 args = self._parse_csv(self._parse_bitwise) 4002 4003 if self._match(TokenType.IN): 4004 return self.expression( 4005 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4006 ) 4007 4008 if haystack_first: 4009 haystack = seq_get(args, 0) 4010 needle = seq_get(args, 1) 4011 else: 4012 needle = seq_get(args, 0) 4013 haystack = seq_get(args, 1) 4014 4015 return self.expression( 4016 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4017 ) 4018 4019 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4020 args = self._parse_csv(self._parse_table) 4021 return exp.JoinHint(this=func_name.upper(), expressions=args) 4022 4023 def _parse_substring(self) -> exp.Substring: 4024 # Postgres supports the form: substring(string [from int] [for int]) 4025 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4026 4027 args = self._parse_csv(self._parse_bitwise) 4028 4029 if self._match(TokenType.FROM): 4030 args.append(self._parse_bitwise()) 4031 if self._match(TokenType.FOR): 4032 args.append(self._parse_bitwise()) 4033 4034 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4035 4036 def _parse_trim(self) -> exp.Trim: 4037 # https://www.w3resource.com/sql/character-functions/trim.php 4038 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4039 4040 position = None 4041 collation = None 4042 4043 if self._match_texts(self.TRIM_TYPES): 4044 position = self._prev.text.upper() 4045 4046 expression = self._parse_bitwise() 4047 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4048 this = self._parse_bitwise() 4049 else: 4050 this = expression 4051 expression = None 4052 4053 if self._match(TokenType.COLLATE): 4054 collation = self._parse_bitwise() 4055 4056 return self.expression( 4057 exp.Trim, this=this, position=position, expression=expression, collation=collation 4058 ) 4059 4060 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4061 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4062 4063 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4064 return self._parse_window(self._parse_id_var(), alias=True) 4065 4066 def _parse_respect_or_ignore_nulls( 4067 self, this: t.Optional[exp.Expression] 4068 ) -> t.Optional[exp.Expression]: 4069 if self._match_text_seq("IGNORE", "NULLS"): 4070 return self.expression(exp.IgnoreNulls, this=this) 4071 if self._match_text_seq("RESPECT", "NULLS"): 4072 return self.expression(exp.RespectNulls, this=this) 4073 return this 4074 4075 def _parse_window( 4076 self, this: t.Optional[exp.Expression], alias: bool = False 4077 ) -> t.Optional[exp.Expression]: 4078 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4079 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4080 self._match_r_paren() 4081 4082 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4083 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4084 if self._match_text_seq("WITHIN", "GROUP"): 4085 order = self._parse_wrapped(self._parse_order) 4086 this = self.expression(exp.WithinGroup, this=this, expression=order) 4087 4088 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4089 # Some dialects choose to implement and some do not. 4090 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4091 4092 # There is some code above in _parse_lambda that handles 4093 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4094 4095 # The below changes handle 4096 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4097 4098 # Oracle allows both formats 4099 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4100 # and Snowflake chose to do the same for familiarity 4101 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4102 this = self._parse_respect_or_ignore_nulls(this) 4103 4104 # bigquery select from window x AS (partition by ...) 4105 if alias: 4106 over = None 4107 self._match(TokenType.ALIAS) 4108 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4109 return this 4110 else: 4111 over = self._prev.text.upper() 4112 4113 if not self._match(TokenType.L_PAREN): 4114 return self.expression( 4115 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4116 ) 4117 4118 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4119 4120 first = self._match(TokenType.FIRST) 4121 if self._match_text_seq("LAST"): 4122 first = False 4123 4124 partition = self._parse_partition_by() 4125 order = self._parse_order() 4126 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4127 4128 if kind: 4129 self._match(TokenType.BETWEEN) 4130 start = self._parse_window_spec() 4131 self._match(TokenType.AND) 4132 end = self._parse_window_spec() 4133 4134 spec = self.expression( 4135 exp.WindowSpec, 4136 kind=kind, 4137 start=start["value"], 4138 start_side=start["side"], 4139 end=end["value"], 4140 end_side=end["side"], 4141 ) 4142 else: 4143 spec = None 4144 4145 self._match_r_paren() 4146 4147 return self.expression( 4148 exp.Window, 4149 this=this, 4150 partition_by=partition, 4151 order=order, 4152 spec=spec, 4153 alias=window_alias, 4154 over=over, 4155 first=first, 4156 ) 4157 4158 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4159 self._match(TokenType.BETWEEN) 4160 4161 return { 4162 "value": ( 4163 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4164 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4165 or self._parse_bitwise() 4166 ), 4167 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4168 } 4169 4170 def _parse_alias( 4171 self, this: t.Optional[exp.Expression], explicit: bool = False 4172 ) -> t.Optional[exp.Expression]: 4173 any_token = self._match(TokenType.ALIAS) 4174 4175 if explicit and not any_token: 4176 return this 4177 4178 if self._match(TokenType.L_PAREN): 4179 aliases = self.expression( 4180 exp.Aliases, 4181 this=this, 4182 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4183 ) 4184 self._match_r_paren(aliases) 4185 return aliases 4186 4187 alias = self._parse_id_var(any_token) 4188 4189 if alias: 4190 return self.expression(exp.Alias, this=this, alias=alias) 4191 4192 return this 4193 4194 def _parse_id_var( 4195 self, 4196 any_token: bool = True, 4197 tokens: t.Optional[t.Collection[TokenType]] = None, 4198 ) -> t.Optional[exp.Expression]: 4199 identifier = self._parse_identifier() 4200 4201 if identifier: 4202 return identifier 4203 4204 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4205 quoted = self._prev.token_type == TokenType.STRING 4206 return exp.Identifier(this=self._prev.text, quoted=quoted) 4207 4208 return None 4209 4210 def _parse_string(self) -> t.Optional[exp.Expression]: 4211 if self._match(TokenType.STRING): 4212 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4213 return self._parse_placeholder() 4214 4215 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4216 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4217 4218 def _parse_number(self) -> t.Optional[exp.Expression]: 4219 if self._match(TokenType.NUMBER): 4220 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4221 return self._parse_placeholder() 4222 4223 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4224 if self._match(TokenType.IDENTIFIER): 4225 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4226 return self._parse_placeholder() 4227 4228 def _parse_var( 4229 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4230 ) -> t.Optional[exp.Expression]: 4231 if ( 4232 (any_token and self._advance_any()) 4233 or self._match(TokenType.VAR) 4234 or (self._match_set(tokens) if tokens else False) 4235 ): 4236 return self.expression(exp.Var, this=self._prev.text) 4237 return self._parse_placeholder() 4238 4239 def _advance_any(self) -> t.Optional[Token]: 4240 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4241 self._advance() 4242 return self._prev 4243 return None 4244 4245 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4246 return self._parse_var() or self._parse_string() 4247 4248 def _parse_null(self) -> t.Optional[exp.Expression]: 4249 if self._match(TokenType.NULL): 4250 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4251 return None 4252 4253 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4254 if self._match(TokenType.TRUE): 4255 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4256 if self._match(TokenType.FALSE): 4257 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4258 return None 4259 4260 def _parse_star(self) -> t.Optional[exp.Expression]: 4261 if self._match(TokenType.STAR): 4262 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4263 return None 4264 4265 def _parse_parameter(self) -> exp.Parameter: 4266 wrapped = self._match(TokenType.L_BRACE) 4267 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4268 self._match(TokenType.R_BRACE) 4269 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4270 4271 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4272 if self._match_set(self.PLACEHOLDER_PARSERS): 4273 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4274 if placeholder: 4275 return placeholder 4276 self._advance(-1) 4277 return None 4278 4279 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4280 if not self._match(TokenType.EXCEPT): 4281 return None 4282 if self._match(TokenType.L_PAREN, advance=False): 4283 return self._parse_wrapped_csv(self._parse_column) 4284 return self._parse_csv(self._parse_column) 4285 4286 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4287 if not self._match(TokenType.REPLACE): 4288 return None 4289 if self._match(TokenType.L_PAREN, advance=False): 4290 return self._parse_wrapped_csv(self._parse_expression) 4291 return self._parse_expressions() 4292 4293 def _parse_csv( 4294 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4295 ) -> t.List[t.Optional[exp.Expression]]: 4296 parse_result = parse_method() 4297 items = [parse_result] if parse_result is not None else [] 4298 4299 while self._match(sep): 4300 self._add_comments(parse_result) 4301 parse_result = parse_method() 4302 if parse_result is not None: 4303 items.append(parse_result) 4304 4305 return items 4306 4307 def _parse_tokens( 4308 self, parse_method: t.Callable, expressions: t.Dict 4309 ) -> t.Optional[exp.Expression]: 4310 this = parse_method() 4311 4312 while self._match_set(expressions): 4313 this = self.expression( 4314 expressions[self._prev.token_type], 4315 this=this, 4316 comments=self._prev_comments, 4317 expression=parse_method(), 4318 ) 4319 4320 return this 4321 4322 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4323 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4324 4325 def _parse_wrapped_csv( 4326 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4327 ) -> t.List[t.Optional[exp.Expression]]: 4328 return self._parse_wrapped( 4329 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4330 ) 4331 4332 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4333 wrapped = self._match(TokenType.L_PAREN) 4334 if not wrapped and not optional: 4335 self.raise_error("Expecting (") 4336 parse_result = parse_method() 4337 if wrapped: 4338 self._match_r_paren() 4339 return parse_result 4340 4341 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4342 return self._parse_csv(self._parse_expression) 4343 4344 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4345 return self._parse_select() or self._parse_set_operations( 4346 self._parse_expression() if alias else self._parse_conjunction() 4347 ) 4348 4349 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4350 return self._parse_query_modifiers( 4351 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4352 ) 4353 4354 def _parse_transaction(self) -> exp.Transaction: 4355 this = None 4356 if self._match_texts(self.TRANSACTION_KIND): 4357 this = self._prev.text 4358 4359 self._match_texts({"TRANSACTION", "WORK"}) 4360 4361 modes = [] 4362 while True: 4363 mode = [] 4364 while self._match(TokenType.VAR): 4365 mode.append(self._prev.text) 4366 4367 if mode: 4368 modes.append(" ".join(mode)) 4369 if not self._match(TokenType.COMMA): 4370 break 4371 4372 return self.expression(exp.Transaction, this=this, modes=modes) 4373 4374 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4375 chain = None 4376 savepoint = None 4377 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4378 4379 self._match_texts({"TRANSACTION", "WORK"}) 4380 4381 if self._match_text_seq("TO"): 4382 self._match_text_seq("SAVEPOINT") 4383 savepoint = self._parse_id_var() 4384 4385 if self._match(TokenType.AND): 4386 chain = not self._match_text_seq("NO") 4387 self._match_text_seq("CHAIN") 4388 4389 if is_rollback: 4390 return self.expression(exp.Rollback, savepoint=savepoint) 4391 4392 return self.expression(exp.Commit, chain=chain) 4393 4394 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4395 if not self._match_text_seq("ADD"): 4396 return None 4397 4398 self._match(TokenType.COLUMN) 4399 exists_column = self._parse_exists(not_=True) 4400 expression = self._parse_column_def(self._parse_field(any_token=True)) 4401 4402 if expression: 4403 expression.set("exists", exists_column) 4404 4405 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4406 if self._match_texts(("FIRST", "AFTER")): 4407 position = self._prev.text 4408 column_position = self.expression( 4409 exp.ColumnPosition, this=self._parse_column(), position=position 4410 ) 4411 expression.set("position", column_position) 4412 4413 return expression 4414 4415 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4416 drop = self._match(TokenType.DROP) and self._parse_drop() 4417 if drop and not isinstance(drop, exp.Command): 4418 drop.set("kind", drop.args.get("kind", "COLUMN")) 4419 return drop 4420 4421 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4422 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4423 return self.expression( 4424 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4425 ) 4426 4427 def _parse_add_constraint(self) -> exp.AddConstraint: 4428 this = None 4429 kind = self._prev.token_type 4430 4431 if kind == TokenType.CONSTRAINT: 4432 this = self._parse_id_var() 4433 4434 if self._match_text_seq("CHECK"): 4435 expression = self._parse_wrapped(self._parse_conjunction) 4436 enforced = self._match_text_seq("ENFORCED") 4437 4438 return self.expression( 4439 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4440 ) 4441 4442 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4443 expression = self._parse_foreign_key() 4444 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4445 expression = self._parse_primary_key() 4446 else: 4447 expression = None 4448 4449 return self.expression(exp.AddConstraint, this=this, expression=expression) 4450 4451 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4452 index = self._index - 1 4453 4454 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4455 return self._parse_csv(self._parse_add_constraint) 4456 4457 self._retreat(index) 4458 return self._parse_csv(self._parse_add_column) 4459 4460 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4461 self._match(TokenType.COLUMN) 4462 column = self._parse_field(any_token=True) 4463 4464 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4465 return self.expression(exp.AlterColumn, this=column, drop=True) 4466 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4467 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4468 4469 self._match_text_seq("SET", "DATA") 4470 return self.expression( 4471 exp.AlterColumn, 4472 this=column, 4473 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4474 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4475 using=self._match(TokenType.USING) and self._parse_conjunction(), 4476 ) 4477 4478 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4479 index = self._index - 1 4480 4481 partition_exists = self._parse_exists() 4482 if self._match(TokenType.PARTITION, advance=False): 4483 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4484 4485 self._retreat(index) 4486 return self._parse_csv(self._parse_drop_column) 4487 4488 def _parse_alter_table_rename(self) -> exp.RenameTable: 4489 self._match_text_seq("TO") 4490 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4491 4492 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4493 start = self._prev 4494 4495 if not self._match(TokenType.TABLE): 4496 return self._parse_as_command(start) 4497 4498 exists = self._parse_exists() 4499 this = self._parse_table(schema=True) 4500 4501 if self._next: 4502 self._advance() 4503 4504 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4505 if parser: 4506 actions = ensure_list(parser(self)) 4507 4508 if not self._curr: 4509 return self.expression( 4510 exp.AlterTable, 4511 this=this, 4512 exists=exists, 4513 actions=actions, 4514 ) 4515 return self._parse_as_command(start) 4516 4517 def _parse_merge(self) -> exp.Merge: 4518 self._match(TokenType.INTO) 4519 target = self._parse_table() 4520 4521 self._match(TokenType.USING) 4522 using = self._parse_table() 4523 4524 self._match(TokenType.ON) 4525 on = self._parse_conjunction() 4526 4527 whens = [] 4528 while self._match(TokenType.WHEN): 4529 matched = not self._match(TokenType.NOT) 4530 self._match_text_seq("MATCHED") 4531 source = ( 4532 False 4533 if self._match_text_seq("BY", "TARGET") 4534 else self._match_text_seq("BY", "SOURCE") 4535 ) 4536 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4537 4538 self._match(TokenType.THEN) 4539 4540 if self._match(TokenType.INSERT): 4541 _this = self._parse_star() 4542 if _this: 4543 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4544 else: 4545 then = self.expression( 4546 exp.Insert, 4547 this=self._parse_value(), 4548 expression=self._match(TokenType.VALUES) and self._parse_value(), 4549 ) 4550 elif self._match(TokenType.UPDATE): 4551 expressions = self._parse_star() 4552 if expressions: 4553 then = self.expression(exp.Update, expressions=expressions) 4554 else: 4555 then = self.expression( 4556 exp.Update, 4557 expressions=self._match(TokenType.SET) 4558 and self._parse_csv(self._parse_equality), 4559 ) 4560 elif self._match(TokenType.DELETE): 4561 then = self.expression(exp.Var, this=self._prev.text) 4562 else: 4563 then = None 4564 4565 whens.append( 4566 self.expression( 4567 exp.When, 4568 matched=matched, 4569 source=source, 4570 condition=condition, 4571 then=then, 4572 ) 4573 ) 4574 4575 return self.expression( 4576 exp.Merge, 4577 this=target, 4578 using=using, 4579 on=on, 4580 expressions=whens, 4581 ) 4582 4583 def _parse_show(self) -> t.Optional[exp.Expression]: 4584 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4585 if parser: 4586 return parser(self) 4587 self._advance() 4588 return self.expression(exp.Show, this=self._prev.text.upper()) 4589 4590 def _parse_set_item_assignment( 4591 self, kind: t.Optional[str] = None 4592 ) -> t.Optional[exp.Expression]: 4593 index = self._index 4594 4595 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4596 return self._parse_set_transaction(global_=kind == "GLOBAL") 4597 4598 left = self._parse_primary() or self._parse_id_var() 4599 4600 if not self._match_texts(("=", "TO")): 4601 self._retreat(index) 4602 return None 4603 4604 right = self._parse_statement() or self._parse_id_var() 4605 this = self.expression(exp.EQ, this=left, expression=right) 4606 4607 return self.expression(exp.SetItem, this=this, kind=kind) 4608 4609 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4610 self._match_text_seq("TRANSACTION") 4611 characteristics = self._parse_csv( 4612 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4613 ) 4614 return self.expression( 4615 exp.SetItem, 4616 expressions=characteristics, 4617 kind="TRANSACTION", 4618 **{"global": global_}, # type: ignore 4619 ) 4620 4621 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4622 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4623 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4624 4625 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4626 index = self._index 4627 set_ = self.expression( 4628 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4629 ) 4630 4631 if self._curr: 4632 self._retreat(index) 4633 return self._parse_as_command(self._prev) 4634 4635 return set_ 4636 4637 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4638 for option in options: 4639 if self._match_text_seq(*option.split(" ")): 4640 return exp.var(option) 4641 return None 4642 4643 def _parse_as_command(self, start: Token) -> exp.Command: 4644 while self._curr: 4645 self._advance() 4646 text = self._find_sql(start, self._prev) 4647 size = len(start.text) 4648 return exp.Command(this=text[:size], expression=text[size:]) 4649 4650 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4651 settings = [] 4652 4653 self._match_l_paren() 4654 kind = self._parse_id_var() 4655 4656 if self._match(TokenType.L_PAREN): 4657 while True: 4658 key = self._parse_id_var() 4659 value = self._parse_primary() 4660 4661 if not key and value is None: 4662 break 4663 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4664 self._match(TokenType.R_PAREN) 4665 4666 self._match_r_paren() 4667 4668 return self.expression( 4669 exp.DictProperty, 4670 this=this, 4671 kind=kind.this if kind else None, 4672 settings=settings, 4673 ) 4674 4675 def _parse_dict_range(self, this: str) -> exp.DictRange: 4676 self._match_l_paren() 4677 has_min = self._match_text_seq("MIN") 4678 if has_min: 4679 min = self._parse_var() or self._parse_primary() 4680 self._match_text_seq("MAX") 4681 max = self._parse_var() or self._parse_primary() 4682 else: 4683 max = self._parse_var() or self._parse_primary() 4684 min = exp.Literal.number(0) 4685 self._match_r_paren() 4686 return self.expression(exp.DictRange, this=this, min=min, max=max) 4687 4688 def _find_parser( 4689 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4690 ) -> t.Optional[t.Callable]: 4691 if not self._curr: 4692 return None 4693 4694 index = self._index 4695 this = [] 4696 while True: 4697 # The current token might be multiple words 4698 curr = self._curr.text.upper() 4699 key = curr.split(" ") 4700 this.append(curr) 4701 4702 self._advance() 4703 result, trie = in_trie(trie, key) 4704 if result == TrieResult.FAILED: 4705 break 4706 4707 if result == TrieResult.EXISTS: 4708 subparser = parsers[" ".join(this)] 4709 return subparser 4710 4711 self._retreat(index) 4712 return None 4713 4714 def _match(self, token_type, advance=True, expression=None): 4715 if not self._curr: 4716 return None 4717 4718 if self._curr.token_type == token_type: 4719 if advance: 4720 self._advance() 4721 self._add_comments(expression) 4722 return True 4723 4724 return None 4725 4726 def _match_set(self, types, advance=True): 4727 if not self._curr: 4728 return None 4729 4730 if self._curr.token_type in types: 4731 if advance: 4732 self._advance() 4733 return True 4734 4735 return None 4736 4737 def _match_pair(self, token_type_a, token_type_b, advance=True): 4738 if not self._curr or not self._next: 4739 return None 4740 4741 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4742 if advance: 4743 self._advance(2) 4744 return True 4745 4746 return None 4747 4748 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4749 if not self._match(TokenType.L_PAREN, expression=expression): 4750 self.raise_error("Expecting (") 4751 4752 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4753 if not self._match(TokenType.R_PAREN, expression=expression): 4754 self.raise_error("Expecting )") 4755 4756 def _match_texts(self, texts, advance=True): 4757 if self._curr and self._curr.text.upper() in texts: 4758 if advance: 4759 self._advance() 4760 return True 4761 return False 4762 4763 def _match_text_seq(self, *texts, advance=True): 4764 index = self._index 4765 for text in texts: 4766 if self._curr and self._curr.text.upper() == text: 4767 self._advance() 4768 else: 4769 self._retreat(index) 4770 return False 4771 4772 if not advance: 4773 self._retreat(index) 4774 4775 return True 4776 4777 @t.overload 4778 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4779 ... 4780 4781 @t.overload 4782 def _replace_columns_with_dots( 4783 self, this: t.Optional[exp.Expression] 4784 ) -> t.Optional[exp.Expression]: 4785 ... 4786 4787 def _replace_columns_with_dots(self, this): 4788 if isinstance(this, exp.Dot): 4789 exp.replace_children(this, self._replace_columns_with_dots) 4790 elif isinstance(this, exp.Column): 4791 exp.replace_children(this, self._replace_columns_with_dots) 4792 table = this.args.get("table") 4793 this = ( 4794 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4795 ) 4796 4797 return this 4798 4799 def _replace_lambda( 4800 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4801 ) -> t.Optional[exp.Expression]: 4802 if not node: 4803 return node 4804 4805 for column in node.find_all(exp.Column): 4806 if column.parts[0].name in lambda_variables: 4807 dot_or_id = column.to_dot() if column.table else column.this 4808 parent = column.parent 4809 4810 while isinstance(parent, exp.Dot): 4811 if not isinstance(parent.parent, exp.Dot): 4812 parent.replace(dot_or_id) 4813 break 4814 parent = parent.parent 4815 else: 4816 if column is node: 4817 node = dot_or_id 4818 else: 4819 column.replace(dot_or_id) 4820 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match(TokenType.IF) 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 begin = self._match(TokenType.BEGIN) 1210 return_ = self._match_text_seq("RETURN") 1211 expression = self._parse_statement() 1212 1213 if return_: 1214 expression = self.expression(exp.Return, this=expression) 1215 elif create_token.token_type == TokenType.INDEX: 1216 this = self._parse_index(index=self._parse_id_var()) 1217 elif create_token.token_type in self.DB_CREATABLES: 1218 table_parts = self._parse_table_parts(schema=True) 1219 1220 # exp.Properties.Location.POST_NAME 1221 self._match(TokenType.COMMA) 1222 extend_props(self._parse_properties(before=True)) 1223 1224 this = self._parse_schema(this=table_parts) 1225 1226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1227 extend_props(self._parse_properties()) 1228 1229 self._match(TokenType.ALIAS) 1230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1231 # exp.Properties.Location.POST_ALIAS 1232 extend_props(self._parse_properties()) 1233 1234 expression = self._parse_ddl_select() 1235 1236 if create_token.token_type == TokenType.TABLE: 1237 indexes = [] 1238 while True: 1239 index = self._parse_index() 1240 1241 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1242 extend_props(self._parse_properties()) 1243 1244 if not index: 1245 break 1246 else: 1247 self._match(TokenType.COMMA) 1248 indexes.append(index) 1249 elif create_token.token_type == TokenType.VIEW: 1250 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1251 no_schema_binding = True 1252 1253 if self._match_text_seq("CLONE"): 1254 clone = self._parse_table(schema=True) 1255 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1256 clone_kind = ( 1257 self._match(TokenType.L_PAREN) 1258 and self._match_texts(self.CLONE_KINDS) 1259 and self._prev.text.upper() 1260 ) 1261 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1262 self._match(TokenType.R_PAREN) 1263 clone = self.expression( 1264 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1265 ) 1266 1267 return self.expression( 1268 exp.Create, 1269 this=this, 1270 kind=create_token.text, 1271 replace=replace, 1272 unique=unique, 1273 expression=expression, 1274 exists=exists, 1275 properties=properties, 1276 indexes=indexes, 1277 no_schema_binding=no_schema_binding, 1278 begin=begin, 1279 clone=clone, 1280 ) 1281 1282 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1283 # only used for teradata currently 1284 self._match(TokenType.COMMA) 1285 1286 kwargs = { 1287 "no": self._match_text_seq("NO"), 1288 "dual": self._match_text_seq("DUAL"), 1289 "before": self._match_text_seq("BEFORE"), 1290 "default": self._match_text_seq("DEFAULT"), 1291 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1292 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1293 "after": self._match_text_seq("AFTER"), 1294 "minimum": self._match_texts(("MIN", "MINIMUM")), 1295 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1296 } 1297 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1300 try: 1301 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1302 except TypeError: 1303 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1304 1305 return None 1306 1307 def _parse_property(self) -> t.Optional[exp.Expression]: 1308 if self._match_texts(self.PROPERTY_PARSERS): 1309 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1310 1311 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1312 return self._parse_character_set(default=True) 1313 1314 if self._match_text_seq("COMPOUND", "SORTKEY"): 1315 return self._parse_sortkey(compound=True) 1316 1317 if self._match_text_seq("SQL", "SECURITY"): 1318 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1319 1320 assignment = self._match_pair( 1321 TokenType.VAR, TokenType.EQ, advance=False 1322 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1323 1324 if assignment: 1325 key = self._parse_var_or_string() 1326 self._match(TokenType.EQ) 1327 return self.expression(exp.Property, this=key, value=self._parse_column()) 1328 1329 return None 1330 1331 def _parse_stored(self) -> exp.FileFormatProperty: 1332 self._match(TokenType.ALIAS) 1333 1334 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1335 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1336 1337 return self.expression( 1338 exp.FileFormatProperty, 1339 this=self.expression( 1340 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1341 ) 1342 if input_format or output_format 1343 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1344 ) 1345 1346 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1347 self._match(TokenType.EQ) 1348 self._match(TokenType.ALIAS) 1349 return self.expression(exp_class, this=self._parse_field()) 1350 1351 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1352 properties = [] 1353 while True: 1354 if before: 1355 prop = self._parse_property_before() 1356 else: 1357 prop = self._parse_property() 1358 1359 if not prop: 1360 break 1361 for p in ensure_list(prop): 1362 properties.append(p) 1363 1364 if properties: 1365 return self.expression(exp.Properties, expressions=properties) 1366 1367 return None 1368 1369 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1370 return self.expression( 1371 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1372 ) 1373 1374 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1375 if self._index >= 2: 1376 pre_volatile_token = self._tokens[self._index - 2] 1377 else: 1378 pre_volatile_token = None 1379 1380 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1381 return exp.VolatileProperty() 1382 1383 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1384 1385 def _parse_with_property( 1386 self, 1387 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1388 self._match(TokenType.WITH) 1389 if self._match(TokenType.L_PAREN, advance=False): 1390 return self._parse_wrapped_csv(self._parse_property) 1391 1392 if self._match_text_seq("JOURNAL"): 1393 return self._parse_withjournaltable() 1394 1395 if self._match_text_seq("DATA"): 1396 return self._parse_withdata(no=False) 1397 elif self._match_text_seq("NO", "DATA"): 1398 return self._parse_withdata(no=True) 1399 1400 if not self._next: 1401 return None 1402 1403 return self._parse_withisolatedloading() 1404 1405 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1406 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1407 self._match(TokenType.EQ) 1408 1409 user = self._parse_id_var() 1410 self._match(TokenType.PARAMETER) 1411 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1412 1413 if not user or not host: 1414 return None 1415 1416 return exp.DefinerProperty(this=f"{user}@{host}") 1417 1418 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1419 self._match(TokenType.TABLE) 1420 self._match(TokenType.EQ) 1421 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1422 1423 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1424 return self.expression(exp.LogProperty, no=no) 1425 1426 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1427 return self.expression(exp.JournalProperty, **kwargs) 1428 1429 def _parse_checksum(self) -> exp.ChecksumProperty: 1430 self._match(TokenType.EQ) 1431 1432 on = None 1433 if self._match(TokenType.ON): 1434 on = True 1435 elif self._match_text_seq("OFF"): 1436 on = False 1437 1438 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1439 1440 def _parse_cluster(self) -> exp.Cluster: 1441 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1442 1443 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1444 self._match_text_seq("BY") 1445 1446 self._match_l_paren() 1447 expressions = self._parse_csv(self._parse_column) 1448 self._match_r_paren() 1449 1450 if self._match_text_seq("SORTED", "BY"): 1451 self._match_l_paren() 1452 sorted_by = self._parse_csv(self._parse_ordered) 1453 self._match_r_paren() 1454 else: 1455 sorted_by = None 1456 1457 self._match(TokenType.INTO) 1458 buckets = self._parse_number() 1459 self._match_text_seq("BUCKETS") 1460 1461 return self.expression( 1462 exp.ClusteredByProperty, 1463 expressions=expressions, 1464 sorted_by=sorted_by, 1465 buckets=buckets, 1466 ) 1467 1468 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1469 if not self._match_text_seq("GRANTS"): 1470 self._retreat(self._index - 1) 1471 return None 1472 1473 return self.expression(exp.CopyGrantsProperty) 1474 1475 def _parse_freespace(self) -> exp.FreespaceProperty: 1476 self._match(TokenType.EQ) 1477 return self.expression( 1478 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1479 ) 1480 1481 def _parse_mergeblockratio( 1482 self, no: bool = False, default: bool = False 1483 ) -> exp.MergeBlockRatioProperty: 1484 if self._match(TokenType.EQ): 1485 return self.expression( 1486 exp.MergeBlockRatioProperty, 1487 this=self._parse_number(), 1488 percent=self._match(TokenType.PERCENT), 1489 ) 1490 1491 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1492 1493 def _parse_datablocksize( 1494 self, 1495 default: t.Optional[bool] = None, 1496 minimum: t.Optional[bool] = None, 1497 maximum: t.Optional[bool] = None, 1498 ) -> exp.DataBlocksizeProperty: 1499 self._match(TokenType.EQ) 1500 size = self._parse_number() 1501 1502 units = None 1503 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1504 units = self._prev.text 1505 1506 return self.expression( 1507 exp.DataBlocksizeProperty, 1508 size=size, 1509 units=units, 1510 default=default, 1511 minimum=minimum, 1512 maximum=maximum, 1513 ) 1514 1515 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1516 self._match(TokenType.EQ) 1517 always = self._match_text_seq("ALWAYS") 1518 manual = self._match_text_seq("MANUAL") 1519 never = self._match_text_seq("NEVER") 1520 default = self._match_text_seq("DEFAULT") 1521 1522 autotemp = None 1523 if self._match_text_seq("AUTOTEMP"): 1524 autotemp = self._parse_schema() 1525 1526 return self.expression( 1527 exp.BlockCompressionProperty, 1528 always=always, 1529 manual=manual, 1530 never=never, 1531 default=default, 1532 autotemp=autotemp, 1533 ) 1534 1535 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1536 no = self._match_text_seq("NO") 1537 concurrent = self._match_text_seq("CONCURRENT") 1538 self._match_text_seq("ISOLATED", "LOADING") 1539 for_all = self._match_text_seq("FOR", "ALL") 1540 for_insert = self._match_text_seq("FOR", "INSERT") 1541 for_none = self._match_text_seq("FOR", "NONE") 1542 return self.expression( 1543 exp.IsolatedLoadingProperty, 1544 no=no, 1545 concurrent=concurrent, 1546 for_all=for_all, 1547 for_insert=for_insert, 1548 for_none=for_none, 1549 ) 1550 1551 def _parse_locking(self) -> exp.LockingProperty: 1552 if self._match(TokenType.TABLE): 1553 kind = "TABLE" 1554 elif self._match(TokenType.VIEW): 1555 kind = "VIEW" 1556 elif self._match(TokenType.ROW): 1557 kind = "ROW" 1558 elif self._match_text_seq("DATABASE"): 1559 kind = "DATABASE" 1560 else: 1561 kind = None 1562 1563 if kind in ("DATABASE", "TABLE", "VIEW"): 1564 this = self._parse_table_parts() 1565 else: 1566 this = None 1567 1568 if self._match(TokenType.FOR): 1569 for_or_in = "FOR" 1570 elif self._match(TokenType.IN): 1571 for_or_in = "IN" 1572 else: 1573 for_or_in = None 1574 1575 if self._match_text_seq("ACCESS"): 1576 lock_type = "ACCESS" 1577 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1578 lock_type = "EXCLUSIVE" 1579 elif self._match_text_seq("SHARE"): 1580 lock_type = "SHARE" 1581 elif self._match_text_seq("READ"): 1582 lock_type = "READ" 1583 elif self._match_text_seq("WRITE"): 1584 lock_type = "WRITE" 1585 elif self._match_text_seq("CHECKSUM"): 1586 lock_type = "CHECKSUM" 1587 else: 1588 lock_type = None 1589 1590 override = self._match_text_seq("OVERRIDE") 1591 1592 return self.expression( 1593 exp.LockingProperty, 1594 this=this, 1595 kind=kind, 1596 for_or_in=for_or_in, 1597 lock_type=lock_type, 1598 override=override, 1599 ) 1600 1601 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1602 if self._match(TokenType.PARTITION_BY): 1603 return self._parse_csv(self._parse_conjunction) 1604 return [] 1605 1606 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1607 self._match(TokenType.EQ) 1608 return self.expression( 1609 exp.PartitionedByProperty, 1610 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1611 ) 1612 1613 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1614 if self._match_text_seq("AND", "STATISTICS"): 1615 statistics = True 1616 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1617 statistics = False 1618 else: 1619 statistics = None 1620 1621 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1622 1623 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1624 if self._match_text_seq("PRIMARY", "INDEX"): 1625 return exp.NoPrimaryIndexProperty() 1626 return None 1627 1628 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1629 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1630 return exp.OnCommitProperty() 1631 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1632 return exp.OnCommitProperty(delete=True) 1633 return None 1634 1635 def _parse_distkey(self) -> exp.DistKeyProperty: 1636 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1637 1638 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1639 table = self._parse_table(schema=True) 1640 1641 options = [] 1642 while self._match_texts(("INCLUDING", "EXCLUDING")): 1643 this = self._prev.text.upper() 1644 1645 id_var = self._parse_id_var() 1646 if not id_var: 1647 return None 1648 1649 options.append( 1650 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1651 ) 1652 1653 return self.expression(exp.LikeProperty, this=table, expressions=options) 1654 1655 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1656 return self.expression( 1657 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1658 ) 1659 1660 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1661 self._match(TokenType.EQ) 1662 return self.expression( 1663 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1664 ) 1665 1666 def _parse_returns(self) -> exp.ReturnsProperty: 1667 value: t.Optional[exp.Expression] 1668 is_table = self._match(TokenType.TABLE) 1669 1670 if is_table: 1671 if self._match(TokenType.LT): 1672 value = self.expression( 1673 exp.Schema, 1674 this="TABLE", 1675 expressions=self._parse_csv(self._parse_struct_types), 1676 ) 1677 if not self._match(TokenType.GT): 1678 self.raise_error("Expecting >") 1679 else: 1680 value = self._parse_schema(exp.var("TABLE")) 1681 else: 1682 value = self._parse_types() 1683 1684 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1685 1686 def _parse_describe(self) -> exp.Describe: 1687 kind = self._match_set(self.CREATABLES) and self._prev.text 1688 this = self._parse_table() 1689 return self.expression(exp.Describe, this=this, kind=kind) 1690 1691 def _parse_insert(self) -> exp.Insert: 1692 overwrite = self._match(TokenType.OVERWRITE) 1693 ignore = self._match(TokenType.IGNORE) 1694 local = self._match_text_seq("LOCAL") 1695 alternative = None 1696 1697 if self._match_text_seq("DIRECTORY"): 1698 this: t.Optional[exp.Expression] = self.expression( 1699 exp.Directory, 1700 this=self._parse_var_or_string(), 1701 local=local, 1702 row_format=self._parse_row_format(match_row=True), 1703 ) 1704 else: 1705 if self._match(TokenType.OR): 1706 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1707 1708 self._match(TokenType.INTO) 1709 self._match(TokenType.TABLE) 1710 this = self._parse_table(schema=True) 1711 1712 returning = self._parse_returning() 1713 1714 return self.expression( 1715 exp.Insert, 1716 this=this, 1717 exists=self._parse_exists(), 1718 partition=self._parse_partition(), 1719 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1720 and self._parse_conjunction(), 1721 expression=self._parse_ddl_select(), 1722 conflict=self._parse_on_conflict(), 1723 returning=returning or self._parse_returning(), 1724 overwrite=overwrite, 1725 alternative=alternative, 1726 ignore=ignore, 1727 ) 1728 1729 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1730 conflict = self._match_text_seq("ON", "CONFLICT") 1731 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1732 1733 if not conflict and not duplicate: 1734 return None 1735 1736 nothing = None 1737 expressions = None 1738 key = None 1739 constraint = None 1740 1741 if conflict: 1742 if self._match_text_seq("ON", "CONSTRAINT"): 1743 constraint = self._parse_id_var() 1744 else: 1745 key = self._parse_csv(self._parse_value) 1746 1747 self._match_text_seq("DO") 1748 if self._match_text_seq("NOTHING"): 1749 nothing = True 1750 else: 1751 self._match(TokenType.UPDATE) 1752 self._match(TokenType.SET) 1753 expressions = self._parse_csv(self._parse_equality) 1754 1755 return self.expression( 1756 exp.OnConflict, 1757 duplicate=duplicate, 1758 expressions=expressions, 1759 nothing=nothing, 1760 key=key, 1761 constraint=constraint, 1762 ) 1763 1764 def _parse_returning(self) -> t.Optional[exp.Returning]: 1765 if not self._match(TokenType.RETURNING): 1766 return None 1767 return self.expression( 1768 exp.Returning, 1769 expressions=self._parse_csv(self._parse_expression), 1770 into=self._match(TokenType.INTO) and self._parse_table_part(), 1771 ) 1772 1773 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1774 if not self._match(TokenType.FORMAT): 1775 return None 1776 return self._parse_row_format() 1777 1778 def _parse_row_format( 1779 self, match_row: bool = False 1780 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1781 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1782 return None 1783 1784 if self._match_text_seq("SERDE"): 1785 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1786 1787 self._match_text_seq("DELIMITED") 1788 1789 kwargs = {} 1790 1791 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1792 kwargs["fields"] = self._parse_string() 1793 if self._match_text_seq("ESCAPED", "BY"): 1794 kwargs["escaped"] = self._parse_string() 1795 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1796 kwargs["collection_items"] = self._parse_string() 1797 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1798 kwargs["map_keys"] = self._parse_string() 1799 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1800 kwargs["lines"] = self._parse_string() 1801 if self._match_text_seq("NULL", "DEFINED", "AS"): 1802 kwargs["null"] = self._parse_string() 1803 1804 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1805 1806 def _parse_load(self) -> exp.LoadData | exp.Command: 1807 if self._match_text_seq("DATA"): 1808 local = self._match_text_seq("LOCAL") 1809 self._match_text_seq("INPATH") 1810 inpath = self._parse_string() 1811 overwrite = self._match(TokenType.OVERWRITE) 1812 self._match_pair(TokenType.INTO, TokenType.TABLE) 1813 1814 return self.expression( 1815 exp.LoadData, 1816 this=self._parse_table(schema=True), 1817 local=local, 1818 overwrite=overwrite, 1819 inpath=inpath, 1820 partition=self._parse_partition(), 1821 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1822 serde=self._match_text_seq("SERDE") and self._parse_string(), 1823 ) 1824 return self._parse_as_command(self._prev) 1825 1826 def _parse_delete(self) -> exp.Delete: 1827 # This handles MySQL's "Multiple-Table Syntax" 1828 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1829 tables = None 1830 if not self._match(TokenType.FROM, advance=False): 1831 tables = self._parse_csv(self._parse_table) or None 1832 1833 returning = self._parse_returning() 1834 1835 return self.expression( 1836 exp.Delete, 1837 tables=tables, 1838 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1839 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1840 where=self._parse_where(), 1841 returning=returning or self._parse_returning(), 1842 limit=self._parse_limit(), 1843 ) 1844 1845 def _parse_update(self) -> exp.Update: 1846 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1847 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1848 returning = self._parse_returning() 1849 return self.expression( 1850 exp.Update, 1851 **{ # type: ignore 1852 "this": this, 1853 "expressions": expressions, 1854 "from": self._parse_from(joins=True), 1855 "where": self._parse_where(), 1856 "returning": returning or self._parse_returning(), 1857 "limit": self._parse_limit(), 1858 }, 1859 ) 1860 1861 def _parse_uncache(self) -> exp.Uncache: 1862 if not self._match(TokenType.TABLE): 1863 self.raise_error("Expecting TABLE after UNCACHE") 1864 1865 return self.expression( 1866 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1867 ) 1868 1869 def _parse_cache(self) -> exp.Cache: 1870 lazy = self._match_text_seq("LAZY") 1871 self._match(TokenType.TABLE) 1872 table = self._parse_table(schema=True) 1873 1874 options = [] 1875 if self._match_text_seq("OPTIONS"): 1876 self._match_l_paren() 1877 k = self._parse_string() 1878 self._match(TokenType.EQ) 1879 v = self._parse_string() 1880 options = [k, v] 1881 self._match_r_paren() 1882 1883 self._match(TokenType.ALIAS) 1884 return self.expression( 1885 exp.Cache, 1886 this=table, 1887 lazy=lazy, 1888 options=options, 1889 expression=self._parse_select(nested=True), 1890 ) 1891 1892 def _parse_partition(self) -> t.Optional[exp.Partition]: 1893 if not self._match(TokenType.PARTITION): 1894 return None 1895 1896 return self.expression( 1897 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1898 ) 1899 1900 def _parse_value(self) -> exp.Tuple: 1901 if self._match(TokenType.L_PAREN): 1902 expressions = self._parse_csv(self._parse_conjunction) 1903 self._match_r_paren() 1904 return self.expression(exp.Tuple, expressions=expressions) 1905 1906 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1907 # https://prestodb.io/docs/current/sql/values.html 1908 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1909 1910 def _parse_select( 1911 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1912 ) -> t.Optional[exp.Expression]: 1913 cte = self._parse_with() 1914 if cte: 1915 this = self._parse_statement() 1916 1917 if not this: 1918 self.raise_error("Failed to parse any statement following CTE") 1919 return cte 1920 1921 if "with" in this.arg_types: 1922 this.set("with", cte) 1923 else: 1924 self.raise_error(f"{this.key} does not support CTE") 1925 this = cte 1926 elif self._match(TokenType.SELECT): 1927 comments = self._prev_comments 1928 1929 hint = self._parse_hint() 1930 all_ = self._match(TokenType.ALL) 1931 distinct = self._match(TokenType.DISTINCT) 1932 1933 kind = ( 1934 self._match(TokenType.ALIAS) 1935 and self._match_texts(("STRUCT", "VALUE")) 1936 and self._prev.text 1937 ) 1938 1939 if distinct: 1940 distinct = self.expression( 1941 exp.Distinct, 1942 on=self._parse_value() if self._match(TokenType.ON) else None, 1943 ) 1944 1945 if all_ and distinct: 1946 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1947 1948 limit = self._parse_limit(top=True) 1949 expressions = self._parse_expressions() 1950 1951 this = self.expression( 1952 exp.Select, 1953 kind=kind, 1954 hint=hint, 1955 distinct=distinct, 1956 expressions=expressions, 1957 limit=limit, 1958 ) 1959 this.comments = comments 1960 1961 into = self._parse_into() 1962 if into: 1963 this.set("into", into) 1964 1965 from_ = self._parse_from() 1966 if from_: 1967 this.set("from", from_) 1968 1969 this = self._parse_query_modifiers(this) 1970 elif (table or nested) and self._match(TokenType.L_PAREN): 1971 if self._match(TokenType.PIVOT): 1972 this = self._parse_simplified_pivot() 1973 elif self._match(TokenType.FROM): 1974 this = exp.select("*").from_( 1975 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1976 ) 1977 else: 1978 this = self._parse_table() if table else self._parse_select(nested=True) 1979 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1980 1981 self._match_r_paren() 1982 1983 # We return early here so that the UNION isn't attached to the subquery by the 1984 # following call to _parse_set_operations, but instead becomes the parent node 1985 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1986 elif self._match(TokenType.VALUES): 1987 this = self.expression( 1988 exp.Values, 1989 expressions=self._parse_csv(self._parse_value), 1990 alias=self._parse_table_alias(), 1991 ) 1992 else: 1993 this = None 1994 1995 return self._parse_set_operations(this) 1996 1997 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1998 if not skip_with_token and not self._match(TokenType.WITH): 1999 return None 2000 2001 comments = self._prev_comments 2002 recursive = self._match(TokenType.RECURSIVE) 2003 2004 expressions = [] 2005 while True: 2006 expressions.append(self._parse_cte()) 2007 2008 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2009 break 2010 else: 2011 self._match(TokenType.WITH) 2012 2013 return self.expression( 2014 exp.With, comments=comments, expressions=expressions, recursive=recursive 2015 ) 2016 2017 def _parse_cte(self) -> exp.CTE: 2018 alias = self._parse_table_alias() 2019 if not alias or not alias.this: 2020 self.raise_error("Expected CTE to have alias") 2021 2022 self._match(TokenType.ALIAS) 2023 return self.expression( 2024 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2025 ) 2026 2027 def _parse_table_alias( 2028 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2029 ) -> t.Optional[exp.TableAlias]: 2030 any_token = self._match(TokenType.ALIAS) 2031 alias = ( 2032 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2033 or self._parse_string_as_identifier() 2034 ) 2035 2036 index = self._index 2037 if self._match(TokenType.L_PAREN): 2038 columns = self._parse_csv(self._parse_function_parameter) 2039 self._match_r_paren() if columns else self._retreat(index) 2040 else: 2041 columns = None 2042 2043 if not alias and not columns: 2044 return None 2045 2046 return self.expression(exp.TableAlias, this=alias, columns=columns) 2047 2048 def _parse_subquery( 2049 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2050 ) -> t.Optional[exp.Subquery]: 2051 if not this: 2052 return None 2053 2054 return self.expression( 2055 exp.Subquery, 2056 this=this, 2057 pivots=self._parse_pivots(), 2058 alias=self._parse_table_alias() if parse_alias else None, 2059 ) 2060 2061 def _parse_query_modifiers( 2062 self, this: t.Optional[exp.Expression] 2063 ) -> t.Optional[exp.Expression]: 2064 if isinstance(this, self.MODIFIABLES): 2065 for join in iter(self._parse_join, None): 2066 this.append("joins", join) 2067 for lateral in iter(self._parse_lateral, None): 2068 this.append("laterals", lateral) 2069 2070 while True: 2071 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2072 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2073 key, expression = parser(self) 2074 2075 if expression: 2076 this.set(key, expression) 2077 if key == "limit": 2078 offset = expression.args.pop("offset", None) 2079 if offset: 2080 this.set("offset", exp.Offset(expression=offset)) 2081 continue 2082 break 2083 return this 2084 2085 def _parse_hint(self) -> t.Optional[exp.Hint]: 2086 if self._match(TokenType.HINT): 2087 hints = [] 2088 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2089 hints.extend(hint) 2090 2091 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2092 self.raise_error("Expected */ after HINT") 2093 2094 return self.expression(exp.Hint, expressions=hints) 2095 2096 return None 2097 2098 def _parse_into(self) -> t.Optional[exp.Into]: 2099 if not self._match(TokenType.INTO): 2100 return None 2101 2102 temp = self._match(TokenType.TEMPORARY) 2103 unlogged = self._match_text_seq("UNLOGGED") 2104 self._match(TokenType.TABLE) 2105 2106 return self.expression( 2107 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2108 ) 2109 2110 def _parse_from( 2111 self, joins: bool = False, skip_from_token: bool = False 2112 ) -> t.Optional[exp.From]: 2113 if not skip_from_token and not self._match(TokenType.FROM): 2114 return None 2115 2116 return self.expression( 2117 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2118 ) 2119 2120 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2121 if not self._match(TokenType.MATCH_RECOGNIZE): 2122 return None 2123 2124 self._match_l_paren() 2125 2126 partition = self._parse_partition_by() 2127 order = self._parse_order() 2128 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2129 2130 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2131 rows = exp.var("ONE ROW PER MATCH") 2132 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2133 text = "ALL ROWS PER MATCH" 2134 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2135 text += f" SHOW EMPTY MATCHES" 2136 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2137 text += f" OMIT EMPTY MATCHES" 2138 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2139 text += f" WITH UNMATCHED ROWS" 2140 rows = exp.var(text) 2141 else: 2142 rows = None 2143 2144 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2145 text = "AFTER MATCH SKIP" 2146 if self._match_text_seq("PAST", "LAST", "ROW"): 2147 text += f" PAST LAST ROW" 2148 elif self._match_text_seq("TO", "NEXT", "ROW"): 2149 text += f" TO NEXT ROW" 2150 elif self._match_text_seq("TO", "FIRST"): 2151 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2152 elif self._match_text_seq("TO", "LAST"): 2153 text += f" TO LAST {self._advance_any().text}" # type: ignore 2154 after = exp.var(text) 2155 else: 2156 after = None 2157 2158 if self._match_text_seq("PATTERN"): 2159 self._match_l_paren() 2160 2161 if not self._curr: 2162 self.raise_error("Expecting )", self._curr) 2163 2164 paren = 1 2165 start = self._curr 2166 2167 while self._curr and paren > 0: 2168 if self._curr.token_type == TokenType.L_PAREN: 2169 paren += 1 2170 if self._curr.token_type == TokenType.R_PAREN: 2171 paren -= 1 2172 2173 end = self._prev 2174 self._advance() 2175 2176 if paren > 0: 2177 self.raise_error("Expecting )", self._curr) 2178 2179 pattern = exp.var(self._find_sql(start, end)) 2180 else: 2181 pattern = None 2182 2183 define = ( 2184 self._parse_csv( 2185 lambda: self.expression( 2186 exp.Alias, 2187 alias=self._parse_id_var(any_token=True), 2188 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2189 ) 2190 ) 2191 if self._match_text_seq("DEFINE") 2192 else None 2193 ) 2194 2195 self._match_r_paren() 2196 2197 return self.expression( 2198 exp.MatchRecognize, 2199 partition_by=partition, 2200 order=order, 2201 measures=measures, 2202 rows=rows, 2203 after=after, 2204 pattern=pattern, 2205 define=define, 2206 alias=self._parse_table_alias(), 2207 ) 2208 2209 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2210 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2211 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2212 2213 if outer_apply or cross_apply: 2214 this = self._parse_select(table=True) 2215 view = None 2216 outer = not cross_apply 2217 elif self._match(TokenType.LATERAL): 2218 this = self._parse_select(table=True) 2219 view = self._match(TokenType.VIEW) 2220 outer = self._match(TokenType.OUTER) 2221 else: 2222 return None 2223 2224 if not this: 2225 this = self._parse_function() or self._parse_id_var(any_token=False) 2226 while self._match(TokenType.DOT): 2227 this = exp.Dot( 2228 this=this, 2229 expression=self._parse_function() or self._parse_id_var(any_token=False), 2230 ) 2231 2232 if view: 2233 table = self._parse_id_var(any_token=False) 2234 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2235 table_alias: t.Optional[exp.TableAlias] = self.expression( 2236 exp.TableAlias, this=table, columns=columns 2237 ) 2238 elif isinstance(this, exp.Subquery) and this.alias: 2239 # Ensures parity between the Subquery's and the Lateral's "alias" args 2240 table_alias = this.args["alias"].copy() 2241 else: 2242 table_alias = self._parse_table_alias() 2243 2244 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2245 2246 def _parse_join_parts( 2247 self, 2248 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2249 return ( 2250 self._match_set(self.JOIN_METHODS) and self._prev, 2251 self._match_set(self.JOIN_SIDES) and self._prev, 2252 self._match_set(self.JOIN_KINDS) and self._prev, 2253 ) 2254 2255 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2256 if self._match(TokenType.COMMA): 2257 return self.expression(exp.Join, this=self._parse_table()) 2258 2259 index = self._index 2260 method, side, kind = self._parse_join_parts() 2261 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2262 join = self._match(TokenType.JOIN) 2263 2264 if not skip_join_token and not join: 2265 self._retreat(index) 2266 kind = None 2267 method = None 2268 side = None 2269 2270 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2271 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2272 2273 if not skip_join_token and not join and not outer_apply and not cross_apply: 2274 return None 2275 2276 if outer_apply: 2277 side = Token(TokenType.LEFT, "LEFT") 2278 2279 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2280 2281 if method: 2282 kwargs["method"] = method.text 2283 if side: 2284 kwargs["side"] = side.text 2285 if kind: 2286 kwargs["kind"] = kind.text 2287 if hint: 2288 kwargs["hint"] = hint 2289 2290 if self._match(TokenType.ON): 2291 kwargs["on"] = self._parse_conjunction() 2292 elif self._match(TokenType.USING): 2293 kwargs["using"] = self._parse_wrapped_id_vars() 2294 elif not (kind and kind.token_type == TokenType.CROSS): 2295 index = self._index 2296 joins = self._parse_joins() 2297 2298 if joins and self._match(TokenType.ON): 2299 kwargs["on"] = self._parse_conjunction() 2300 elif joins and self._match(TokenType.USING): 2301 kwargs["using"] = self._parse_wrapped_id_vars() 2302 else: 2303 joins = None 2304 self._retreat(index) 2305 2306 kwargs["this"].set("joins", joins) 2307 2308 return self.expression(exp.Join, **kwargs) 2309 2310 def _parse_index( 2311 self, 2312 index: t.Optional[exp.Expression] = None, 2313 ) -> t.Optional[exp.Index]: 2314 if index: 2315 unique = None 2316 primary = None 2317 amp = None 2318 2319 self._match(TokenType.ON) 2320 self._match(TokenType.TABLE) # hive 2321 table = self._parse_table_parts(schema=True) 2322 else: 2323 unique = self._match(TokenType.UNIQUE) 2324 primary = self._match_text_seq("PRIMARY") 2325 amp = self._match_text_seq("AMP") 2326 2327 if not self._match(TokenType.INDEX): 2328 return None 2329 2330 index = self._parse_id_var() 2331 table = None 2332 2333 using = self._parse_field() if self._match(TokenType.USING) else None 2334 2335 if self._match(TokenType.L_PAREN, advance=False): 2336 columns = self._parse_wrapped_csv(self._parse_ordered) 2337 else: 2338 columns = None 2339 2340 return self.expression( 2341 exp.Index, 2342 this=index, 2343 table=table, 2344 using=using, 2345 columns=columns, 2346 unique=unique, 2347 primary=primary, 2348 amp=amp, 2349 partition_by=self._parse_partition_by(), 2350 ) 2351 2352 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2353 hints: t.List[exp.Expression] = [] 2354 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2355 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2356 hints.append( 2357 self.expression( 2358 exp.WithTableHint, 2359 expressions=self._parse_csv( 2360 lambda: self._parse_function() or self._parse_var(any_token=True) 2361 ), 2362 ) 2363 ) 2364 self._match_r_paren() 2365 else: 2366 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2367 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2368 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2369 2370 self._match_texts({"INDEX", "KEY"}) 2371 if self._match(TokenType.FOR): 2372 hint.set("target", self._advance_any() and self._prev.text.upper()) 2373 2374 hint.set("expressions", self._parse_wrapped_id_vars()) 2375 hints.append(hint) 2376 2377 return hints or None 2378 2379 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2380 return ( 2381 (not schema and self._parse_function(optional_parens=False)) 2382 or self._parse_id_var(any_token=False) 2383 or self._parse_string_as_identifier() 2384 or self._parse_placeholder() 2385 ) 2386 2387 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2388 catalog = None 2389 db = None 2390 table = self._parse_table_part(schema=schema) 2391 2392 while self._match(TokenType.DOT): 2393 if catalog: 2394 # This allows nesting the table in arbitrarily many dot expressions if needed 2395 table = self.expression( 2396 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2397 ) 2398 else: 2399 catalog = db 2400 db = table 2401 table = self._parse_table_part(schema=schema) 2402 2403 if not table: 2404 self.raise_error(f"Expected table name but got {self._curr}") 2405 2406 return self.expression( 2407 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2408 ) 2409 2410 def _parse_table( 2411 self, 2412 schema: bool = False, 2413 joins: bool = False, 2414 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2415 ) -> t.Optional[exp.Expression]: 2416 lateral = self._parse_lateral() 2417 if lateral: 2418 return lateral 2419 2420 unnest = self._parse_unnest() 2421 if unnest: 2422 return unnest 2423 2424 values = self._parse_derived_table_values() 2425 if values: 2426 return values 2427 2428 subquery = self._parse_select(table=True) 2429 if subquery: 2430 if not subquery.args.get("pivots"): 2431 subquery.set("pivots", self._parse_pivots()) 2432 return subquery 2433 2434 this: exp.Expression = self._parse_table_parts(schema=schema) 2435 2436 if schema: 2437 return self._parse_schema(this=this) 2438 2439 if self.ALIAS_POST_TABLESAMPLE: 2440 table_sample = self._parse_table_sample() 2441 2442 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2443 if alias: 2444 this.set("alias", alias) 2445 2446 if not this.args.get("pivots"): 2447 this.set("pivots", self._parse_pivots()) 2448 2449 this.set("hints", self._parse_table_hints()) 2450 2451 if not self.ALIAS_POST_TABLESAMPLE: 2452 table_sample = self._parse_table_sample() 2453 2454 if table_sample: 2455 table_sample.set("this", this) 2456 this = table_sample 2457 2458 if joins: 2459 for join in iter(self._parse_join, None): 2460 this.append("joins", join) 2461 2462 return this 2463 2464 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2465 if not self._match(TokenType.UNNEST): 2466 return None 2467 2468 expressions = self._parse_wrapped_csv(self._parse_type) 2469 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2470 2471 alias = self._parse_table_alias() if with_alias else None 2472 2473 if alias and self.UNNEST_COLUMN_ONLY: 2474 if alias.args.get("columns"): 2475 self.raise_error("Unexpected extra column alias in unnest.") 2476 2477 alias.set("columns", [alias.this]) 2478 alias.set("this", None) 2479 2480 offset = None 2481 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2482 self._match(TokenType.ALIAS) 2483 offset = self._parse_id_var() or exp.to_identifier("offset") 2484 2485 return self.expression( 2486 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2487 ) 2488 2489 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2490 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2491 if not is_derived and not self._match(TokenType.VALUES): 2492 return None 2493 2494 expressions = self._parse_csv(self._parse_value) 2495 alias = self._parse_table_alias() 2496 2497 if is_derived: 2498 self._match_r_paren() 2499 2500 return self.expression( 2501 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2502 ) 2503 2504 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2505 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2506 as_modifier and self._match_text_seq("USING", "SAMPLE") 2507 ): 2508 return None 2509 2510 bucket_numerator = None 2511 bucket_denominator = None 2512 bucket_field = None 2513 percent = None 2514 rows = None 2515 size = None 2516 seed = None 2517 2518 kind = ( 2519 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2520 ) 2521 method = self._parse_var(tokens=(TokenType.ROW,)) 2522 2523 self._match(TokenType.L_PAREN) 2524 2525 num = self._parse_number() 2526 2527 if self._match_text_seq("BUCKET"): 2528 bucket_numerator = self._parse_number() 2529 self._match_text_seq("OUT", "OF") 2530 bucket_denominator = bucket_denominator = self._parse_number() 2531 self._match(TokenType.ON) 2532 bucket_field = self._parse_field() 2533 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2534 percent = num 2535 elif self._match(TokenType.ROWS): 2536 rows = num 2537 else: 2538 size = num 2539 2540 self._match(TokenType.R_PAREN) 2541 2542 if self._match(TokenType.L_PAREN): 2543 method = self._parse_var() 2544 seed = self._match(TokenType.COMMA) and self._parse_number() 2545 self._match_r_paren() 2546 elif self._match_texts(("SEED", "REPEATABLE")): 2547 seed = self._parse_wrapped(self._parse_number) 2548 2549 return self.expression( 2550 exp.TableSample, 2551 method=method, 2552 bucket_numerator=bucket_numerator, 2553 bucket_denominator=bucket_denominator, 2554 bucket_field=bucket_field, 2555 percent=percent, 2556 rows=rows, 2557 size=size, 2558 seed=seed, 2559 kind=kind, 2560 ) 2561 2562 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2563 return list(iter(self._parse_pivot, None)) or None 2564 2565 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2566 return list(iter(self._parse_join, None)) or None 2567 2568 # https://duckdb.org/docs/sql/statements/pivot 2569 def _parse_simplified_pivot(self) -> exp.Pivot: 2570 def _parse_on() -> t.Optional[exp.Expression]: 2571 this = self._parse_bitwise() 2572 return self._parse_in(this) if self._match(TokenType.IN) else this 2573 2574 this = self._parse_table() 2575 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2576 using = self._match(TokenType.USING) and self._parse_csv( 2577 lambda: self._parse_alias(self._parse_function()) 2578 ) 2579 group = self._parse_group() 2580 return self.expression( 2581 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2582 ) 2583 2584 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2585 index = self._index 2586 2587 if self._match(TokenType.PIVOT): 2588 unpivot = False 2589 elif self._match(TokenType.UNPIVOT): 2590 unpivot = True 2591 else: 2592 return None 2593 2594 expressions = [] 2595 field = None 2596 2597 if not self._match(TokenType.L_PAREN): 2598 self._retreat(index) 2599 return None 2600 2601 if unpivot: 2602 expressions = self._parse_csv(self._parse_column) 2603 else: 2604 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2605 2606 if not expressions: 2607 self.raise_error("Failed to parse PIVOT's aggregation list") 2608 2609 if not self._match(TokenType.FOR): 2610 self.raise_error("Expecting FOR") 2611 2612 value = self._parse_column() 2613 2614 if not self._match(TokenType.IN): 2615 self.raise_error("Expecting IN") 2616 2617 field = self._parse_in(value, alias=True) 2618 2619 self._match_r_paren() 2620 2621 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2622 2623 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2624 pivot.set("alias", self._parse_table_alias()) 2625 2626 if not unpivot: 2627 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2628 2629 columns: t.List[exp.Expression] = [] 2630 for fld in pivot.args["field"].expressions: 2631 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2632 for name in names: 2633 if self.PREFIXED_PIVOT_COLUMNS: 2634 name = f"{name}_{field_name}" if name else field_name 2635 else: 2636 name = f"{field_name}_{name}" if name else field_name 2637 2638 columns.append(exp.to_identifier(name)) 2639 2640 pivot.set("columns", columns) 2641 2642 return pivot 2643 2644 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2645 return [agg.alias for agg in aggregations] 2646 2647 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2648 if not skip_where_token and not self._match(TokenType.WHERE): 2649 return None 2650 2651 return self.expression( 2652 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2653 ) 2654 2655 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2656 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2657 return None 2658 2659 elements = defaultdict(list) 2660 2661 if self._match(TokenType.ALL): 2662 return self.expression(exp.Group, all=True) 2663 2664 while True: 2665 expressions = self._parse_csv(self._parse_conjunction) 2666 if expressions: 2667 elements["expressions"].extend(expressions) 2668 2669 grouping_sets = self._parse_grouping_sets() 2670 if grouping_sets: 2671 elements["grouping_sets"].extend(grouping_sets) 2672 2673 rollup = None 2674 cube = None 2675 totals = None 2676 2677 with_ = self._match(TokenType.WITH) 2678 if self._match(TokenType.ROLLUP): 2679 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2680 elements["rollup"].extend(ensure_list(rollup)) 2681 2682 if self._match(TokenType.CUBE): 2683 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2684 elements["cube"].extend(ensure_list(cube)) 2685 2686 if self._match_text_seq("TOTALS"): 2687 totals = True 2688 elements["totals"] = True # type: ignore 2689 2690 if not (grouping_sets or rollup or cube or totals): 2691 break 2692 2693 return self.expression(exp.Group, **elements) # type: ignore 2694 2695 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2696 if not self._match(TokenType.GROUPING_SETS): 2697 return None 2698 2699 return self._parse_wrapped_csv(self._parse_grouping_set) 2700 2701 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2702 if self._match(TokenType.L_PAREN): 2703 grouping_set = self._parse_csv(self._parse_column) 2704 self._match_r_paren() 2705 return self.expression(exp.Tuple, expressions=grouping_set) 2706 2707 return self._parse_column() 2708 2709 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2710 if not skip_having_token and not self._match(TokenType.HAVING): 2711 return None 2712 return self.expression(exp.Having, this=self._parse_conjunction()) 2713 2714 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2715 if not self._match(TokenType.QUALIFY): 2716 return None 2717 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2718 2719 def _parse_order( 2720 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2721 ) -> t.Optional[exp.Expression]: 2722 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2723 return this 2724 2725 return self.expression( 2726 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2727 ) 2728 2729 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2730 if not self._match(token): 2731 return None 2732 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2733 2734 def _parse_ordered(self) -> exp.Ordered: 2735 this = self._parse_conjunction() 2736 self._match(TokenType.ASC) 2737 2738 is_desc = self._match(TokenType.DESC) 2739 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2740 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2741 desc = is_desc or False 2742 asc = not desc 2743 nulls_first = is_nulls_first or False 2744 explicitly_null_ordered = is_nulls_first or is_nulls_last 2745 2746 if ( 2747 not explicitly_null_ordered 2748 and ( 2749 (asc and self.NULL_ORDERING == "nulls_are_small") 2750 or (desc and self.NULL_ORDERING != "nulls_are_small") 2751 ) 2752 and self.NULL_ORDERING != "nulls_are_last" 2753 ): 2754 nulls_first = True 2755 2756 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2757 2758 def _parse_limit( 2759 self, this: t.Optional[exp.Expression] = None, top: bool = False 2760 ) -> t.Optional[exp.Expression]: 2761 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2762 limit_paren = self._match(TokenType.L_PAREN) 2763 expression = self._parse_number() if top else self._parse_term() 2764 2765 if self._match(TokenType.COMMA): 2766 offset = expression 2767 expression = self._parse_term() 2768 else: 2769 offset = None 2770 2771 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2772 2773 if limit_paren: 2774 self._match_r_paren() 2775 2776 return limit_exp 2777 2778 if self._match(TokenType.FETCH): 2779 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2780 direction = self._prev.text if direction else "FIRST" 2781 2782 count = self._parse_number() 2783 percent = self._match(TokenType.PERCENT) 2784 2785 self._match_set((TokenType.ROW, TokenType.ROWS)) 2786 2787 only = self._match_text_seq("ONLY") 2788 with_ties = self._match_text_seq("WITH", "TIES") 2789 2790 if only and with_ties: 2791 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2792 2793 return self.expression( 2794 exp.Fetch, 2795 direction=direction, 2796 count=count, 2797 percent=percent, 2798 with_ties=with_ties, 2799 ) 2800 2801 return this 2802 2803 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2804 if not self._match(TokenType.OFFSET): 2805 return this 2806 2807 count = self._parse_number() 2808 self._match_set((TokenType.ROW, TokenType.ROWS)) 2809 return self.expression(exp.Offset, this=this, expression=count) 2810 2811 def _parse_locks(self) -> t.List[exp.Lock]: 2812 locks = [] 2813 while True: 2814 if self._match_text_seq("FOR", "UPDATE"): 2815 update = True 2816 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2817 "LOCK", "IN", "SHARE", "MODE" 2818 ): 2819 update = False 2820 else: 2821 break 2822 2823 expressions = None 2824 if self._match_text_seq("OF"): 2825 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2826 2827 wait: t.Optional[bool | exp.Expression] = None 2828 if self._match_text_seq("NOWAIT"): 2829 wait = True 2830 elif self._match_text_seq("WAIT"): 2831 wait = self._parse_primary() 2832 elif self._match_text_seq("SKIP", "LOCKED"): 2833 wait = False 2834 2835 locks.append( 2836 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2837 ) 2838 2839 return locks 2840 2841 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2842 if not self._match_set(self.SET_OPERATIONS): 2843 return this 2844 2845 token_type = self._prev.token_type 2846 2847 if token_type == TokenType.UNION: 2848 expression = exp.Union 2849 elif token_type == TokenType.EXCEPT: 2850 expression = exp.Except 2851 else: 2852 expression = exp.Intersect 2853 2854 return self.expression( 2855 expression, 2856 this=this, 2857 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2858 expression=self._parse_set_operations(self._parse_select(nested=True)), 2859 ) 2860 2861 def _parse_expression(self) -> t.Optional[exp.Expression]: 2862 return self._parse_alias(self._parse_conjunction()) 2863 2864 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2865 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2866 2867 def _parse_equality(self) -> t.Optional[exp.Expression]: 2868 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2869 2870 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2871 return self._parse_tokens(self._parse_range, self.COMPARISON) 2872 2873 def _parse_range(self) -> t.Optional[exp.Expression]: 2874 this = self._parse_bitwise() 2875 negate = self._match(TokenType.NOT) 2876 2877 if self._match_set(self.RANGE_PARSERS): 2878 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2879 if not expression: 2880 return this 2881 2882 this = expression 2883 elif self._match(TokenType.ISNULL): 2884 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2885 2886 # Postgres supports ISNULL and NOTNULL for conditions. 2887 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2888 if self._match(TokenType.NOTNULL): 2889 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2890 this = self.expression(exp.Not, this=this) 2891 2892 if negate: 2893 this = self.expression(exp.Not, this=this) 2894 2895 if self._match(TokenType.IS): 2896 this = self._parse_is(this) 2897 2898 return this 2899 2900 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2901 index = self._index - 1 2902 negate = self._match(TokenType.NOT) 2903 2904 if self._match_text_seq("DISTINCT", "FROM"): 2905 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2906 return self.expression(klass, this=this, expression=self._parse_expression()) 2907 2908 expression = self._parse_null() or self._parse_boolean() 2909 if not expression: 2910 self._retreat(index) 2911 return None 2912 2913 this = self.expression(exp.Is, this=this, expression=expression) 2914 return self.expression(exp.Not, this=this) if negate else this 2915 2916 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2917 unnest = self._parse_unnest(with_alias=False) 2918 if unnest: 2919 this = self.expression(exp.In, this=this, unnest=unnest) 2920 elif self._match(TokenType.L_PAREN): 2921 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2922 2923 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2924 this = self.expression(exp.In, this=this, query=expressions[0]) 2925 else: 2926 this = self.expression(exp.In, this=this, expressions=expressions) 2927 2928 self._match_r_paren(this) 2929 else: 2930 this = self.expression(exp.In, this=this, field=self._parse_field()) 2931 2932 return this 2933 2934 def _parse_between(self, this: exp.Expression) -> exp.Between: 2935 low = self._parse_bitwise() 2936 self._match(TokenType.AND) 2937 high = self._parse_bitwise() 2938 return self.expression(exp.Between, this=this, low=low, high=high) 2939 2940 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2941 if not self._match(TokenType.ESCAPE): 2942 return this 2943 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2944 2945 def _parse_interval(self) -> t.Optional[exp.Interval]: 2946 if not self._match(TokenType.INTERVAL): 2947 return None 2948 2949 if self._match(TokenType.STRING, advance=False): 2950 this = self._parse_primary() 2951 else: 2952 this = self._parse_term() 2953 2954 unit = self._parse_function() or self._parse_var() 2955 2956 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2957 # each INTERVAL expression into this canonical form so it's easy to transpile 2958 if this and this.is_number: 2959 this = exp.Literal.string(this.name) 2960 elif this and this.is_string: 2961 parts = this.name.split() 2962 2963 if len(parts) == 2: 2964 if unit: 2965 # this is not actually a unit, it's something else 2966 unit = None 2967 self._retreat(self._index - 1) 2968 else: 2969 this = exp.Literal.string(parts[0]) 2970 unit = self.expression(exp.Var, this=parts[1]) 2971 2972 return self.expression(exp.Interval, this=this, unit=unit) 2973 2974 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2975 this = self._parse_term() 2976 2977 while True: 2978 if self._match_set(self.BITWISE): 2979 this = self.expression( 2980 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2981 ) 2982 elif self._match_pair(TokenType.LT, TokenType.LT): 2983 this = self.expression( 2984 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2985 ) 2986 elif self._match_pair(TokenType.GT, TokenType.GT): 2987 this = self.expression( 2988 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2989 ) 2990 else: 2991 break 2992 2993 return this 2994 2995 def _parse_term(self) -> t.Optional[exp.Expression]: 2996 return self._parse_tokens(self._parse_factor, self.TERM) 2997 2998 def _parse_factor(self) -> t.Optional[exp.Expression]: 2999 return self._parse_tokens(self._parse_unary, self.FACTOR) 3000 3001 def _parse_unary(self) -> t.Optional[exp.Expression]: 3002 if self._match_set(self.UNARY_PARSERS): 3003 return self.UNARY_PARSERS[self._prev.token_type](self) 3004 return self._parse_at_time_zone(self._parse_type()) 3005 3006 def _parse_type(self) -> t.Optional[exp.Expression]: 3007 interval = self._parse_interval() 3008 if interval: 3009 return interval 3010 3011 index = self._index 3012 data_type = self._parse_types(check_func=True) 3013 this = self._parse_column() 3014 3015 if data_type: 3016 if isinstance(this, exp.Literal): 3017 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3018 if parser: 3019 return parser(self, this, data_type) 3020 return self.expression(exp.Cast, this=this, to=data_type) 3021 if not data_type.expressions: 3022 self._retreat(index) 3023 return self._parse_column() 3024 return self._parse_column_ops(data_type) 3025 3026 return this 3027 3028 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3029 this = self._parse_type() 3030 if not this: 3031 return None 3032 3033 return self.expression( 3034 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3035 ) 3036 3037 def _parse_types( 3038 self, check_func: bool = False, schema: bool = False 3039 ) -> t.Optional[exp.Expression]: 3040 index = self._index 3041 3042 prefix = self._match_text_seq("SYSUDTLIB", ".") 3043 3044 if not self._match_set(self.TYPE_TOKENS): 3045 return None 3046 3047 type_token = self._prev.token_type 3048 3049 if type_token == TokenType.PSEUDO_TYPE: 3050 return self.expression(exp.PseudoType, this=self._prev.text) 3051 3052 nested = type_token in self.NESTED_TYPE_TOKENS 3053 is_struct = type_token == TokenType.STRUCT 3054 expressions = None 3055 maybe_func = False 3056 3057 if self._match(TokenType.L_PAREN): 3058 if is_struct: 3059 expressions = self._parse_csv(self._parse_struct_types) 3060 elif nested: 3061 expressions = self._parse_csv( 3062 lambda: self._parse_types(check_func=check_func, schema=schema) 3063 ) 3064 elif type_token in self.ENUM_TYPE_TOKENS: 3065 expressions = self._parse_csv(self._parse_primary) 3066 else: 3067 expressions = self._parse_csv(self._parse_type_size) 3068 3069 if not expressions or not self._match(TokenType.R_PAREN): 3070 self._retreat(index) 3071 return None 3072 3073 maybe_func = True 3074 3075 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3076 this = exp.DataType( 3077 this=exp.DataType.Type.ARRAY, 3078 expressions=[ 3079 exp.DataType( 3080 this=exp.DataType.Type[type_token.value], 3081 expressions=expressions, 3082 nested=nested, 3083 ) 3084 ], 3085 nested=True, 3086 ) 3087 3088 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3089 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3090 3091 return this 3092 3093 if self._match(TokenType.L_BRACKET): 3094 self._retreat(index) 3095 return None 3096 3097 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3098 if nested and self._match(TokenType.LT): 3099 if is_struct: 3100 expressions = self._parse_csv(self._parse_struct_types) 3101 else: 3102 expressions = self._parse_csv( 3103 lambda: self._parse_types(check_func=check_func, schema=schema) 3104 ) 3105 3106 if not self._match(TokenType.GT): 3107 self.raise_error("Expecting >") 3108 3109 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3110 values = self._parse_csv(self._parse_conjunction) 3111 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3112 3113 value: t.Optional[exp.Expression] = None 3114 if type_token in self.TIMESTAMPS: 3115 if self._match_text_seq("WITH", "TIME", "ZONE"): 3116 maybe_func = False 3117 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3118 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3119 maybe_func = False 3120 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3121 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3122 maybe_func = False 3123 elif type_token == TokenType.INTERVAL: 3124 unit = self._parse_var() 3125 3126 if not unit: 3127 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3128 else: 3129 value = self.expression(exp.Interval, unit=unit) 3130 3131 if maybe_func and check_func: 3132 index2 = self._index 3133 peek = self._parse_string() 3134 3135 if not peek: 3136 self._retreat(index) 3137 return None 3138 3139 self._retreat(index2) 3140 3141 if value: 3142 return value 3143 3144 return exp.DataType( 3145 this=exp.DataType.Type[type_token.value], 3146 expressions=expressions, 3147 nested=nested, 3148 values=values, 3149 prefix=prefix, 3150 ) 3151 3152 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3153 this = self._parse_type() or self._parse_id_var() 3154 self._match(TokenType.COLON) 3155 return self._parse_column_def(this) 3156 3157 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3158 if not self._match_text_seq("AT", "TIME", "ZONE"): 3159 return this 3160 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3161 3162 def _parse_column(self) -> t.Optional[exp.Expression]: 3163 this = self._parse_field() 3164 if isinstance(this, exp.Identifier): 3165 this = self.expression(exp.Column, this=this) 3166 elif not this: 3167 return self._parse_bracket(this) 3168 return self._parse_column_ops(this) 3169 3170 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3171 this = self._parse_bracket(this) 3172 3173 while self._match_set(self.COLUMN_OPERATORS): 3174 op_token = self._prev.token_type 3175 op = self.COLUMN_OPERATORS.get(op_token) 3176 3177 if op_token == TokenType.DCOLON: 3178 field = self._parse_types() 3179 if not field: 3180 self.raise_error("Expected type") 3181 elif op and self._curr: 3182 self._advance() 3183 value = self._prev.text 3184 field = ( 3185 exp.Literal.number(value) 3186 if self._prev.token_type == TokenType.NUMBER 3187 else exp.Literal.string(value) 3188 ) 3189 else: 3190 field = self._parse_field(anonymous_func=True, any_token=True) 3191 3192 if isinstance(field, exp.Func): 3193 # bigquery allows function calls like x.y.count(...) 3194 # SAFE.SUBSTR(...) 3195 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3196 this = self._replace_columns_with_dots(this) 3197 3198 if op: 3199 this = op(self, this, field) 3200 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3201 this = self.expression( 3202 exp.Column, 3203 this=field, 3204 table=this.this, 3205 db=this.args.get("table"), 3206 catalog=this.args.get("db"), 3207 ) 3208 else: 3209 this = self.expression(exp.Dot, this=this, expression=field) 3210 this = self._parse_bracket(this) 3211 return this 3212 3213 def _parse_primary(self) -> t.Optional[exp.Expression]: 3214 if self._match_set(self.PRIMARY_PARSERS): 3215 token_type = self._prev.token_type 3216 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3217 3218 if token_type == TokenType.STRING: 3219 expressions = [primary] 3220 while self._match(TokenType.STRING): 3221 expressions.append(exp.Literal.string(self._prev.text)) 3222 3223 if len(expressions) > 1: 3224 return self.expression(exp.Concat, expressions=expressions) 3225 3226 return primary 3227 3228 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3229 return exp.Literal.number(f"0.{self._prev.text}") 3230 3231 if self._match(TokenType.L_PAREN): 3232 comments = self._prev_comments 3233 query = self._parse_select() 3234 3235 if query: 3236 expressions = [query] 3237 else: 3238 expressions = self._parse_expressions() 3239 3240 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3241 3242 if isinstance(this, exp.Subqueryable): 3243 this = self._parse_set_operations( 3244 self._parse_subquery(this=this, parse_alias=False) 3245 ) 3246 elif len(expressions) > 1: 3247 this = self.expression(exp.Tuple, expressions=expressions) 3248 else: 3249 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3250 3251 if this: 3252 this.add_comments(comments) 3253 3254 self._match_r_paren(expression=this) 3255 return this 3256 3257 return None 3258 3259 def _parse_field( 3260 self, 3261 any_token: bool = False, 3262 tokens: t.Optional[t.Collection[TokenType]] = None, 3263 anonymous_func: bool = False, 3264 ) -> t.Optional[exp.Expression]: 3265 return ( 3266 self._parse_primary() 3267 or self._parse_function(anonymous=anonymous_func) 3268 or self._parse_id_var(any_token=any_token, tokens=tokens) 3269 ) 3270 3271 def _parse_function( 3272 self, 3273 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3274 anonymous: bool = False, 3275 optional_parens: bool = True, 3276 ) -> t.Optional[exp.Expression]: 3277 if not self._curr: 3278 return None 3279 3280 token_type = self._curr.token_type 3281 3282 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3283 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3284 3285 if not self._next or self._next.token_type != TokenType.L_PAREN: 3286 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3287 self._advance() 3288 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3289 3290 return None 3291 3292 if token_type not in self.FUNC_TOKENS: 3293 return None 3294 3295 this = self._curr.text 3296 upper = this.upper() 3297 self._advance(2) 3298 3299 parser = self.FUNCTION_PARSERS.get(upper) 3300 3301 if parser and not anonymous: 3302 this = parser(self) 3303 else: 3304 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3305 3306 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3307 this = self.expression(subquery_predicate, this=self._parse_select()) 3308 self._match_r_paren() 3309 return this 3310 3311 if functions is None: 3312 functions = self.FUNCTIONS 3313 3314 function = functions.get(upper) 3315 3316 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3317 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3318 3319 if function and not anonymous: 3320 this = self.validate_expression(function(args), args) 3321 else: 3322 this = self.expression(exp.Anonymous, this=this, expressions=args) 3323 3324 self._match_r_paren(this) 3325 return self._parse_window(this) 3326 3327 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3328 return self._parse_column_def(self._parse_id_var()) 3329 3330 def _parse_user_defined_function( 3331 self, kind: t.Optional[TokenType] = None 3332 ) -> t.Optional[exp.Expression]: 3333 this = self._parse_id_var() 3334 3335 while self._match(TokenType.DOT): 3336 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3337 3338 if not self._match(TokenType.L_PAREN): 3339 return this 3340 3341 expressions = self._parse_csv(self._parse_function_parameter) 3342 self._match_r_paren() 3343 return self.expression( 3344 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3345 ) 3346 3347 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3348 literal = self._parse_primary() 3349 if literal: 3350 return self.expression(exp.Introducer, this=token.text, expression=literal) 3351 3352 return self.expression(exp.Identifier, this=token.text) 3353 3354 def _parse_session_parameter(self) -> exp.SessionParameter: 3355 kind = None 3356 this = self._parse_id_var() or self._parse_primary() 3357 3358 if this and self._match(TokenType.DOT): 3359 kind = this.name 3360 this = self._parse_var() or self._parse_primary() 3361 3362 return self.expression(exp.SessionParameter, this=this, kind=kind) 3363 3364 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3365 index = self._index 3366 3367 if self._match(TokenType.L_PAREN): 3368 expressions = self._parse_csv(self._parse_id_var) 3369 3370 if not self._match(TokenType.R_PAREN): 3371 self._retreat(index) 3372 else: 3373 expressions = [self._parse_id_var()] 3374 3375 if self._match_set(self.LAMBDAS): 3376 return self.LAMBDAS[self._prev.token_type](self, expressions) 3377 3378 self._retreat(index) 3379 3380 this: t.Optional[exp.Expression] 3381 3382 if self._match(TokenType.DISTINCT): 3383 this = self.expression( 3384 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3385 ) 3386 else: 3387 this = self._parse_select_or_expression(alias=alias) 3388 3389 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3390 3391 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3392 index = self._index 3393 3394 if not self.errors: 3395 try: 3396 if self._parse_select(nested=True): 3397 return this 3398 except ParseError: 3399 pass 3400 finally: 3401 self.errors.clear() 3402 self._retreat(index) 3403 3404 if not self._match(TokenType.L_PAREN): 3405 return this 3406 3407 args = self._parse_csv( 3408 lambda: self._parse_constraint() 3409 or self._parse_column_def(self._parse_field(any_token=True)) 3410 ) 3411 3412 self._match_r_paren() 3413 return self.expression(exp.Schema, this=this, expressions=args) 3414 3415 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3416 # column defs are not really columns, they're identifiers 3417 if isinstance(this, exp.Column): 3418 this = this.this 3419 3420 kind = self._parse_types(schema=True) 3421 3422 if self._match_text_seq("FOR", "ORDINALITY"): 3423 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3424 3425 constraints = [] 3426 while True: 3427 constraint = self._parse_column_constraint() 3428 if not constraint: 3429 break 3430 constraints.append(constraint) 3431 3432 if not kind and not constraints: 3433 return this 3434 3435 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3436 3437 def _parse_auto_increment( 3438 self, 3439 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3440 start = None 3441 increment = None 3442 3443 if self._match(TokenType.L_PAREN, advance=False): 3444 args = self._parse_wrapped_csv(self._parse_bitwise) 3445 start = seq_get(args, 0) 3446 increment = seq_get(args, 1) 3447 elif self._match_text_seq("START"): 3448 start = self._parse_bitwise() 3449 self._match_text_seq("INCREMENT") 3450 increment = self._parse_bitwise() 3451 3452 if start and increment: 3453 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3454 3455 return exp.AutoIncrementColumnConstraint() 3456 3457 def _parse_compress(self) -> exp.CompressColumnConstraint: 3458 if self._match(TokenType.L_PAREN, advance=False): 3459 return self.expression( 3460 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3461 ) 3462 3463 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3464 3465 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3466 if self._match_text_seq("BY", "DEFAULT"): 3467 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3468 this = self.expression( 3469 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3470 ) 3471 else: 3472 self._match_text_seq("ALWAYS") 3473 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3474 3475 self._match(TokenType.ALIAS) 3476 identity = self._match_text_seq("IDENTITY") 3477 3478 if self._match(TokenType.L_PAREN): 3479 if self._match_text_seq("START", "WITH"): 3480 this.set("start", self._parse_bitwise()) 3481 if self._match_text_seq("INCREMENT", "BY"): 3482 this.set("increment", self._parse_bitwise()) 3483 if self._match_text_seq("MINVALUE"): 3484 this.set("minvalue", self._parse_bitwise()) 3485 if self._match_text_seq("MAXVALUE"): 3486 this.set("maxvalue", self._parse_bitwise()) 3487 3488 if self._match_text_seq("CYCLE"): 3489 this.set("cycle", True) 3490 elif self._match_text_seq("NO", "CYCLE"): 3491 this.set("cycle", False) 3492 3493 if not identity: 3494 this.set("expression", self._parse_bitwise()) 3495 3496 self._match_r_paren() 3497 3498 return this 3499 3500 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3501 self._match_text_seq("LENGTH") 3502 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3503 3504 def _parse_not_constraint( 3505 self, 3506 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3507 if self._match_text_seq("NULL"): 3508 return self.expression(exp.NotNullColumnConstraint) 3509 if self._match_text_seq("CASESPECIFIC"): 3510 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3511 return None 3512 3513 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3514 if self._match(TokenType.CONSTRAINT): 3515 this = self._parse_id_var() 3516 else: 3517 this = None 3518 3519 if self._match_texts(self.CONSTRAINT_PARSERS): 3520 return self.expression( 3521 exp.ColumnConstraint, 3522 this=this, 3523 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3524 ) 3525 3526 return this 3527 3528 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3529 if not self._match(TokenType.CONSTRAINT): 3530 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3531 3532 this = self._parse_id_var() 3533 expressions = [] 3534 3535 while True: 3536 constraint = self._parse_unnamed_constraint() or self._parse_function() 3537 if not constraint: 3538 break 3539 expressions.append(constraint) 3540 3541 return self.expression(exp.Constraint, this=this, expressions=expressions) 3542 3543 def _parse_unnamed_constraint( 3544 self, constraints: t.Optional[t.Collection[str]] = None 3545 ) -> t.Optional[exp.Expression]: 3546 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3547 return None 3548 3549 constraint = self._prev.text.upper() 3550 if constraint not in self.CONSTRAINT_PARSERS: 3551 self.raise_error(f"No parser found for schema constraint {constraint}.") 3552 3553 return self.CONSTRAINT_PARSERS[constraint](self) 3554 3555 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3556 self._match_text_seq("KEY") 3557 return self.expression( 3558 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3559 ) 3560 3561 def _parse_key_constraint_options(self) -> t.List[str]: 3562 options = [] 3563 while True: 3564 if not self._curr: 3565 break 3566 3567 if self._match(TokenType.ON): 3568 action = None 3569 on = self._advance_any() and self._prev.text 3570 3571 if self._match_text_seq("NO", "ACTION"): 3572 action = "NO ACTION" 3573 elif self._match_text_seq("CASCADE"): 3574 action = "CASCADE" 3575 elif self._match_pair(TokenType.SET, TokenType.NULL): 3576 action = "SET NULL" 3577 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3578 action = "SET DEFAULT" 3579 else: 3580 self.raise_error("Invalid key constraint") 3581 3582 options.append(f"ON {on} {action}") 3583 elif self._match_text_seq("NOT", "ENFORCED"): 3584 options.append("NOT ENFORCED") 3585 elif self._match_text_seq("DEFERRABLE"): 3586 options.append("DEFERRABLE") 3587 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3588 options.append("INITIALLY DEFERRED") 3589 elif self._match_text_seq("NORELY"): 3590 options.append("NORELY") 3591 elif self._match_text_seq("MATCH", "FULL"): 3592 options.append("MATCH FULL") 3593 else: 3594 break 3595 3596 return options 3597 3598 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3599 if match and not self._match(TokenType.REFERENCES): 3600 return None 3601 3602 expressions = None 3603 this = self._parse_table(schema=True) 3604 options = self._parse_key_constraint_options() 3605 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3606 3607 def _parse_foreign_key(self) -> exp.ForeignKey: 3608 expressions = self._parse_wrapped_id_vars() 3609 reference = self._parse_references() 3610 options = {} 3611 3612 while self._match(TokenType.ON): 3613 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3614 self.raise_error("Expected DELETE or UPDATE") 3615 3616 kind = self._prev.text.lower() 3617 3618 if self._match_text_seq("NO", "ACTION"): 3619 action = "NO ACTION" 3620 elif self._match(TokenType.SET): 3621 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3622 action = "SET " + self._prev.text.upper() 3623 else: 3624 self._advance() 3625 action = self._prev.text.upper() 3626 3627 options[kind] = action 3628 3629 return self.expression( 3630 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3631 ) 3632 3633 def _parse_primary_key( 3634 self, wrapped_optional: bool = False, in_props: bool = False 3635 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3636 desc = ( 3637 self._match_set((TokenType.ASC, TokenType.DESC)) 3638 and self._prev.token_type == TokenType.DESC 3639 ) 3640 3641 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3642 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3643 3644 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3645 options = self._parse_key_constraint_options() 3646 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3647 3648 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3649 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3650 return this 3651 3652 bracket_kind = self._prev.token_type 3653 3654 if self._match(TokenType.COLON): 3655 expressions: t.List[t.Optional[exp.Expression]] = [ 3656 self.expression(exp.Slice, expression=self._parse_conjunction()) 3657 ] 3658 else: 3659 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3660 3661 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3662 if bracket_kind == TokenType.L_BRACE: 3663 this = self.expression(exp.Struct, expressions=expressions) 3664 elif not this or this.name.upper() == "ARRAY": 3665 this = self.expression(exp.Array, expressions=expressions) 3666 else: 3667 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3668 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3669 3670 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3671 self.raise_error("Expected ]") 3672 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3673 self.raise_error("Expected }") 3674 3675 self._add_comments(this) 3676 return self._parse_bracket(this) 3677 3678 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3679 if self._match(TokenType.COLON): 3680 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3681 return this 3682 3683 def _parse_case(self) -> t.Optional[exp.Expression]: 3684 ifs = [] 3685 default = None 3686 3687 expression = self._parse_conjunction() 3688 3689 while self._match(TokenType.WHEN): 3690 this = self._parse_conjunction() 3691 self._match(TokenType.THEN) 3692 then = self._parse_conjunction() 3693 ifs.append(self.expression(exp.If, this=this, true=then)) 3694 3695 if self._match(TokenType.ELSE): 3696 default = self._parse_conjunction() 3697 3698 if not self._match(TokenType.END): 3699 self.raise_error("Expected END after CASE", self._prev) 3700 3701 return self._parse_window( 3702 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3703 ) 3704 3705 def _parse_if(self) -> t.Optional[exp.Expression]: 3706 if self._match(TokenType.L_PAREN): 3707 args = self._parse_csv(self._parse_conjunction) 3708 this = self.validate_expression(exp.If.from_arg_list(args), args) 3709 self._match_r_paren() 3710 else: 3711 index = self._index - 1 3712 condition = self._parse_conjunction() 3713 3714 if not condition: 3715 self._retreat(index) 3716 return None 3717 3718 self._match(TokenType.THEN) 3719 true = self._parse_conjunction() 3720 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3721 self._match(TokenType.END) 3722 this = self.expression(exp.If, this=condition, true=true, false=false) 3723 3724 return self._parse_window(this) 3725 3726 def _parse_extract(self) -> exp.Extract: 3727 this = self._parse_function() or self._parse_var() or self._parse_type() 3728 3729 if self._match(TokenType.FROM): 3730 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3731 3732 if not self._match(TokenType.COMMA): 3733 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3734 3735 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3736 3737 def _parse_any_value(self) -> exp.AnyValue: 3738 this = self._parse_lambda() 3739 is_max = None 3740 having = None 3741 3742 if self._match(TokenType.HAVING): 3743 self._match_texts(("MAX", "MIN")) 3744 is_max = self._prev.text == "MAX" 3745 having = self._parse_column() 3746 3747 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3748 3749 def _parse_cast(self, strict: bool) -> exp.Expression: 3750 this = self._parse_conjunction() 3751 3752 if not self._match(TokenType.ALIAS): 3753 if self._match(TokenType.COMMA): 3754 return self.expression( 3755 exp.CastToStrType, this=this, expression=self._parse_string() 3756 ) 3757 else: 3758 self.raise_error("Expected AS after CAST") 3759 3760 fmt = None 3761 to = self._parse_types() 3762 3763 if not to: 3764 self.raise_error("Expected TYPE after CAST") 3765 elif to.this == exp.DataType.Type.CHAR: 3766 if self._match(TokenType.CHARACTER_SET): 3767 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3768 elif self._match(TokenType.FORMAT): 3769 fmt_string = self._parse_string() 3770 fmt = self._parse_at_time_zone(fmt_string) 3771 3772 if to.this in exp.DataType.TEMPORAL_TYPES: 3773 this = self.expression( 3774 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3775 this=this, 3776 format=exp.Literal.string( 3777 format_time( 3778 fmt_string.this if fmt_string else "", 3779 self.FORMAT_MAPPING or self.TIME_MAPPING, 3780 self.FORMAT_TRIE or self.TIME_TRIE, 3781 ) 3782 ), 3783 ) 3784 3785 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3786 this.set("zone", fmt.args["zone"]) 3787 3788 return this 3789 3790 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3791 3792 def _parse_concat(self) -> t.Optional[exp.Expression]: 3793 args = self._parse_csv(self._parse_conjunction) 3794 if self.CONCAT_NULL_OUTPUTS_STRING: 3795 args = [ 3796 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3797 for arg in args 3798 if arg 3799 ] 3800 3801 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3802 # we find such a call we replace it with its argument. 3803 if len(args) == 1: 3804 return args[0] 3805 3806 return self.expression( 3807 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3808 ) 3809 3810 def _parse_string_agg(self) -> exp.Expression: 3811 if self._match(TokenType.DISTINCT): 3812 args: t.List[t.Optional[exp.Expression]] = [ 3813 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3814 ] 3815 if self._match(TokenType.COMMA): 3816 args.extend(self._parse_csv(self._parse_conjunction)) 3817 else: 3818 args = self._parse_csv(self._parse_conjunction) 3819 3820 index = self._index 3821 if not self._match(TokenType.R_PAREN): 3822 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3823 return self.expression( 3824 exp.GroupConcat, 3825 this=seq_get(args, 0), 3826 separator=self._parse_order(this=seq_get(args, 1)), 3827 ) 3828 3829 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3830 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3831 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3832 if not self._match_text_seq("WITHIN", "GROUP"): 3833 self._retreat(index) 3834 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3835 3836 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3837 order = self._parse_order(this=seq_get(args, 0)) 3838 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3839 3840 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3841 this = self._parse_bitwise() 3842 3843 if self._match(TokenType.USING): 3844 to: t.Optional[exp.Expression] = self.expression( 3845 exp.CharacterSet, this=self._parse_var() 3846 ) 3847 elif self._match(TokenType.COMMA): 3848 to = self._parse_types() 3849 else: 3850 to = None 3851 3852 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3853 3854 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3855 """ 3856 There are generally two variants of the DECODE function: 3857 3858 - DECODE(bin, charset) 3859 - DECODE(expression, search, result [, search, result] ... [, default]) 3860 3861 The second variant will always be parsed into a CASE expression. Note that NULL 3862 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3863 instead of relying on pattern matching. 3864 """ 3865 args = self._parse_csv(self._parse_conjunction) 3866 3867 if len(args) < 3: 3868 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3869 3870 expression, *expressions = args 3871 if not expression: 3872 return None 3873 3874 ifs = [] 3875 for search, result in zip(expressions[::2], expressions[1::2]): 3876 if not search or not result: 3877 return None 3878 3879 if isinstance(search, exp.Literal): 3880 ifs.append( 3881 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3882 ) 3883 elif isinstance(search, exp.Null): 3884 ifs.append( 3885 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3886 ) 3887 else: 3888 cond = exp.or_( 3889 exp.EQ(this=expression.copy(), expression=search), 3890 exp.and_( 3891 exp.Is(this=expression.copy(), expression=exp.Null()), 3892 exp.Is(this=search.copy(), expression=exp.Null()), 3893 copy=False, 3894 ), 3895 copy=False, 3896 ) 3897 ifs.append(exp.If(this=cond, true=result)) 3898 3899 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3900 3901 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3902 self._match_text_seq("KEY") 3903 key = self._parse_field() 3904 self._match(TokenType.COLON) 3905 self._match_text_seq("VALUE") 3906 value = self._parse_field() 3907 3908 if not key and not value: 3909 return None 3910 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3911 3912 def _parse_json_object(self) -> exp.JSONObject: 3913 star = self._parse_star() 3914 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3915 3916 null_handling = None 3917 if self._match_text_seq("NULL", "ON", "NULL"): 3918 null_handling = "NULL ON NULL" 3919 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3920 null_handling = "ABSENT ON NULL" 3921 3922 unique_keys = None 3923 if self._match_text_seq("WITH", "UNIQUE"): 3924 unique_keys = True 3925 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3926 unique_keys = False 3927 3928 self._match_text_seq("KEYS") 3929 3930 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3931 format_json = self._match_text_seq("FORMAT", "JSON") 3932 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3933 3934 return self.expression( 3935 exp.JSONObject, 3936 expressions=expressions, 3937 null_handling=null_handling, 3938 unique_keys=unique_keys, 3939 return_type=return_type, 3940 format_json=format_json, 3941 encoding=encoding, 3942 ) 3943 3944 def _parse_logarithm(self) -> exp.Func: 3945 # Default argument order is base, expression 3946 args = self._parse_csv(self._parse_range) 3947 3948 if len(args) > 1: 3949 if not self.LOG_BASE_FIRST: 3950 args.reverse() 3951 return exp.Log.from_arg_list(args) 3952 3953 return self.expression( 3954 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3955 ) 3956 3957 def _parse_match_against(self) -> exp.MatchAgainst: 3958 expressions = self._parse_csv(self._parse_column) 3959 3960 self._match_text_seq(")", "AGAINST", "(") 3961 3962 this = self._parse_string() 3963 3964 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3965 modifier = "IN NATURAL LANGUAGE MODE" 3966 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3967 modifier = f"{modifier} WITH QUERY EXPANSION" 3968 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3969 modifier = "IN BOOLEAN MODE" 3970 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3971 modifier = "WITH QUERY EXPANSION" 3972 else: 3973 modifier = None 3974 3975 return self.expression( 3976 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3977 ) 3978 3979 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3980 def _parse_open_json(self) -> exp.OpenJSON: 3981 this = self._parse_bitwise() 3982 path = self._match(TokenType.COMMA) and self._parse_string() 3983 3984 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3985 this = self._parse_field(any_token=True) 3986 kind = self._parse_types() 3987 path = self._parse_string() 3988 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3989 3990 return self.expression( 3991 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3992 ) 3993 3994 expressions = None 3995 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3996 self._match_l_paren() 3997 expressions = self._parse_csv(_parse_open_json_column_def) 3998 3999 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4000 4001 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4002 args = self._parse_csv(self._parse_bitwise) 4003 4004 if self._match(TokenType.IN): 4005 return self.expression( 4006 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4007 ) 4008 4009 if haystack_first: 4010 haystack = seq_get(args, 0) 4011 needle = seq_get(args, 1) 4012 else: 4013 needle = seq_get(args, 0) 4014 haystack = seq_get(args, 1) 4015 4016 return self.expression( 4017 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4018 ) 4019 4020 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4021 args = self._parse_csv(self._parse_table) 4022 return exp.JoinHint(this=func_name.upper(), expressions=args) 4023 4024 def _parse_substring(self) -> exp.Substring: 4025 # Postgres supports the form: substring(string [from int] [for int]) 4026 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4027 4028 args = self._parse_csv(self._parse_bitwise) 4029 4030 if self._match(TokenType.FROM): 4031 args.append(self._parse_bitwise()) 4032 if self._match(TokenType.FOR): 4033 args.append(self._parse_bitwise()) 4034 4035 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4036 4037 def _parse_trim(self) -> exp.Trim: 4038 # https://www.w3resource.com/sql/character-functions/trim.php 4039 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4040 4041 position = None 4042 collation = None 4043 4044 if self._match_texts(self.TRIM_TYPES): 4045 position = self._prev.text.upper() 4046 4047 expression = self._parse_bitwise() 4048 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4049 this = self._parse_bitwise() 4050 else: 4051 this = expression 4052 expression = None 4053 4054 if self._match(TokenType.COLLATE): 4055 collation = self._parse_bitwise() 4056 4057 return self.expression( 4058 exp.Trim, this=this, position=position, expression=expression, collation=collation 4059 ) 4060 4061 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4062 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4063 4064 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4065 return self._parse_window(self._parse_id_var(), alias=True) 4066 4067 def _parse_respect_or_ignore_nulls( 4068 self, this: t.Optional[exp.Expression] 4069 ) -> t.Optional[exp.Expression]: 4070 if self._match_text_seq("IGNORE", "NULLS"): 4071 return self.expression(exp.IgnoreNulls, this=this) 4072 if self._match_text_seq("RESPECT", "NULLS"): 4073 return self.expression(exp.RespectNulls, this=this) 4074 return this 4075 4076 def _parse_window( 4077 self, this: t.Optional[exp.Expression], alias: bool = False 4078 ) -> t.Optional[exp.Expression]: 4079 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4080 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4081 self._match_r_paren() 4082 4083 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4084 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4085 if self._match_text_seq("WITHIN", "GROUP"): 4086 order = self._parse_wrapped(self._parse_order) 4087 this = self.expression(exp.WithinGroup, this=this, expression=order) 4088 4089 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4090 # Some dialects choose to implement and some do not. 4091 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4092 4093 # There is some code above in _parse_lambda that handles 4094 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4095 4096 # The below changes handle 4097 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4098 4099 # Oracle allows both formats 4100 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4101 # and Snowflake chose to do the same for familiarity 4102 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4103 this = self._parse_respect_or_ignore_nulls(this) 4104 4105 # bigquery select from window x AS (partition by ...) 4106 if alias: 4107 over = None 4108 self._match(TokenType.ALIAS) 4109 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4110 return this 4111 else: 4112 over = self._prev.text.upper() 4113 4114 if not self._match(TokenType.L_PAREN): 4115 return self.expression( 4116 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4117 ) 4118 4119 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4120 4121 first = self._match(TokenType.FIRST) 4122 if self._match_text_seq("LAST"): 4123 first = False 4124 4125 partition = self._parse_partition_by() 4126 order = self._parse_order() 4127 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4128 4129 if kind: 4130 self._match(TokenType.BETWEEN) 4131 start = self._parse_window_spec() 4132 self._match(TokenType.AND) 4133 end = self._parse_window_spec() 4134 4135 spec = self.expression( 4136 exp.WindowSpec, 4137 kind=kind, 4138 start=start["value"], 4139 start_side=start["side"], 4140 end=end["value"], 4141 end_side=end["side"], 4142 ) 4143 else: 4144 spec = None 4145 4146 self._match_r_paren() 4147 4148 return self.expression( 4149 exp.Window, 4150 this=this, 4151 partition_by=partition, 4152 order=order, 4153 spec=spec, 4154 alias=window_alias, 4155 over=over, 4156 first=first, 4157 ) 4158 4159 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4160 self._match(TokenType.BETWEEN) 4161 4162 return { 4163 "value": ( 4164 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4165 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4166 or self._parse_bitwise() 4167 ), 4168 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4169 } 4170 4171 def _parse_alias( 4172 self, this: t.Optional[exp.Expression], explicit: bool = False 4173 ) -> t.Optional[exp.Expression]: 4174 any_token = self._match(TokenType.ALIAS) 4175 4176 if explicit and not any_token: 4177 return this 4178 4179 if self._match(TokenType.L_PAREN): 4180 aliases = self.expression( 4181 exp.Aliases, 4182 this=this, 4183 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4184 ) 4185 self._match_r_paren(aliases) 4186 return aliases 4187 4188 alias = self._parse_id_var(any_token) 4189 4190 if alias: 4191 return self.expression(exp.Alias, this=this, alias=alias) 4192 4193 return this 4194 4195 def _parse_id_var( 4196 self, 4197 any_token: bool = True, 4198 tokens: t.Optional[t.Collection[TokenType]] = None, 4199 ) -> t.Optional[exp.Expression]: 4200 identifier = self._parse_identifier() 4201 4202 if identifier: 4203 return identifier 4204 4205 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4206 quoted = self._prev.token_type == TokenType.STRING 4207 return exp.Identifier(this=self._prev.text, quoted=quoted) 4208 4209 return None 4210 4211 def _parse_string(self) -> t.Optional[exp.Expression]: 4212 if self._match(TokenType.STRING): 4213 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4214 return self._parse_placeholder() 4215 4216 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4217 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4218 4219 def _parse_number(self) -> t.Optional[exp.Expression]: 4220 if self._match(TokenType.NUMBER): 4221 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4222 return self._parse_placeholder() 4223 4224 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4225 if self._match(TokenType.IDENTIFIER): 4226 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4227 return self._parse_placeholder() 4228 4229 def _parse_var( 4230 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4231 ) -> t.Optional[exp.Expression]: 4232 if ( 4233 (any_token and self._advance_any()) 4234 or self._match(TokenType.VAR) 4235 or (self._match_set(tokens) if tokens else False) 4236 ): 4237 return self.expression(exp.Var, this=self._prev.text) 4238 return self._parse_placeholder() 4239 4240 def _advance_any(self) -> t.Optional[Token]: 4241 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4242 self._advance() 4243 return self._prev 4244 return None 4245 4246 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4247 return self._parse_var() or self._parse_string() 4248 4249 def _parse_null(self) -> t.Optional[exp.Expression]: 4250 if self._match(TokenType.NULL): 4251 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4252 return None 4253 4254 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4255 if self._match(TokenType.TRUE): 4256 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4257 if self._match(TokenType.FALSE): 4258 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4259 return None 4260 4261 def _parse_star(self) -> t.Optional[exp.Expression]: 4262 if self._match(TokenType.STAR): 4263 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4264 return None 4265 4266 def _parse_parameter(self) -> exp.Parameter: 4267 wrapped = self._match(TokenType.L_BRACE) 4268 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4269 self._match(TokenType.R_BRACE) 4270 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4271 4272 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4273 if self._match_set(self.PLACEHOLDER_PARSERS): 4274 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4275 if placeholder: 4276 return placeholder 4277 self._advance(-1) 4278 return None 4279 4280 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4281 if not self._match(TokenType.EXCEPT): 4282 return None 4283 if self._match(TokenType.L_PAREN, advance=False): 4284 return self._parse_wrapped_csv(self._parse_column) 4285 return self._parse_csv(self._parse_column) 4286 4287 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4288 if not self._match(TokenType.REPLACE): 4289 return None 4290 if self._match(TokenType.L_PAREN, advance=False): 4291 return self._parse_wrapped_csv(self._parse_expression) 4292 return self._parse_expressions() 4293 4294 def _parse_csv( 4295 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4296 ) -> t.List[t.Optional[exp.Expression]]: 4297 parse_result = parse_method() 4298 items = [parse_result] if parse_result is not None else [] 4299 4300 while self._match(sep): 4301 self._add_comments(parse_result) 4302 parse_result = parse_method() 4303 if parse_result is not None: 4304 items.append(parse_result) 4305 4306 return items 4307 4308 def _parse_tokens( 4309 self, parse_method: t.Callable, expressions: t.Dict 4310 ) -> t.Optional[exp.Expression]: 4311 this = parse_method() 4312 4313 while self._match_set(expressions): 4314 this = self.expression( 4315 expressions[self._prev.token_type], 4316 this=this, 4317 comments=self._prev_comments, 4318 expression=parse_method(), 4319 ) 4320 4321 return this 4322 4323 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4324 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4325 4326 def _parse_wrapped_csv( 4327 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4328 ) -> t.List[t.Optional[exp.Expression]]: 4329 return self._parse_wrapped( 4330 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4331 ) 4332 4333 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4334 wrapped = self._match(TokenType.L_PAREN) 4335 if not wrapped and not optional: 4336 self.raise_error("Expecting (") 4337 parse_result = parse_method() 4338 if wrapped: 4339 self._match_r_paren() 4340 return parse_result 4341 4342 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4343 return self._parse_csv(self._parse_expression) 4344 4345 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4346 return self._parse_select() or self._parse_set_operations( 4347 self._parse_expression() if alias else self._parse_conjunction() 4348 ) 4349 4350 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4351 return self._parse_query_modifiers( 4352 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4353 ) 4354 4355 def _parse_transaction(self) -> exp.Transaction: 4356 this = None 4357 if self._match_texts(self.TRANSACTION_KIND): 4358 this = self._prev.text 4359 4360 self._match_texts({"TRANSACTION", "WORK"}) 4361 4362 modes = [] 4363 while True: 4364 mode = [] 4365 while self._match(TokenType.VAR): 4366 mode.append(self._prev.text) 4367 4368 if mode: 4369 modes.append(" ".join(mode)) 4370 if not self._match(TokenType.COMMA): 4371 break 4372 4373 return self.expression(exp.Transaction, this=this, modes=modes) 4374 4375 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4376 chain = None 4377 savepoint = None 4378 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4379 4380 self._match_texts({"TRANSACTION", "WORK"}) 4381 4382 if self._match_text_seq("TO"): 4383 self._match_text_seq("SAVEPOINT") 4384 savepoint = self._parse_id_var() 4385 4386 if self._match(TokenType.AND): 4387 chain = not self._match_text_seq("NO") 4388 self._match_text_seq("CHAIN") 4389 4390 if is_rollback: 4391 return self.expression(exp.Rollback, savepoint=savepoint) 4392 4393 return self.expression(exp.Commit, chain=chain) 4394 4395 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4396 if not self._match_text_seq("ADD"): 4397 return None 4398 4399 self._match(TokenType.COLUMN) 4400 exists_column = self._parse_exists(not_=True) 4401 expression = self._parse_column_def(self._parse_field(any_token=True)) 4402 4403 if expression: 4404 expression.set("exists", exists_column) 4405 4406 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4407 if self._match_texts(("FIRST", "AFTER")): 4408 position = self._prev.text 4409 column_position = self.expression( 4410 exp.ColumnPosition, this=self._parse_column(), position=position 4411 ) 4412 expression.set("position", column_position) 4413 4414 return expression 4415 4416 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4417 drop = self._match(TokenType.DROP) and self._parse_drop() 4418 if drop and not isinstance(drop, exp.Command): 4419 drop.set("kind", drop.args.get("kind", "COLUMN")) 4420 return drop 4421 4422 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4423 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4424 return self.expression( 4425 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4426 ) 4427 4428 def _parse_add_constraint(self) -> exp.AddConstraint: 4429 this = None 4430 kind = self._prev.token_type 4431 4432 if kind == TokenType.CONSTRAINT: 4433 this = self._parse_id_var() 4434 4435 if self._match_text_seq("CHECK"): 4436 expression = self._parse_wrapped(self._parse_conjunction) 4437 enforced = self._match_text_seq("ENFORCED") 4438 4439 return self.expression( 4440 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4441 ) 4442 4443 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4444 expression = self._parse_foreign_key() 4445 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4446 expression = self._parse_primary_key() 4447 else: 4448 expression = None 4449 4450 return self.expression(exp.AddConstraint, this=this, expression=expression) 4451 4452 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4453 index = self._index - 1 4454 4455 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4456 return self._parse_csv(self._parse_add_constraint) 4457 4458 self._retreat(index) 4459 return self._parse_csv(self._parse_add_column) 4460 4461 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4462 self._match(TokenType.COLUMN) 4463 column = self._parse_field(any_token=True) 4464 4465 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4466 return self.expression(exp.AlterColumn, this=column, drop=True) 4467 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4468 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4469 4470 self._match_text_seq("SET", "DATA") 4471 return self.expression( 4472 exp.AlterColumn, 4473 this=column, 4474 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4475 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4476 using=self._match(TokenType.USING) and self._parse_conjunction(), 4477 ) 4478 4479 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4480 index = self._index - 1 4481 4482 partition_exists = self._parse_exists() 4483 if self._match(TokenType.PARTITION, advance=False): 4484 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4485 4486 self._retreat(index) 4487 return self._parse_csv(self._parse_drop_column) 4488 4489 def _parse_alter_table_rename(self) -> exp.RenameTable: 4490 self._match_text_seq("TO") 4491 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4492 4493 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4494 start = self._prev 4495 4496 if not self._match(TokenType.TABLE): 4497 return self._parse_as_command(start) 4498 4499 exists = self._parse_exists() 4500 this = self._parse_table(schema=True) 4501 4502 if self._next: 4503 self._advance() 4504 4505 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4506 if parser: 4507 actions = ensure_list(parser(self)) 4508 4509 if not self._curr: 4510 return self.expression( 4511 exp.AlterTable, 4512 this=this, 4513 exists=exists, 4514 actions=actions, 4515 ) 4516 return self._parse_as_command(start) 4517 4518 def _parse_merge(self) -> exp.Merge: 4519 self._match(TokenType.INTO) 4520 target = self._parse_table() 4521 4522 self._match(TokenType.USING) 4523 using = self._parse_table() 4524 4525 self._match(TokenType.ON) 4526 on = self._parse_conjunction() 4527 4528 whens = [] 4529 while self._match(TokenType.WHEN): 4530 matched = not self._match(TokenType.NOT) 4531 self._match_text_seq("MATCHED") 4532 source = ( 4533 False 4534 if self._match_text_seq("BY", "TARGET") 4535 else self._match_text_seq("BY", "SOURCE") 4536 ) 4537 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4538 4539 self._match(TokenType.THEN) 4540 4541 if self._match(TokenType.INSERT): 4542 _this = self._parse_star() 4543 if _this: 4544 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4545 else: 4546 then = self.expression( 4547 exp.Insert, 4548 this=self._parse_value(), 4549 expression=self._match(TokenType.VALUES) and self._parse_value(), 4550 ) 4551 elif self._match(TokenType.UPDATE): 4552 expressions = self._parse_star() 4553 if expressions: 4554 then = self.expression(exp.Update, expressions=expressions) 4555 else: 4556 then = self.expression( 4557 exp.Update, 4558 expressions=self._match(TokenType.SET) 4559 and self._parse_csv(self._parse_equality), 4560 ) 4561 elif self._match(TokenType.DELETE): 4562 then = self.expression(exp.Var, this=self._prev.text) 4563 else: 4564 then = None 4565 4566 whens.append( 4567 self.expression( 4568 exp.When, 4569 matched=matched, 4570 source=source, 4571 condition=condition, 4572 then=then, 4573 ) 4574 ) 4575 4576 return self.expression( 4577 exp.Merge, 4578 this=target, 4579 using=using, 4580 on=on, 4581 expressions=whens, 4582 ) 4583 4584 def _parse_show(self) -> t.Optional[exp.Expression]: 4585 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4586 if parser: 4587 return parser(self) 4588 self._advance() 4589 return self.expression(exp.Show, this=self._prev.text.upper()) 4590 4591 def _parse_set_item_assignment( 4592 self, kind: t.Optional[str] = None 4593 ) -> t.Optional[exp.Expression]: 4594 index = self._index 4595 4596 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4597 return self._parse_set_transaction(global_=kind == "GLOBAL") 4598 4599 left = self._parse_primary() or self._parse_id_var() 4600 4601 if not self._match_texts(("=", "TO")): 4602 self._retreat(index) 4603 return None 4604 4605 right = self._parse_statement() or self._parse_id_var() 4606 this = self.expression(exp.EQ, this=left, expression=right) 4607 4608 return self.expression(exp.SetItem, this=this, kind=kind) 4609 4610 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4611 self._match_text_seq("TRANSACTION") 4612 characteristics = self._parse_csv( 4613 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4614 ) 4615 return self.expression( 4616 exp.SetItem, 4617 expressions=characteristics, 4618 kind="TRANSACTION", 4619 **{"global": global_}, # type: ignore 4620 ) 4621 4622 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4623 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4624 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4625 4626 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4627 index = self._index 4628 set_ = self.expression( 4629 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4630 ) 4631 4632 if self._curr: 4633 self._retreat(index) 4634 return self._parse_as_command(self._prev) 4635 4636 return set_ 4637 4638 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4639 for option in options: 4640 if self._match_text_seq(*option.split(" ")): 4641 return exp.var(option) 4642 return None 4643 4644 def _parse_as_command(self, start: Token) -> exp.Command: 4645 while self._curr: 4646 self._advance() 4647 text = self._find_sql(start, self._prev) 4648 size = len(start.text) 4649 return exp.Command(this=text[:size], expression=text[size:]) 4650 4651 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4652 settings = [] 4653 4654 self._match_l_paren() 4655 kind = self._parse_id_var() 4656 4657 if self._match(TokenType.L_PAREN): 4658 while True: 4659 key = self._parse_id_var() 4660 value = self._parse_primary() 4661 4662 if not key and value is None: 4663 break 4664 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4665 self._match(TokenType.R_PAREN) 4666 4667 self._match_r_paren() 4668 4669 return self.expression( 4670 exp.DictProperty, 4671 this=this, 4672 kind=kind.this if kind else None, 4673 settings=settings, 4674 ) 4675 4676 def _parse_dict_range(self, this: str) -> exp.DictRange: 4677 self._match_l_paren() 4678 has_min = self._match_text_seq("MIN") 4679 if has_min: 4680 min = self._parse_var() or self._parse_primary() 4681 self._match_text_seq("MAX") 4682 max = self._parse_var() or self._parse_primary() 4683 else: 4684 max = self._parse_var() or self._parse_primary() 4685 min = exp.Literal.number(0) 4686 self._match_r_paren() 4687 return self.expression(exp.DictRange, this=this, min=min, max=max) 4688 4689 def _find_parser( 4690 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4691 ) -> t.Optional[t.Callable]: 4692 if not self._curr: 4693 return None 4694 4695 index = self._index 4696 this = [] 4697 while True: 4698 # The current token might be multiple words 4699 curr = self._curr.text.upper() 4700 key = curr.split(" ") 4701 this.append(curr) 4702 4703 self._advance() 4704 result, trie = in_trie(trie, key) 4705 if result == TrieResult.FAILED: 4706 break 4707 4708 if result == TrieResult.EXISTS: 4709 subparser = parsers[" ".join(this)] 4710 return subparser 4711 4712 self._retreat(index) 4713 return None 4714 4715 def _match(self, token_type, advance=True, expression=None): 4716 if not self._curr: 4717 return None 4718 4719 if self._curr.token_type == token_type: 4720 if advance: 4721 self._advance() 4722 self._add_comments(expression) 4723 return True 4724 4725 return None 4726 4727 def _match_set(self, types, advance=True): 4728 if not self._curr: 4729 return None 4730 4731 if self._curr.token_type in types: 4732 if advance: 4733 self._advance() 4734 return True 4735 4736 return None 4737 4738 def _match_pair(self, token_type_a, token_type_b, advance=True): 4739 if not self._curr or not self._next: 4740 return None 4741 4742 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4743 if advance: 4744 self._advance(2) 4745 return True 4746 4747 return None 4748 4749 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4750 if not self._match(TokenType.L_PAREN, expression=expression): 4751 self.raise_error("Expecting (") 4752 4753 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4754 if not self._match(TokenType.R_PAREN, expression=expression): 4755 self.raise_error("Expecting )") 4756 4757 def _match_texts(self, texts, advance=True): 4758 if self._curr and self._curr.text.upper() in texts: 4759 if advance: 4760 self._advance() 4761 return True 4762 return False 4763 4764 def _match_text_seq(self, *texts, advance=True): 4765 index = self._index 4766 for text in texts: 4767 if self._curr and self._curr.text.upper() == text: 4768 self._advance() 4769 else: 4770 self._retreat(index) 4771 return False 4772 4773 if not advance: 4774 self._retreat(index) 4775 4776 return True 4777 4778 @t.overload 4779 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4780 ... 4781 4782 @t.overload 4783 def _replace_columns_with_dots( 4784 self, this: t.Optional[exp.Expression] 4785 ) -> t.Optional[exp.Expression]: 4786 ... 4787 4788 def _replace_columns_with_dots(self, this): 4789 if isinstance(this, exp.Dot): 4790 exp.replace_children(this, self._replace_columns_with_dots) 4791 elif isinstance(this, exp.Column): 4792 exp.replace_children(this, self._replace_columns_with_dots) 4793 table = this.args.get("table") 4794 this = ( 4795 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4796 ) 4797 4798 return this 4799 4800 def _replace_lambda( 4801 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4802 ) -> t.Optional[exp.Expression]: 4803 if not node: 4804 return node 4805 4806 for column in node.find_all(exp.Column): 4807 if column.parts[0].name in lambda_variables: 4808 dot_or_id = column.to_dot() if column.table else column.this 4809 parent = column.parent 4810 4811 while isinstance(parent, exp.Dot): 4812 if not isinstance(parent.parent, exp.Dot): 4813 parent.replace(dot_or_id) 4814 break 4815 parent = parent.parent 4816 else: 4817 if column is node: 4818 node = dot_or_id 4819 else: 4820 column.replace(dot_or_id) 4821 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.