sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5 6from sqlglot import exp 7from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 8from sqlglot.helper import ( 9 apply_index_offset, 10 count_params, 11 ensure_collection, 12 ensure_list, 13 seq_get, 14) 15from sqlglot.tokens import Token, Tokenizer, TokenType 16from sqlglot.trie import in_trie, new_trie 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args): 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33class _Parser(type): 34 def __new__(cls, clsname, bases, attrs): 35 klass = super().__new__(cls, clsname, bases, attrs) 36 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 37 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 38 return klass 39 40 41class Parser(metaclass=_Parser): 42 """ 43 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 44 a parsed syntax tree. 45 46 Args: 47 error_level: the desired error level. 48 Default: ErrorLevel.RAISE 49 error_message_context: determines the amount of context to capture from a 50 query string when displaying the error message (in number of characters). 51 Default: 50. 52 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 53 Default: 0 54 alias_post_tablesample: If the table alias comes after tablesample. 55 Default: False 56 max_errors: Maximum number of error messages to include in a raised ParseError. 57 This is only relevant if error_level is ErrorLevel.RAISE. 58 Default: 3 59 null_ordering: Indicates the default null ordering method to use if not explicitly set. 60 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 61 Default: "nulls_are_small" 62 """ 63 64 FUNCTIONS: t.Dict[str, t.Callable] = { 65 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 66 "DATE_TO_DATE_STR": lambda args: exp.Cast( 67 this=seq_get(args, 0), 68 to=exp.DataType(this=exp.DataType.Type.TEXT), 69 ), 70 "TIME_TO_TIME_STR": lambda args: exp.Cast( 71 this=seq_get(args, 0), 72 to=exp.DataType(this=exp.DataType.Type.TEXT), 73 ), 74 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 75 this=exp.Cast( 76 this=seq_get(args, 0), 77 to=exp.DataType(this=exp.DataType.Type.TEXT), 78 ), 79 start=exp.Literal.number(1), 80 length=exp.Literal.number(10), 81 ), 82 "VAR_MAP": parse_var_map, 83 "IFNULL": exp.Coalesce.from_arg_list, 84 } 85 86 NO_PAREN_FUNCTIONS = { 87 TokenType.CURRENT_DATE: exp.CurrentDate, 88 TokenType.CURRENT_DATETIME: exp.CurrentDate, 89 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 90 } 91 92 NESTED_TYPE_TOKENS = { 93 TokenType.ARRAY, 94 TokenType.MAP, 95 TokenType.STRUCT, 96 TokenType.NULLABLE, 97 } 98 99 TYPE_TOKENS = { 100 TokenType.BOOLEAN, 101 TokenType.TINYINT, 102 TokenType.SMALLINT, 103 TokenType.INT, 104 TokenType.BIGINT, 105 TokenType.FLOAT, 106 TokenType.DOUBLE, 107 TokenType.CHAR, 108 TokenType.NCHAR, 109 TokenType.VARCHAR, 110 TokenType.NVARCHAR, 111 TokenType.TEXT, 112 TokenType.MEDIUMTEXT, 113 TokenType.LONGTEXT, 114 TokenType.MEDIUMBLOB, 115 TokenType.LONGBLOB, 116 TokenType.BINARY, 117 TokenType.VARBINARY, 118 TokenType.JSON, 119 TokenType.JSONB, 120 TokenType.INTERVAL, 121 TokenType.TIME, 122 TokenType.TIMESTAMP, 123 TokenType.TIMESTAMPTZ, 124 TokenType.TIMESTAMPLTZ, 125 TokenType.DATETIME, 126 TokenType.DATE, 127 TokenType.DECIMAL, 128 TokenType.UUID, 129 TokenType.GEOGRAPHY, 130 TokenType.GEOMETRY, 131 TokenType.HLLSKETCH, 132 TokenType.HSTORE, 133 TokenType.PSEUDO_TYPE, 134 TokenType.SUPER, 135 TokenType.SERIAL, 136 TokenType.SMALLSERIAL, 137 TokenType.BIGSERIAL, 138 TokenType.XML, 139 TokenType.UNIQUEIDENTIFIER, 140 TokenType.MONEY, 141 TokenType.SMALLMONEY, 142 TokenType.ROWVERSION, 143 TokenType.IMAGE, 144 TokenType.VARIANT, 145 TokenType.OBJECT, 146 *NESTED_TYPE_TOKENS, 147 } 148 149 SUBQUERY_PREDICATES = { 150 TokenType.ANY: exp.Any, 151 TokenType.ALL: exp.All, 152 TokenType.EXISTS: exp.Exists, 153 TokenType.SOME: exp.Any, 154 } 155 156 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 157 158 ID_VAR_TOKENS = { 159 TokenType.VAR, 160 TokenType.ALWAYS, 161 TokenType.ANTI, 162 TokenType.APPLY, 163 TokenType.AUTO_INCREMENT, 164 TokenType.BEGIN, 165 TokenType.BOTH, 166 TokenType.BUCKET, 167 TokenType.CACHE, 168 TokenType.CASCADE, 169 TokenType.COLLATE, 170 TokenType.COLUMN, 171 TokenType.COMMAND, 172 TokenType.COMMIT, 173 TokenType.COMPOUND, 174 TokenType.CONSTRAINT, 175 TokenType.CURRENT_TIME, 176 TokenType.DEFAULT, 177 TokenType.DELETE, 178 TokenType.DESCRIBE, 179 TokenType.DIV, 180 TokenType.END, 181 TokenType.EXECUTE, 182 TokenType.ESCAPE, 183 TokenType.FALSE, 184 TokenType.FIRST, 185 TokenType.FILTER, 186 TokenType.FOLLOWING, 187 TokenType.FORMAT, 188 TokenType.FUNCTION, 189 TokenType.GENERATED, 190 TokenType.IDENTITY, 191 TokenType.IF, 192 TokenType.INDEX, 193 TokenType.ISNULL, 194 TokenType.INTERVAL, 195 TokenType.LAZY, 196 TokenType.LEADING, 197 TokenType.LEFT, 198 TokenType.LOCAL, 199 TokenType.MATERIALIZED, 200 TokenType.MERGE, 201 TokenType.NATURAL, 202 TokenType.NEXT, 203 TokenType.OFFSET, 204 TokenType.ONLY, 205 TokenType.OPTIONS, 206 TokenType.ORDINALITY, 207 TokenType.PERCENT, 208 TokenType.PIVOT, 209 TokenType.PRECEDING, 210 TokenType.RANGE, 211 TokenType.REFERENCES, 212 TokenType.RIGHT, 213 TokenType.ROW, 214 TokenType.ROWS, 215 TokenType.SCHEMA, 216 TokenType.SCHEMA_COMMENT, 217 TokenType.SEED, 218 TokenType.SEMI, 219 TokenType.SET, 220 TokenType.SHOW, 221 TokenType.SORTKEY, 222 TokenType.TABLE, 223 TokenType.TEMPORARY, 224 TokenType.TOP, 225 TokenType.TRAILING, 226 TokenType.TRUE, 227 TokenType.UNBOUNDED, 228 TokenType.UNIQUE, 229 TokenType.UNLOGGED, 230 TokenType.UNPIVOT, 231 TokenType.PROCEDURE, 232 TokenType.VIEW, 233 TokenType.VOLATILE, 234 TokenType.WINDOW, 235 *SUBQUERY_PREDICATES, 236 *TYPE_TOKENS, 237 *NO_PAREN_FUNCTIONS, 238 } 239 240 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 241 TokenType.APPLY, 242 TokenType.LEFT, 243 TokenType.NATURAL, 244 TokenType.OFFSET, 245 TokenType.RIGHT, 246 TokenType.WINDOW, 247 } 248 249 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 250 251 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 252 253 FUNC_TOKENS = { 254 TokenType.COMMAND, 255 TokenType.CURRENT_DATE, 256 TokenType.CURRENT_DATETIME, 257 TokenType.CURRENT_TIMESTAMP, 258 TokenType.CURRENT_TIME, 259 TokenType.FILTER, 260 TokenType.FIRST, 261 TokenType.FORMAT, 262 TokenType.IDENTIFIER, 263 TokenType.INDEX, 264 TokenType.ISNULL, 265 TokenType.ILIKE, 266 TokenType.LIKE, 267 TokenType.MERGE, 268 TokenType.OFFSET, 269 TokenType.PRIMARY_KEY, 270 TokenType.REPLACE, 271 TokenType.ROW, 272 TokenType.UNNEST, 273 TokenType.VAR, 274 TokenType.LEFT, 275 TokenType.RIGHT, 276 TokenType.DATE, 277 TokenType.DATETIME, 278 TokenType.TABLE, 279 TokenType.TIMESTAMP, 280 TokenType.TIMESTAMPTZ, 281 TokenType.WINDOW, 282 *TYPE_TOKENS, 283 *SUBQUERY_PREDICATES, 284 } 285 286 CONJUNCTION = { 287 TokenType.AND: exp.And, 288 TokenType.OR: exp.Or, 289 } 290 291 EQUALITY = { 292 TokenType.EQ: exp.EQ, 293 TokenType.NEQ: exp.NEQ, 294 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 295 } 296 297 COMPARISON = { 298 TokenType.GT: exp.GT, 299 TokenType.GTE: exp.GTE, 300 TokenType.LT: exp.LT, 301 TokenType.LTE: exp.LTE, 302 } 303 304 BITWISE = { 305 TokenType.AMP: exp.BitwiseAnd, 306 TokenType.CARET: exp.BitwiseXor, 307 TokenType.PIPE: exp.BitwiseOr, 308 TokenType.DPIPE: exp.DPipe, 309 } 310 311 TERM = { 312 TokenType.DASH: exp.Sub, 313 TokenType.PLUS: exp.Add, 314 TokenType.MOD: exp.Mod, 315 TokenType.COLLATE: exp.Collate, 316 } 317 318 FACTOR = { 319 TokenType.DIV: exp.IntDiv, 320 TokenType.LR_ARROW: exp.Distance, 321 TokenType.SLASH: exp.Div, 322 TokenType.STAR: exp.Mul, 323 } 324 325 TIMESTAMPS = { 326 TokenType.TIME, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMPTZ, 329 TokenType.TIMESTAMPLTZ, 330 } 331 332 SET_OPERATIONS = { 333 TokenType.UNION, 334 TokenType.INTERSECT, 335 TokenType.EXCEPT, 336 } 337 338 JOIN_SIDES = { 339 TokenType.LEFT, 340 TokenType.RIGHT, 341 TokenType.FULL, 342 } 343 344 JOIN_KINDS = { 345 TokenType.INNER, 346 TokenType.OUTER, 347 TokenType.CROSS, 348 TokenType.SEMI, 349 TokenType.ANTI, 350 } 351 352 LAMBDAS = { 353 TokenType.ARROW: lambda self, expressions: self.expression( 354 exp.Lambda, 355 this=self._parse_conjunction().transform( 356 self._replace_lambda, {node.name for node in expressions} 357 ), 358 expressions=expressions, 359 ), 360 TokenType.FARROW: lambda self, expressions: self.expression( 361 exp.Kwarg, 362 this=exp.Var(this=expressions[0].name), 363 expression=self._parse_conjunction(), 364 ), 365 } 366 367 COLUMN_OPERATORS = { 368 TokenType.DOT: None, 369 TokenType.DCOLON: lambda self, this, to: self.expression( 370 exp.Cast, 371 this=this, 372 to=to, 373 ), 374 TokenType.ARROW: lambda self, this, path: self.expression( 375 exp.JSONExtract, 376 this=this, 377 expression=path, 378 ), 379 TokenType.DARROW: lambda self, this, path: self.expression( 380 exp.JSONExtractScalar, 381 this=this, 382 expression=path, 383 ), 384 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 385 exp.JSONBExtract, 386 this=this, 387 expression=path, 388 ), 389 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 390 exp.JSONBExtractScalar, 391 this=this, 392 expression=path, 393 ), 394 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 395 exp.JSONBContains, 396 this=this, 397 expression=key, 398 ), 399 } 400 401 EXPRESSION_PARSERS = { 402 exp.Column: lambda self: self._parse_column(), 403 exp.DataType: lambda self: self._parse_types(), 404 exp.From: lambda self: self._parse_from(), 405 exp.Group: lambda self: self._parse_group(), 406 exp.Identifier: lambda self: self._parse_id_var(), 407 exp.Lateral: lambda self: self._parse_lateral(), 408 exp.Join: lambda self: self._parse_join(), 409 exp.Order: lambda self: self._parse_order(), 410 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 411 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 412 exp.Lambda: lambda self: self._parse_lambda(), 413 exp.Limit: lambda self: self._parse_limit(), 414 exp.Offset: lambda self: self._parse_offset(), 415 exp.TableAlias: lambda self: self._parse_table_alias(), 416 exp.Table: lambda self: self._parse_table(), 417 exp.Condition: lambda self: self._parse_conjunction(), 418 exp.Expression: lambda self: self._parse_statement(), 419 exp.Properties: lambda self: self._parse_properties(), 420 exp.Where: lambda self: self._parse_where(), 421 exp.Ordered: lambda self: self._parse_ordered(), 422 exp.Having: lambda self: self._parse_having(), 423 exp.With: lambda self: self._parse_with(), 424 exp.Window: lambda self: self._parse_named_window(), 425 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 426 } 427 428 STATEMENT_PARSERS = { 429 TokenType.ALTER: lambda self: self._parse_alter(), 430 TokenType.BEGIN: lambda self: self._parse_transaction(), 431 TokenType.CACHE: lambda self: self._parse_cache(), 432 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 433 TokenType.CREATE: lambda self: self._parse_create(), 434 TokenType.DELETE: lambda self: self._parse_delete(), 435 TokenType.DESC: lambda self: self._parse_describe(), 436 TokenType.DESCRIBE: lambda self: self._parse_describe(), 437 TokenType.DROP: lambda self: self._parse_drop(), 438 TokenType.END: lambda self: self._parse_commit_or_rollback(), 439 TokenType.INSERT: lambda self: self._parse_insert(), 440 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 441 TokenType.MERGE: lambda self: self._parse_merge(), 442 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 443 TokenType.UNCACHE: lambda self: self._parse_uncache(), 444 TokenType.UPDATE: lambda self: self._parse_update(), 445 TokenType.USE: lambda self: self.expression( 446 exp.Use, 447 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 448 and exp.Var(this=self._prev.text), 449 this=self._parse_table(schema=False), 450 ), 451 } 452 453 UNARY_PARSERS = { 454 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 455 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 456 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 457 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 458 } 459 460 PRIMARY_PARSERS = { 461 TokenType.STRING: lambda self, token: self.expression( 462 exp.Literal, this=token.text, is_string=True 463 ), 464 TokenType.NUMBER: lambda self, token: self.expression( 465 exp.Literal, this=token.text, is_string=False 466 ), 467 TokenType.STAR: lambda self, _: self.expression( 468 exp.Star, 469 **{"except": self._parse_except(), "replace": self._parse_replace()}, 470 ), 471 TokenType.NULL: lambda self, _: self.expression(exp.Null), 472 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 473 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 474 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 475 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 476 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 477 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 478 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 479 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 480 } 481 482 PLACEHOLDER_PARSERS = { 483 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 484 TokenType.PARAMETER: lambda self: self.expression( 485 exp.Parameter, this=self._parse_var() or self._parse_primary() 486 ), 487 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 488 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 489 else None, 490 } 491 492 RANGE_PARSERS = { 493 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 494 TokenType.GLOB: lambda self, this: self._parse_escape( 495 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 496 ), 497 TokenType.IN: lambda self, this: self._parse_in(this), 498 TokenType.IS: lambda self, this: self._parse_is(this), 499 TokenType.LIKE: lambda self, this: self._parse_escape( 500 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 501 ), 502 TokenType.ILIKE: lambda self, this: self._parse_escape( 503 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 504 ), 505 TokenType.IRLIKE: lambda self, this: self.expression( 506 exp.RegexpILike, this=this, expression=self._parse_bitwise() 507 ), 508 TokenType.RLIKE: lambda self, this: self.expression( 509 exp.RegexpLike, this=this, expression=self._parse_bitwise() 510 ), 511 TokenType.SIMILAR_TO: lambda self, this: self.expression( 512 exp.SimilarTo, this=this, expression=self._parse_bitwise() 513 ), 514 } 515 516 PROPERTY_PARSERS = { 517 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 518 "CHARACTER SET": lambda self: self._parse_character_set(), 519 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 520 "PARTITION BY": lambda self: self._parse_partitioned_by(), 521 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 522 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 523 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 524 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 525 "DISTKEY": lambda self: self._parse_distkey(), 526 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 527 "SORTKEY": lambda self: self._parse_sortkey(), 528 "LIKE": lambda self: self._parse_create_like(), 529 "RETURNS": lambda self: self._parse_returns(), 530 "ROW": lambda self: self._parse_row(), 531 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 532 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 533 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 534 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 535 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 536 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 537 "DETERMINISTIC": lambda self: self.expression( 538 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 539 ), 540 "IMMUTABLE": lambda self: self.expression( 541 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 542 ), 543 "STABLE": lambda self: self.expression( 544 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 545 ), 546 "VOLATILE": lambda self: self.expression( 547 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 548 ), 549 "WITH": lambda self: self._parse_with_property(), 550 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 551 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 552 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 553 "BEFORE": lambda self: self._parse_journal( 554 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 555 ), 556 "JOURNAL": lambda self: self._parse_journal( 557 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 558 ), 559 "AFTER": lambda self: self._parse_afterjournal( 560 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 561 ), 562 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 563 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 564 "CHECKSUM": lambda self: self._parse_checksum(), 565 "FREESPACE": lambda self: self._parse_freespace(), 566 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 567 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 568 ), 569 "MIN": lambda self: self._parse_datablocksize(), 570 "MINIMUM": lambda self: self._parse_datablocksize(), 571 "MAX": lambda self: self._parse_datablocksize(), 572 "MAXIMUM": lambda self: self._parse_datablocksize(), 573 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 574 default=self._prev.text.upper() == "DEFAULT" 575 ), 576 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 577 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 578 "DEFINER": lambda self: self._parse_definer(), 579 } 580 581 CONSTRAINT_PARSERS = { 582 TokenType.CHECK: lambda self: self.expression( 583 exp.Check, this=self._parse_wrapped(self._parse_conjunction) 584 ), 585 TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(), 586 TokenType.UNIQUE: lambda self: self._parse_unique(), 587 TokenType.LIKE: lambda self: self._parse_create_like(), 588 } 589 590 NO_PAREN_FUNCTION_PARSERS = { 591 TokenType.CASE: lambda self: self._parse_case(), 592 TokenType.IF: lambda self: self._parse_if(), 593 } 594 595 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 596 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 597 "TRY_CONVERT": lambda self: self._parse_convert(False), 598 "EXTRACT": lambda self: self._parse_extract(), 599 "POSITION": lambda self: self._parse_position(), 600 "SUBSTRING": lambda self: self._parse_substring(), 601 "TRIM": lambda self: self._parse_trim(), 602 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 603 "TRY_CAST": lambda self: self._parse_cast(False), 604 "STRING_AGG": lambda self: self._parse_string_agg(), 605 } 606 607 QUERY_MODIFIER_PARSERS = { 608 "match": lambda self: self._parse_match_recognize(), 609 "where": lambda self: self._parse_where(), 610 "group": lambda self: self._parse_group(), 611 "having": lambda self: self._parse_having(), 612 "qualify": lambda self: self._parse_qualify(), 613 "windows": lambda self: self._parse_window_clause(), 614 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 615 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 616 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 617 "order": lambda self: self._parse_order(), 618 "limit": lambda self: self._parse_limit(), 619 "offset": lambda self: self._parse_offset(), 620 "lock": lambda self: self._parse_lock(), 621 } 622 623 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 624 SET_PARSERS: t.Dict[str, t.Callable] = {} 625 626 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 627 628 CREATABLES = { 629 TokenType.COLUMN, 630 TokenType.FUNCTION, 631 TokenType.INDEX, 632 TokenType.PROCEDURE, 633 TokenType.SCHEMA, 634 TokenType.TABLE, 635 TokenType.VIEW, 636 } 637 638 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 639 640 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 641 642 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 643 644 STRICT_CAST = True 645 646 __slots__ = ( 647 "error_level", 648 "error_message_context", 649 "sql", 650 "errors", 651 "index_offset", 652 "unnest_column_only", 653 "alias_post_tablesample", 654 "max_errors", 655 "null_ordering", 656 "_tokens", 657 "_index", 658 "_curr", 659 "_next", 660 "_prev", 661 "_prev_comments", 662 "_show_trie", 663 "_set_trie", 664 ) 665 666 def __init__( 667 self, 668 error_level: t.Optional[ErrorLevel] = None, 669 error_message_context: int = 100, 670 index_offset: int = 0, 671 unnest_column_only: bool = False, 672 alias_post_tablesample: bool = False, 673 max_errors: int = 3, 674 null_ordering: t.Optional[str] = None, 675 ): 676 self.error_level = error_level or ErrorLevel.IMMEDIATE 677 self.error_message_context = error_message_context 678 self.index_offset = index_offset 679 self.unnest_column_only = unnest_column_only 680 self.alias_post_tablesample = alias_post_tablesample 681 self.max_errors = max_errors 682 self.null_ordering = null_ordering 683 self.reset() 684 685 def reset(self): 686 self.sql = "" 687 self.errors = [] 688 self._tokens = [] 689 self._index = 0 690 self._curr = None 691 self._next = None 692 self._prev = None 693 self._prev_comments = None 694 695 def parse( 696 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 697 ) -> t.List[t.Optional[exp.Expression]]: 698 """ 699 Parses a list of tokens and returns a list of syntax trees, one tree 700 per parsed SQL statement. 701 702 Args: 703 raw_tokens: the list of tokens. 704 sql: the original SQL string, used to produce helpful debug messages. 705 706 Returns: 707 The list of syntax trees. 708 """ 709 return self._parse( 710 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 711 ) 712 713 def parse_into( 714 self, 715 expression_types: exp.IntoType, 716 raw_tokens: t.List[Token], 717 sql: t.Optional[str] = None, 718 ) -> t.List[t.Optional[exp.Expression]]: 719 """ 720 Parses a list of tokens into a given Expression type. If a collection of Expression 721 types is given instead, this method will try to parse the token list into each one 722 of them, stopping at the first for which the parsing succeeds. 723 724 Args: 725 expression_types: the expression type(s) to try and parse the token list into. 726 raw_tokens: the list of tokens. 727 sql: the original SQL string, used to produce helpful debug messages. 728 729 Returns: 730 The target Expression. 731 """ 732 errors = [] 733 for expression_type in ensure_collection(expression_types): 734 parser = self.EXPRESSION_PARSERS.get(expression_type) 735 if not parser: 736 raise TypeError(f"No parser registered for {expression_type}") 737 try: 738 return self._parse(parser, raw_tokens, sql) 739 except ParseError as e: 740 e.errors[0]["into_expression"] = expression_type 741 errors.append(e) 742 raise ParseError( 743 f"Failed to parse into {expression_types}", 744 errors=merge_errors(errors), 745 ) from errors[-1] 746 747 def _parse( 748 self, 749 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 750 raw_tokens: t.List[Token], 751 sql: t.Optional[str] = None, 752 ) -> t.List[t.Optional[exp.Expression]]: 753 self.reset() 754 self.sql = sql or "" 755 total = len(raw_tokens) 756 chunks: t.List[t.List[Token]] = [[]] 757 758 for i, token in enumerate(raw_tokens): 759 if token.token_type == TokenType.SEMICOLON: 760 if i < total - 1: 761 chunks.append([]) 762 else: 763 chunks[-1].append(token) 764 765 expressions = [] 766 767 for tokens in chunks: 768 self._index = -1 769 self._tokens = tokens 770 self._advance() 771 772 expressions.append(parse_method(self)) 773 774 if self._index < len(self._tokens): 775 self.raise_error("Invalid expression / Unexpected token") 776 777 self.check_errors() 778 779 return expressions 780 781 def check_errors(self) -> None: 782 """ 783 Logs or raises any found errors, depending on the chosen error level setting. 784 """ 785 if self.error_level == ErrorLevel.WARN: 786 for error in self.errors: 787 logger.error(str(error)) 788 elif self.error_level == ErrorLevel.RAISE and self.errors: 789 raise ParseError( 790 concat_messages(self.errors, self.max_errors), 791 errors=merge_errors(self.errors), 792 ) 793 794 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 795 """ 796 Appends an error in the list of recorded errors or raises it, depending on the chosen 797 error level setting. 798 """ 799 token = token or self._curr or self._prev or Token.string("") 800 start = self._find_token(token) 801 end = start + len(token.text) 802 start_context = self.sql[max(start - self.error_message_context, 0) : start] 803 highlight = self.sql[start:end] 804 end_context = self.sql[end : end + self.error_message_context] 805 806 error = ParseError.new( 807 f"{message}. Line {token.line}, Col: {token.col}.\n" 808 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 809 description=message, 810 line=token.line, 811 col=token.col, 812 start_context=start_context, 813 highlight=highlight, 814 end_context=end_context, 815 ) 816 817 if self.error_level == ErrorLevel.IMMEDIATE: 818 raise error 819 820 self.errors.append(error) 821 822 def expression( 823 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 824 ) -> exp.Expression: 825 """ 826 Creates a new, validated Expression. 827 828 Args: 829 exp_class: the expression class to instantiate. 830 comments: an optional list of comments to attach to the expression. 831 kwargs: the arguments to set for the expression along with their respective values. 832 833 Returns: 834 The target expression. 835 """ 836 instance = exp_class(**kwargs) 837 if self._prev_comments: 838 instance.comments = self._prev_comments 839 self._prev_comments = None 840 if comments: 841 instance.comments = comments 842 self.validate_expression(instance) 843 return instance 844 845 def validate_expression( 846 self, expression: exp.Expression, args: t.Optional[t.List] = None 847 ) -> None: 848 """ 849 Validates an already instantiated expression, making sure that all its mandatory arguments 850 are set. 851 852 Args: 853 expression: the expression to validate. 854 args: an optional list of items that was used to instantiate the expression, if it's a Func. 855 """ 856 if self.error_level == ErrorLevel.IGNORE: 857 return 858 859 for error_message in expression.error_messages(args): 860 self.raise_error(error_message) 861 862 def _find_sql(self, start: Token, end: Token) -> str: 863 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 864 865 def _find_token(self, token: Token) -> int: 866 line = 1 867 col = 1 868 index = 0 869 870 while line < token.line or col < token.col: 871 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 872 line += 1 873 col = 1 874 else: 875 col += 1 876 index += 1 877 878 return index 879 880 def _advance(self, times: int = 1) -> None: 881 self._index += times 882 self._curr = seq_get(self._tokens, self._index) 883 self._next = seq_get(self._tokens, self._index + 1) 884 if self._index > 0: 885 self._prev = self._tokens[self._index - 1] 886 self._prev_comments = self._prev.comments 887 else: 888 self._prev = None 889 self._prev_comments = None 890 891 def _retreat(self, index: int) -> None: 892 self._advance(index - self._index) 893 894 def _parse_command(self) -> exp.Expression: 895 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 896 897 def _parse_statement(self) -> t.Optional[exp.Expression]: 898 if self._curr is None: 899 return None 900 901 if self._match_set(self.STATEMENT_PARSERS): 902 return self.STATEMENT_PARSERS[self._prev.token_type](self) 903 904 if self._match_set(Tokenizer.COMMANDS): 905 return self._parse_command() 906 907 expression = self._parse_expression() 908 expression = self._parse_set_operations(expression) if expression else self._parse_select() 909 910 self._parse_query_modifiers(expression) 911 return expression 912 913 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 914 start = self._prev 915 temporary = self._match(TokenType.TEMPORARY) 916 materialized = self._match(TokenType.MATERIALIZED) 917 kind = self._match_set(self.CREATABLES) and self._prev.text 918 if not kind: 919 if default_kind: 920 kind = default_kind 921 else: 922 return self._parse_as_command(start) 923 924 return self.expression( 925 exp.Drop, 926 exists=self._parse_exists(), 927 this=self._parse_table(schema=True), 928 kind=kind, 929 temporary=temporary, 930 materialized=materialized, 931 cascade=self._match(TokenType.CASCADE), 932 ) 933 934 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 935 return ( 936 self._match(TokenType.IF) 937 and (not not_ or self._match(TokenType.NOT)) 938 and self._match(TokenType.EXISTS) 939 ) 940 941 def _parse_create(self) -> t.Optional[exp.Expression]: 942 start = self._prev 943 replace = self._match_pair(TokenType.OR, TokenType.REPLACE) 944 set_ = self._match(TokenType.SET) # Teradata 945 multiset = self._match_text_seq("MULTISET") # Teradata 946 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 947 volatile = self._match(TokenType.VOLATILE) # Teradata 948 temporary = self._match(TokenType.TEMPORARY) 949 transient = self._match_text_seq("TRANSIENT") 950 external = self._match_text_seq("EXTERNAL") 951 unique = self._match(TokenType.UNIQUE) 952 materialized = self._match(TokenType.MATERIALIZED) 953 954 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 955 self._match(TokenType.TABLE) 956 957 properties = None 958 create_token = self._match_set(self.CREATABLES) and self._prev 959 960 if not create_token: 961 properties = self._parse_properties() 962 create_token = self._match_set(self.CREATABLES) and self._prev 963 964 if not properties or not create_token: 965 return self._parse_as_command(start) 966 967 exists = self._parse_exists(not_=True) 968 this = None 969 expression = None 970 data = None 971 statistics = None 972 no_primary_index = None 973 indexes = None 974 no_schema_binding = None 975 begin = None 976 977 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 978 this = self._parse_user_defined_function(kind=create_token.token_type) 979 properties = self._parse_properties() 980 981 self._match(TokenType.ALIAS) 982 begin = self._match(TokenType.BEGIN) 983 return_ = self._match_text_seq("RETURN") 984 expression = self._parse_statement() 985 986 if return_: 987 expression = self.expression(exp.Return, this=expression) 988 elif create_token.token_type == TokenType.INDEX: 989 this = self._parse_index() 990 elif create_token.token_type in ( 991 TokenType.TABLE, 992 TokenType.VIEW, 993 TokenType.SCHEMA, 994 ): 995 table_parts = self._parse_table_parts(schema=True) 996 997 if self._match(TokenType.COMMA): # comma-separated properties before schema definition 998 properties = self._parse_properties(before=True) 999 1000 this = self._parse_schema(this=table_parts) 1001 1002 if not properties: # properties after schema definition 1003 properties = self._parse_properties() 1004 1005 self._match(TokenType.ALIAS) 1006 expression = self._parse_ddl_select() 1007 1008 if create_token.token_type == TokenType.TABLE: 1009 if self._match_text_seq("WITH", "DATA"): 1010 data = True 1011 elif self._match_text_seq("WITH", "NO", "DATA"): 1012 data = False 1013 1014 if self._match_text_seq("AND", "STATISTICS"): 1015 statistics = True 1016 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1017 statistics = False 1018 1019 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1020 1021 indexes = [] 1022 while True: 1023 index = self._parse_create_table_index() 1024 1025 # post index PARTITION BY property 1026 if self._match(TokenType.PARTITION_BY, advance=False): 1027 if properties: 1028 properties.expressions.append(self._parse_property()) 1029 else: 1030 properties = self._parse_properties() 1031 1032 if not index: 1033 break 1034 else: 1035 indexes.append(index) 1036 elif create_token.token_type == TokenType.VIEW: 1037 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1038 no_schema_binding = True 1039 1040 return self.expression( 1041 exp.Create, 1042 this=this, 1043 kind=create_token.text, 1044 expression=expression, 1045 set=set_, 1046 multiset=multiset, 1047 global_temporary=global_temporary, 1048 volatile=volatile, 1049 exists=exists, 1050 properties=properties, 1051 temporary=temporary, 1052 transient=transient, 1053 external=external, 1054 replace=replace, 1055 unique=unique, 1056 materialized=materialized, 1057 data=data, 1058 statistics=statistics, 1059 no_primary_index=no_primary_index, 1060 indexes=indexes, 1061 no_schema_binding=no_schema_binding, 1062 begin=begin, 1063 ) 1064 1065 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1066 self._match(TokenType.COMMA) 1067 1068 # parsers look to _prev for no/dual/default, so need to consume first 1069 self._match_text_seq("NO") 1070 self._match_text_seq("DUAL") 1071 self._match_text_seq("DEFAULT") 1072 1073 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1074 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1075 1076 return None 1077 1078 def _parse_property(self) -> t.Optional[exp.Expression]: 1079 if self._match_texts(self.PROPERTY_PARSERS): 1080 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1081 1082 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1083 return self._parse_character_set(True) 1084 1085 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1086 return self._parse_sortkey(compound=True) 1087 1088 if self._match_text_seq("SQL", "SECURITY"): 1089 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1090 1091 assignment = self._match_pair( 1092 TokenType.VAR, TokenType.EQ, advance=False 1093 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1094 1095 if assignment: 1096 key = self._parse_var_or_string() 1097 self._match(TokenType.EQ) 1098 return self.expression(exp.Property, this=key, value=self._parse_column()) 1099 1100 return None 1101 1102 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1103 self._match(TokenType.EQ) 1104 self._match(TokenType.ALIAS) 1105 return self.expression( 1106 exp_class, 1107 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1108 ) 1109 1110 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1111 properties = [] 1112 1113 while True: 1114 if before: 1115 identified_property = self._parse_property_before() 1116 else: 1117 identified_property = self._parse_property() 1118 1119 if not identified_property: 1120 break 1121 for p in ensure_collection(identified_property): 1122 properties.append(p) 1123 1124 if properties: 1125 return self.expression(exp.Properties, expressions=properties) 1126 1127 return None 1128 1129 def _parse_fallback(self, no=False) -> exp.Expression: 1130 self._match_text_seq("FALLBACK") 1131 return self.expression( 1132 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1133 ) 1134 1135 def _parse_with_property( 1136 self, 1137 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1138 if self._match(TokenType.L_PAREN, advance=False): 1139 return self._parse_wrapped_csv(self._parse_property) 1140 1141 if not self._next: 1142 return None 1143 1144 if self._next.text.upper() == "JOURNAL": 1145 return self._parse_withjournaltable() 1146 1147 return self._parse_withisolatedloading() 1148 1149 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1150 def _parse_definer(self) -> t.Optional[exp.Expression]: 1151 self._match(TokenType.EQ) 1152 1153 user = self._parse_id_var() 1154 self._match(TokenType.PARAMETER) 1155 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1156 1157 if not user or not host: 1158 return None 1159 1160 return exp.DefinerProperty(this=f"{user}@{host}") 1161 1162 def _parse_withjournaltable(self) -> exp.Expression: 1163 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1164 self._match(TokenType.EQ) 1165 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1166 1167 def _parse_log(self, no=False) -> exp.Expression: 1168 self._match_text_seq("LOG") 1169 return self.expression(exp.LogProperty, no=no) 1170 1171 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1172 before = self._match_text_seq("BEFORE") 1173 self._match_text_seq("JOURNAL") 1174 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1175 1176 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1177 self._match_text_seq("NOT") 1178 self._match_text_seq("LOCAL") 1179 self._match_text_seq("AFTER", "JOURNAL") 1180 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1181 1182 def _parse_checksum(self) -> exp.Expression: 1183 self._match_text_seq("CHECKSUM") 1184 self._match(TokenType.EQ) 1185 1186 on = None 1187 if self._match(TokenType.ON): 1188 on = True 1189 elif self._match_text_seq("OFF"): 1190 on = False 1191 default = self._match(TokenType.DEFAULT) 1192 1193 return self.expression( 1194 exp.ChecksumProperty, 1195 on=on, 1196 default=default, 1197 ) 1198 1199 def _parse_freespace(self) -> exp.Expression: 1200 self._match_text_seq("FREESPACE") 1201 self._match(TokenType.EQ) 1202 return self.expression( 1203 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1204 ) 1205 1206 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1207 self._match_text_seq("MERGEBLOCKRATIO") 1208 if self._match(TokenType.EQ): 1209 return self.expression( 1210 exp.MergeBlockRatioProperty, 1211 this=self._parse_number(), 1212 percent=self._match(TokenType.PERCENT), 1213 ) 1214 else: 1215 return self.expression( 1216 exp.MergeBlockRatioProperty, 1217 no=no, 1218 default=default, 1219 ) 1220 1221 def _parse_datablocksize(self, default=None) -> exp.Expression: 1222 if default: 1223 self._match_text_seq("DATABLOCKSIZE") 1224 return self.expression(exp.DataBlocksizeProperty, default=True) 1225 elif self._match_texts(("MIN", "MINIMUM")): 1226 self._match_text_seq("DATABLOCKSIZE") 1227 return self.expression(exp.DataBlocksizeProperty, min=True) 1228 elif self._match_texts(("MAX", "MAXIMUM")): 1229 self._match_text_seq("DATABLOCKSIZE") 1230 return self.expression(exp.DataBlocksizeProperty, min=False) 1231 1232 self._match_text_seq("DATABLOCKSIZE") 1233 self._match(TokenType.EQ) 1234 size = self._parse_number() 1235 units = None 1236 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1237 units = self._prev.text 1238 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1239 1240 def _parse_blockcompression(self) -> exp.Expression: 1241 self._match_text_seq("BLOCKCOMPRESSION") 1242 self._match(TokenType.EQ) 1243 always = self._match(TokenType.ALWAYS) 1244 manual = self._match_text_seq("MANUAL") 1245 never = self._match_text_seq("NEVER") 1246 default = self._match_text_seq("DEFAULT") 1247 autotemp = None 1248 if self._match_text_seq("AUTOTEMP"): 1249 autotemp = self._parse_schema() 1250 1251 return self.expression( 1252 exp.BlockCompressionProperty, 1253 always=always, 1254 manual=manual, 1255 never=never, 1256 default=default, 1257 autotemp=autotemp, 1258 ) 1259 1260 def _parse_withisolatedloading(self) -> exp.Expression: 1261 self._match(TokenType.WITH) 1262 no = self._match_text_seq("NO") 1263 concurrent = self._match_text_seq("CONCURRENT") 1264 self._match_text_seq("ISOLATED", "LOADING") 1265 for_all = self._match_text_seq("FOR", "ALL") 1266 for_insert = self._match_text_seq("FOR", "INSERT") 1267 for_none = self._match_text_seq("FOR", "NONE") 1268 return self.expression( 1269 exp.IsolatedLoadingProperty, 1270 no=no, 1271 concurrent=concurrent, 1272 for_all=for_all, 1273 for_insert=for_insert, 1274 for_none=for_none, 1275 ) 1276 1277 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1278 if self._match(TokenType.PARTITION_BY): 1279 return self._parse_csv(self._parse_conjunction) 1280 return [] 1281 1282 def _parse_partitioned_by(self) -> exp.Expression: 1283 self._match(TokenType.EQ) 1284 return self.expression( 1285 exp.PartitionedByProperty, 1286 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1287 ) 1288 1289 def _parse_distkey(self) -> exp.Expression: 1290 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1291 1292 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1293 table = self._parse_table(schema=True) 1294 options = [] 1295 while self._match_texts(("INCLUDING", "EXCLUDING")): 1296 this = self._prev.text.upper() 1297 id_var = self._parse_id_var() 1298 1299 if not id_var: 1300 return None 1301 1302 options.append( 1303 self.expression( 1304 exp.Property, 1305 this=this, 1306 value=exp.Var(this=id_var.this.upper()), 1307 ) 1308 ) 1309 return self.expression(exp.LikeProperty, this=table, expressions=options) 1310 1311 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1312 return self.expression( 1313 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1314 ) 1315 1316 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1317 self._match(TokenType.EQ) 1318 return self.expression( 1319 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1320 ) 1321 1322 def _parse_returns(self) -> exp.Expression: 1323 value: t.Optional[exp.Expression] 1324 is_table = self._match(TokenType.TABLE) 1325 1326 if is_table: 1327 if self._match(TokenType.LT): 1328 value = self.expression( 1329 exp.Schema, 1330 this="TABLE", 1331 expressions=self._parse_csv(self._parse_struct_kwargs), 1332 ) 1333 if not self._match(TokenType.GT): 1334 self.raise_error("Expecting >") 1335 else: 1336 value = self._parse_schema(exp.Var(this="TABLE")) 1337 else: 1338 value = self._parse_types() 1339 1340 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1341 1342 def _parse_describe(self) -> exp.Expression: 1343 kind = self._match_set(self.CREATABLES) and self._prev.text 1344 this = self._parse_table() 1345 1346 return self.expression(exp.Describe, this=this, kind=kind) 1347 1348 def _parse_insert(self) -> exp.Expression: 1349 overwrite = self._match(TokenType.OVERWRITE) 1350 local = self._match(TokenType.LOCAL) 1351 1352 this: t.Optional[exp.Expression] 1353 1354 if self._match_text_seq("DIRECTORY"): 1355 this = self.expression( 1356 exp.Directory, 1357 this=self._parse_var_or_string(), 1358 local=local, 1359 row_format=self._parse_row_format(match_row=True), 1360 ) 1361 else: 1362 self._match(TokenType.INTO) 1363 self._match(TokenType.TABLE) 1364 this = self._parse_table(schema=True) 1365 1366 return self.expression( 1367 exp.Insert, 1368 this=this, 1369 exists=self._parse_exists(), 1370 partition=self._parse_partition(), 1371 expression=self._parse_ddl_select(), 1372 overwrite=overwrite, 1373 ) 1374 1375 def _parse_row(self) -> t.Optional[exp.Expression]: 1376 if not self._match(TokenType.FORMAT): 1377 return None 1378 return self._parse_row_format() 1379 1380 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1381 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1382 return None 1383 1384 if self._match_text_seq("SERDE"): 1385 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1386 1387 self._match_text_seq("DELIMITED") 1388 1389 kwargs = {} 1390 1391 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1392 kwargs["fields"] = self._parse_string() 1393 if self._match_text_seq("ESCAPED", "BY"): 1394 kwargs["escaped"] = self._parse_string() 1395 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1396 kwargs["collection_items"] = self._parse_string() 1397 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1398 kwargs["map_keys"] = self._parse_string() 1399 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1400 kwargs["lines"] = self._parse_string() 1401 if self._match_text_seq("NULL", "DEFINED", "AS"): 1402 kwargs["null"] = self._parse_string() 1403 1404 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1405 1406 def _parse_load_data(self) -> exp.Expression: 1407 local = self._match(TokenType.LOCAL) 1408 self._match_text_seq("INPATH") 1409 inpath = self._parse_string() 1410 overwrite = self._match(TokenType.OVERWRITE) 1411 self._match_pair(TokenType.INTO, TokenType.TABLE) 1412 1413 return self.expression( 1414 exp.LoadData, 1415 this=self._parse_table(schema=True), 1416 local=local, 1417 overwrite=overwrite, 1418 inpath=inpath, 1419 partition=self._parse_partition(), 1420 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1421 serde=self._match_text_seq("SERDE") and self._parse_string(), 1422 ) 1423 1424 def _parse_delete(self) -> exp.Expression: 1425 self._match(TokenType.FROM) 1426 1427 return self.expression( 1428 exp.Delete, 1429 this=self._parse_table(schema=True), 1430 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1431 where=self._parse_where(), 1432 ) 1433 1434 def _parse_update(self) -> exp.Expression: 1435 return self.expression( 1436 exp.Update, 1437 **{ # type: ignore 1438 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1439 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1440 "from": self._parse_from(), 1441 "where": self._parse_where(), 1442 }, 1443 ) 1444 1445 def _parse_uncache(self) -> exp.Expression: 1446 if not self._match(TokenType.TABLE): 1447 self.raise_error("Expecting TABLE after UNCACHE") 1448 1449 return self.expression( 1450 exp.Uncache, 1451 exists=self._parse_exists(), 1452 this=self._parse_table(schema=True), 1453 ) 1454 1455 def _parse_cache(self) -> exp.Expression: 1456 lazy = self._match(TokenType.LAZY) 1457 self._match(TokenType.TABLE) 1458 table = self._parse_table(schema=True) 1459 options = [] 1460 1461 if self._match(TokenType.OPTIONS): 1462 self._match_l_paren() 1463 k = self._parse_string() 1464 self._match(TokenType.EQ) 1465 v = self._parse_string() 1466 options = [k, v] 1467 self._match_r_paren() 1468 1469 self._match(TokenType.ALIAS) 1470 return self.expression( 1471 exp.Cache, 1472 this=table, 1473 lazy=lazy, 1474 options=options, 1475 expression=self._parse_select(nested=True), 1476 ) 1477 1478 def _parse_partition(self) -> t.Optional[exp.Expression]: 1479 if not self._match(TokenType.PARTITION): 1480 return None 1481 1482 return self.expression( 1483 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1484 ) 1485 1486 def _parse_value(self) -> exp.Expression: 1487 if self._match(TokenType.L_PAREN): 1488 expressions = self._parse_csv(self._parse_conjunction) 1489 self._match_r_paren() 1490 return self.expression(exp.Tuple, expressions=expressions) 1491 1492 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1493 # Source: https://prestodb.io/docs/current/sql/values.html 1494 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1495 1496 def _parse_select( 1497 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1498 ) -> t.Optional[exp.Expression]: 1499 cte = self._parse_with() 1500 if cte: 1501 this = self._parse_statement() 1502 1503 if not this: 1504 self.raise_error("Failed to parse any statement following CTE") 1505 return cte 1506 1507 if "with" in this.arg_types: 1508 this.set("with", cte) 1509 else: 1510 self.raise_error(f"{this.key} does not support CTE") 1511 this = cte 1512 elif self._match(TokenType.SELECT): 1513 comments = self._prev_comments 1514 1515 hint = self._parse_hint() 1516 all_ = self._match(TokenType.ALL) 1517 distinct = self._match(TokenType.DISTINCT) 1518 1519 if distinct: 1520 distinct = self.expression( 1521 exp.Distinct, 1522 on=self._parse_value() if self._match(TokenType.ON) else None, 1523 ) 1524 1525 if all_ and distinct: 1526 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1527 1528 limit = self._parse_limit(top=True) 1529 expressions = self._parse_csv(self._parse_expression) 1530 1531 this = self.expression( 1532 exp.Select, 1533 hint=hint, 1534 distinct=distinct, 1535 expressions=expressions, 1536 limit=limit, 1537 ) 1538 this.comments = comments 1539 1540 into = self._parse_into() 1541 if into: 1542 this.set("into", into) 1543 1544 from_ = self._parse_from() 1545 if from_: 1546 this.set("from", from_) 1547 1548 self._parse_query_modifiers(this) 1549 elif (table or nested) and self._match(TokenType.L_PAREN): 1550 this = self._parse_table() if table else self._parse_select(nested=True) 1551 self._parse_query_modifiers(this) 1552 this = self._parse_set_operations(this) 1553 self._match_r_paren() 1554 1555 # early return so that subquery unions aren't parsed again 1556 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1557 # Union ALL should be a property of the top select node, not the subquery 1558 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1559 elif self._match(TokenType.VALUES): 1560 this = self.expression( 1561 exp.Values, 1562 expressions=self._parse_csv(self._parse_value), 1563 alias=self._parse_table_alias(), 1564 ) 1565 else: 1566 this = None 1567 1568 return self._parse_set_operations(this) 1569 1570 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1571 if not skip_with_token and not self._match(TokenType.WITH): 1572 return None 1573 1574 recursive = self._match(TokenType.RECURSIVE) 1575 1576 expressions = [] 1577 while True: 1578 expressions.append(self._parse_cte()) 1579 1580 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1581 break 1582 else: 1583 self._match(TokenType.WITH) 1584 1585 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1586 1587 def _parse_cte(self) -> exp.Expression: 1588 alias = self._parse_table_alias() 1589 if not alias or not alias.this: 1590 self.raise_error("Expected CTE to have alias") 1591 1592 self._match(TokenType.ALIAS) 1593 1594 return self.expression( 1595 exp.CTE, 1596 this=self._parse_wrapped(self._parse_statement), 1597 alias=alias, 1598 ) 1599 1600 def _parse_table_alias( 1601 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1602 ) -> t.Optional[exp.Expression]: 1603 any_token = self._match(TokenType.ALIAS) 1604 alias = self._parse_id_var( 1605 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1606 ) 1607 index = self._index 1608 1609 if self._match(TokenType.L_PAREN): 1610 columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var())) 1611 self._match_r_paren() if columns else self._retreat(index) 1612 else: 1613 columns = None 1614 1615 if not alias and not columns: 1616 return None 1617 1618 return self.expression(exp.TableAlias, this=alias, columns=columns) 1619 1620 def _parse_subquery( 1621 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1622 ) -> exp.Expression: 1623 return self.expression( 1624 exp.Subquery, 1625 this=this, 1626 pivots=self._parse_pivots(), 1627 alias=self._parse_table_alias() if parse_alias else None, 1628 ) 1629 1630 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1631 if not isinstance(this, self.MODIFIABLES): 1632 return 1633 1634 table = isinstance(this, exp.Table) 1635 1636 while True: 1637 lateral = self._parse_lateral() 1638 join = self._parse_join() 1639 comma = None if table else self._match(TokenType.COMMA) 1640 if lateral: 1641 this.append("laterals", lateral) 1642 if join: 1643 this.append("joins", join) 1644 if comma: 1645 this.args["from"].append("expressions", self._parse_table()) 1646 if not (lateral or join or comma): 1647 break 1648 1649 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1650 expression = parser(self) 1651 1652 if expression: 1653 this.set(key, expression) 1654 1655 def _parse_hint(self) -> t.Optional[exp.Expression]: 1656 if self._match(TokenType.HINT): 1657 hints = self._parse_csv(self._parse_function) 1658 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1659 self.raise_error("Expected */ after HINT") 1660 return self.expression(exp.Hint, expressions=hints) 1661 1662 return None 1663 1664 def _parse_into(self) -> t.Optional[exp.Expression]: 1665 if not self._match(TokenType.INTO): 1666 return None 1667 1668 temp = self._match(TokenType.TEMPORARY) 1669 unlogged = self._match(TokenType.UNLOGGED) 1670 self._match(TokenType.TABLE) 1671 1672 return self.expression( 1673 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1674 ) 1675 1676 def _parse_from(self) -> t.Optional[exp.Expression]: 1677 if not self._match(TokenType.FROM): 1678 return None 1679 1680 return self.expression( 1681 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1682 ) 1683 1684 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1685 if not self._match(TokenType.MATCH_RECOGNIZE): 1686 return None 1687 self._match_l_paren() 1688 1689 partition = self._parse_partition_by() 1690 order = self._parse_order() 1691 measures = ( 1692 self._parse_alias(self._parse_conjunction()) 1693 if self._match_text_seq("MEASURES") 1694 else None 1695 ) 1696 1697 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1698 rows = exp.Var(this="ONE ROW PER MATCH") 1699 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1700 text = "ALL ROWS PER MATCH" 1701 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1702 text += f" SHOW EMPTY MATCHES" 1703 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1704 text += f" OMIT EMPTY MATCHES" 1705 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1706 text += f" WITH UNMATCHED ROWS" 1707 rows = exp.Var(this=text) 1708 else: 1709 rows = None 1710 1711 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1712 text = "AFTER MATCH SKIP" 1713 if self._match_text_seq("PAST", "LAST", "ROW"): 1714 text += f" PAST LAST ROW" 1715 elif self._match_text_seq("TO", "NEXT", "ROW"): 1716 text += f" TO NEXT ROW" 1717 elif self._match_text_seq("TO", "FIRST"): 1718 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1719 elif self._match_text_seq("TO", "LAST"): 1720 text += f" TO LAST {self._advance_any().text}" # type: ignore 1721 after = exp.Var(this=text) 1722 else: 1723 after = None 1724 1725 if self._match_text_seq("PATTERN"): 1726 self._match_l_paren() 1727 1728 if not self._curr: 1729 self.raise_error("Expecting )", self._curr) 1730 1731 paren = 1 1732 start = self._curr 1733 1734 while self._curr and paren > 0: 1735 if self._curr.token_type == TokenType.L_PAREN: 1736 paren += 1 1737 if self._curr.token_type == TokenType.R_PAREN: 1738 paren -= 1 1739 end = self._prev 1740 self._advance() 1741 if paren > 0: 1742 self.raise_error("Expecting )", self._curr) 1743 pattern = exp.Var(this=self._find_sql(start, end)) 1744 else: 1745 pattern = None 1746 1747 define = ( 1748 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1749 ) 1750 self._match_r_paren() 1751 1752 return self.expression( 1753 exp.MatchRecognize, 1754 partition_by=partition, 1755 order=order, 1756 measures=measures, 1757 rows=rows, 1758 after=after, 1759 pattern=pattern, 1760 define=define, 1761 ) 1762 1763 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1764 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1765 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1766 1767 if outer_apply or cross_apply: 1768 this = self._parse_select(table=True) 1769 view = None 1770 outer = not cross_apply 1771 elif self._match(TokenType.LATERAL): 1772 this = self._parse_select(table=True) 1773 view = self._match(TokenType.VIEW) 1774 outer = self._match(TokenType.OUTER) 1775 else: 1776 return None 1777 1778 if not this: 1779 this = self._parse_function() or self._parse_id_var(any_token=False) 1780 while self._match(TokenType.DOT): 1781 this = exp.Dot( 1782 this=this, 1783 expression=self._parse_function() or self._parse_id_var(any_token=False), 1784 ) 1785 1786 table_alias: t.Optional[exp.Expression] 1787 1788 if view: 1789 table = self._parse_id_var(any_token=False) 1790 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1791 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1792 else: 1793 table_alias = self._parse_table_alias() 1794 1795 expression = self.expression( 1796 exp.Lateral, 1797 this=this, 1798 view=view, 1799 outer=outer, 1800 alias=table_alias, 1801 ) 1802 1803 if outer_apply or cross_apply: 1804 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1805 1806 return expression 1807 1808 def _parse_join_side_and_kind( 1809 self, 1810 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1811 return ( 1812 self._match(TokenType.NATURAL) and self._prev, 1813 self._match_set(self.JOIN_SIDES) and self._prev, 1814 self._match_set(self.JOIN_KINDS) and self._prev, 1815 ) 1816 1817 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1818 natural, side, kind = self._parse_join_side_and_kind() 1819 1820 if not skip_join_token and not self._match(TokenType.JOIN): 1821 return None 1822 1823 kwargs: t.Dict[ 1824 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1825 ] = {"this": self._parse_table()} 1826 1827 if natural: 1828 kwargs["natural"] = True 1829 if side: 1830 kwargs["side"] = side.text 1831 if kind: 1832 kwargs["kind"] = kind.text 1833 1834 if self._match(TokenType.ON): 1835 kwargs["on"] = self._parse_conjunction() 1836 elif self._match(TokenType.USING): 1837 kwargs["using"] = self._parse_wrapped_id_vars() 1838 1839 return self.expression(exp.Join, **kwargs) # type: ignore 1840 1841 def _parse_index(self) -> exp.Expression: 1842 index = self._parse_id_var() 1843 self._match(TokenType.ON) 1844 self._match(TokenType.TABLE) # hive 1845 1846 return self.expression( 1847 exp.Index, 1848 this=index, 1849 table=self.expression(exp.Table, this=self._parse_id_var()), 1850 columns=self._parse_expression(), 1851 ) 1852 1853 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1854 unique = self._match(TokenType.UNIQUE) 1855 primary = self._match_text_seq("PRIMARY") 1856 amp = self._match_text_seq("AMP") 1857 if not self._match(TokenType.INDEX): 1858 return None 1859 index = self._parse_id_var() 1860 columns = None 1861 if self._match(TokenType.L_PAREN, advance=False): 1862 columns = self._parse_wrapped_csv(self._parse_column) 1863 return self.expression( 1864 exp.Index, 1865 this=index, 1866 columns=columns, 1867 unique=unique, 1868 primary=primary, 1869 amp=amp, 1870 ) 1871 1872 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1873 catalog = None 1874 db = None 1875 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1876 1877 while self._match(TokenType.DOT): 1878 if catalog: 1879 # This allows nesting the table in arbitrarily many dot expressions if needed 1880 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1881 else: 1882 catalog = db 1883 db = table 1884 table = self._parse_id_var() 1885 1886 if not table: 1887 self.raise_error(f"Expected table name but got {self._curr}") 1888 1889 return self.expression( 1890 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 1891 ) 1892 1893 def _parse_table( 1894 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1895 ) -> t.Optional[exp.Expression]: 1896 lateral = self._parse_lateral() 1897 1898 if lateral: 1899 return lateral 1900 1901 unnest = self._parse_unnest() 1902 1903 if unnest: 1904 return unnest 1905 1906 values = self._parse_derived_table_values() 1907 1908 if values: 1909 return values 1910 1911 subquery = self._parse_select(table=True) 1912 1913 if subquery: 1914 return subquery 1915 1916 this = self._parse_table_parts(schema=schema) 1917 1918 if schema: 1919 return self._parse_schema(this=this) 1920 1921 if self.alias_post_tablesample: 1922 table_sample = self._parse_table_sample() 1923 1924 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1925 1926 if alias: 1927 this.set("alias", alias) 1928 1929 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 1930 this.set( 1931 "hints", 1932 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 1933 ) 1934 self._match_r_paren() 1935 1936 if not self.alias_post_tablesample: 1937 table_sample = self._parse_table_sample() 1938 1939 if table_sample: 1940 table_sample.set("this", this) 1941 this = table_sample 1942 1943 return this 1944 1945 def _parse_unnest(self) -> t.Optional[exp.Expression]: 1946 if not self._match(TokenType.UNNEST): 1947 return None 1948 1949 expressions = self._parse_wrapped_csv(self._parse_column) 1950 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 1951 alias = self._parse_table_alias() 1952 1953 if alias and self.unnest_column_only: 1954 if alias.args.get("columns"): 1955 self.raise_error("Unexpected extra column alias in unnest.") 1956 alias.set("columns", [alias.this]) 1957 alias.set("this", None) 1958 1959 offset = None 1960 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 1961 self._match(TokenType.ALIAS) 1962 offset = self._parse_conjunction() 1963 1964 return self.expression( 1965 exp.Unnest, 1966 expressions=expressions, 1967 ordinality=ordinality, 1968 alias=alias, 1969 offset=offset, 1970 ) 1971 1972 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 1973 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 1974 if not is_derived and not self._match(TokenType.VALUES): 1975 return None 1976 1977 expressions = self._parse_csv(self._parse_value) 1978 1979 if is_derived: 1980 self._match_r_paren() 1981 1982 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 1983 1984 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 1985 if not self._match(TokenType.TABLE_SAMPLE): 1986 return None 1987 1988 method = self._parse_var() 1989 bucket_numerator = None 1990 bucket_denominator = None 1991 bucket_field = None 1992 percent = None 1993 rows = None 1994 size = None 1995 seed = None 1996 1997 self._match_l_paren() 1998 1999 if self._match(TokenType.BUCKET): 2000 bucket_numerator = self._parse_number() 2001 self._match(TokenType.OUT_OF) 2002 bucket_denominator = bucket_denominator = self._parse_number() 2003 self._match(TokenType.ON) 2004 bucket_field = self._parse_field() 2005 else: 2006 num = self._parse_number() 2007 2008 if self._match(TokenType.PERCENT): 2009 percent = num 2010 elif self._match(TokenType.ROWS): 2011 rows = num 2012 else: 2013 size = num 2014 2015 self._match_r_paren() 2016 2017 if self._match(TokenType.SEED): 2018 seed = self._parse_wrapped(self._parse_number) 2019 2020 return self.expression( 2021 exp.TableSample, 2022 method=method, 2023 bucket_numerator=bucket_numerator, 2024 bucket_denominator=bucket_denominator, 2025 bucket_field=bucket_field, 2026 percent=percent, 2027 rows=rows, 2028 size=size, 2029 seed=seed, 2030 ) 2031 2032 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2033 return list(iter(self._parse_pivot, None)) 2034 2035 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2036 index = self._index 2037 2038 if self._match(TokenType.PIVOT): 2039 unpivot = False 2040 elif self._match(TokenType.UNPIVOT): 2041 unpivot = True 2042 else: 2043 return None 2044 2045 expressions = [] 2046 field = None 2047 2048 if not self._match(TokenType.L_PAREN): 2049 self._retreat(index) 2050 return None 2051 2052 if unpivot: 2053 expressions = self._parse_csv(self._parse_column) 2054 else: 2055 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2056 2057 if not self._match(TokenType.FOR): 2058 self.raise_error("Expecting FOR") 2059 2060 value = self._parse_column() 2061 2062 if not self._match(TokenType.IN): 2063 self.raise_error("Expecting IN") 2064 2065 field = self._parse_in(value) 2066 2067 self._match_r_paren() 2068 2069 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2070 2071 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2072 if not skip_where_token and not self._match(TokenType.WHERE): 2073 return None 2074 2075 return self.expression( 2076 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2077 ) 2078 2079 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2080 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2081 return None 2082 2083 expressions = self._parse_csv(self._parse_conjunction) 2084 grouping_sets = self._parse_grouping_sets() 2085 2086 self._match(TokenType.COMMA) 2087 with_ = self._match(TokenType.WITH) 2088 cube = self._match(TokenType.CUBE) and ( 2089 with_ or self._parse_wrapped_csv(self._parse_column) 2090 ) 2091 2092 self._match(TokenType.COMMA) 2093 rollup = self._match(TokenType.ROLLUP) and ( 2094 with_ or self._parse_wrapped_csv(self._parse_column) 2095 ) 2096 2097 return self.expression( 2098 exp.Group, 2099 expressions=expressions, 2100 grouping_sets=grouping_sets, 2101 cube=cube, 2102 rollup=rollup, 2103 ) 2104 2105 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2106 if not self._match(TokenType.GROUPING_SETS): 2107 return None 2108 2109 return self._parse_wrapped_csv(self._parse_grouping_set) 2110 2111 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2112 if self._match(TokenType.L_PAREN): 2113 grouping_set = self._parse_csv(self._parse_column) 2114 self._match_r_paren() 2115 return self.expression(exp.Tuple, expressions=grouping_set) 2116 2117 return self._parse_column() 2118 2119 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2120 if not skip_having_token and not self._match(TokenType.HAVING): 2121 return None 2122 return self.expression(exp.Having, this=self._parse_conjunction()) 2123 2124 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2125 if not self._match(TokenType.QUALIFY): 2126 return None 2127 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2128 2129 def _parse_order( 2130 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2131 ) -> t.Optional[exp.Expression]: 2132 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2133 return this 2134 2135 return self.expression( 2136 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2137 ) 2138 2139 def _parse_sort( 2140 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2141 ) -> t.Optional[exp.Expression]: 2142 if not self._match(token_type): 2143 return None 2144 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2145 2146 def _parse_ordered(self) -> exp.Expression: 2147 this = self._parse_conjunction() 2148 self._match(TokenType.ASC) 2149 is_desc = self._match(TokenType.DESC) 2150 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2151 is_nulls_last = self._match(TokenType.NULLS_LAST) 2152 desc = is_desc or False 2153 asc = not desc 2154 nulls_first = is_nulls_first or False 2155 explicitly_null_ordered = is_nulls_first or is_nulls_last 2156 if ( 2157 not explicitly_null_ordered 2158 and ( 2159 (asc and self.null_ordering == "nulls_are_small") 2160 or (desc and self.null_ordering != "nulls_are_small") 2161 ) 2162 and self.null_ordering != "nulls_are_last" 2163 ): 2164 nulls_first = True 2165 2166 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2167 2168 def _parse_limit( 2169 self, this: t.Optional[exp.Expression] = None, top: bool = False 2170 ) -> t.Optional[exp.Expression]: 2171 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2172 limit_paren = self._match(TokenType.L_PAREN) 2173 limit_exp = self.expression( 2174 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2175 ) 2176 2177 if limit_paren: 2178 self._match_r_paren() 2179 2180 return limit_exp 2181 2182 if self._match(TokenType.FETCH): 2183 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2184 direction = self._prev.text if direction else "FIRST" 2185 count = self._parse_number() 2186 self._match_set((TokenType.ROW, TokenType.ROWS)) 2187 self._match(TokenType.ONLY) 2188 return self.expression(exp.Fetch, direction=direction, count=count) 2189 2190 return this 2191 2192 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2193 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2194 return this 2195 2196 count = self._parse_number() 2197 self._match_set((TokenType.ROW, TokenType.ROWS)) 2198 return self.expression(exp.Offset, this=this, expression=count) 2199 2200 def _parse_lock(self) -> t.Optional[exp.Expression]: 2201 if self._match_text_seq("FOR", "UPDATE"): 2202 return self.expression(exp.Lock, update=True) 2203 if self._match_text_seq("FOR", "SHARE"): 2204 return self.expression(exp.Lock, update=False) 2205 2206 return None 2207 2208 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2209 if not self._match_set(self.SET_OPERATIONS): 2210 return this 2211 2212 token_type = self._prev.token_type 2213 2214 if token_type == TokenType.UNION: 2215 expression = exp.Union 2216 elif token_type == TokenType.EXCEPT: 2217 expression = exp.Except 2218 else: 2219 expression = exp.Intersect 2220 2221 return self.expression( 2222 expression, 2223 this=this, 2224 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2225 expression=self._parse_set_operations(self._parse_select(nested=True)), 2226 ) 2227 2228 def _parse_expression(self) -> t.Optional[exp.Expression]: 2229 return self._parse_alias(self._parse_conjunction()) 2230 2231 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2232 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2233 2234 def _parse_equality(self) -> t.Optional[exp.Expression]: 2235 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2236 2237 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2238 return self._parse_tokens(self._parse_range, self.COMPARISON) 2239 2240 def _parse_range(self) -> t.Optional[exp.Expression]: 2241 this = self._parse_bitwise() 2242 negate = self._match(TokenType.NOT) 2243 2244 if self._match_set(self.RANGE_PARSERS): 2245 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2246 elif self._match(TokenType.ISNULL): 2247 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2248 2249 # Postgres supports ISNULL and NOTNULL for conditions. 2250 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2251 if self._match(TokenType.NOTNULL): 2252 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2253 this = self.expression(exp.Not, this=this) 2254 2255 if negate: 2256 this = self.expression(exp.Not, this=this) 2257 2258 if self._match(TokenType.IS): 2259 this = self._parse_is(this) 2260 2261 return this 2262 2263 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2264 negate = self._match(TokenType.NOT) 2265 if self._match(TokenType.DISTINCT_FROM): 2266 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2267 return self.expression(klass, this=this, expression=self._parse_expression()) 2268 2269 this = self.expression( 2270 exp.Is, 2271 this=this, 2272 expression=self._parse_null() or self._parse_boolean(), 2273 ) 2274 return self.expression(exp.Not, this=this) if negate else this 2275 2276 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2277 unnest = self._parse_unnest() 2278 if unnest: 2279 this = self.expression(exp.In, this=this, unnest=unnest) 2280 elif self._match(TokenType.L_PAREN): 2281 expressions = self._parse_csv(self._parse_select_or_expression) 2282 2283 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2284 this = self.expression(exp.In, this=this, query=expressions[0]) 2285 else: 2286 this = self.expression(exp.In, this=this, expressions=expressions) 2287 2288 self._match_r_paren() 2289 else: 2290 this = self.expression(exp.In, this=this, field=self._parse_field()) 2291 2292 return this 2293 2294 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2295 low = self._parse_bitwise() 2296 self._match(TokenType.AND) 2297 high = self._parse_bitwise() 2298 return self.expression(exp.Between, this=this, low=low, high=high) 2299 2300 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2301 if not self._match(TokenType.ESCAPE): 2302 return this 2303 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2304 2305 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2306 this = self._parse_term() 2307 2308 while True: 2309 if self._match_set(self.BITWISE): 2310 this = self.expression( 2311 self.BITWISE[self._prev.token_type], 2312 this=this, 2313 expression=self._parse_term(), 2314 ) 2315 elif self._match_pair(TokenType.LT, TokenType.LT): 2316 this = self.expression( 2317 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2318 ) 2319 elif self._match_pair(TokenType.GT, TokenType.GT): 2320 this = self.expression( 2321 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2322 ) 2323 else: 2324 break 2325 2326 return this 2327 2328 def _parse_term(self) -> t.Optional[exp.Expression]: 2329 return self._parse_tokens(self._parse_factor, self.TERM) 2330 2331 def _parse_factor(self) -> t.Optional[exp.Expression]: 2332 return self._parse_tokens(self._parse_unary, self.FACTOR) 2333 2334 def _parse_unary(self) -> t.Optional[exp.Expression]: 2335 if self._match_set(self.UNARY_PARSERS): 2336 return self.UNARY_PARSERS[self._prev.token_type](self) 2337 return self._parse_at_time_zone(self._parse_type()) 2338 2339 def _parse_type(self) -> t.Optional[exp.Expression]: 2340 if self._match(TokenType.INTERVAL): 2341 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2342 2343 index = self._index 2344 type_token = self._parse_types(check_func=True) 2345 this = self._parse_column() 2346 2347 if type_token: 2348 if this and not isinstance(this, exp.Star): 2349 return self.expression(exp.Cast, this=this, to=type_token) 2350 if not type_token.args.get("expressions"): 2351 self._retreat(index) 2352 return self._parse_column() 2353 return type_token 2354 2355 return this 2356 2357 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2358 index = self._index 2359 2360 if not self._match_set(self.TYPE_TOKENS): 2361 return None 2362 2363 type_token = self._prev.token_type 2364 2365 if type_token == TokenType.PSEUDO_TYPE: 2366 return self.expression(exp.PseudoType, this=self._prev.text) 2367 2368 nested = type_token in self.NESTED_TYPE_TOKENS 2369 is_struct = type_token == TokenType.STRUCT 2370 expressions = None 2371 maybe_func = False 2372 2373 if self._match(TokenType.L_PAREN): 2374 if is_struct: 2375 expressions = self._parse_csv(self._parse_struct_kwargs) 2376 elif nested: 2377 expressions = self._parse_csv(self._parse_types) 2378 else: 2379 expressions = self._parse_csv(self._parse_conjunction) 2380 2381 if not expressions: 2382 self._retreat(index) 2383 return None 2384 2385 self._match_r_paren() 2386 maybe_func = True 2387 2388 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2389 this = exp.DataType( 2390 this=exp.DataType.Type.ARRAY, 2391 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2392 nested=True, 2393 ) 2394 2395 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2396 this = exp.DataType( 2397 this=exp.DataType.Type.ARRAY, 2398 expressions=[this], 2399 nested=True, 2400 ) 2401 2402 return this 2403 2404 if self._match(TokenType.L_BRACKET): 2405 self._retreat(index) 2406 return None 2407 2408 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2409 if nested and self._match(TokenType.LT): 2410 if is_struct: 2411 expressions = self._parse_csv(self._parse_struct_kwargs) 2412 else: 2413 expressions = self._parse_csv(self._parse_types) 2414 2415 if not self._match(TokenType.GT): 2416 self.raise_error("Expecting >") 2417 2418 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2419 values = self._parse_csv(self._parse_conjunction) 2420 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2421 2422 value: t.Optional[exp.Expression] = None 2423 if type_token in self.TIMESTAMPS: 2424 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2425 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2426 elif ( 2427 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2428 ): 2429 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2430 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2431 if type_token == TokenType.TIME: 2432 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2433 else: 2434 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2435 2436 maybe_func = maybe_func and value is None 2437 2438 if value is None: 2439 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2440 elif type_token == TokenType.INTERVAL: 2441 value = self.expression(exp.Interval, unit=self._parse_var()) 2442 2443 if maybe_func and check_func: 2444 index2 = self._index 2445 peek = self._parse_string() 2446 2447 if not peek: 2448 self._retreat(index) 2449 return None 2450 2451 self._retreat(index2) 2452 2453 if value: 2454 return value 2455 2456 return exp.DataType( 2457 this=exp.DataType.Type[type_token.value.upper()], 2458 expressions=expressions, 2459 nested=nested, 2460 values=values, 2461 ) 2462 2463 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2464 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2465 return self._parse_types() 2466 2467 this = self._parse_id_var() 2468 self._match(TokenType.COLON) 2469 data_type = self._parse_types() 2470 2471 if not data_type: 2472 return None 2473 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2474 2475 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2476 if not self._match(TokenType.AT_TIME_ZONE): 2477 return this 2478 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2479 2480 def _parse_column(self) -> t.Optional[exp.Expression]: 2481 this = self._parse_field() 2482 if isinstance(this, exp.Identifier): 2483 this = self.expression(exp.Column, this=this) 2484 elif not this: 2485 return self._parse_bracket(this) 2486 this = self._parse_bracket(this) 2487 2488 while self._match_set(self.COLUMN_OPERATORS): 2489 op_token = self._prev.token_type 2490 op = self.COLUMN_OPERATORS.get(op_token) 2491 2492 if op_token == TokenType.DCOLON: 2493 field = self._parse_types() 2494 if not field: 2495 self.raise_error("Expected type") 2496 elif op: 2497 self._advance() 2498 value = self._prev.text 2499 field = ( 2500 exp.Literal.number(value) 2501 if self._prev.token_type == TokenType.NUMBER 2502 else exp.Literal.string(value) 2503 ) 2504 else: 2505 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2506 2507 if isinstance(field, exp.Func): 2508 # bigquery allows function calls like x.y.count(...) 2509 # SAFE.SUBSTR(...) 2510 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2511 this = self._replace_columns_with_dots(this) 2512 2513 if op: 2514 this = op(self, this, field) 2515 elif isinstance(this, exp.Column) and not this.table: 2516 this = self.expression(exp.Column, this=field, table=this.this) 2517 else: 2518 this = self.expression(exp.Dot, this=this, expression=field) 2519 this = self._parse_bracket(this) 2520 2521 return this 2522 2523 def _parse_primary(self) -> t.Optional[exp.Expression]: 2524 if self._match_set(self.PRIMARY_PARSERS): 2525 token_type = self._prev.token_type 2526 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2527 2528 if token_type == TokenType.STRING: 2529 expressions = [primary] 2530 while self._match(TokenType.STRING): 2531 expressions.append(exp.Literal.string(self._prev.text)) 2532 if len(expressions) > 1: 2533 return self.expression(exp.Concat, expressions=expressions) 2534 return primary 2535 2536 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2537 return exp.Literal.number(f"0.{self._prev.text}") 2538 2539 if self._match(TokenType.L_PAREN): 2540 comments = self._prev_comments 2541 query = self._parse_select() 2542 2543 if query: 2544 expressions = [query] 2545 else: 2546 expressions = self._parse_csv( 2547 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2548 ) 2549 2550 this = seq_get(expressions, 0) 2551 self._parse_query_modifiers(this) 2552 self._match_r_paren() 2553 2554 if isinstance(this, exp.Subqueryable): 2555 this = self._parse_set_operations( 2556 self._parse_subquery(this=this, parse_alias=False) 2557 ) 2558 elif len(expressions) > 1: 2559 this = self.expression(exp.Tuple, expressions=expressions) 2560 else: 2561 this = self.expression(exp.Paren, this=this) 2562 2563 if this and comments: 2564 this.comments = comments 2565 2566 return this 2567 2568 return None 2569 2570 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2571 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2572 2573 def _parse_function( 2574 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2575 ) -> t.Optional[exp.Expression]: 2576 if not self._curr: 2577 return None 2578 2579 token_type = self._curr.token_type 2580 2581 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2582 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2583 2584 if not self._next or self._next.token_type != TokenType.L_PAREN: 2585 if token_type in self.NO_PAREN_FUNCTIONS: 2586 self._advance() 2587 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2588 2589 return None 2590 2591 if token_type not in self.FUNC_TOKENS: 2592 return None 2593 2594 this = self._curr.text 2595 upper = this.upper() 2596 self._advance(2) 2597 2598 parser = self.FUNCTION_PARSERS.get(upper) 2599 2600 if parser: 2601 this = parser(self) 2602 else: 2603 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2604 2605 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2606 this = self.expression(subquery_predicate, this=self._parse_select()) 2607 self._match_r_paren() 2608 return this 2609 2610 if functions is None: 2611 functions = self.FUNCTIONS 2612 2613 function = functions.get(upper) 2614 args = self._parse_csv(self._parse_lambda) 2615 2616 if function: 2617 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2618 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2619 if count_params(function) == 2: 2620 params = None 2621 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2622 params = self._parse_csv(self._parse_lambda) 2623 2624 this = function(args, params) 2625 else: 2626 this = function(args) 2627 2628 self.validate_expression(this, args) 2629 else: 2630 this = self.expression(exp.Anonymous, this=this, expressions=args) 2631 2632 self._match_r_paren(this) 2633 return self._parse_window(this) 2634 2635 def _parse_user_defined_function( 2636 self, kind: t.Optional[TokenType] = None 2637 ) -> t.Optional[exp.Expression]: 2638 this = self._parse_id_var() 2639 2640 while self._match(TokenType.DOT): 2641 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2642 2643 if not self._match(TokenType.L_PAREN): 2644 return this 2645 2646 expressions = self._parse_csv(self._parse_udf_kwarg) 2647 self._match_r_paren() 2648 return self.expression( 2649 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2650 ) 2651 2652 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2653 literal = self._parse_primary() 2654 if literal: 2655 return self.expression(exp.Introducer, this=token.text, expression=literal) 2656 2657 return self.expression(exp.Identifier, this=token.text) 2658 2659 def _parse_national(self, token: Token) -> exp.Expression: 2660 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2661 2662 def _parse_session_parameter(self) -> exp.Expression: 2663 kind = None 2664 this = self._parse_id_var() or self._parse_primary() 2665 2666 if this and self._match(TokenType.DOT): 2667 kind = this.name 2668 this = self._parse_var() or self._parse_primary() 2669 2670 return self.expression(exp.SessionParameter, this=this, kind=kind) 2671 2672 def _parse_udf_kwarg(self) -> t.Optional[exp.Expression]: 2673 this = self._parse_id_var() 2674 kind = self._parse_types() 2675 2676 if not kind: 2677 return this 2678 2679 return self.expression(exp.UserDefinedFunctionKwarg, this=this, kind=kind) 2680 2681 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2682 index = self._index 2683 2684 if self._match(TokenType.L_PAREN): 2685 expressions = self._parse_csv(self._parse_id_var) 2686 2687 if not self._match(TokenType.R_PAREN): 2688 self._retreat(index) 2689 else: 2690 expressions = [self._parse_id_var()] 2691 2692 if self._match_set(self.LAMBDAS): 2693 return self.LAMBDAS[self._prev.token_type](self, expressions) 2694 2695 self._retreat(index) 2696 2697 this: t.Optional[exp.Expression] 2698 2699 if self._match(TokenType.DISTINCT): 2700 this = self.expression( 2701 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2702 ) 2703 else: 2704 this = self._parse_select_or_expression() 2705 2706 if self._match(TokenType.IGNORE_NULLS): 2707 this = self.expression(exp.IgnoreNulls, this=this) 2708 else: 2709 self._match(TokenType.RESPECT_NULLS) 2710 2711 return self._parse_limit(self._parse_order(this)) 2712 2713 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2714 index = self._index 2715 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2716 self._retreat(index) 2717 return this 2718 2719 args = self._parse_csv( 2720 lambda: self._parse_constraint() 2721 or self._parse_column_def(self._parse_field(any_token=True)) 2722 ) 2723 self._match_r_paren() 2724 return self.expression(exp.Schema, this=this, expressions=args) 2725 2726 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2727 kind = self._parse_types() 2728 2729 constraints = [] 2730 while True: 2731 constraint = self._parse_column_constraint() 2732 if not constraint: 2733 break 2734 constraints.append(constraint) 2735 2736 if not kind and not constraints: 2737 return this 2738 2739 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2740 2741 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2742 this = self._parse_references() 2743 2744 if this: 2745 return this 2746 2747 if self._match(TokenType.CONSTRAINT): 2748 this = self._parse_id_var() 2749 2750 kind: exp.Expression 2751 2752 if self._match_set((TokenType.AUTO_INCREMENT, TokenType.IDENTITY)): 2753 start = None 2754 increment = None 2755 2756 if self._match(TokenType.L_PAREN, advance=False): 2757 args = self._parse_wrapped_csv(self._parse_bitwise) 2758 start = seq_get(args, 0) 2759 increment = seq_get(args, 1) 2760 elif self._match_text_seq("START"): 2761 start = self._parse_bitwise() 2762 self._match_text_seq("INCREMENT") 2763 increment = self._parse_bitwise() 2764 2765 if start and increment: 2766 kind = exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2767 else: 2768 kind = exp.AutoIncrementColumnConstraint() 2769 elif self._match(TokenType.CHECK): 2770 constraint = self._parse_wrapped(self._parse_conjunction) 2771 kind = self.expression(exp.CheckColumnConstraint, this=constraint) 2772 elif self._match(TokenType.COLLATE): 2773 kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var()) 2774 elif self._match(TokenType.ENCODE): 2775 kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var()) 2776 elif self._match(TokenType.DEFAULT): 2777 kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_bitwise()) 2778 elif self._match_pair(TokenType.NOT, TokenType.NULL): 2779 kind = exp.NotNullColumnConstraint() 2780 elif self._match(TokenType.NULL): 2781 kind = exp.NotNullColumnConstraint(allow_null=True) 2782 elif self._match(TokenType.SCHEMA_COMMENT): 2783 kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string()) 2784 elif self._match(TokenType.PRIMARY_KEY): 2785 desc = None 2786 if self._match(TokenType.ASC) or self._match(TokenType.DESC): 2787 desc = self._prev.token_type == TokenType.DESC 2788 kind = exp.PrimaryKeyColumnConstraint(desc=desc) 2789 elif self._match(TokenType.UNIQUE): 2790 kind = exp.UniqueColumnConstraint() 2791 elif self._match(TokenType.GENERATED): 2792 if self._match(TokenType.BY_DEFAULT): 2793 kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2794 else: 2795 self._match(TokenType.ALWAYS) 2796 kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2797 self._match_pair(TokenType.ALIAS, TokenType.IDENTITY) 2798 2799 if self._match(TokenType.L_PAREN): 2800 if self._match_text_seq("START", "WITH"): 2801 kind.set("start", self._parse_bitwise()) 2802 if self._match_text_seq("INCREMENT", "BY"): 2803 kind.set("increment", self._parse_bitwise()) 2804 2805 self._match_r_paren() 2806 else: 2807 return this 2808 2809 return self.expression(exp.ColumnConstraint, this=this, kind=kind) 2810 2811 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2812 if not self._match(TokenType.CONSTRAINT): 2813 return self._parse_unnamed_constraint() 2814 2815 this = self._parse_id_var() 2816 expressions = [] 2817 2818 while True: 2819 constraint = self._parse_unnamed_constraint() or self._parse_function() 2820 if not constraint: 2821 break 2822 expressions.append(constraint) 2823 2824 return self.expression(exp.Constraint, this=this, expressions=expressions) 2825 2826 def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]: 2827 if not self._match_set(self.CONSTRAINT_PARSERS): 2828 return None 2829 return self.CONSTRAINT_PARSERS[self._prev.token_type](self) 2830 2831 def _parse_unique(self) -> exp.Expression: 2832 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2833 2834 def _parse_key_constraint_options(self) -> t.List[str]: 2835 options = [] 2836 while True: 2837 if not self._curr: 2838 break 2839 2840 if self._match(TokenType.ON): 2841 action = None 2842 on = self._advance_any() and self._prev.text 2843 2844 if self._match(TokenType.NO_ACTION): 2845 action = "NO ACTION" 2846 elif self._match(TokenType.CASCADE): 2847 action = "CASCADE" 2848 elif self._match_pair(TokenType.SET, TokenType.NULL): 2849 action = "SET NULL" 2850 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2851 action = "SET DEFAULT" 2852 else: 2853 self.raise_error("Invalid key constraint") 2854 2855 options.append(f"ON {on} {action}") 2856 elif self._match_text_seq("NOT", "ENFORCED"): 2857 options.append("NOT ENFORCED") 2858 elif self._match_text_seq("DEFERRABLE"): 2859 options.append("DEFERRABLE") 2860 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2861 options.append("INITIALLY DEFERRED") 2862 elif self._match_text_seq("NORELY"): 2863 options.append("NORELY") 2864 elif self._match_text_seq("MATCH", "FULL"): 2865 options.append("MATCH FULL") 2866 else: 2867 break 2868 2869 return options 2870 2871 def _parse_references(self) -> t.Optional[exp.Expression]: 2872 if not self._match(TokenType.REFERENCES): 2873 return None 2874 2875 expressions = None 2876 this = self._parse_id_var() 2877 2878 if self._match(TokenType.L_PAREN, advance=False): 2879 expressions = self._parse_wrapped_id_vars() 2880 2881 options = self._parse_key_constraint_options() 2882 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 2883 2884 def _parse_foreign_key(self) -> exp.Expression: 2885 expressions = self._parse_wrapped_id_vars() 2886 reference = self._parse_references() 2887 options = {} 2888 2889 while self._match(TokenType.ON): 2890 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 2891 self.raise_error("Expected DELETE or UPDATE") 2892 2893 kind = self._prev.text.lower() 2894 2895 if self._match(TokenType.NO_ACTION): 2896 action = "NO ACTION" 2897 elif self._match(TokenType.SET): 2898 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 2899 action = "SET " + self._prev.text.upper() 2900 else: 2901 self._advance() 2902 action = self._prev.text.upper() 2903 2904 options[kind] = action 2905 2906 return self.expression( 2907 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 2908 ) 2909 2910 def _parse_primary_key(self) -> exp.Expression: 2911 expressions = self._parse_wrapped_id_vars() 2912 options = self._parse_key_constraint_options() 2913 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 2914 2915 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2916 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 2917 return this 2918 2919 bracket_kind = self._prev.token_type 2920 expressions: t.List[t.Optional[exp.Expression]] 2921 2922 if self._match(TokenType.COLON): 2923 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 2924 else: 2925 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 2926 2927 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 2928 if bracket_kind == TokenType.L_BRACE: 2929 this = self.expression(exp.Struct, expressions=expressions) 2930 elif not this or this.name.upper() == "ARRAY": 2931 this = self.expression(exp.Array, expressions=expressions) 2932 else: 2933 expressions = apply_index_offset(expressions, -self.index_offset) 2934 this = self.expression(exp.Bracket, this=this, expressions=expressions) 2935 2936 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 2937 self.raise_error("Expected ]") 2938 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 2939 self.raise_error("Expected }") 2940 2941 this.comments = self._prev_comments 2942 return self._parse_bracket(this) 2943 2944 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2945 if self._match(TokenType.COLON): 2946 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 2947 return this 2948 2949 def _parse_case(self) -> t.Optional[exp.Expression]: 2950 ifs = [] 2951 default = None 2952 2953 expression = self._parse_conjunction() 2954 2955 while self._match(TokenType.WHEN): 2956 this = self._parse_conjunction() 2957 self._match(TokenType.THEN) 2958 then = self._parse_conjunction() 2959 ifs.append(self.expression(exp.If, this=this, true=then)) 2960 2961 if self._match(TokenType.ELSE): 2962 default = self._parse_conjunction() 2963 2964 if not self._match(TokenType.END): 2965 self.raise_error("Expected END after CASE", self._prev) 2966 2967 return self._parse_window( 2968 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 2969 ) 2970 2971 def _parse_if(self) -> t.Optional[exp.Expression]: 2972 if self._match(TokenType.L_PAREN): 2973 args = self._parse_csv(self._parse_conjunction) 2974 this = exp.If.from_arg_list(args) 2975 self.validate_expression(this, args) 2976 self._match_r_paren() 2977 else: 2978 condition = self._parse_conjunction() 2979 self._match(TokenType.THEN) 2980 true = self._parse_conjunction() 2981 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 2982 self._match(TokenType.END) 2983 this = self.expression(exp.If, this=condition, true=true, false=false) 2984 2985 return self._parse_window(this) 2986 2987 def _parse_extract(self) -> exp.Expression: 2988 this = self._parse_function() or self._parse_var() or self._parse_type() 2989 2990 if self._match(TokenType.FROM): 2991 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 2992 2993 if not self._match(TokenType.COMMA): 2994 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 2995 2996 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 2997 2998 def _parse_cast(self, strict: bool) -> exp.Expression: 2999 this = self._parse_conjunction() 3000 3001 if not self._match(TokenType.ALIAS): 3002 self.raise_error("Expected AS after CAST") 3003 3004 to = self._parse_types() 3005 3006 if not to: 3007 self.raise_error("Expected TYPE after CAST") 3008 elif to.this == exp.DataType.Type.CHAR: 3009 if self._match(TokenType.CHARACTER_SET): 3010 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3011 3012 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3013 3014 def _parse_string_agg(self) -> exp.Expression: 3015 expression: t.Optional[exp.Expression] 3016 3017 if self._match(TokenType.DISTINCT): 3018 args = self._parse_csv(self._parse_conjunction) 3019 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3020 else: 3021 args = self._parse_csv(self._parse_conjunction) 3022 expression = seq_get(args, 0) 3023 3024 index = self._index 3025 if not self._match(TokenType.R_PAREN): 3026 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3027 order = self._parse_order(this=expression) 3028 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3029 3030 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3031 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3032 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3033 if not self._match(TokenType.WITHIN_GROUP): 3034 self._retreat(index) 3035 this = exp.GroupConcat.from_arg_list(args) 3036 self.validate_expression(this, args) 3037 return this 3038 3039 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3040 order = self._parse_order(this=expression) 3041 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3042 3043 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3044 to: t.Optional[exp.Expression] 3045 this = self._parse_column() 3046 3047 if self._match(TokenType.USING): 3048 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3049 elif self._match(TokenType.COMMA): 3050 to = self._parse_types() 3051 else: 3052 to = None 3053 3054 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3055 3056 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3057 args = self._parse_csv(self._parse_bitwise) 3058 3059 if self._match(TokenType.IN): 3060 return self.expression( 3061 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3062 ) 3063 3064 if haystack_first: 3065 haystack = seq_get(args, 0) 3066 needle = seq_get(args, 1) 3067 else: 3068 needle = seq_get(args, 0) 3069 haystack = seq_get(args, 1) 3070 3071 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3072 3073 self.validate_expression(this, args) 3074 3075 return this 3076 3077 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3078 args = self._parse_csv(self._parse_table) 3079 return exp.JoinHint(this=func_name.upper(), expressions=args) 3080 3081 def _parse_substring(self) -> exp.Expression: 3082 # Postgres supports the form: substring(string [from int] [for int]) 3083 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3084 3085 args = self._parse_csv(self._parse_bitwise) 3086 3087 if self._match(TokenType.FROM): 3088 args.append(self._parse_bitwise()) 3089 if self._match(TokenType.FOR): 3090 args.append(self._parse_bitwise()) 3091 3092 this = exp.Substring.from_arg_list(args) 3093 self.validate_expression(this, args) 3094 3095 return this 3096 3097 def _parse_trim(self) -> exp.Expression: 3098 # https://www.w3resource.com/sql/character-functions/trim.php 3099 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3100 3101 position = None 3102 collation = None 3103 3104 if self._match_set(self.TRIM_TYPES): 3105 position = self._prev.text.upper() 3106 3107 expression = self._parse_term() 3108 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3109 this = self._parse_term() 3110 else: 3111 this = expression 3112 expression = None 3113 3114 if self._match(TokenType.COLLATE): 3115 collation = self._parse_term() 3116 3117 return self.expression( 3118 exp.Trim, 3119 this=this, 3120 position=position, 3121 expression=expression, 3122 collation=collation, 3123 ) 3124 3125 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3126 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3127 3128 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3129 return self._parse_window(self._parse_id_var(), alias=True) 3130 3131 def _parse_window( 3132 self, this: t.Optional[exp.Expression], alias: bool = False 3133 ) -> t.Optional[exp.Expression]: 3134 if self._match(TokenType.FILTER): 3135 where = self._parse_wrapped(self._parse_where) 3136 this = self.expression(exp.Filter, this=this, expression=where) 3137 3138 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3139 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3140 if self._match(TokenType.WITHIN_GROUP): 3141 order = self._parse_wrapped(self._parse_order) 3142 this = self.expression(exp.WithinGroup, this=this, expression=order) 3143 3144 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3145 # Some dialects choose to implement and some do not. 3146 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3147 3148 # There is some code above in _parse_lambda that handles 3149 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3150 3151 # The below changes handle 3152 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3153 3154 # Oracle allows both formats 3155 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3156 # and Snowflake chose to do the same for familiarity 3157 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3158 if self._match(TokenType.IGNORE_NULLS): 3159 this = self.expression(exp.IgnoreNulls, this=this) 3160 elif self._match(TokenType.RESPECT_NULLS): 3161 this = self.expression(exp.RespectNulls, this=this) 3162 3163 # bigquery select from window x AS (partition by ...) 3164 if alias: 3165 self._match(TokenType.ALIAS) 3166 elif not self._match(TokenType.OVER): 3167 return this 3168 3169 if not self._match(TokenType.L_PAREN): 3170 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3171 3172 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3173 partition = self._parse_partition_by() 3174 order = self._parse_order() 3175 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3176 3177 if kind: 3178 self._match(TokenType.BETWEEN) 3179 start = self._parse_window_spec() 3180 self._match(TokenType.AND) 3181 end = self._parse_window_spec() 3182 3183 spec = self.expression( 3184 exp.WindowSpec, 3185 kind=kind, 3186 start=start["value"], 3187 start_side=start["side"], 3188 end=end["value"], 3189 end_side=end["side"], 3190 ) 3191 else: 3192 spec = None 3193 3194 self._match_r_paren() 3195 3196 return self.expression( 3197 exp.Window, 3198 this=this, 3199 partition_by=partition, 3200 order=order, 3201 spec=spec, 3202 alias=window_alias, 3203 ) 3204 3205 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3206 self._match(TokenType.BETWEEN) 3207 3208 return { 3209 "value": ( 3210 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3211 ) 3212 or self._parse_bitwise(), 3213 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3214 } 3215 3216 def _parse_alias( 3217 self, this: t.Optional[exp.Expression], explicit: bool = False 3218 ) -> t.Optional[exp.Expression]: 3219 any_token = self._match(TokenType.ALIAS) 3220 3221 if explicit and not any_token: 3222 return this 3223 3224 if self._match(TokenType.L_PAREN): 3225 aliases = self.expression( 3226 exp.Aliases, 3227 this=this, 3228 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3229 ) 3230 self._match_r_paren(aliases) 3231 return aliases 3232 3233 alias = self._parse_id_var(any_token) 3234 3235 if alias: 3236 return self.expression(exp.Alias, this=this, alias=alias) 3237 3238 return this 3239 3240 def _parse_id_var( 3241 self, 3242 any_token: bool = True, 3243 tokens: t.Optional[t.Collection[TokenType]] = None, 3244 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3245 ) -> t.Optional[exp.Expression]: 3246 identifier = self._parse_identifier() 3247 3248 if identifier: 3249 return identifier 3250 3251 prefix = "" 3252 3253 if prefix_tokens: 3254 while self._match_set(prefix_tokens): 3255 prefix += self._prev.text 3256 3257 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3258 quoted = self._prev.token_type == TokenType.STRING 3259 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3260 3261 return None 3262 3263 def _parse_string(self) -> t.Optional[exp.Expression]: 3264 if self._match(TokenType.STRING): 3265 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3266 return self._parse_placeholder() 3267 3268 def _parse_number(self) -> t.Optional[exp.Expression]: 3269 if self._match(TokenType.NUMBER): 3270 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3271 return self._parse_placeholder() 3272 3273 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3274 if self._match(TokenType.IDENTIFIER): 3275 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3276 return self._parse_placeholder() 3277 3278 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3279 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3280 return self.expression(exp.Var, this=self._prev.text) 3281 return self._parse_placeholder() 3282 3283 def _advance_any(self) -> t.Optional[Token]: 3284 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3285 self._advance() 3286 return self._prev 3287 return None 3288 3289 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3290 return self._parse_var() or self._parse_string() 3291 3292 def _parse_null(self) -> t.Optional[exp.Expression]: 3293 if self._match(TokenType.NULL): 3294 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3295 return None 3296 3297 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3298 if self._match(TokenType.TRUE): 3299 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3300 if self._match(TokenType.FALSE): 3301 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3302 return None 3303 3304 def _parse_star(self) -> t.Optional[exp.Expression]: 3305 if self._match(TokenType.STAR): 3306 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3307 return None 3308 3309 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3310 if self._match_set(self.PLACEHOLDER_PARSERS): 3311 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3312 if placeholder: 3313 return placeholder 3314 self._advance(-1) 3315 return None 3316 3317 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3318 if not self._match(TokenType.EXCEPT): 3319 return None 3320 if self._match(TokenType.L_PAREN, advance=False): 3321 return self._parse_wrapped_csv(self._parse_column) 3322 return self._parse_csv(self._parse_column) 3323 3324 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3325 if not self._match(TokenType.REPLACE): 3326 return None 3327 if self._match(TokenType.L_PAREN, advance=False): 3328 return self._parse_wrapped_csv(self._parse_expression) 3329 return self._parse_csv(self._parse_expression) 3330 3331 def _parse_csv( 3332 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3333 ) -> t.List[t.Optional[exp.Expression]]: 3334 parse_result = parse_method() 3335 items = [parse_result] if parse_result is not None else [] 3336 3337 while self._match(sep): 3338 if parse_result and self._prev_comments: 3339 parse_result.comments = self._prev_comments 3340 3341 parse_result = parse_method() 3342 if parse_result is not None: 3343 items.append(parse_result) 3344 3345 return items 3346 3347 def _parse_tokens( 3348 self, parse_method: t.Callable, expressions: t.Dict 3349 ) -> t.Optional[exp.Expression]: 3350 this = parse_method() 3351 3352 while self._match_set(expressions): 3353 this = self.expression( 3354 expressions[self._prev.token_type], 3355 this=this, 3356 comments=self._prev_comments, 3357 expression=parse_method(), 3358 ) 3359 3360 return this 3361 3362 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3363 return self._parse_wrapped_csv(self._parse_id_var) 3364 3365 def _parse_wrapped_csv( 3366 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3367 ) -> t.List[t.Optional[exp.Expression]]: 3368 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3369 3370 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3371 self._match_l_paren() 3372 parse_result = parse_method() 3373 self._match_r_paren() 3374 return parse_result 3375 3376 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3377 return self._parse_select() or self._parse_expression() 3378 3379 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3380 return self._parse_set_operations( 3381 self._parse_select(nested=True, parse_subquery_alias=False) 3382 ) 3383 3384 def _parse_transaction(self) -> exp.Expression: 3385 this = None 3386 if self._match_texts(self.TRANSACTION_KIND): 3387 this = self._prev.text 3388 3389 self._match_texts({"TRANSACTION", "WORK"}) 3390 3391 modes = [] 3392 while True: 3393 mode = [] 3394 while self._match(TokenType.VAR): 3395 mode.append(self._prev.text) 3396 3397 if mode: 3398 modes.append(" ".join(mode)) 3399 if not self._match(TokenType.COMMA): 3400 break 3401 3402 return self.expression(exp.Transaction, this=this, modes=modes) 3403 3404 def _parse_commit_or_rollback(self) -> exp.Expression: 3405 chain = None 3406 savepoint = None 3407 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3408 3409 self._match_texts({"TRANSACTION", "WORK"}) 3410 3411 if self._match_text_seq("TO"): 3412 self._match_text_seq("SAVEPOINT") 3413 savepoint = self._parse_id_var() 3414 3415 if self._match(TokenType.AND): 3416 chain = not self._match_text_seq("NO") 3417 self._match_text_seq("CHAIN") 3418 3419 if is_rollback: 3420 return self.expression(exp.Rollback, savepoint=savepoint) 3421 return self.expression(exp.Commit, chain=chain) 3422 3423 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3424 if not self._match_text_seq("ADD"): 3425 return None 3426 3427 self._match(TokenType.COLUMN) 3428 exists_column = self._parse_exists(not_=True) 3429 expression = self._parse_column_def(self._parse_field(any_token=True)) 3430 3431 if expression: 3432 expression.set("exists", exists_column) 3433 3434 return expression 3435 3436 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3437 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3438 3439 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3440 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3441 return self.expression( 3442 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3443 ) 3444 3445 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3446 this = None 3447 kind = self._prev.token_type 3448 3449 if kind == TokenType.CONSTRAINT: 3450 this = self._parse_id_var() 3451 3452 if self._match(TokenType.CHECK): 3453 expression = self._parse_wrapped(self._parse_conjunction) 3454 enforced = self._match_text_seq("ENFORCED") 3455 3456 return self.expression( 3457 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3458 ) 3459 3460 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3461 expression = self._parse_foreign_key() 3462 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3463 expression = self._parse_primary_key() 3464 3465 return self.expression(exp.AddConstraint, this=this, expression=expression) 3466 3467 def _parse_alter(self) -> t.Optional[exp.Expression]: 3468 if not self._match(TokenType.TABLE): 3469 return self._parse_as_command(self._prev) 3470 3471 exists = self._parse_exists() 3472 this = self._parse_table(schema=True) 3473 3474 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3475 3476 index = self._index 3477 if self._match(TokenType.DELETE): 3478 actions = [self.expression(exp.Delete, where=self._parse_where())] 3479 elif self._match_text_seq("ADD"): 3480 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3481 actions = self._parse_csv(self._parse_add_constraint) 3482 else: 3483 self._retreat(index) 3484 actions = self._parse_csv(self._parse_add_column) 3485 elif self._match_text_seq("DROP"): 3486 partition_exists = self._parse_exists() 3487 3488 if self._match(TokenType.PARTITION, advance=False): 3489 actions = self._parse_csv( 3490 lambda: self._parse_drop_partition(exists=partition_exists) 3491 ) 3492 else: 3493 self._retreat(index) 3494 actions = self._parse_csv(self._parse_drop_column) 3495 elif self._match_text_seq("RENAME", "TO"): 3496 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3497 elif self._match_text_seq("ALTER"): 3498 self._match(TokenType.COLUMN) 3499 column = self._parse_field(any_token=True) 3500 3501 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3502 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3503 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3504 actions = self.expression( 3505 exp.AlterColumn, this=column, default=self._parse_conjunction() 3506 ) 3507 else: 3508 self._match_text_seq("SET", "DATA") 3509 actions = self.expression( 3510 exp.AlterColumn, 3511 this=column, 3512 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3513 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3514 using=self._match(TokenType.USING) and self._parse_conjunction(), 3515 ) 3516 3517 actions = ensure_list(actions) 3518 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3519 3520 def _parse_show(self) -> t.Optional[exp.Expression]: 3521 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3522 if parser: 3523 return parser(self) 3524 self._advance() 3525 return self.expression(exp.Show, this=self._prev.text.upper()) 3526 3527 def _default_parse_set_item(self) -> exp.Expression: 3528 return self.expression( 3529 exp.SetItem, 3530 this=self._parse_statement(), 3531 ) 3532 3533 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3534 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3535 return parser(self) if parser else self._default_parse_set_item() 3536 3537 def _parse_merge(self) -> exp.Expression: 3538 self._match(TokenType.INTO) 3539 target = self._parse_table() 3540 3541 self._match(TokenType.USING) 3542 using = self._parse_table() 3543 3544 self._match(TokenType.ON) 3545 on = self._parse_conjunction() 3546 3547 whens = [] 3548 while self._match(TokenType.WHEN): 3549 this = self._parse_conjunction() 3550 self._match(TokenType.THEN) 3551 3552 if self._match(TokenType.INSERT): 3553 _this = self._parse_star() 3554 if _this: 3555 then = self.expression(exp.Insert, this=_this) 3556 else: 3557 then = self.expression( 3558 exp.Insert, 3559 this=self._parse_value(), 3560 expression=self._match(TokenType.VALUES) and self._parse_value(), 3561 ) 3562 elif self._match(TokenType.UPDATE): 3563 expressions = self._parse_star() 3564 if expressions: 3565 then = self.expression(exp.Update, expressions=expressions) 3566 else: 3567 then = self.expression( 3568 exp.Update, 3569 expressions=self._match(TokenType.SET) 3570 and self._parse_csv(self._parse_equality), 3571 ) 3572 elif self._match(TokenType.DELETE): 3573 then = self.expression(exp.Var, this=self._prev.text) 3574 3575 whens.append(self.expression(exp.When, this=this, then=then)) 3576 3577 return self.expression( 3578 exp.Merge, 3579 this=target, 3580 using=using, 3581 on=on, 3582 expressions=whens, 3583 ) 3584 3585 def _parse_set(self) -> exp.Expression: 3586 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3587 3588 def _parse_as_command(self, start: Token) -> exp.Command: 3589 while self._curr: 3590 self._advance() 3591 return exp.Command(this=self._find_sql(start, self._prev)) 3592 3593 def _find_parser( 3594 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3595 ) -> t.Optional[t.Callable]: 3596 index = self._index 3597 this = [] 3598 while True: 3599 # The current token might be multiple words 3600 curr = self._curr.text.upper() 3601 key = curr.split(" ") 3602 this.append(curr) 3603 self._advance() 3604 result, trie = in_trie(trie, key) 3605 if result == 0: 3606 break 3607 if result == 2: 3608 subparser = parsers[" ".join(this)] 3609 return subparser 3610 self._retreat(index) 3611 return None 3612 3613 def _match(self, token_type, advance=True): 3614 if not self._curr: 3615 return None 3616 3617 if self._curr.token_type == token_type: 3618 if advance: 3619 self._advance() 3620 return True 3621 3622 return None 3623 3624 def _match_set(self, types): 3625 if not self._curr: 3626 return None 3627 3628 if self._curr.token_type in types: 3629 self._advance() 3630 return True 3631 3632 return None 3633 3634 def _match_pair(self, token_type_a, token_type_b, advance=True): 3635 if not self._curr or not self._next: 3636 return None 3637 3638 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3639 if advance: 3640 self._advance(2) 3641 return True 3642 3643 return None 3644 3645 def _match_l_paren(self, expression=None): 3646 if not self._match(TokenType.L_PAREN): 3647 self.raise_error("Expecting (") 3648 if expression and self._prev_comments: 3649 expression.comments = self._prev_comments 3650 3651 def _match_r_paren(self, expression=None): 3652 if not self._match(TokenType.R_PAREN): 3653 self.raise_error("Expecting )") 3654 if expression and self._prev_comments: 3655 expression.comments = self._prev_comments 3656 3657 def _match_texts(self, texts): 3658 if self._curr and self._curr.text.upper() in texts: 3659 self._advance() 3660 return True 3661 return False 3662 3663 def _match_text_seq(self, *texts, advance=True): 3664 index = self._index 3665 for text in texts: 3666 if self._curr and self._curr.text.upper() == text: 3667 self._advance() 3668 else: 3669 self._retreat(index) 3670 return False 3671 3672 if not advance: 3673 self._retreat(index) 3674 3675 return True 3676 3677 def _replace_columns_with_dots(self, this): 3678 if isinstance(this, exp.Dot): 3679 exp.replace_children(this, self._replace_columns_with_dots) 3680 elif isinstance(this, exp.Column): 3681 exp.replace_children(this, self._replace_columns_with_dots) 3682 table = this.args.get("table") 3683 this = ( 3684 self.expression(exp.Dot, this=table, expression=this.this) 3685 if table 3686 else self.expression(exp.Var, this=this.name) 3687 ) 3688 elif isinstance(this, exp.Identifier): 3689 this = self.expression(exp.Var, this=this.name) 3690 return this 3691 3692 def _replace_lambda(self, node, lambda_variables): 3693 if isinstance(node, exp.Column): 3694 if node.name in lambda_variables: 3695 return node.this 3696 return node
42class Parser(metaclass=_Parser): 43 """ 44 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 45 a parsed syntax tree. 46 47 Args: 48 error_level: the desired error level. 49 Default: ErrorLevel.RAISE 50 error_message_context: determines the amount of context to capture from a 51 query string when displaying the error message (in number of characters). 52 Default: 50. 53 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 54 Default: 0 55 alias_post_tablesample: If the table alias comes after tablesample. 56 Default: False 57 max_errors: Maximum number of error messages to include in a raised ParseError. 58 This is only relevant if error_level is ErrorLevel.RAISE. 59 Default: 3 60 null_ordering: Indicates the default null ordering method to use if not explicitly set. 61 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 62 Default: "nulls_are_small" 63 """ 64 65 FUNCTIONS: t.Dict[str, t.Callable] = { 66 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 67 "DATE_TO_DATE_STR": lambda args: exp.Cast( 68 this=seq_get(args, 0), 69 to=exp.DataType(this=exp.DataType.Type.TEXT), 70 ), 71 "TIME_TO_TIME_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 76 this=exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 start=exp.Literal.number(1), 81 length=exp.Literal.number(10), 82 ), 83 "VAR_MAP": parse_var_map, 84 "IFNULL": exp.Coalesce.from_arg_list, 85 } 86 87 NO_PAREN_FUNCTIONS = { 88 TokenType.CURRENT_DATE: exp.CurrentDate, 89 TokenType.CURRENT_DATETIME: exp.CurrentDate, 90 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 91 } 92 93 NESTED_TYPE_TOKENS = { 94 TokenType.ARRAY, 95 TokenType.MAP, 96 TokenType.STRUCT, 97 TokenType.NULLABLE, 98 } 99 100 TYPE_TOKENS = { 101 TokenType.BOOLEAN, 102 TokenType.TINYINT, 103 TokenType.SMALLINT, 104 TokenType.INT, 105 TokenType.BIGINT, 106 TokenType.FLOAT, 107 TokenType.DOUBLE, 108 TokenType.CHAR, 109 TokenType.NCHAR, 110 TokenType.VARCHAR, 111 TokenType.NVARCHAR, 112 TokenType.TEXT, 113 TokenType.MEDIUMTEXT, 114 TokenType.LONGTEXT, 115 TokenType.MEDIUMBLOB, 116 TokenType.LONGBLOB, 117 TokenType.BINARY, 118 TokenType.VARBINARY, 119 TokenType.JSON, 120 TokenType.JSONB, 121 TokenType.INTERVAL, 122 TokenType.TIME, 123 TokenType.TIMESTAMP, 124 TokenType.TIMESTAMPTZ, 125 TokenType.TIMESTAMPLTZ, 126 TokenType.DATETIME, 127 TokenType.DATE, 128 TokenType.DECIMAL, 129 TokenType.UUID, 130 TokenType.GEOGRAPHY, 131 TokenType.GEOMETRY, 132 TokenType.HLLSKETCH, 133 TokenType.HSTORE, 134 TokenType.PSEUDO_TYPE, 135 TokenType.SUPER, 136 TokenType.SERIAL, 137 TokenType.SMALLSERIAL, 138 TokenType.BIGSERIAL, 139 TokenType.XML, 140 TokenType.UNIQUEIDENTIFIER, 141 TokenType.MONEY, 142 TokenType.SMALLMONEY, 143 TokenType.ROWVERSION, 144 TokenType.IMAGE, 145 TokenType.VARIANT, 146 TokenType.OBJECT, 147 *NESTED_TYPE_TOKENS, 148 } 149 150 SUBQUERY_PREDICATES = { 151 TokenType.ANY: exp.Any, 152 TokenType.ALL: exp.All, 153 TokenType.EXISTS: exp.Exists, 154 TokenType.SOME: exp.Any, 155 } 156 157 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 158 159 ID_VAR_TOKENS = { 160 TokenType.VAR, 161 TokenType.ALWAYS, 162 TokenType.ANTI, 163 TokenType.APPLY, 164 TokenType.AUTO_INCREMENT, 165 TokenType.BEGIN, 166 TokenType.BOTH, 167 TokenType.BUCKET, 168 TokenType.CACHE, 169 TokenType.CASCADE, 170 TokenType.COLLATE, 171 TokenType.COLUMN, 172 TokenType.COMMAND, 173 TokenType.COMMIT, 174 TokenType.COMPOUND, 175 TokenType.CONSTRAINT, 176 TokenType.CURRENT_TIME, 177 TokenType.DEFAULT, 178 TokenType.DELETE, 179 TokenType.DESCRIBE, 180 TokenType.DIV, 181 TokenType.END, 182 TokenType.EXECUTE, 183 TokenType.ESCAPE, 184 TokenType.FALSE, 185 TokenType.FIRST, 186 TokenType.FILTER, 187 TokenType.FOLLOWING, 188 TokenType.FORMAT, 189 TokenType.FUNCTION, 190 TokenType.GENERATED, 191 TokenType.IDENTITY, 192 TokenType.IF, 193 TokenType.INDEX, 194 TokenType.ISNULL, 195 TokenType.INTERVAL, 196 TokenType.LAZY, 197 TokenType.LEADING, 198 TokenType.LEFT, 199 TokenType.LOCAL, 200 TokenType.MATERIALIZED, 201 TokenType.MERGE, 202 TokenType.NATURAL, 203 TokenType.NEXT, 204 TokenType.OFFSET, 205 TokenType.ONLY, 206 TokenType.OPTIONS, 207 TokenType.ORDINALITY, 208 TokenType.PERCENT, 209 TokenType.PIVOT, 210 TokenType.PRECEDING, 211 TokenType.RANGE, 212 TokenType.REFERENCES, 213 TokenType.RIGHT, 214 TokenType.ROW, 215 TokenType.ROWS, 216 TokenType.SCHEMA, 217 TokenType.SCHEMA_COMMENT, 218 TokenType.SEED, 219 TokenType.SEMI, 220 TokenType.SET, 221 TokenType.SHOW, 222 TokenType.SORTKEY, 223 TokenType.TABLE, 224 TokenType.TEMPORARY, 225 TokenType.TOP, 226 TokenType.TRAILING, 227 TokenType.TRUE, 228 TokenType.UNBOUNDED, 229 TokenType.UNIQUE, 230 TokenType.UNLOGGED, 231 TokenType.UNPIVOT, 232 TokenType.PROCEDURE, 233 TokenType.VIEW, 234 TokenType.VOLATILE, 235 TokenType.WINDOW, 236 *SUBQUERY_PREDICATES, 237 *TYPE_TOKENS, 238 *NO_PAREN_FUNCTIONS, 239 } 240 241 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 242 TokenType.APPLY, 243 TokenType.LEFT, 244 TokenType.NATURAL, 245 TokenType.OFFSET, 246 TokenType.RIGHT, 247 TokenType.WINDOW, 248 } 249 250 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 251 252 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 253 254 FUNC_TOKENS = { 255 TokenType.COMMAND, 256 TokenType.CURRENT_DATE, 257 TokenType.CURRENT_DATETIME, 258 TokenType.CURRENT_TIMESTAMP, 259 TokenType.CURRENT_TIME, 260 TokenType.FILTER, 261 TokenType.FIRST, 262 TokenType.FORMAT, 263 TokenType.IDENTIFIER, 264 TokenType.INDEX, 265 TokenType.ISNULL, 266 TokenType.ILIKE, 267 TokenType.LIKE, 268 TokenType.MERGE, 269 TokenType.OFFSET, 270 TokenType.PRIMARY_KEY, 271 TokenType.REPLACE, 272 TokenType.ROW, 273 TokenType.UNNEST, 274 TokenType.VAR, 275 TokenType.LEFT, 276 TokenType.RIGHT, 277 TokenType.DATE, 278 TokenType.DATETIME, 279 TokenType.TABLE, 280 TokenType.TIMESTAMP, 281 TokenType.TIMESTAMPTZ, 282 TokenType.WINDOW, 283 *TYPE_TOKENS, 284 *SUBQUERY_PREDICATES, 285 } 286 287 CONJUNCTION = { 288 TokenType.AND: exp.And, 289 TokenType.OR: exp.Or, 290 } 291 292 EQUALITY = { 293 TokenType.EQ: exp.EQ, 294 TokenType.NEQ: exp.NEQ, 295 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 296 } 297 298 COMPARISON = { 299 TokenType.GT: exp.GT, 300 TokenType.GTE: exp.GTE, 301 TokenType.LT: exp.LT, 302 TokenType.LTE: exp.LTE, 303 } 304 305 BITWISE = { 306 TokenType.AMP: exp.BitwiseAnd, 307 TokenType.CARET: exp.BitwiseXor, 308 TokenType.PIPE: exp.BitwiseOr, 309 TokenType.DPIPE: exp.DPipe, 310 } 311 312 TERM = { 313 TokenType.DASH: exp.Sub, 314 TokenType.PLUS: exp.Add, 315 TokenType.MOD: exp.Mod, 316 TokenType.COLLATE: exp.Collate, 317 } 318 319 FACTOR = { 320 TokenType.DIV: exp.IntDiv, 321 TokenType.LR_ARROW: exp.Distance, 322 TokenType.SLASH: exp.Div, 323 TokenType.STAR: exp.Mul, 324 } 325 326 TIMESTAMPS = { 327 TokenType.TIME, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMPTZ, 330 TokenType.TIMESTAMPLTZ, 331 } 332 333 SET_OPERATIONS = { 334 TokenType.UNION, 335 TokenType.INTERSECT, 336 TokenType.EXCEPT, 337 } 338 339 JOIN_SIDES = { 340 TokenType.LEFT, 341 TokenType.RIGHT, 342 TokenType.FULL, 343 } 344 345 JOIN_KINDS = { 346 TokenType.INNER, 347 TokenType.OUTER, 348 TokenType.CROSS, 349 TokenType.SEMI, 350 TokenType.ANTI, 351 } 352 353 LAMBDAS = { 354 TokenType.ARROW: lambda self, expressions: self.expression( 355 exp.Lambda, 356 this=self._parse_conjunction().transform( 357 self._replace_lambda, {node.name for node in expressions} 358 ), 359 expressions=expressions, 360 ), 361 TokenType.FARROW: lambda self, expressions: self.expression( 362 exp.Kwarg, 363 this=exp.Var(this=expressions[0].name), 364 expression=self._parse_conjunction(), 365 ), 366 } 367 368 COLUMN_OPERATORS = { 369 TokenType.DOT: None, 370 TokenType.DCOLON: lambda self, this, to: self.expression( 371 exp.Cast, 372 this=this, 373 to=to, 374 ), 375 TokenType.ARROW: lambda self, this, path: self.expression( 376 exp.JSONExtract, 377 this=this, 378 expression=path, 379 ), 380 TokenType.DARROW: lambda self, this, path: self.expression( 381 exp.JSONExtractScalar, 382 this=this, 383 expression=path, 384 ), 385 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 386 exp.JSONBExtract, 387 this=this, 388 expression=path, 389 ), 390 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 391 exp.JSONBExtractScalar, 392 this=this, 393 expression=path, 394 ), 395 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 396 exp.JSONBContains, 397 this=this, 398 expression=key, 399 ), 400 } 401 402 EXPRESSION_PARSERS = { 403 exp.Column: lambda self: self._parse_column(), 404 exp.DataType: lambda self: self._parse_types(), 405 exp.From: lambda self: self._parse_from(), 406 exp.Group: lambda self: self._parse_group(), 407 exp.Identifier: lambda self: self._parse_id_var(), 408 exp.Lateral: lambda self: self._parse_lateral(), 409 exp.Join: lambda self: self._parse_join(), 410 exp.Order: lambda self: self._parse_order(), 411 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 412 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 413 exp.Lambda: lambda self: self._parse_lambda(), 414 exp.Limit: lambda self: self._parse_limit(), 415 exp.Offset: lambda self: self._parse_offset(), 416 exp.TableAlias: lambda self: self._parse_table_alias(), 417 exp.Table: lambda self: self._parse_table(), 418 exp.Condition: lambda self: self._parse_conjunction(), 419 exp.Expression: lambda self: self._parse_statement(), 420 exp.Properties: lambda self: self._parse_properties(), 421 exp.Where: lambda self: self._parse_where(), 422 exp.Ordered: lambda self: self._parse_ordered(), 423 exp.Having: lambda self: self._parse_having(), 424 exp.With: lambda self: self._parse_with(), 425 exp.Window: lambda self: self._parse_named_window(), 426 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 427 } 428 429 STATEMENT_PARSERS = { 430 TokenType.ALTER: lambda self: self._parse_alter(), 431 TokenType.BEGIN: lambda self: self._parse_transaction(), 432 TokenType.CACHE: lambda self: self._parse_cache(), 433 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 434 TokenType.CREATE: lambda self: self._parse_create(), 435 TokenType.DELETE: lambda self: self._parse_delete(), 436 TokenType.DESC: lambda self: self._parse_describe(), 437 TokenType.DESCRIBE: lambda self: self._parse_describe(), 438 TokenType.DROP: lambda self: self._parse_drop(), 439 TokenType.END: lambda self: self._parse_commit_or_rollback(), 440 TokenType.INSERT: lambda self: self._parse_insert(), 441 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 442 TokenType.MERGE: lambda self: self._parse_merge(), 443 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 444 TokenType.UNCACHE: lambda self: self._parse_uncache(), 445 TokenType.UPDATE: lambda self: self._parse_update(), 446 TokenType.USE: lambda self: self.expression( 447 exp.Use, 448 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 449 and exp.Var(this=self._prev.text), 450 this=self._parse_table(schema=False), 451 ), 452 } 453 454 UNARY_PARSERS = { 455 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 456 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 457 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 458 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 459 } 460 461 PRIMARY_PARSERS = { 462 TokenType.STRING: lambda self, token: self.expression( 463 exp.Literal, this=token.text, is_string=True 464 ), 465 TokenType.NUMBER: lambda self, token: self.expression( 466 exp.Literal, this=token.text, is_string=False 467 ), 468 TokenType.STAR: lambda self, _: self.expression( 469 exp.Star, 470 **{"except": self._parse_except(), "replace": self._parse_replace()}, 471 ), 472 TokenType.NULL: lambda self, _: self.expression(exp.Null), 473 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 474 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 475 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 476 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 477 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 478 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 479 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 480 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 481 } 482 483 PLACEHOLDER_PARSERS = { 484 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 485 TokenType.PARAMETER: lambda self: self.expression( 486 exp.Parameter, this=self._parse_var() or self._parse_primary() 487 ), 488 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 489 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 490 else None, 491 } 492 493 RANGE_PARSERS = { 494 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 495 TokenType.GLOB: lambda self, this: self._parse_escape( 496 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 497 ), 498 TokenType.IN: lambda self, this: self._parse_in(this), 499 TokenType.IS: lambda self, this: self._parse_is(this), 500 TokenType.LIKE: lambda self, this: self._parse_escape( 501 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 502 ), 503 TokenType.ILIKE: lambda self, this: self._parse_escape( 504 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 505 ), 506 TokenType.IRLIKE: lambda self, this: self.expression( 507 exp.RegexpILike, this=this, expression=self._parse_bitwise() 508 ), 509 TokenType.RLIKE: lambda self, this: self.expression( 510 exp.RegexpLike, this=this, expression=self._parse_bitwise() 511 ), 512 TokenType.SIMILAR_TO: lambda self, this: self.expression( 513 exp.SimilarTo, this=this, expression=self._parse_bitwise() 514 ), 515 } 516 517 PROPERTY_PARSERS = { 518 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 519 "CHARACTER SET": lambda self: self._parse_character_set(), 520 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 521 "PARTITION BY": lambda self: self._parse_partitioned_by(), 522 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 523 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 524 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 525 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 526 "DISTKEY": lambda self: self._parse_distkey(), 527 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 528 "SORTKEY": lambda self: self._parse_sortkey(), 529 "LIKE": lambda self: self._parse_create_like(), 530 "RETURNS": lambda self: self._parse_returns(), 531 "ROW": lambda self: self._parse_row(), 532 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 533 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 534 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 535 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 536 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 537 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 538 "DETERMINISTIC": lambda self: self.expression( 539 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 540 ), 541 "IMMUTABLE": lambda self: self.expression( 542 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 543 ), 544 "STABLE": lambda self: self.expression( 545 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 546 ), 547 "VOLATILE": lambda self: self.expression( 548 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 549 ), 550 "WITH": lambda self: self._parse_with_property(), 551 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 552 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 553 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 554 "BEFORE": lambda self: self._parse_journal( 555 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 556 ), 557 "JOURNAL": lambda self: self._parse_journal( 558 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 559 ), 560 "AFTER": lambda self: self._parse_afterjournal( 561 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 562 ), 563 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 564 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 565 "CHECKSUM": lambda self: self._parse_checksum(), 566 "FREESPACE": lambda self: self._parse_freespace(), 567 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 568 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 569 ), 570 "MIN": lambda self: self._parse_datablocksize(), 571 "MINIMUM": lambda self: self._parse_datablocksize(), 572 "MAX": lambda self: self._parse_datablocksize(), 573 "MAXIMUM": lambda self: self._parse_datablocksize(), 574 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 575 default=self._prev.text.upper() == "DEFAULT" 576 ), 577 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 578 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 579 "DEFINER": lambda self: self._parse_definer(), 580 } 581 582 CONSTRAINT_PARSERS = { 583 TokenType.CHECK: lambda self: self.expression( 584 exp.Check, this=self._parse_wrapped(self._parse_conjunction) 585 ), 586 TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(), 587 TokenType.UNIQUE: lambda self: self._parse_unique(), 588 TokenType.LIKE: lambda self: self._parse_create_like(), 589 } 590 591 NO_PAREN_FUNCTION_PARSERS = { 592 TokenType.CASE: lambda self: self._parse_case(), 593 TokenType.IF: lambda self: self._parse_if(), 594 } 595 596 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 597 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 598 "TRY_CONVERT": lambda self: self._parse_convert(False), 599 "EXTRACT": lambda self: self._parse_extract(), 600 "POSITION": lambda self: self._parse_position(), 601 "SUBSTRING": lambda self: self._parse_substring(), 602 "TRIM": lambda self: self._parse_trim(), 603 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 604 "TRY_CAST": lambda self: self._parse_cast(False), 605 "STRING_AGG": lambda self: self._parse_string_agg(), 606 } 607 608 QUERY_MODIFIER_PARSERS = { 609 "match": lambda self: self._parse_match_recognize(), 610 "where": lambda self: self._parse_where(), 611 "group": lambda self: self._parse_group(), 612 "having": lambda self: self._parse_having(), 613 "qualify": lambda self: self._parse_qualify(), 614 "windows": lambda self: self._parse_window_clause(), 615 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 616 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 617 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 618 "order": lambda self: self._parse_order(), 619 "limit": lambda self: self._parse_limit(), 620 "offset": lambda self: self._parse_offset(), 621 "lock": lambda self: self._parse_lock(), 622 } 623 624 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 625 SET_PARSERS: t.Dict[str, t.Callable] = {} 626 627 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 628 629 CREATABLES = { 630 TokenType.COLUMN, 631 TokenType.FUNCTION, 632 TokenType.INDEX, 633 TokenType.PROCEDURE, 634 TokenType.SCHEMA, 635 TokenType.TABLE, 636 TokenType.VIEW, 637 } 638 639 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 640 641 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 642 643 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 644 645 STRICT_CAST = True 646 647 __slots__ = ( 648 "error_level", 649 "error_message_context", 650 "sql", 651 "errors", 652 "index_offset", 653 "unnest_column_only", 654 "alias_post_tablesample", 655 "max_errors", 656 "null_ordering", 657 "_tokens", 658 "_index", 659 "_curr", 660 "_next", 661 "_prev", 662 "_prev_comments", 663 "_show_trie", 664 "_set_trie", 665 ) 666 667 def __init__( 668 self, 669 error_level: t.Optional[ErrorLevel] = None, 670 error_message_context: int = 100, 671 index_offset: int = 0, 672 unnest_column_only: bool = False, 673 alias_post_tablesample: bool = False, 674 max_errors: int = 3, 675 null_ordering: t.Optional[str] = None, 676 ): 677 self.error_level = error_level or ErrorLevel.IMMEDIATE 678 self.error_message_context = error_message_context 679 self.index_offset = index_offset 680 self.unnest_column_only = unnest_column_only 681 self.alias_post_tablesample = alias_post_tablesample 682 self.max_errors = max_errors 683 self.null_ordering = null_ordering 684 self.reset() 685 686 def reset(self): 687 self.sql = "" 688 self.errors = [] 689 self._tokens = [] 690 self._index = 0 691 self._curr = None 692 self._next = None 693 self._prev = None 694 self._prev_comments = None 695 696 def parse( 697 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 698 ) -> t.List[t.Optional[exp.Expression]]: 699 """ 700 Parses a list of tokens and returns a list of syntax trees, one tree 701 per parsed SQL statement. 702 703 Args: 704 raw_tokens: the list of tokens. 705 sql: the original SQL string, used to produce helpful debug messages. 706 707 Returns: 708 The list of syntax trees. 709 """ 710 return self._parse( 711 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 712 ) 713 714 def parse_into( 715 self, 716 expression_types: exp.IntoType, 717 raw_tokens: t.List[Token], 718 sql: t.Optional[str] = None, 719 ) -> t.List[t.Optional[exp.Expression]]: 720 """ 721 Parses a list of tokens into a given Expression type. If a collection of Expression 722 types is given instead, this method will try to parse the token list into each one 723 of them, stopping at the first for which the parsing succeeds. 724 725 Args: 726 expression_types: the expression type(s) to try and parse the token list into. 727 raw_tokens: the list of tokens. 728 sql: the original SQL string, used to produce helpful debug messages. 729 730 Returns: 731 The target Expression. 732 """ 733 errors = [] 734 for expression_type in ensure_collection(expression_types): 735 parser = self.EXPRESSION_PARSERS.get(expression_type) 736 if not parser: 737 raise TypeError(f"No parser registered for {expression_type}") 738 try: 739 return self._parse(parser, raw_tokens, sql) 740 except ParseError as e: 741 e.errors[0]["into_expression"] = expression_type 742 errors.append(e) 743 raise ParseError( 744 f"Failed to parse into {expression_types}", 745 errors=merge_errors(errors), 746 ) from errors[-1] 747 748 def _parse( 749 self, 750 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 751 raw_tokens: t.List[Token], 752 sql: t.Optional[str] = None, 753 ) -> t.List[t.Optional[exp.Expression]]: 754 self.reset() 755 self.sql = sql or "" 756 total = len(raw_tokens) 757 chunks: t.List[t.List[Token]] = [[]] 758 759 for i, token in enumerate(raw_tokens): 760 if token.token_type == TokenType.SEMICOLON: 761 if i < total - 1: 762 chunks.append([]) 763 else: 764 chunks[-1].append(token) 765 766 expressions = [] 767 768 for tokens in chunks: 769 self._index = -1 770 self._tokens = tokens 771 self._advance() 772 773 expressions.append(parse_method(self)) 774 775 if self._index < len(self._tokens): 776 self.raise_error("Invalid expression / Unexpected token") 777 778 self.check_errors() 779 780 return expressions 781 782 def check_errors(self) -> None: 783 """ 784 Logs or raises any found errors, depending on the chosen error level setting. 785 """ 786 if self.error_level == ErrorLevel.WARN: 787 for error in self.errors: 788 logger.error(str(error)) 789 elif self.error_level == ErrorLevel.RAISE and self.errors: 790 raise ParseError( 791 concat_messages(self.errors, self.max_errors), 792 errors=merge_errors(self.errors), 793 ) 794 795 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 796 """ 797 Appends an error in the list of recorded errors or raises it, depending on the chosen 798 error level setting. 799 """ 800 token = token or self._curr or self._prev or Token.string("") 801 start = self._find_token(token) 802 end = start + len(token.text) 803 start_context = self.sql[max(start - self.error_message_context, 0) : start] 804 highlight = self.sql[start:end] 805 end_context = self.sql[end : end + self.error_message_context] 806 807 error = ParseError.new( 808 f"{message}. Line {token.line}, Col: {token.col}.\n" 809 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 810 description=message, 811 line=token.line, 812 col=token.col, 813 start_context=start_context, 814 highlight=highlight, 815 end_context=end_context, 816 ) 817 818 if self.error_level == ErrorLevel.IMMEDIATE: 819 raise error 820 821 self.errors.append(error) 822 823 def expression( 824 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 825 ) -> exp.Expression: 826 """ 827 Creates a new, validated Expression. 828 829 Args: 830 exp_class: the expression class to instantiate. 831 comments: an optional list of comments to attach to the expression. 832 kwargs: the arguments to set for the expression along with their respective values. 833 834 Returns: 835 The target expression. 836 """ 837 instance = exp_class(**kwargs) 838 if self._prev_comments: 839 instance.comments = self._prev_comments 840 self._prev_comments = None 841 if comments: 842 instance.comments = comments 843 self.validate_expression(instance) 844 return instance 845 846 def validate_expression( 847 self, expression: exp.Expression, args: t.Optional[t.List] = None 848 ) -> None: 849 """ 850 Validates an already instantiated expression, making sure that all its mandatory arguments 851 are set. 852 853 Args: 854 expression: the expression to validate. 855 args: an optional list of items that was used to instantiate the expression, if it's a Func. 856 """ 857 if self.error_level == ErrorLevel.IGNORE: 858 return 859 860 for error_message in expression.error_messages(args): 861 self.raise_error(error_message) 862 863 def _find_sql(self, start: Token, end: Token) -> str: 864 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 865 866 def _find_token(self, token: Token) -> int: 867 line = 1 868 col = 1 869 index = 0 870 871 while line < token.line or col < token.col: 872 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 873 line += 1 874 col = 1 875 else: 876 col += 1 877 index += 1 878 879 return index 880 881 def _advance(self, times: int = 1) -> None: 882 self._index += times 883 self._curr = seq_get(self._tokens, self._index) 884 self._next = seq_get(self._tokens, self._index + 1) 885 if self._index > 0: 886 self._prev = self._tokens[self._index - 1] 887 self._prev_comments = self._prev.comments 888 else: 889 self._prev = None 890 self._prev_comments = None 891 892 def _retreat(self, index: int) -> None: 893 self._advance(index - self._index) 894 895 def _parse_command(self) -> exp.Expression: 896 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 897 898 def _parse_statement(self) -> t.Optional[exp.Expression]: 899 if self._curr is None: 900 return None 901 902 if self._match_set(self.STATEMENT_PARSERS): 903 return self.STATEMENT_PARSERS[self._prev.token_type](self) 904 905 if self._match_set(Tokenizer.COMMANDS): 906 return self._parse_command() 907 908 expression = self._parse_expression() 909 expression = self._parse_set_operations(expression) if expression else self._parse_select() 910 911 self._parse_query_modifiers(expression) 912 return expression 913 914 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 915 start = self._prev 916 temporary = self._match(TokenType.TEMPORARY) 917 materialized = self._match(TokenType.MATERIALIZED) 918 kind = self._match_set(self.CREATABLES) and self._prev.text 919 if not kind: 920 if default_kind: 921 kind = default_kind 922 else: 923 return self._parse_as_command(start) 924 925 return self.expression( 926 exp.Drop, 927 exists=self._parse_exists(), 928 this=self._parse_table(schema=True), 929 kind=kind, 930 temporary=temporary, 931 materialized=materialized, 932 cascade=self._match(TokenType.CASCADE), 933 ) 934 935 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 936 return ( 937 self._match(TokenType.IF) 938 and (not not_ or self._match(TokenType.NOT)) 939 and self._match(TokenType.EXISTS) 940 ) 941 942 def _parse_create(self) -> t.Optional[exp.Expression]: 943 start = self._prev 944 replace = self._match_pair(TokenType.OR, TokenType.REPLACE) 945 set_ = self._match(TokenType.SET) # Teradata 946 multiset = self._match_text_seq("MULTISET") # Teradata 947 global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata 948 volatile = self._match(TokenType.VOLATILE) # Teradata 949 temporary = self._match(TokenType.TEMPORARY) 950 transient = self._match_text_seq("TRANSIENT") 951 external = self._match_text_seq("EXTERNAL") 952 unique = self._match(TokenType.UNIQUE) 953 materialized = self._match(TokenType.MATERIALIZED) 954 955 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 956 self._match(TokenType.TABLE) 957 958 properties = None 959 create_token = self._match_set(self.CREATABLES) and self._prev 960 961 if not create_token: 962 properties = self._parse_properties() 963 create_token = self._match_set(self.CREATABLES) and self._prev 964 965 if not properties or not create_token: 966 return self._parse_as_command(start) 967 968 exists = self._parse_exists(not_=True) 969 this = None 970 expression = None 971 data = None 972 statistics = None 973 no_primary_index = None 974 indexes = None 975 no_schema_binding = None 976 begin = None 977 978 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 979 this = self._parse_user_defined_function(kind=create_token.token_type) 980 properties = self._parse_properties() 981 982 self._match(TokenType.ALIAS) 983 begin = self._match(TokenType.BEGIN) 984 return_ = self._match_text_seq("RETURN") 985 expression = self._parse_statement() 986 987 if return_: 988 expression = self.expression(exp.Return, this=expression) 989 elif create_token.token_type == TokenType.INDEX: 990 this = self._parse_index() 991 elif create_token.token_type in ( 992 TokenType.TABLE, 993 TokenType.VIEW, 994 TokenType.SCHEMA, 995 ): 996 table_parts = self._parse_table_parts(schema=True) 997 998 if self._match(TokenType.COMMA): # comma-separated properties before schema definition 999 properties = self._parse_properties(before=True) 1000 1001 this = self._parse_schema(this=table_parts) 1002 1003 if not properties: # properties after schema definition 1004 properties = self._parse_properties() 1005 1006 self._match(TokenType.ALIAS) 1007 expression = self._parse_ddl_select() 1008 1009 if create_token.token_type == TokenType.TABLE: 1010 if self._match_text_seq("WITH", "DATA"): 1011 data = True 1012 elif self._match_text_seq("WITH", "NO", "DATA"): 1013 data = False 1014 1015 if self._match_text_seq("AND", "STATISTICS"): 1016 statistics = True 1017 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1018 statistics = False 1019 1020 no_primary_index = self._match_text_seq("NO", "PRIMARY", "INDEX") 1021 1022 indexes = [] 1023 while True: 1024 index = self._parse_create_table_index() 1025 1026 # post index PARTITION BY property 1027 if self._match(TokenType.PARTITION_BY, advance=False): 1028 if properties: 1029 properties.expressions.append(self._parse_property()) 1030 else: 1031 properties = self._parse_properties() 1032 1033 if not index: 1034 break 1035 else: 1036 indexes.append(index) 1037 elif create_token.token_type == TokenType.VIEW: 1038 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1039 no_schema_binding = True 1040 1041 return self.expression( 1042 exp.Create, 1043 this=this, 1044 kind=create_token.text, 1045 expression=expression, 1046 set=set_, 1047 multiset=multiset, 1048 global_temporary=global_temporary, 1049 volatile=volatile, 1050 exists=exists, 1051 properties=properties, 1052 temporary=temporary, 1053 transient=transient, 1054 external=external, 1055 replace=replace, 1056 unique=unique, 1057 materialized=materialized, 1058 data=data, 1059 statistics=statistics, 1060 no_primary_index=no_primary_index, 1061 indexes=indexes, 1062 no_schema_binding=no_schema_binding, 1063 begin=begin, 1064 ) 1065 1066 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1067 self._match(TokenType.COMMA) 1068 1069 # parsers look to _prev for no/dual/default, so need to consume first 1070 self._match_text_seq("NO") 1071 self._match_text_seq("DUAL") 1072 self._match_text_seq("DEFAULT") 1073 1074 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1075 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1076 1077 return None 1078 1079 def _parse_property(self) -> t.Optional[exp.Expression]: 1080 if self._match_texts(self.PROPERTY_PARSERS): 1081 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1082 1083 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1084 return self._parse_character_set(True) 1085 1086 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1087 return self._parse_sortkey(compound=True) 1088 1089 if self._match_text_seq("SQL", "SECURITY"): 1090 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1091 1092 assignment = self._match_pair( 1093 TokenType.VAR, TokenType.EQ, advance=False 1094 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1095 1096 if assignment: 1097 key = self._parse_var_or_string() 1098 self._match(TokenType.EQ) 1099 return self.expression(exp.Property, this=key, value=self._parse_column()) 1100 1101 return None 1102 1103 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1104 self._match(TokenType.EQ) 1105 self._match(TokenType.ALIAS) 1106 return self.expression( 1107 exp_class, 1108 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1109 ) 1110 1111 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1112 properties = [] 1113 1114 while True: 1115 if before: 1116 identified_property = self._parse_property_before() 1117 else: 1118 identified_property = self._parse_property() 1119 1120 if not identified_property: 1121 break 1122 for p in ensure_collection(identified_property): 1123 properties.append(p) 1124 1125 if properties: 1126 return self.expression(exp.Properties, expressions=properties) 1127 1128 return None 1129 1130 def _parse_fallback(self, no=False) -> exp.Expression: 1131 self._match_text_seq("FALLBACK") 1132 return self.expression( 1133 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1134 ) 1135 1136 def _parse_with_property( 1137 self, 1138 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1139 if self._match(TokenType.L_PAREN, advance=False): 1140 return self._parse_wrapped_csv(self._parse_property) 1141 1142 if not self._next: 1143 return None 1144 1145 if self._next.text.upper() == "JOURNAL": 1146 return self._parse_withjournaltable() 1147 1148 return self._parse_withisolatedloading() 1149 1150 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1151 def _parse_definer(self) -> t.Optional[exp.Expression]: 1152 self._match(TokenType.EQ) 1153 1154 user = self._parse_id_var() 1155 self._match(TokenType.PARAMETER) 1156 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1157 1158 if not user or not host: 1159 return None 1160 1161 return exp.DefinerProperty(this=f"{user}@{host}") 1162 1163 def _parse_withjournaltable(self) -> exp.Expression: 1164 self._match_text_seq("WITH", "JOURNAL", "TABLE") 1165 self._match(TokenType.EQ) 1166 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1167 1168 def _parse_log(self, no=False) -> exp.Expression: 1169 self._match_text_seq("LOG") 1170 return self.expression(exp.LogProperty, no=no) 1171 1172 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1173 before = self._match_text_seq("BEFORE") 1174 self._match_text_seq("JOURNAL") 1175 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1176 1177 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1178 self._match_text_seq("NOT") 1179 self._match_text_seq("LOCAL") 1180 self._match_text_seq("AFTER", "JOURNAL") 1181 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1182 1183 def _parse_checksum(self) -> exp.Expression: 1184 self._match_text_seq("CHECKSUM") 1185 self._match(TokenType.EQ) 1186 1187 on = None 1188 if self._match(TokenType.ON): 1189 on = True 1190 elif self._match_text_seq("OFF"): 1191 on = False 1192 default = self._match(TokenType.DEFAULT) 1193 1194 return self.expression( 1195 exp.ChecksumProperty, 1196 on=on, 1197 default=default, 1198 ) 1199 1200 def _parse_freespace(self) -> exp.Expression: 1201 self._match_text_seq("FREESPACE") 1202 self._match(TokenType.EQ) 1203 return self.expression( 1204 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1205 ) 1206 1207 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1208 self._match_text_seq("MERGEBLOCKRATIO") 1209 if self._match(TokenType.EQ): 1210 return self.expression( 1211 exp.MergeBlockRatioProperty, 1212 this=self._parse_number(), 1213 percent=self._match(TokenType.PERCENT), 1214 ) 1215 else: 1216 return self.expression( 1217 exp.MergeBlockRatioProperty, 1218 no=no, 1219 default=default, 1220 ) 1221 1222 def _parse_datablocksize(self, default=None) -> exp.Expression: 1223 if default: 1224 self._match_text_seq("DATABLOCKSIZE") 1225 return self.expression(exp.DataBlocksizeProperty, default=True) 1226 elif self._match_texts(("MIN", "MINIMUM")): 1227 self._match_text_seq("DATABLOCKSIZE") 1228 return self.expression(exp.DataBlocksizeProperty, min=True) 1229 elif self._match_texts(("MAX", "MAXIMUM")): 1230 self._match_text_seq("DATABLOCKSIZE") 1231 return self.expression(exp.DataBlocksizeProperty, min=False) 1232 1233 self._match_text_seq("DATABLOCKSIZE") 1234 self._match(TokenType.EQ) 1235 size = self._parse_number() 1236 units = None 1237 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1238 units = self._prev.text 1239 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1240 1241 def _parse_blockcompression(self) -> exp.Expression: 1242 self._match_text_seq("BLOCKCOMPRESSION") 1243 self._match(TokenType.EQ) 1244 always = self._match(TokenType.ALWAYS) 1245 manual = self._match_text_seq("MANUAL") 1246 never = self._match_text_seq("NEVER") 1247 default = self._match_text_seq("DEFAULT") 1248 autotemp = None 1249 if self._match_text_seq("AUTOTEMP"): 1250 autotemp = self._parse_schema() 1251 1252 return self.expression( 1253 exp.BlockCompressionProperty, 1254 always=always, 1255 manual=manual, 1256 never=never, 1257 default=default, 1258 autotemp=autotemp, 1259 ) 1260 1261 def _parse_withisolatedloading(self) -> exp.Expression: 1262 self._match(TokenType.WITH) 1263 no = self._match_text_seq("NO") 1264 concurrent = self._match_text_seq("CONCURRENT") 1265 self._match_text_seq("ISOLATED", "LOADING") 1266 for_all = self._match_text_seq("FOR", "ALL") 1267 for_insert = self._match_text_seq("FOR", "INSERT") 1268 for_none = self._match_text_seq("FOR", "NONE") 1269 return self.expression( 1270 exp.IsolatedLoadingProperty, 1271 no=no, 1272 concurrent=concurrent, 1273 for_all=for_all, 1274 for_insert=for_insert, 1275 for_none=for_none, 1276 ) 1277 1278 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1279 if self._match(TokenType.PARTITION_BY): 1280 return self._parse_csv(self._parse_conjunction) 1281 return [] 1282 1283 def _parse_partitioned_by(self) -> exp.Expression: 1284 self._match(TokenType.EQ) 1285 return self.expression( 1286 exp.PartitionedByProperty, 1287 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1288 ) 1289 1290 def _parse_distkey(self) -> exp.Expression: 1291 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1292 1293 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1294 table = self._parse_table(schema=True) 1295 options = [] 1296 while self._match_texts(("INCLUDING", "EXCLUDING")): 1297 this = self._prev.text.upper() 1298 id_var = self._parse_id_var() 1299 1300 if not id_var: 1301 return None 1302 1303 options.append( 1304 self.expression( 1305 exp.Property, 1306 this=this, 1307 value=exp.Var(this=id_var.this.upper()), 1308 ) 1309 ) 1310 return self.expression(exp.LikeProperty, this=table, expressions=options) 1311 1312 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1313 return self.expression( 1314 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1315 ) 1316 1317 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1318 self._match(TokenType.EQ) 1319 return self.expression( 1320 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1321 ) 1322 1323 def _parse_returns(self) -> exp.Expression: 1324 value: t.Optional[exp.Expression] 1325 is_table = self._match(TokenType.TABLE) 1326 1327 if is_table: 1328 if self._match(TokenType.LT): 1329 value = self.expression( 1330 exp.Schema, 1331 this="TABLE", 1332 expressions=self._parse_csv(self._parse_struct_kwargs), 1333 ) 1334 if not self._match(TokenType.GT): 1335 self.raise_error("Expecting >") 1336 else: 1337 value = self._parse_schema(exp.Var(this="TABLE")) 1338 else: 1339 value = self._parse_types() 1340 1341 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1342 1343 def _parse_describe(self) -> exp.Expression: 1344 kind = self._match_set(self.CREATABLES) and self._prev.text 1345 this = self._parse_table() 1346 1347 return self.expression(exp.Describe, this=this, kind=kind) 1348 1349 def _parse_insert(self) -> exp.Expression: 1350 overwrite = self._match(TokenType.OVERWRITE) 1351 local = self._match(TokenType.LOCAL) 1352 1353 this: t.Optional[exp.Expression] 1354 1355 if self._match_text_seq("DIRECTORY"): 1356 this = self.expression( 1357 exp.Directory, 1358 this=self._parse_var_or_string(), 1359 local=local, 1360 row_format=self._parse_row_format(match_row=True), 1361 ) 1362 else: 1363 self._match(TokenType.INTO) 1364 self._match(TokenType.TABLE) 1365 this = self._parse_table(schema=True) 1366 1367 return self.expression( 1368 exp.Insert, 1369 this=this, 1370 exists=self._parse_exists(), 1371 partition=self._parse_partition(), 1372 expression=self._parse_ddl_select(), 1373 overwrite=overwrite, 1374 ) 1375 1376 def _parse_row(self) -> t.Optional[exp.Expression]: 1377 if not self._match(TokenType.FORMAT): 1378 return None 1379 return self._parse_row_format() 1380 1381 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1382 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1383 return None 1384 1385 if self._match_text_seq("SERDE"): 1386 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1387 1388 self._match_text_seq("DELIMITED") 1389 1390 kwargs = {} 1391 1392 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1393 kwargs["fields"] = self._parse_string() 1394 if self._match_text_seq("ESCAPED", "BY"): 1395 kwargs["escaped"] = self._parse_string() 1396 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1397 kwargs["collection_items"] = self._parse_string() 1398 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1399 kwargs["map_keys"] = self._parse_string() 1400 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1401 kwargs["lines"] = self._parse_string() 1402 if self._match_text_seq("NULL", "DEFINED", "AS"): 1403 kwargs["null"] = self._parse_string() 1404 1405 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1406 1407 def _parse_load_data(self) -> exp.Expression: 1408 local = self._match(TokenType.LOCAL) 1409 self._match_text_seq("INPATH") 1410 inpath = self._parse_string() 1411 overwrite = self._match(TokenType.OVERWRITE) 1412 self._match_pair(TokenType.INTO, TokenType.TABLE) 1413 1414 return self.expression( 1415 exp.LoadData, 1416 this=self._parse_table(schema=True), 1417 local=local, 1418 overwrite=overwrite, 1419 inpath=inpath, 1420 partition=self._parse_partition(), 1421 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1422 serde=self._match_text_seq("SERDE") and self._parse_string(), 1423 ) 1424 1425 def _parse_delete(self) -> exp.Expression: 1426 self._match(TokenType.FROM) 1427 1428 return self.expression( 1429 exp.Delete, 1430 this=self._parse_table(schema=True), 1431 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1432 where=self._parse_where(), 1433 ) 1434 1435 def _parse_update(self) -> exp.Expression: 1436 return self.expression( 1437 exp.Update, 1438 **{ # type: ignore 1439 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1440 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1441 "from": self._parse_from(), 1442 "where": self._parse_where(), 1443 }, 1444 ) 1445 1446 def _parse_uncache(self) -> exp.Expression: 1447 if not self._match(TokenType.TABLE): 1448 self.raise_error("Expecting TABLE after UNCACHE") 1449 1450 return self.expression( 1451 exp.Uncache, 1452 exists=self._parse_exists(), 1453 this=self._parse_table(schema=True), 1454 ) 1455 1456 def _parse_cache(self) -> exp.Expression: 1457 lazy = self._match(TokenType.LAZY) 1458 self._match(TokenType.TABLE) 1459 table = self._parse_table(schema=True) 1460 options = [] 1461 1462 if self._match(TokenType.OPTIONS): 1463 self._match_l_paren() 1464 k = self._parse_string() 1465 self._match(TokenType.EQ) 1466 v = self._parse_string() 1467 options = [k, v] 1468 self._match_r_paren() 1469 1470 self._match(TokenType.ALIAS) 1471 return self.expression( 1472 exp.Cache, 1473 this=table, 1474 lazy=lazy, 1475 options=options, 1476 expression=self._parse_select(nested=True), 1477 ) 1478 1479 def _parse_partition(self) -> t.Optional[exp.Expression]: 1480 if not self._match(TokenType.PARTITION): 1481 return None 1482 1483 return self.expression( 1484 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1485 ) 1486 1487 def _parse_value(self) -> exp.Expression: 1488 if self._match(TokenType.L_PAREN): 1489 expressions = self._parse_csv(self._parse_conjunction) 1490 self._match_r_paren() 1491 return self.expression(exp.Tuple, expressions=expressions) 1492 1493 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1494 # Source: https://prestodb.io/docs/current/sql/values.html 1495 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1496 1497 def _parse_select( 1498 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1499 ) -> t.Optional[exp.Expression]: 1500 cte = self._parse_with() 1501 if cte: 1502 this = self._parse_statement() 1503 1504 if not this: 1505 self.raise_error("Failed to parse any statement following CTE") 1506 return cte 1507 1508 if "with" in this.arg_types: 1509 this.set("with", cte) 1510 else: 1511 self.raise_error(f"{this.key} does not support CTE") 1512 this = cte 1513 elif self._match(TokenType.SELECT): 1514 comments = self._prev_comments 1515 1516 hint = self._parse_hint() 1517 all_ = self._match(TokenType.ALL) 1518 distinct = self._match(TokenType.DISTINCT) 1519 1520 if distinct: 1521 distinct = self.expression( 1522 exp.Distinct, 1523 on=self._parse_value() if self._match(TokenType.ON) else None, 1524 ) 1525 1526 if all_ and distinct: 1527 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1528 1529 limit = self._parse_limit(top=True) 1530 expressions = self._parse_csv(self._parse_expression) 1531 1532 this = self.expression( 1533 exp.Select, 1534 hint=hint, 1535 distinct=distinct, 1536 expressions=expressions, 1537 limit=limit, 1538 ) 1539 this.comments = comments 1540 1541 into = self._parse_into() 1542 if into: 1543 this.set("into", into) 1544 1545 from_ = self._parse_from() 1546 if from_: 1547 this.set("from", from_) 1548 1549 self._parse_query_modifiers(this) 1550 elif (table or nested) and self._match(TokenType.L_PAREN): 1551 this = self._parse_table() if table else self._parse_select(nested=True) 1552 self._parse_query_modifiers(this) 1553 this = self._parse_set_operations(this) 1554 self._match_r_paren() 1555 1556 # early return so that subquery unions aren't parsed again 1557 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1558 # Union ALL should be a property of the top select node, not the subquery 1559 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1560 elif self._match(TokenType.VALUES): 1561 this = self.expression( 1562 exp.Values, 1563 expressions=self._parse_csv(self._parse_value), 1564 alias=self._parse_table_alias(), 1565 ) 1566 else: 1567 this = None 1568 1569 return self._parse_set_operations(this) 1570 1571 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1572 if not skip_with_token and not self._match(TokenType.WITH): 1573 return None 1574 1575 recursive = self._match(TokenType.RECURSIVE) 1576 1577 expressions = [] 1578 while True: 1579 expressions.append(self._parse_cte()) 1580 1581 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1582 break 1583 else: 1584 self._match(TokenType.WITH) 1585 1586 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1587 1588 def _parse_cte(self) -> exp.Expression: 1589 alias = self._parse_table_alias() 1590 if not alias or not alias.this: 1591 self.raise_error("Expected CTE to have alias") 1592 1593 self._match(TokenType.ALIAS) 1594 1595 return self.expression( 1596 exp.CTE, 1597 this=self._parse_wrapped(self._parse_statement), 1598 alias=alias, 1599 ) 1600 1601 def _parse_table_alias( 1602 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1603 ) -> t.Optional[exp.Expression]: 1604 any_token = self._match(TokenType.ALIAS) 1605 alias = self._parse_id_var( 1606 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1607 ) 1608 index = self._index 1609 1610 if self._match(TokenType.L_PAREN): 1611 columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var())) 1612 self._match_r_paren() if columns else self._retreat(index) 1613 else: 1614 columns = None 1615 1616 if not alias and not columns: 1617 return None 1618 1619 return self.expression(exp.TableAlias, this=alias, columns=columns) 1620 1621 def _parse_subquery( 1622 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1623 ) -> exp.Expression: 1624 return self.expression( 1625 exp.Subquery, 1626 this=this, 1627 pivots=self._parse_pivots(), 1628 alias=self._parse_table_alias() if parse_alias else None, 1629 ) 1630 1631 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1632 if not isinstance(this, self.MODIFIABLES): 1633 return 1634 1635 table = isinstance(this, exp.Table) 1636 1637 while True: 1638 lateral = self._parse_lateral() 1639 join = self._parse_join() 1640 comma = None if table else self._match(TokenType.COMMA) 1641 if lateral: 1642 this.append("laterals", lateral) 1643 if join: 1644 this.append("joins", join) 1645 if comma: 1646 this.args["from"].append("expressions", self._parse_table()) 1647 if not (lateral or join or comma): 1648 break 1649 1650 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1651 expression = parser(self) 1652 1653 if expression: 1654 this.set(key, expression) 1655 1656 def _parse_hint(self) -> t.Optional[exp.Expression]: 1657 if self._match(TokenType.HINT): 1658 hints = self._parse_csv(self._parse_function) 1659 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1660 self.raise_error("Expected */ after HINT") 1661 return self.expression(exp.Hint, expressions=hints) 1662 1663 return None 1664 1665 def _parse_into(self) -> t.Optional[exp.Expression]: 1666 if not self._match(TokenType.INTO): 1667 return None 1668 1669 temp = self._match(TokenType.TEMPORARY) 1670 unlogged = self._match(TokenType.UNLOGGED) 1671 self._match(TokenType.TABLE) 1672 1673 return self.expression( 1674 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1675 ) 1676 1677 def _parse_from(self) -> t.Optional[exp.Expression]: 1678 if not self._match(TokenType.FROM): 1679 return None 1680 1681 return self.expression( 1682 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1683 ) 1684 1685 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1686 if not self._match(TokenType.MATCH_RECOGNIZE): 1687 return None 1688 self._match_l_paren() 1689 1690 partition = self._parse_partition_by() 1691 order = self._parse_order() 1692 measures = ( 1693 self._parse_alias(self._parse_conjunction()) 1694 if self._match_text_seq("MEASURES") 1695 else None 1696 ) 1697 1698 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1699 rows = exp.Var(this="ONE ROW PER MATCH") 1700 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1701 text = "ALL ROWS PER MATCH" 1702 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1703 text += f" SHOW EMPTY MATCHES" 1704 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1705 text += f" OMIT EMPTY MATCHES" 1706 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1707 text += f" WITH UNMATCHED ROWS" 1708 rows = exp.Var(this=text) 1709 else: 1710 rows = None 1711 1712 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1713 text = "AFTER MATCH SKIP" 1714 if self._match_text_seq("PAST", "LAST", "ROW"): 1715 text += f" PAST LAST ROW" 1716 elif self._match_text_seq("TO", "NEXT", "ROW"): 1717 text += f" TO NEXT ROW" 1718 elif self._match_text_seq("TO", "FIRST"): 1719 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1720 elif self._match_text_seq("TO", "LAST"): 1721 text += f" TO LAST {self._advance_any().text}" # type: ignore 1722 after = exp.Var(this=text) 1723 else: 1724 after = None 1725 1726 if self._match_text_seq("PATTERN"): 1727 self._match_l_paren() 1728 1729 if not self._curr: 1730 self.raise_error("Expecting )", self._curr) 1731 1732 paren = 1 1733 start = self._curr 1734 1735 while self._curr and paren > 0: 1736 if self._curr.token_type == TokenType.L_PAREN: 1737 paren += 1 1738 if self._curr.token_type == TokenType.R_PAREN: 1739 paren -= 1 1740 end = self._prev 1741 self._advance() 1742 if paren > 0: 1743 self.raise_error("Expecting )", self._curr) 1744 pattern = exp.Var(this=self._find_sql(start, end)) 1745 else: 1746 pattern = None 1747 1748 define = ( 1749 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1750 ) 1751 self._match_r_paren() 1752 1753 return self.expression( 1754 exp.MatchRecognize, 1755 partition_by=partition, 1756 order=order, 1757 measures=measures, 1758 rows=rows, 1759 after=after, 1760 pattern=pattern, 1761 define=define, 1762 ) 1763 1764 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1765 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1766 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1767 1768 if outer_apply or cross_apply: 1769 this = self._parse_select(table=True) 1770 view = None 1771 outer = not cross_apply 1772 elif self._match(TokenType.LATERAL): 1773 this = self._parse_select(table=True) 1774 view = self._match(TokenType.VIEW) 1775 outer = self._match(TokenType.OUTER) 1776 else: 1777 return None 1778 1779 if not this: 1780 this = self._parse_function() or self._parse_id_var(any_token=False) 1781 while self._match(TokenType.DOT): 1782 this = exp.Dot( 1783 this=this, 1784 expression=self._parse_function() or self._parse_id_var(any_token=False), 1785 ) 1786 1787 table_alias: t.Optional[exp.Expression] 1788 1789 if view: 1790 table = self._parse_id_var(any_token=False) 1791 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1792 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1793 else: 1794 table_alias = self._parse_table_alias() 1795 1796 expression = self.expression( 1797 exp.Lateral, 1798 this=this, 1799 view=view, 1800 outer=outer, 1801 alias=table_alias, 1802 ) 1803 1804 if outer_apply or cross_apply: 1805 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1806 1807 return expression 1808 1809 def _parse_join_side_and_kind( 1810 self, 1811 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1812 return ( 1813 self._match(TokenType.NATURAL) and self._prev, 1814 self._match_set(self.JOIN_SIDES) and self._prev, 1815 self._match_set(self.JOIN_KINDS) and self._prev, 1816 ) 1817 1818 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1819 natural, side, kind = self._parse_join_side_and_kind() 1820 1821 if not skip_join_token and not self._match(TokenType.JOIN): 1822 return None 1823 1824 kwargs: t.Dict[ 1825 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 1826 ] = {"this": self._parse_table()} 1827 1828 if natural: 1829 kwargs["natural"] = True 1830 if side: 1831 kwargs["side"] = side.text 1832 if kind: 1833 kwargs["kind"] = kind.text 1834 1835 if self._match(TokenType.ON): 1836 kwargs["on"] = self._parse_conjunction() 1837 elif self._match(TokenType.USING): 1838 kwargs["using"] = self._parse_wrapped_id_vars() 1839 1840 return self.expression(exp.Join, **kwargs) # type: ignore 1841 1842 def _parse_index(self) -> exp.Expression: 1843 index = self._parse_id_var() 1844 self._match(TokenType.ON) 1845 self._match(TokenType.TABLE) # hive 1846 1847 return self.expression( 1848 exp.Index, 1849 this=index, 1850 table=self.expression(exp.Table, this=self._parse_id_var()), 1851 columns=self._parse_expression(), 1852 ) 1853 1854 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 1855 unique = self._match(TokenType.UNIQUE) 1856 primary = self._match_text_seq("PRIMARY") 1857 amp = self._match_text_seq("AMP") 1858 if not self._match(TokenType.INDEX): 1859 return None 1860 index = self._parse_id_var() 1861 columns = None 1862 if self._match(TokenType.L_PAREN, advance=False): 1863 columns = self._parse_wrapped_csv(self._parse_column) 1864 return self.expression( 1865 exp.Index, 1866 this=index, 1867 columns=columns, 1868 unique=unique, 1869 primary=primary, 1870 amp=amp, 1871 ) 1872 1873 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 1874 catalog = None 1875 db = None 1876 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 1877 1878 while self._match(TokenType.DOT): 1879 if catalog: 1880 # This allows nesting the table in arbitrarily many dot expressions if needed 1881 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 1882 else: 1883 catalog = db 1884 db = table 1885 table = self._parse_id_var() 1886 1887 if not table: 1888 self.raise_error(f"Expected table name but got {self._curr}") 1889 1890 return self.expression( 1891 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 1892 ) 1893 1894 def _parse_table( 1895 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1896 ) -> t.Optional[exp.Expression]: 1897 lateral = self._parse_lateral() 1898 1899 if lateral: 1900 return lateral 1901 1902 unnest = self._parse_unnest() 1903 1904 if unnest: 1905 return unnest 1906 1907 values = self._parse_derived_table_values() 1908 1909 if values: 1910 return values 1911 1912 subquery = self._parse_select(table=True) 1913 1914 if subquery: 1915 return subquery 1916 1917 this = self._parse_table_parts(schema=schema) 1918 1919 if schema: 1920 return self._parse_schema(this=this) 1921 1922 if self.alias_post_tablesample: 1923 table_sample = self._parse_table_sample() 1924 1925 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1926 1927 if alias: 1928 this.set("alias", alias) 1929 1930 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 1931 this.set( 1932 "hints", 1933 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 1934 ) 1935 self._match_r_paren() 1936 1937 if not self.alias_post_tablesample: 1938 table_sample = self._parse_table_sample() 1939 1940 if table_sample: 1941 table_sample.set("this", this) 1942 this = table_sample 1943 1944 return this 1945 1946 def _parse_unnest(self) -> t.Optional[exp.Expression]: 1947 if not self._match(TokenType.UNNEST): 1948 return None 1949 1950 expressions = self._parse_wrapped_csv(self._parse_column) 1951 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 1952 alias = self._parse_table_alias() 1953 1954 if alias and self.unnest_column_only: 1955 if alias.args.get("columns"): 1956 self.raise_error("Unexpected extra column alias in unnest.") 1957 alias.set("columns", [alias.this]) 1958 alias.set("this", None) 1959 1960 offset = None 1961 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 1962 self._match(TokenType.ALIAS) 1963 offset = self._parse_conjunction() 1964 1965 return self.expression( 1966 exp.Unnest, 1967 expressions=expressions, 1968 ordinality=ordinality, 1969 alias=alias, 1970 offset=offset, 1971 ) 1972 1973 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 1974 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 1975 if not is_derived and not self._match(TokenType.VALUES): 1976 return None 1977 1978 expressions = self._parse_csv(self._parse_value) 1979 1980 if is_derived: 1981 self._match_r_paren() 1982 1983 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 1984 1985 def _parse_table_sample(self) -> t.Optional[exp.Expression]: 1986 if not self._match(TokenType.TABLE_SAMPLE): 1987 return None 1988 1989 method = self._parse_var() 1990 bucket_numerator = None 1991 bucket_denominator = None 1992 bucket_field = None 1993 percent = None 1994 rows = None 1995 size = None 1996 seed = None 1997 1998 self._match_l_paren() 1999 2000 if self._match(TokenType.BUCKET): 2001 bucket_numerator = self._parse_number() 2002 self._match(TokenType.OUT_OF) 2003 bucket_denominator = bucket_denominator = self._parse_number() 2004 self._match(TokenType.ON) 2005 bucket_field = self._parse_field() 2006 else: 2007 num = self._parse_number() 2008 2009 if self._match(TokenType.PERCENT): 2010 percent = num 2011 elif self._match(TokenType.ROWS): 2012 rows = num 2013 else: 2014 size = num 2015 2016 self._match_r_paren() 2017 2018 if self._match(TokenType.SEED): 2019 seed = self._parse_wrapped(self._parse_number) 2020 2021 return self.expression( 2022 exp.TableSample, 2023 method=method, 2024 bucket_numerator=bucket_numerator, 2025 bucket_denominator=bucket_denominator, 2026 bucket_field=bucket_field, 2027 percent=percent, 2028 rows=rows, 2029 size=size, 2030 seed=seed, 2031 ) 2032 2033 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2034 return list(iter(self._parse_pivot, None)) 2035 2036 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2037 index = self._index 2038 2039 if self._match(TokenType.PIVOT): 2040 unpivot = False 2041 elif self._match(TokenType.UNPIVOT): 2042 unpivot = True 2043 else: 2044 return None 2045 2046 expressions = [] 2047 field = None 2048 2049 if not self._match(TokenType.L_PAREN): 2050 self._retreat(index) 2051 return None 2052 2053 if unpivot: 2054 expressions = self._parse_csv(self._parse_column) 2055 else: 2056 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2057 2058 if not self._match(TokenType.FOR): 2059 self.raise_error("Expecting FOR") 2060 2061 value = self._parse_column() 2062 2063 if not self._match(TokenType.IN): 2064 self.raise_error("Expecting IN") 2065 2066 field = self._parse_in(value) 2067 2068 self._match_r_paren() 2069 2070 return self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2071 2072 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2073 if not skip_where_token and not self._match(TokenType.WHERE): 2074 return None 2075 2076 return self.expression( 2077 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2078 ) 2079 2080 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2081 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2082 return None 2083 2084 expressions = self._parse_csv(self._parse_conjunction) 2085 grouping_sets = self._parse_grouping_sets() 2086 2087 self._match(TokenType.COMMA) 2088 with_ = self._match(TokenType.WITH) 2089 cube = self._match(TokenType.CUBE) and ( 2090 with_ or self._parse_wrapped_csv(self._parse_column) 2091 ) 2092 2093 self._match(TokenType.COMMA) 2094 rollup = self._match(TokenType.ROLLUP) and ( 2095 with_ or self._parse_wrapped_csv(self._parse_column) 2096 ) 2097 2098 return self.expression( 2099 exp.Group, 2100 expressions=expressions, 2101 grouping_sets=grouping_sets, 2102 cube=cube, 2103 rollup=rollup, 2104 ) 2105 2106 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2107 if not self._match(TokenType.GROUPING_SETS): 2108 return None 2109 2110 return self._parse_wrapped_csv(self._parse_grouping_set) 2111 2112 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2113 if self._match(TokenType.L_PAREN): 2114 grouping_set = self._parse_csv(self._parse_column) 2115 self._match_r_paren() 2116 return self.expression(exp.Tuple, expressions=grouping_set) 2117 2118 return self._parse_column() 2119 2120 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2121 if not skip_having_token and not self._match(TokenType.HAVING): 2122 return None 2123 return self.expression(exp.Having, this=self._parse_conjunction()) 2124 2125 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2126 if not self._match(TokenType.QUALIFY): 2127 return None 2128 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2129 2130 def _parse_order( 2131 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2132 ) -> t.Optional[exp.Expression]: 2133 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2134 return this 2135 2136 return self.expression( 2137 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2138 ) 2139 2140 def _parse_sort( 2141 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2142 ) -> t.Optional[exp.Expression]: 2143 if not self._match(token_type): 2144 return None 2145 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2146 2147 def _parse_ordered(self) -> exp.Expression: 2148 this = self._parse_conjunction() 2149 self._match(TokenType.ASC) 2150 is_desc = self._match(TokenType.DESC) 2151 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2152 is_nulls_last = self._match(TokenType.NULLS_LAST) 2153 desc = is_desc or False 2154 asc = not desc 2155 nulls_first = is_nulls_first or False 2156 explicitly_null_ordered = is_nulls_first or is_nulls_last 2157 if ( 2158 not explicitly_null_ordered 2159 and ( 2160 (asc and self.null_ordering == "nulls_are_small") 2161 or (desc and self.null_ordering != "nulls_are_small") 2162 ) 2163 and self.null_ordering != "nulls_are_last" 2164 ): 2165 nulls_first = True 2166 2167 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2168 2169 def _parse_limit( 2170 self, this: t.Optional[exp.Expression] = None, top: bool = False 2171 ) -> t.Optional[exp.Expression]: 2172 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2173 limit_paren = self._match(TokenType.L_PAREN) 2174 limit_exp = self.expression( 2175 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2176 ) 2177 2178 if limit_paren: 2179 self._match_r_paren() 2180 2181 return limit_exp 2182 2183 if self._match(TokenType.FETCH): 2184 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2185 direction = self._prev.text if direction else "FIRST" 2186 count = self._parse_number() 2187 self._match_set((TokenType.ROW, TokenType.ROWS)) 2188 self._match(TokenType.ONLY) 2189 return self.expression(exp.Fetch, direction=direction, count=count) 2190 2191 return this 2192 2193 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2194 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2195 return this 2196 2197 count = self._parse_number() 2198 self._match_set((TokenType.ROW, TokenType.ROWS)) 2199 return self.expression(exp.Offset, this=this, expression=count) 2200 2201 def _parse_lock(self) -> t.Optional[exp.Expression]: 2202 if self._match_text_seq("FOR", "UPDATE"): 2203 return self.expression(exp.Lock, update=True) 2204 if self._match_text_seq("FOR", "SHARE"): 2205 return self.expression(exp.Lock, update=False) 2206 2207 return None 2208 2209 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2210 if not self._match_set(self.SET_OPERATIONS): 2211 return this 2212 2213 token_type = self._prev.token_type 2214 2215 if token_type == TokenType.UNION: 2216 expression = exp.Union 2217 elif token_type == TokenType.EXCEPT: 2218 expression = exp.Except 2219 else: 2220 expression = exp.Intersect 2221 2222 return self.expression( 2223 expression, 2224 this=this, 2225 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2226 expression=self._parse_set_operations(self._parse_select(nested=True)), 2227 ) 2228 2229 def _parse_expression(self) -> t.Optional[exp.Expression]: 2230 return self._parse_alias(self._parse_conjunction()) 2231 2232 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2233 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2234 2235 def _parse_equality(self) -> t.Optional[exp.Expression]: 2236 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2237 2238 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2239 return self._parse_tokens(self._parse_range, self.COMPARISON) 2240 2241 def _parse_range(self) -> t.Optional[exp.Expression]: 2242 this = self._parse_bitwise() 2243 negate = self._match(TokenType.NOT) 2244 2245 if self._match_set(self.RANGE_PARSERS): 2246 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2247 elif self._match(TokenType.ISNULL): 2248 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2249 2250 # Postgres supports ISNULL and NOTNULL for conditions. 2251 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2252 if self._match(TokenType.NOTNULL): 2253 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2254 this = self.expression(exp.Not, this=this) 2255 2256 if negate: 2257 this = self.expression(exp.Not, this=this) 2258 2259 if self._match(TokenType.IS): 2260 this = self._parse_is(this) 2261 2262 return this 2263 2264 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2265 negate = self._match(TokenType.NOT) 2266 if self._match(TokenType.DISTINCT_FROM): 2267 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2268 return self.expression(klass, this=this, expression=self._parse_expression()) 2269 2270 this = self.expression( 2271 exp.Is, 2272 this=this, 2273 expression=self._parse_null() or self._parse_boolean(), 2274 ) 2275 return self.expression(exp.Not, this=this) if negate else this 2276 2277 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2278 unnest = self._parse_unnest() 2279 if unnest: 2280 this = self.expression(exp.In, this=this, unnest=unnest) 2281 elif self._match(TokenType.L_PAREN): 2282 expressions = self._parse_csv(self._parse_select_or_expression) 2283 2284 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2285 this = self.expression(exp.In, this=this, query=expressions[0]) 2286 else: 2287 this = self.expression(exp.In, this=this, expressions=expressions) 2288 2289 self._match_r_paren() 2290 else: 2291 this = self.expression(exp.In, this=this, field=self._parse_field()) 2292 2293 return this 2294 2295 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2296 low = self._parse_bitwise() 2297 self._match(TokenType.AND) 2298 high = self._parse_bitwise() 2299 return self.expression(exp.Between, this=this, low=low, high=high) 2300 2301 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2302 if not self._match(TokenType.ESCAPE): 2303 return this 2304 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2305 2306 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2307 this = self._parse_term() 2308 2309 while True: 2310 if self._match_set(self.BITWISE): 2311 this = self.expression( 2312 self.BITWISE[self._prev.token_type], 2313 this=this, 2314 expression=self._parse_term(), 2315 ) 2316 elif self._match_pair(TokenType.LT, TokenType.LT): 2317 this = self.expression( 2318 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2319 ) 2320 elif self._match_pair(TokenType.GT, TokenType.GT): 2321 this = self.expression( 2322 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2323 ) 2324 else: 2325 break 2326 2327 return this 2328 2329 def _parse_term(self) -> t.Optional[exp.Expression]: 2330 return self._parse_tokens(self._parse_factor, self.TERM) 2331 2332 def _parse_factor(self) -> t.Optional[exp.Expression]: 2333 return self._parse_tokens(self._parse_unary, self.FACTOR) 2334 2335 def _parse_unary(self) -> t.Optional[exp.Expression]: 2336 if self._match_set(self.UNARY_PARSERS): 2337 return self.UNARY_PARSERS[self._prev.token_type](self) 2338 return self._parse_at_time_zone(self._parse_type()) 2339 2340 def _parse_type(self) -> t.Optional[exp.Expression]: 2341 if self._match(TokenType.INTERVAL): 2342 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2343 2344 index = self._index 2345 type_token = self._parse_types(check_func=True) 2346 this = self._parse_column() 2347 2348 if type_token: 2349 if this and not isinstance(this, exp.Star): 2350 return self.expression(exp.Cast, this=this, to=type_token) 2351 if not type_token.args.get("expressions"): 2352 self._retreat(index) 2353 return self._parse_column() 2354 return type_token 2355 2356 return this 2357 2358 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2359 index = self._index 2360 2361 if not self._match_set(self.TYPE_TOKENS): 2362 return None 2363 2364 type_token = self._prev.token_type 2365 2366 if type_token == TokenType.PSEUDO_TYPE: 2367 return self.expression(exp.PseudoType, this=self._prev.text) 2368 2369 nested = type_token in self.NESTED_TYPE_TOKENS 2370 is_struct = type_token == TokenType.STRUCT 2371 expressions = None 2372 maybe_func = False 2373 2374 if self._match(TokenType.L_PAREN): 2375 if is_struct: 2376 expressions = self._parse_csv(self._parse_struct_kwargs) 2377 elif nested: 2378 expressions = self._parse_csv(self._parse_types) 2379 else: 2380 expressions = self._parse_csv(self._parse_conjunction) 2381 2382 if not expressions: 2383 self._retreat(index) 2384 return None 2385 2386 self._match_r_paren() 2387 maybe_func = True 2388 2389 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2390 this = exp.DataType( 2391 this=exp.DataType.Type.ARRAY, 2392 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2393 nested=True, 2394 ) 2395 2396 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2397 this = exp.DataType( 2398 this=exp.DataType.Type.ARRAY, 2399 expressions=[this], 2400 nested=True, 2401 ) 2402 2403 return this 2404 2405 if self._match(TokenType.L_BRACKET): 2406 self._retreat(index) 2407 return None 2408 2409 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2410 if nested and self._match(TokenType.LT): 2411 if is_struct: 2412 expressions = self._parse_csv(self._parse_struct_kwargs) 2413 else: 2414 expressions = self._parse_csv(self._parse_types) 2415 2416 if not self._match(TokenType.GT): 2417 self.raise_error("Expecting >") 2418 2419 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2420 values = self._parse_csv(self._parse_conjunction) 2421 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2422 2423 value: t.Optional[exp.Expression] = None 2424 if type_token in self.TIMESTAMPS: 2425 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2426 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2427 elif ( 2428 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2429 ): 2430 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2431 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2432 if type_token == TokenType.TIME: 2433 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2434 else: 2435 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2436 2437 maybe_func = maybe_func and value is None 2438 2439 if value is None: 2440 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2441 elif type_token == TokenType.INTERVAL: 2442 value = self.expression(exp.Interval, unit=self._parse_var()) 2443 2444 if maybe_func and check_func: 2445 index2 = self._index 2446 peek = self._parse_string() 2447 2448 if not peek: 2449 self._retreat(index) 2450 return None 2451 2452 self._retreat(index2) 2453 2454 if value: 2455 return value 2456 2457 return exp.DataType( 2458 this=exp.DataType.Type[type_token.value.upper()], 2459 expressions=expressions, 2460 nested=nested, 2461 values=values, 2462 ) 2463 2464 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2465 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2466 return self._parse_types() 2467 2468 this = self._parse_id_var() 2469 self._match(TokenType.COLON) 2470 data_type = self._parse_types() 2471 2472 if not data_type: 2473 return None 2474 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2475 2476 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2477 if not self._match(TokenType.AT_TIME_ZONE): 2478 return this 2479 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2480 2481 def _parse_column(self) -> t.Optional[exp.Expression]: 2482 this = self._parse_field() 2483 if isinstance(this, exp.Identifier): 2484 this = self.expression(exp.Column, this=this) 2485 elif not this: 2486 return self._parse_bracket(this) 2487 this = self._parse_bracket(this) 2488 2489 while self._match_set(self.COLUMN_OPERATORS): 2490 op_token = self._prev.token_type 2491 op = self.COLUMN_OPERATORS.get(op_token) 2492 2493 if op_token == TokenType.DCOLON: 2494 field = self._parse_types() 2495 if not field: 2496 self.raise_error("Expected type") 2497 elif op: 2498 self._advance() 2499 value = self._prev.text 2500 field = ( 2501 exp.Literal.number(value) 2502 if self._prev.token_type == TokenType.NUMBER 2503 else exp.Literal.string(value) 2504 ) 2505 else: 2506 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2507 2508 if isinstance(field, exp.Func): 2509 # bigquery allows function calls like x.y.count(...) 2510 # SAFE.SUBSTR(...) 2511 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2512 this = self._replace_columns_with_dots(this) 2513 2514 if op: 2515 this = op(self, this, field) 2516 elif isinstance(this, exp.Column) and not this.table: 2517 this = self.expression(exp.Column, this=field, table=this.this) 2518 else: 2519 this = self.expression(exp.Dot, this=this, expression=field) 2520 this = self._parse_bracket(this) 2521 2522 return this 2523 2524 def _parse_primary(self) -> t.Optional[exp.Expression]: 2525 if self._match_set(self.PRIMARY_PARSERS): 2526 token_type = self._prev.token_type 2527 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2528 2529 if token_type == TokenType.STRING: 2530 expressions = [primary] 2531 while self._match(TokenType.STRING): 2532 expressions.append(exp.Literal.string(self._prev.text)) 2533 if len(expressions) > 1: 2534 return self.expression(exp.Concat, expressions=expressions) 2535 return primary 2536 2537 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2538 return exp.Literal.number(f"0.{self._prev.text}") 2539 2540 if self._match(TokenType.L_PAREN): 2541 comments = self._prev_comments 2542 query = self._parse_select() 2543 2544 if query: 2545 expressions = [query] 2546 else: 2547 expressions = self._parse_csv( 2548 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2549 ) 2550 2551 this = seq_get(expressions, 0) 2552 self._parse_query_modifiers(this) 2553 self._match_r_paren() 2554 2555 if isinstance(this, exp.Subqueryable): 2556 this = self._parse_set_operations( 2557 self._parse_subquery(this=this, parse_alias=False) 2558 ) 2559 elif len(expressions) > 1: 2560 this = self.expression(exp.Tuple, expressions=expressions) 2561 else: 2562 this = self.expression(exp.Paren, this=this) 2563 2564 if this and comments: 2565 this.comments = comments 2566 2567 return this 2568 2569 return None 2570 2571 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2572 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2573 2574 def _parse_function( 2575 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2576 ) -> t.Optional[exp.Expression]: 2577 if not self._curr: 2578 return None 2579 2580 token_type = self._curr.token_type 2581 2582 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2583 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2584 2585 if not self._next or self._next.token_type != TokenType.L_PAREN: 2586 if token_type in self.NO_PAREN_FUNCTIONS: 2587 self._advance() 2588 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2589 2590 return None 2591 2592 if token_type not in self.FUNC_TOKENS: 2593 return None 2594 2595 this = self._curr.text 2596 upper = this.upper() 2597 self._advance(2) 2598 2599 parser = self.FUNCTION_PARSERS.get(upper) 2600 2601 if parser: 2602 this = parser(self) 2603 else: 2604 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2605 2606 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2607 this = self.expression(subquery_predicate, this=self._parse_select()) 2608 self._match_r_paren() 2609 return this 2610 2611 if functions is None: 2612 functions = self.FUNCTIONS 2613 2614 function = functions.get(upper) 2615 args = self._parse_csv(self._parse_lambda) 2616 2617 if function: 2618 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2619 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2620 if count_params(function) == 2: 2621 params = None 2622 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2623 params = self._parse_csv(self._parse_lambda) 2624 2625 this = function(args, params) 2626 else: 2627 this = function(args) 2628 2629 self.validate_expression(this, args) 2630 else: 2631 this = self.expression(exp.Anonymous, this=this, expressions=args) 2632 2633 self._match_r_paren(this) 2634 return self._parse_window(this) 2635 2636 def _parse_user_defined_function( 2637 self, kind: t.Optional[TokenType] = None 2638 ) -> t.Optional[exp.Expression]: 2639 this = self._parse_id_var() 2640 2641 while self._match(TokenType.DOT): 2642 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2643 2644 if not self._match(TokenType.L_PAREN): 2645 return this 2646 2647 expressions = self._parse_csv(self._parse_udf_kwarg) 2648 self._match_r_paren() 2649 return self.expression( 2650 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2651 ) 2652 2653 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2654 literal = self._parse_primary() 2655 if literal: 2656 return self.expression(exp.Introducer, this=token.text, expression=literal) 2657 2658 return self.expression(exp.Identifier, this=token.text) 2659 2660 def _parse_national(self, token: Token) -> exp.Expression: 2661 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2662 2663 def _parse_session_parameter(self) -> exp.Expression: 2664 kind = None 2665 this = self._parse_id_var() or self._parse_primary() 2666 2667 if this and self._match(TokenType.DOT): 2668 kind = this.name 2669 this = self._parse_var() or self._parse_primary() 2670 2671 return self.expression(exp.SessionParameter, this=this, kind=kind) 2672 2673 def _parse_udf_kwarg(self) -> t.Optional[exp.Expression]: 2674 this = self._parse_id_var() 2675 kind = self._parse_types() 2676 2677 if not kind: 2678 return this 2679 2680 return self.expression(exp.UserDefinedFunctionKwarg, this=this, kind=kind) 2681 2682 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2683 index = self._index 2684 2685 if self._match(TokenType.L_PAREN): 2686 expressions = self._parse_csv(self._parse_id_var) 2687 2688 if not self._match(TokenType.R_PAREN): 2689 self._retreat(index) 2690 else: 2691 expressions = [self._parse_id_var()] 2692 2693 if self._match_set(self.LAMBDAS): 2694 return self.LAMBDAS[self._prev.token_type](self, expressions) 2695 2696 self._retreat(index) 2697 2698 this: t.Optional[exp.Expression] 2699 2700 if self._match(TokenType.DISTINCT): 2701 this = self.expression( 2702 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2703 ) 2704 else: 2705 this = self._parse_select_or_expression() 2706 2707 if self._match(TokenType.IGNORE_NULLS): 2708 this = self.expression(exp.IgnoreNulls, this=this) 2709 else: 2710 self._match(TokenType.RESPECT_NULLS) 2711 2712 return self._parse_limit(self._parse_order(this)) 2713 2714 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2715 index = self._index 2716 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2717 self._retreat(index) 2718 return this 2719 2720 args = self._parse_csv( 2721 lambda: self._parse_constraint() 2722 or self._parse_column_def(self._parse_field(any_token=True)) 2723 ) 2724 self._match_r_paren() 2725 return self.expression(exp.Schema, this=this, expressions=args) 2726 2727 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2728 kind = self._parse_types() 2729 2730 constraints = [] 2731 while True: 2732 constraint = self._parse_column_constraint() 2733 if not constraint: 2734 break 2735 constraints.append(constraint) 2736 2737 if not kind and not constraints: 2738 return this 2739 2740 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2741 2742 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 2743 this = self._parse_references() 2744 2745 if this: 2746 return this 2747 2748 if self._match(TokenType.CONSTRAINT): 2749 this = self._parse_id_var() 2750 2751 kind: exp.Expression 2752 2753 if self._match_set((TokenType.AUTO_INCREMENT, TokenType.IDENTITY)): 2754 start = None 2755 increment = None 2756 2757 if self._match(TokenType.L_PAREN, advance=False): 2758 args = self._parse_wrapped_csv(self._parse_bitwise) 2759 start = seq_get(args, 0) 2760 increment = seq_get(args, 1) 2761 elif self._match_text_seq("START"): 2762 start = self._parse_bitwise() 2763 self._match_text_seq("INCREMENT") 2764 increment = self._parse_bitwise() 2765 2766 if start and increment: 2767 kind = exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2768 else: 2769 kind = exp.AutoIncrementColumnConstraint() 2770 elif self._match(TokenType.CHECK): 2771 constraint = self._parse_wrapped(self._parse_conjunction) 2772 kind = self.expression(exp.CheckColumnConstraint, this=constraint) 2773 elif self._match(TokenType.COLLATE): 2774 kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var()) 2775 elif self._match(TokenType.ENCODE): 2776 kind = self.expression(exp.EncodeColumnConstraint, this=self._parse_var()) 2777 elif self._match(TokenType.DEFAULT): 2778 kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_bitwise()) 2779 elif self._match_pair(TokenType.NOT, TokenType.NULL): 2780 kind = exp.NotNullColumnConstraint() 2781 elif self._match(TokenType.NULL): 2782 kind = exp.NotNullColumnConstraint(allow_null=True) 2783 elif self._match(TokenType.SCHEMA_COMMENT): 2784 kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string()) 2785 elif self._match(TokenType.PRIMARY_KEY): 2786 desc = None 2787 if self._match(TokenType.ASC) or self._match(TokenType.DESC): 2788 desc = self._prev.token_type == TokenType.DESC 2789 kind = exp.PrimaryKeyColumnConstraint(desc=desc) 2790 elif self._match(TokenType.UNIQUE): 2791 kind = exp.UniqueColumnConstraint() 2792 elif self._match(TokenType.GENERATED): 2793 if self._match(TokenType.BY_DEFAULT): 2794 kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2795 else: 2796 self._match(TokenType.ALWAYS) 2797 kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2798 self._match_pair(TokenType.ALIAS, TokenType.IDENTITY) 2799 2800 if self._match(TokenType.L_PAREN): 2801 if self._match_text_seq("START", "WITH"): 2802 kind.set("start", self._parse_bitwise()) 2803 if self._match_text_seq("INCREMENT", "BY"): 2804 kind.set("increment", self._parse_bitwise()) 2805 2806 self._match_r_paren() 2807 else: 2808 return this 2809 2810 return self.expression(exp.ColumnConstraint, this=this, kind=kind) 2811 2812 def _parse_constraint(self) -> t.Optional[exp.Expression]: 2813 if not self._match(TokenType.CONSTRAINT): 2814 return self._parse_unnamed_constraint() 2815 2816 this = self._parse_id_var() 2817 expressions = [] 2818 2819 while True: 2820 constraint = self._parse_unnamed_constraint() or self._parse_function() 2821 if not constraint: 2822 break 2823 expressions.append(constraint) 2824 2825 return self.expression(exp.Constraint, this=this, expressions=expressions) 2826 2827 def _parse_unnamed_constraint(self) -> t.Optional[exp.Expression]: 2828 if not self._match_set(self.CONSTRAINT_PARSERS): 2829 return None 2830 return self.CONSTRAINT_PARSERS[self._prev.token_type](self) 2831 2832 def _parse_unique(self) -> exp.Expression: 2833 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 2834 2835 def _parse_key_constraint_options(self) -> t.List[str]: 2836 options = [] 2837 while True: 2838 if not self._curr: 2839 break 2840 2841 if self._match(TokenType.ON): 2842 action = None 2843 on = self._advance_any() and self._prev.text 2844 2845 if self._match(TokenType.NO_ACTION): 2846 action = "NO ACTION" 2847 elif self._match(TokenType.CASCADE): 2848 action = "CASCADE" 2849 elif self._match_pair(TokenType.SET, TokenType.NULL): 2850 action = "SET NULL" 2851 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 2852 action = "SET DEFAULT" 2853 else: 2854 self.raise_error("Invalid key constraint") 2855 2856 options.append(f"ON {on} {action}") 2857 elif self._match_text_seq("NOT", "ENFORCED"): 2858 options.append("NOT ENFORCED") 2859 elif self._match_text_seq("DEFERRABLE"): 2860 options.append("DEFERRABLE") 2861 elif self._match_text_seq("INITIALLY", "DEFERRED"): 2862 options.append("INITIALLY DEFERRED") 2863 elif self._match_text_seq("NORELY"): 2864 options.append("NORELY") 2865 elif self._match_text_seq("MATCH", "FULL"): 2866 options.append("MATCH FULL") 2867 else: 2868 break 2869 2870 return options 2871 2872 def _parse_references(self) -> t.Optional[exp.Expression]: 2873 if not self._match(TokenType.REFERENCES): 2874 return None 2875 2876 expressions = None 2877 this = self._parse_id_var() 2878 2879 if self._match(TokenType.L_PAREN, advance=False): 2880 expressions = self._parse_wrapped_id_vars() 2881 2882 options = self._parse_key_constraint_options() 2883 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 2884 2885 def _parse_foreign_key(self) -> exp.Expression: 2886 expressions = self._parse_wrapped_id_vars() 2887 reference = self._parse_references() 2888 options = {} 2889 2890 while self._match(TokenType.ON): 2891 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 2892 self.raise_error("Expected DELETE or UPDATE") 2893 2894 kind = self._prev.text.lower() 2895 2896 if self._match(TokenType.NO_ACTION): 2897 action = "NO ACTION" 2898 elif self._match(TokenType.SET): 2899 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 2900 action = "SET " + self._prev.text.upper() 2901 else: 2902 self._advance() 2903 action = self._prev.text.upper() 2904 2905 options[kind] = action 2906 2907 return self.expression( 2908 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 2909 ) 2910 2911 def _parse_primary_key(self) -> exp.Expression: 2912 expressions = self._parse_wrapped_id_vars() 2913 options = self._parse_key_constraint_options() 2914 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 2915 2916 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2917 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 2918 return this 2919 2920 bracket_kind = self._prev.token_type 2921 expressions: t.List[t.Optional[exp.Expression]] 2922 2923 if self._match(TokenType.COLON): 2924 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 2925 else: 2926 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 2927 2928 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 2929 if bracket_kind == TokenType.L_BRACE: 2930 this = self.expression(exp.Struct, expressions=expressions) 2931 elif not this or this.name.upper() == "ARRAY": 2932 this = self.expression(exp.Array, expressions=expressions) 2933 else: 2934 expressions = apply_index_offset(expressions, -self.index_offset) 2935 this = self.expression(exp.Bracket, this=this, expressions=expressions) 2936 2937 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 2938 self.raise_error("Expected ]") 2939 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 2940 self.raise_error("Expected }") 2941 2942 this.comments = self._prev_comments 2943 return self._parse_bracket(this) 2944 2945 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2946 if self._match(TokenType.COLON): 2947 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 2948 return this 2949 2950 def _parse_case(self) -> t.Optional[exp.Expression]: 2951 ifs = [] 2952 default = None 2953 2954 expression = self._parse_conjunction() 2955 2956 while self._match(TokenType.WHEN): 2957 this = self._parse_conjunction() 2958 self._match(TokenType.THEN) 2959 then = self._parse_conjunction() 2960 ifs.append(self.expression(exp.If, this=this, true=then)) 2961 2962 if self._match(TokenType.ELSE): 2963 default = self._parse_conjunction() 2964 2965 if not self._match(TokenType.END): 2966 self.raise_error("Expected END after CASE", self._prev) 2967 2968 return self._parse_window( 2969 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 2970 ) 2971 2972 def _parse_if(self) -> t.Optional[exp.Expression]: 2973 if self._match(TokenType.L_PAREN): 2974 args = self._parse_csv(self._parse_conjunction) 2975 this = exp.If.from_arg_list(args) 2976 self.validate_expression(this, args) 2977 self._match_r_paren() 2978 else: 2979 condition = self._parse_conjunction() 2980 self._match(TokenType.THEN) 2981 true = self._parse_conjunction() 2982 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 2983 self._match(TokenType.END) 2984 this = self.expression(exp.If, this=condition, true=true, false=false) 2985 2986 return self._parse_window(this) 2987 2988 def _parse_extract(self) -> exp.Expression: 2989 this = self._parse_function() or self._parse_var() or self._parse_type() 2990 2991 if self._match(TokenType.FROM): 2992 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 2993 2994 if not self._match(TokenType.COMMA): 2995 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 2996 2997 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 2998 2999 def _parse_cast(self, strict: bool) -> exp.Expression: 3000 this = self._parse_conjunction() 3001 3002 if not self._match(TokenType.ALIAS): 3003 self.raise_error("Expected AS after CAST") 3004 3005 to = self._parse_types() 3006 3007 if not to: 3008 self.raise_error("Expected TYPE after CAST") 3009 elif to.this == exp.DataType.Type.CHAR: 3010 if self._match(TokenType.CHARACTER_SET): 3011 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3012 3013 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3014 3015 def _parse_string_agg(self) -> exp.Expression: 3016 expression: t.Optional[exp.Expression] 3017 3018 if self._match(TokenType.DISTINCT): 3019 args = self._parse_csv(self._parse_conjunction) 3020 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3021 else: 3022 args = self._parse_csv(self._parse_conjunction) 3023 expression = seq_get(args, 0) 3024 3025 index = self._index 3026 if not self._match(TokenType.R_PAREN): 3027 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3028 order = self._parse_order(this=expression) 3029 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3030 3031 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3032 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3033 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3034 if not self._match(TokenType.WITHIN_GROUP): 3035 self._retreat(index) 3036 this = exp.GroupConcat.from_arg_list(args) 3037 self.validate_expression(this, args) 3038 return this 3039 3040 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3041 order = self._parse_order(this=expression) 3042 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3043 3044 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3045 to: t.Optional[exp.Expression] 3046 this = self._parse_column() 3047 3048 if self._match(TokenType.USING): 3049 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3050 elif self._match(TokenType.COMMA): 3051 to = self._parse_types() 3052 else: 3053 to = None 3054 3055 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3056 3057 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3058 args = self._parse_csv(self._parse_bitwise) 3059 3060 if self._match(TokenType.IN): 3061 return self.expression( 3062 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3063 ) 3064 3065 if haystack_first: 3066 haystack = seq_get(args, 0) 3067 needle = seq_get(args, 1) 3068 else: 3069 needle = seq_get(args, 0) 3070 haystack = seq_get(args, 1) 3071 3072 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3073 3074 self.validate_expression(this, args) 3075 3076 return this 3077 3078 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3079 args = self._parse_csv(self._parse_table) 3080 return exp.JoinHint(this=func_name.upper(), expressions=args) 3081 3082 def _parse_substring(self) -> exp.Expression: 3083 # Postgres supports the form: substring(string [from int] [for int]) 3084 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3085 3086 args = self._parse_csv(self._parse_bitwise) 3087 3088 if self._match(TokenType.FROM): 3089 args.append(self._parse_bitwise()) 3090 if self._match(TokenType.FOR): 3091 args.append(self._parse_bitwise()) 3092 3093 this = exp.Substring.from_arg_list(args) 3094 self.validate_expression(this, args) 3095 3096 return this 3097 3098 def _parse_trim(self) -> exp.Expression: 3099 # https://www.w3resource.com/sql/character-functions/trim.php 3100 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3101 3102 position = None 3103 collation = None 3104 3105 if self._match_set(self.TRIM_TYPES): 3106 position = self._prev.text.upper() 3107 3108 expression = self._parse_term() 3109 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3110 this = self._parse_term() 3111 else: 3112 this = expression 3113 expression = None 3114 3115 if self._match(TokenType.COLLATE): 3116 collation = self._parse_term() 3117 3118 return self.expression( 3119 exp.Trim, 3120 this=this, 3121 position=position, 3122 expression=expression, 3123 collation=collation, 3124 ) 3125 3126 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3127 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3128 3129 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3130 return self._parse_window(self._parse_id_var(), alias=True) 3131 3132 def _parse_window( 3133 self, this: t.Optional[exp.Expression], alias: bool = False 3134 ) -> t.Optional[exp.Expression]: 3135 if self._match(TokenType.FILTER): 3136 where = self._parse_wrapped(self._parse_where) 3137 this = self.expression(exp.Filter, this=this, expression=where) 3138 3139 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3140 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3141 if self._match(TokenType.WITHIN_GROUP): 3142 order = self._parse_wrapped(self._parse_order) 3143 this = self.expression(exp.WithinGroup, this=this, expression=order) 3144 3145 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3146 # Some dialects choose to implement and some do not. 3147 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3148 3149 # There is some code above in _parse_lambda that handles 3150 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3151 3152 # The below changes handle 3153 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3154 3155 # Oracle allows both formats 3156 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3157 # and Snowflake chose to do the same for familiarity 3158 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3159 if self._match(TokenType.IGNORE_NULLS): 3160 this = self.expression(exp.IgnoreNulls, this=this) 3161 elif self._match(TokenType.RESPECT_NULLS): 3162 this = self.expression(exp.RespectNulls, this=this) 3163 3164 # bigquery select from window x AS (partition by ...) 3165 if alias: 3166 self._match(TokenType.ALIAS) 3167 elif not self._match(TokenType.OVER): 3168 return this 3169 3170 if not self._match(TokenType.L_PAREN): 3171 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3172 3173 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3174 partition = self._parse_partition_by() 3175 order = self._parse_order() 3176 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3177 3178 if kind: 3179 self._match(TokenType.BETWEEN) 3180 start = self._parse_window_spec() 3181 self._match(TokenType.AND) 3182 end = self._parse_window_spec() 3183 3184 spec = self.expression( 3185 exp.WindowSpec, 3186 kind=kind, 3187 start=start["value"], 3188 start_side=start["side"], 3189 end=end["value"], 3190 end_side=end["side"], 3191 ) 3192 else: 3193 spec = None 3194 3195 self._match_r_paren() 3196 3197 return self.expression( 3198 exp.Window, 3199 this=this, 3200 partition_by=partition, 3201 order=order, 3202 spec=spec, 3203 alias=window_alias, 3204 ) 3205 3206 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3207 self._match(TokenType.BETWEEN) 3208 3209 return { 3210 "value": ( 3211 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3212 ) 3213 or self._parse_bitwise(), 3214 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3215 } 3216 3217 def _parse_alias( 3218 self, this: t.Optional[exp.Expression], explicit: bool = False 3219 ) -> t.Optional[exp.Expression]: 3220 any_token = self._match(TokenType.ALIAS) 3221 3222 if explicit and not any_token: 3223 return this 3224 3225 if self._match(TokenType.L_PAREN): 3226 aliases = self.expression( 3227 exp.Aliases, 3228 this=this, 3229 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3230 ) 3231 self._match_r_paren(aliases) 3232 return aliases 3233 3234 alias = self._parse_id_var(any_token) 3235 3236 if alias: 3237 return self.expression(exp.Alias, this=this, alias=alias) 3238 3239 return this 3240 3241 def _parse_id_var( 3242 self, 3243 any_token: bool = True, 3244 tokens: t.Optional[t.Collection[TokenType]] = None, 3245 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3246 ) -> t.Optional[exp.Expression]: 3247 identifier = self._parse_identifier() 3248 3249 if identifier: 3250 return identifier 3251 3252 prefix = "" 3253 3254 if prefix_tokens: 3255 while self._match_set(prefix_tokens): 3256 prefix += self._prev.text 3257 3258 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3259 quoted = self._prev.token_type == TokenType.STRING 3260 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3261 3262 return None 3263 3264 def _parse_string(self) -> t.Optional[exp.Expression]: 3265 if self._match(TokenType.STRING): 3266 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3267 return self._parse_placeholder() 3268 3269 def _parse_number(self) -> t.Optional[exp.Expression]: 3270 if self._match(TokenType.NUMBER): 3271 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3272 return self._parse_placeholder() 3273 3274 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3275 if self._match(TokenType.IDENTIFIER): 3276 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3277 return self._parse_placeholder() 3278 3279 def _parse_var(self, any_token: bool = False) -> t.Optional[exp.Expression]: 3280 if (any_token and self._advance_any()) or self._match(TokenType.VAR): 3281 return self.expression(exp.Var, this=self._prev.text) 3282 return self._parse_placeholder() 3283 3284 def _advance_any(self) -> t.Optional[Token]: 3285 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3286 self._advance() 3287 return self._prev 3288 return None 3289 3290 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3291 return self._parse_var() or self._parse_string() 3292 3293 def _parse_null(self) -> t.Optional[exp.Expression]: 3294 if self._match(TokenType.NULL): 3295 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3296 return None 3297 3298 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3299 if self._match(TokenType.TRUE): 3300 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3301 if self._match(TokenType.FALSE): 3302 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3303 return None 3304 3305 def _parse_star(self) -> t.Optional[exp.Expression]: 3306 if self._match(TokenType.STAR): 3307 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3308 return None 3309 3310 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3311 if self._match_set(self.PLACEHOLDER_PARSERS): 3312 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3313 if placeholder: 3314 return placeholder 3315 self._advance(-1) 3316 return None 3317 3318 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3319 if not self._match(TokenType.EXCEPT): 3320 return None 3321 if self._match(TokenType.L_PAREN, advance=False): 3322 return self._parse_wrapped_csv(self._parse_column) 3323 return self._parse_csv(self._parse_column) 3324 3325 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3326 if not self._match(TokenType.REPLACE): 3327 return None 3328 if self._match(TokenType.L_PAREN, advance=False): 3329 return self._parse_wrapped_csv(self._parse_expression) 3330 return self._parse_csv(self._parse_expression) 3331 3332 def _parse_csv( 3333 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3334 ) -> t.List[t.Optional[exp.Expression]]: 3335 parse_result = parse_method() 3336 items = [parse_result] if parse_result is not None else [] 3337 3338 while self._match(sep): 3339 if parse_result and self._prev_comments: 3340 parse_result.comments = self._prev_comments 3341 3342 parse_result = parse_method() 3343 if parse_result is not None: 3344 items.append(parse_result) 3345 3346 return items 3347 3348 def _parse_tokens( 3349 self, parse_method: t.Callable, expressions: t.Dict 3350 ) -> t.Optional[exp.Expression]: 3351 this = parse_method() 3352 3353 while self._match_set(expressions): 3354 this = self.expression( 3355 expressions[self._prev.token_type], 3356 this=this, 3357 comments=self._prev_comments, 3358 expression=parse_method(), 3359 ) 3360 3361 return this 3362 3363 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3364 return self._parse_wrapped_csv(self._parse_id_var) 3365 3366 def _parse_wrapped_csv( 3367 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3368 ) -> t.List[t.Optional[exp.Expression]]: 3369 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3370 3371 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3372 self._match_l_paren() 3373 parse_result = parse_method() 3374 self._match_r_paren() 3375 return parse_result 3376 3377 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3378 return self._parse_select() or self._parse_expression() 3379 3380 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3381 return self._parse_set_operations( 3382 self._parse_select(nested=True, parse_subquery_alias=False) 3383 ) 3384 3385 def _parse_transaction(self) -> exp.Expression: 3386 this = None 3387 if self._match_texts(self.TRANSACTION_KIND): 3388 this = self._prev.text 3389 3390 self._match_texts({"TRANSACTION", "WORK"}) 3391 3392 modes = [] 3393 while True: 3394 mode = [] 3395 while self._match(TokenType.VAR): 3396 mode.append(self._prev.text) 3397 3398 if mode: 3399 modes.append(" ".join(mode)) 3400 if not self._match(TokenType.COMMA): 3401 break 3402 3403 return self.expression(exp.Transaction, this=this, modes=modes) 3404 3405 def _parse_commit_or_rollback(self) -> exp.Expression: 3406 chain = None 3407 savepoint = None 3408 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3409 3410 self._match_texts({"TRANSACTION", "WORK"}) 3411 3412 if self._match_text_seq("TO"): 3413 self._match_text_seq("SAVEPOINT") 3414 savepoint = self._parse_id_var() 3415 3416 if self._match(TokenType.AND): 3417 chain = not self._match_text_seq("NO") 3418 self._match_text_seq("CHAIN") 3419 3420 if is_rollback: 3421 return self.expression(exp.Rollback, savepoint=savepoint) 3422 return self.expression(exp.Commit, chain=chain) 3423 3424 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3425 if not self._match_text_seq("ADD"): 3426 return None 3427 3428 self._match(TokenType.COLUMN) 3429 exists_column = self._parse_exists(not_=True) 3430 expression = self._parse_column_def(self._parse_field(any_token=True)) 3431 3432 if expression: 3433 expression.set("exists", exists_column) 3434 3435 return expression 3436 3437 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3438 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3439 3440 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3441 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3442 return self.expression( 3443 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3444 ) 3445 3446 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3447 this = None 3448 kind = self._prev.token_type 3449 3450 if kind == TokenType.CONSTRAINT: 3451 this = self._parse_id_var() 3452 3453 if self._match(TokenType.CHECK): 3454 expression = self._parse_wrapped(self._parse_conjunction) 3455 enforced = self._match_text_seq("ENFORCED") 3456 3457 return self.expression( 3458 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3459 ) 3460 3461 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3462 expression = self._parse_foreign_key() 3463 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3464 expression = self._parse_primary_key() 3465 3466 return self.expression(exp.AddConstraint, this=this, expression=expression) 3467 3468 def _parse_alter(self) -> t.Optional[exp.Expression]: 3469 if not self._match(TokenType.TABLE): 3470 return self._parse_as_command(self._prev) 3471 3472 exists = self._parse_exists() 3473 this = self._parse_table(schema=True) 3474 3475 actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None 3476 3477 index = self._index 3478 if self._match(TokenType.DELETE): 3479 actions = [self.expression(exp.Delete, where=self._parse_where())] 3480 elif self._match_text_seq("ADD"): 3481 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3482 actions = self._parse_csv(self._parse_add_constraint) 3483 else: 3484 self._retreat(index) 3485 actions = self._parse_csv(self._parse_add_column) 3486 elif self._match_text_seq("DROP"): 3487 partition_exists = self._parse_exists() 3488 3489 if self._match(TokenType.PARTITION, advance=False): 3490 actions = self._parse_csv( 3491 lambda: self._parse_drop_partition(exists=partition_exists) 3492 ) 3493 else: 3494 self._retreat(index) 3495 actions = self._parse_csv(self._parse_drop_column) 3496 elif self._match_text_seq("RENAME", "TO"): 3497 actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3498 elif self._match_text_seq("ALTER"): 3499 self._match(TokenType.COLUMN) 3500 column = self._parse_field(any_token=True) 3501 3502 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3503 actions = self.expression(exp.AlterColumn, this=column, drop=True) 3504 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3505 actions = self.expression( 3506 exp.AlterColumn, this=column, default=self._parse_conjunction() 3507 ) 3508 else: 3509 self._match_text_seq("SET", "DATA") 3510 actions = self.expression( 3511 exp.AlterColumn, 3512 this=column, 3513 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3514 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3515 using=self._match(TokenType.USING) and self._parse_conjunction(), 3516 ) 3517 3518 actions = ensure_list(actions) 3519 return self.expression(exp.AlterTable, this=this, exists=exists, actions=actions) 3520 3521 def _parse_show(self) -> t.Optional[exp.Expression]: 3522 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3523 if parser: 3524 return parser(self) 3525 self._advance() 3526 return self.expression(exp.Show, this=self._prev.text.upper()) 3527 3528 def _default_parse_set_item(self) -> exp.Expression: 3529 return self.expression( 3530 exp.SetItem, 3531 this=self._parse_statement(), 3532 ) 3533 3534 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3535 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3536 return parser(self) if parser else self._default_parse_set_item() 3537 3538 def _parse_merge(self) -> exp.Expression: 3539 self._match(TokenType.INTO) 3540 target = self._parse_table() 3541 3542 self._match(TokenType.USING) 3543 using = self._parse_table() 3544 3545 self._match(TokenType.ON) 3546 on = self._parse_conjunction() 3547 3548 whens = [] 3549 while self._match(TokenType.WHEN): 3550 this = self._parse_conjunction() 3551 self._match(TokenType.THEN) 3552 3553 if self._match(TokenType.INSERT): 3554 _this = self._parse_star() 3555 if _this: 3556 then = self.expression(exp.Insert, this=_this) 3557 else: 3558 then = self.expression( 3559 exp.Insert, 3560 this=self._parse_value(), 3561 expression=self._match(TokenType.VALUES) and self._parse_value(), 3562 ) 3563 elif self._match(TokenType.UPDATE): 3564 expressions = self._parse_star() 3565 if expressions: 3566 then = self.expression(exp.Update, expressions=expressions) 3567 else: 3568 then = self.expression( 3569 exp.Update, 3570 expressions=self._match(TokenType.SET) 3571 and self._parse_csv(self._parse_equality), 3572 ) 3573 elif self._match(TokenType.DELETE): 3574 then = self.expression(exp.Var, this=self._prev.text) 3575 3576 whens.append(self.expression(exp.When, this=this, then=then)) 3577 3578 return self.expression( 3579 exp.Merge, 3580 this=target, 3581 using=using, 3582 on=on, 3583 expressions=whens, 3584 ) 3585 3586 def _parse_set(self) -> exp.Expression: 3587 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3588 3589 def _parse_as_command(self, start: Token) -> exp.Command: 3590 while self._curr: 3591 self._advance() 3592 return exp.Command(this=self._find_sql(start, self._prev)) 3593 3594 def _find_parser( 3595 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3596 ) -> t.Optional[t.Callable]: 3597 index = self._index 3598 this = [] 3599 while True: 3600 # The current token might be multiple words 3601 curr = self._curr.text.upper() 3602 key = curr.split(" ") 3603 this.append(curr) 3604 self._advance() 3605 result, trie = in_trie(trie, key) 3606 if result == 0: 3607 break 3608 if result == 2: 3609 subparser = parsers[" ".join(this)] 3610 return subparser 3611 self._retreat(index) 3612 return None 3613 3614 def _match(self, token_type, advance=True): 3615 if not self._curr: 3616 return None 3617 3618 if self._curr.token_type == token_type: 3619 if advance: 3620 self._advance() 3621 return True 3622 3623 return None 3624 3625 def _match_set(self, types): 3626 if not self._curr: 3627 return None 3628 3629 if self._curr.token_type in types: 3630 self._advance() 3631 return True 3632 3633 return None 3634 3635 def _match_pair(self, token_type_a, token_type_b, advance=True): 3636 if not self._curr or not self._next: 3637 return None 3638 3639 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3640 if advance: 3641 self._advance(2) 3642 return True 3643 3644 return None 3645 3646 def _match_l_paren(self, expression=None): 3647 if not self._match(TokenType.L_PAREN): 3648 self.raise_error("Expecting (") 3649 if expression and self._prev_comments: 3650 expression.comments = self._prev_comments 3651 3652 def _match_r_paren(self, expression=None): 3653 if not self._match(TokenType.R_PAREN): 3654 self.raise_error("Expecting )") 3655 if expression and self._prev_comments: 3656 expression.comments = self._prev_comments 3657 3658 def _match_texts(self, texts): 3659 if self._curr and self._curr.text.upper() in texts: 3660 self._advance() 3661 return True 3662 return False 3663 3664 def _match_text_seq(self, *texts, advance=True): 3665 index = self._index 3666 for text in texts: 3667 if self._curr and self._curr.text.upper() == text: 3668 self._advance() 3669 else: 3670 self._retreat(index) 3671 return False 3672 3673 if not advance: 3674 self._retreat(index) 3675 3676 return True 3677 3678 def _replace_columns_with_dots(self, this): 3679 if isinstance(this, exp.Dot): 3680 exp.replace_children(this, self._replace_columns_with_dots) 3681 elif isinstance(this, exp.Column): 3682 exp.replace_children(this, self._replace_columns_with_dots) 3683 table = this.args.get("table") 3684 this = ( 3685 self.expression(exp.Dot, this=table, expression=this.this) 3686 if table 3687 else self.expression(exp.Var, this=this.name) 3688 ) 3689 elif isinstance(this, exp.Identifier): 3690 this = self.expression(exp.Var, this=this.name) 3691 return this 3692 3693 def _replace_lambda(self, node, lambda_variables): 3694 if isinstance(node, exp.Column): 3695 if node.name in lambda_variables: 3696 return node.this 3697 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
667 def __init__( 668 self, 669 error_level: t.Optional[ErrorLevel] = None, 670 error_message_context: int = 100, 671 index_offset: int = 0, 672 unnest_column_only: bool = False, 673 alias_post_tablesample: bool = False, 674 max_errors: int = 3, 675 null_ordering: t.Optional[str] = None, 676 ): 677 self.error_level = error_level or ErrorLevel.IMMEDIATE 678 self.error_message_context = error_message_context 679 self.index_offset = index_offset 680 self.unnest_column_only = unnest_column_only 681 self.alias_post_tablesample = alias_post_tablesample 682 self.max_errors = max_errors 683 self.null_ordering = null_ordering 684 self.reset()
696 def parse( 697 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 698 ) -> t.List[t.Optional[exp.Expression]]: 699 """ 700 Parses a list of tokens and returns a list of syntax trees, one tree 701 per parsed SQL statement. 702 703 Args: 704 raw_tokens: the list of tokens. 705 sql: the original SQL string, used to produce helpful debug messages. 706 707 Returns: 708 The list of syntax trees. 709 """ 710 return self._parse( 711 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 712 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
714 def parse_into( 715 self, 716 expression_types: exp.IntoType, 717 raw_tokens: t.List[Token], 718 sql: t.Optional[str] = None, 719 ) -> t.List[t.Optional[exp.Expression]]: 720 """ 721 Parses a list of tokens into a given Expression type. If a collection of Expression 722 types is given instead, this method will try to parse the token list into each one 723 of them, stopping at the first for which the parsing succeeds. 724 725 Args: 726 expression_types: the expression type(s) to try and parse the token list into. 727 raw_tokens: the list of tokens. 728 sql: the original SQL string, used to produce helpful debug messages. 729 730 Returns: 731 The target Expression. 732 """ 733 errors = [] 734 for expression_type in ensure_collection(expression_types): 735 parser = self.EXPRESSION_PARSERS.get(expression_type) 736 if not parser: 737 raise TypeError(f"No parser registered for {expression_type}") 738 try: 739 return self._parse(parser, raw_tokens, sql) 740 except ParseError as e: 741 e.errors[0]["into_expression"] = expression_type 742 errors.append(e) 743 raise ParseError( 744 f"Failed to parse into {expression_types}", 745 errors=merge_errors(errors), 746 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
782 def check_errors(self) -> None: 783 """ 784 Logs or raises any found errors, depending on the chosen error level setting. 785 """ 786 if self.error_level == ErrorLevel.WARN: 787 for error in self.errors: 788 logger.error(str(error)) 789 elif self.error_level == ErrorLevel.RAISE and self.errors: 790 raise ParseError( 791 concat_messages(self.errors, self.max_errors), 792 errors=merge_errors(self.errors), 793 )
Logs or raises any found errors, depending on the chosen error level setting.
795 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 796 """ 797 Appends an error in the list of recorded errors or raises it, depending on the chosen 798 error level setting. 799 """ 800 token = token or self._curr or self._prev or Token.string("") 801 start = self._find_token(token) 802 end = start + len(token.text) 803 start_context = self.sql[max(start - self.error_message_context, 0) : start] 804 highlight = self.sql[start:end] 805 end_context = self.sql[end : end + self.error_message_context] 806 807 error = ParseError.new( 808 f"{message}. Line {token.line}, Col: {token.col}.\n" 809 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 810 description=message, 811 line=token.line, 812 col=token.col, 813 start_context=start_context, 814 highlight=highlight, 815 end_context=end_context, 816 ) 817 818 if self.error_level == ErrorLevel.IMMEDIATE: 819 raise error 820 821 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
823 def expression( 824 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 825 ) -> exp.Expression: 826 """ 827 Creates a new, validated Expression. 828 829 Args: 830 exp_class: the expression class to instantiate. 831 comments: an optional list of comments to attach to the expression. 832 kwargs: the arguments to set for the expression along with their respective values. 833 834 Returns: 835 The target expression. 836 """ 837 instance = exp_class(**kwargs) 838 if self._prev_comments: 839 instance.comments = self._prev_comments 840 self._prev_comments = None 841 if comments: 842 instance.comments = comments 843 self.validate_expression(instance) 844 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
846 def validate_expression( 847 self, expression: exp.Expression, args: t.Optional[t.List] = None 848 ) -> None: 849 """ 850 Validates an already instantiated expression, making sure that all its mandatory arguments 851 are set. 852 853 Args: 854 expression: the expression to validate. 855 args: an optional list of items that was used to instantiate the expression, if it's a Func. 856 """ 857 if self.error_level == ErrorLevel.IGNORE: 858 return 859 860 for error_message in expression.error_messages(args): 861 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.