sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21E = t.TypeVar("E", bound=exp.Expression) 22 23 24def parse_var_map(args: t.Sequence) -> exp.Expression: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 return exp.VarMap( 34 keys=exp.Array(expressions=keys), 35 values=exp.Array(expressions=values), 36 ) 37 38 39def parse_like(args): 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 return lambda self, this: self._parse_escape( 48 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 49 ) 50 51 52class _Parser(type): 53 def __new__(cls, clsname, bases, attrs): 54 klass = super().__new__(cls, clsname, bases, attrs) 55 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 56 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 57 58 return klass 59 60 61class Parser(metaclass=_Parser): 62 """ 63 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 64 a parsed syntax tree. 65 66 Args: 67 error_level: the desired error level. 68 Default: ErrorLevel.RAISE 69 error_message_context: determines the amount of context to capture from a 70 query string when displaying the error message (in number of characters). 71 Default: 50. 72 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 73 Default: 0 74 alias_post_tablesample: If the table alias comes after tablesample. 75 Default: False 76 max_errors: Maximum number of error messages to include in a raised ParseError. 77 This is only relevant if error_level is ErrorLevel.RAISE. 78 Default: 3 79 null_ordering: Indicates the default null ordering method to use if not explicitly set. 80 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 81 Default: "nulls_are_small" 82 """ 83 84 FUNCTIONS: t.Dict[str, t.Callable] = { 85 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 86 "DATE_TO_DATE_STR": lambda args: exp.Cast( 87 this=seq_get(args, 0), 88 to=exp.DataType(this=exp.DataType.Type.TEXT), 89 ), 90 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 91 "IFNULL": exp.Coalesce.from_arg_list, 92 "LIKE": parse_like, 93 "TIME_TO_TIME_STR": lambda args: exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 98 this=exp.Cast( 99 this=seq_get(args, 0), 100 to=exp.DataType(this=exp.DataType.Type.TEXT), 101 ), 102 start=exp.Literal.number(1), 103 length=exp.Literal.number(10), 104 ), 105 "VAR_MAP": parse_var_map, 106 } 107 108 NO_PAREN_FUNCTIONS = { 109 TokenType.CURRENT_DATE: exp.CurrentDate, 110 TokenType.CURRENT_DATETIME: exp.CurrentDate, 111 TokenType.CURRENT_TIME: exp.CurrentTime, 112 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 113 TokenType.CURRENT_USER: exp.CurrentUser, 114 } 115 116 JOIN_HINTS: t.Set[str] = set() 117 118 NESTED_TYPE_TOKENS = { 119 TokenType.ARRAY, 120 TokenType.MAP, 121 TokenType.STRUCT, 122 TokenType.NULLABLE, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.SMALLINT, 130 TokenType.INT, 131 TokenType.BIGINT, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATE, 154 TokenType.DECIMAL, 155 TokenType.BIGDECIMAL, 156 TokenType.UUID, 157 TokenType.GEOGRAPHY, 158 TokenType.GEOMETRY, 159 TokenType.HLLSKETCH, 160 TokenType.HSTORE, 161 TokenType.PSEUDO_TYPE, 162 TokenType.SUPER, 163 TokenType.SERIAL, 164 TokenType.SMALLSERIAL, 165 TokenType.BIGSERIAL, 166 TokenType.XML, 167 TokenType.UNIQUEIDENTIFIER, 168 TokenType.MONEY, 169 TokenType.SMALLMONEY, 170 TokenType.ROWVERSION, 171 TokenType.IMAGE, 172 TokenType.VARIANT, 173 TokenType.OBJECT, 174 TokenType.INET, 175 *NESTED_TYPE_TOKENS, 176 } 177 178 SUBQUERY_PREDICATES = { 179 TokenType.ANY: exp.Any, 180 TokenType.ALL: exp.All, 181 TokenType.EXISTS: exp.Exists, 182 TokenType.SOME: exp.Any, 183 } 184 185 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 186 187 DB_CREATABLES = { 188 TokenType.DATABASE, 189 TokenType.SCHEMA, 190 TokenType.TABLE, 191 TokenType.VIEW, 192 } 193 194 CREATABLES = { 195 TokenType.COLUMN, 196 TokenType.FUNCTION, 197 TokenType.INDEX, 198 TokenType.PROCEDURE, 199 *DB_CREATABLES, 200 } 201 202 ID_VAR_TOKENS = { 203 TokenType.VAR, 204 TokenType.ANTI, 205 TokenType.APPLY, 206 TokenType.AUTO_INCREMENT, 207 TokenType.BEGIN, 208 TokenType.BOTH, 209 TokenType.BUCKET, 210 TokenType.CACHE, 211 TokenType.CASCADE, 212 TokenType.COLLATE, 213 TokenType.COMMAND, 214 TokenType.COMMENT, 215 TokenType.COMMIT, 216 TokenType.COMPOUND, 217 TokenType.CONSTRAINT, 218 TokenType.DEFAULT, 219 TokenType.DELETE, 220 TokenType.DESCRIBE, 221 TokenType.DIV, 222 TokenType.END, 223 TokenType.EXECUTE, 224 TokenType.ESCAPE, 225 TokenType.FALSE, 226 TokenType.FIRST, 227 TokenType.FILTER, 228 TokenType.FOLLOWING, 229 TokenType.FORMAT, 230 TokenType.FULL, 231 TokenType.IF, 232 TokenType.IS, 233 TokenType.ISNULL, 234 TokenType.INTERVAL, 235 TokenType.KEEP, 236 TokenType.LAZY, 237 TokenType.LEADING, 238 TokenType.LEFT, 239 TokenType.LOCAL, 240 TokenType.MATERIALIZED, 241 TokenType.MERGE, 242 TokenType.NATURAL, 243 TokenType.NEXT, 244 TokenType.OFFSET, 245 TokenType.ONLY, 246 TokenType.OPTIONS, 247 TokenType.ORDINALITY, 248 TokenType.OVERWRITE, 249 TokenType.PARTITION, 250 TokenType.PERCENT, 251 TokenType.PIVOT, 252 TokenType.PRAGMA, 253 TokenType.PRECEDING, 254 TokenType.RANGE, 255 TokenType.REFERENCES, 256 TokenType.RIGHT, 257 TokenType.ROW, 258 TokenType.ROWS, 259 TokenType.SEED, 260 TokenType.SEMI, 261 TokenType.SET, 262 TokenType.SHOW, 263 TokenType.SORTKEY, 264 TokenType.TEMPORARY, 265 TokenType.TOP, 266 TokenType.TRAILING, 267 TokenType.TRUE, 268 TokenType.UNBOUNDED, 269 TokenType.UNIQUE, 270 TokenType.UNLOGGED, 271 TokenType.UNPIVOT, 272 TokenType.VOLATILE, 273 TokenType.WINDOW, 274 *CREATABLES, 275 *SUBQUERY_PREDICATES, 276 *TYPE_TOKENS, 277 *NO_PAREN_FUNCTIONS, 278 } 279 280 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 281 282 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 283 TokenType.APPLY, 284 TokenType.FULL, 285 TokenType.LEFT, 286 TokenType.NATURAL, 287 TokenType.OFFSET, 288 TokenType.RIGHT, 289 TokenType.WINDOW, 290 } 291 292 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 293 294 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 295 296 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 297 298 FUNC_TOKENS = { 299 TokenType.COMMAND, 300 TokenType.CURRENT_DATE, 301 TokenType.CURRENT_DATETIME, 302 TokenType.CURRENT_TIMESTAMP, 303 TokenType.CURRENT_TIME, 304 TokenType.CURRENT_USER, 305 TokenType.FILTER, 306 TokenType.FIRST, 307 TokenType.FORMAT, 308 TokenType.GLOB, 309 TokenType.IDENTIFIER, 310 TokenType.INDEX, 311 TokenType.ISNULL, 312 TokenType.ILIKE, 313 TokenType.LIKE, 314 TokenType.MERGE, 315 TokenType.OFFSET, 316 TokenType.PRIMARY_KEY, 317 TokenType.REPLACE, 318 TokenType.ROW, 319 TokenType.UNNEST, 320 TokenType.VAR, 321 TokenType.LEFT, 322 TokenType.RIGHT, 323 TokenType.DATE, 324 TokenType.DATETIME, 325 TokenType.TABLE, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMPTZ, 328 TokenType.WINDOW, 329 *TYPE_TOKENS, 330 *SUBQUERY_PREDICATES, 331 } 332 333 CONJUNCTION = { 334 TokenType.AND: exp.And, 335 TokenType.OR: exp.Or, 336 } 337 338 EQUALITY = { 339 TokenType.EQ: exp.EQ, 340 TokenType.NEQ: exp.NEQ, 341 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 342 } 343 344 COMPARISON = { 345 TokenType.GT: exp.GT, 346 TokenType.GTE: exp.GTE, 347 TokenType.LT: exp.LT, 348 TokenType.LTE: exp.LTE, 349 } 350 351 BITWISE = { 352 TokenType.AMP: exp.BitwiseAnd, 353 TokenType.CARET: exp.BitwiseXor, 354 TokenType.PIPE: exp.BitwiseOr, 355 TokenType.DPIPE: exp.DPipe, 356 } 357 358 TERM = { 359 TokenType.DASH: exp.Sub, 360 TokenType.PLUS: exp.Add, 361 TokenType.MOD: exp.Mod, 362 TokenType.COLLATE: exp.Collate, 363 } 364 365 FACTOR = { 366 TokenType.DIV: exp.IntDiv, 367 TokenType.LR_ARROW: exp.Distance, 368 TokenType.SLASH: exp.Div, 369 TokenType.STAR: exp.Mul, 370 } 371 372 TIMESTAMPS = { 373 TokenType.TIME, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.TIMESTAMPLTZ, 377 } 378 379 SET_OPERATIONS = { 380 TokenType.UNION, 381 TokenType.INTERSECT, 382 TokenType.EXCEPT, 383 } 384 385 JOIN_SIDES = { 386 TokenType.LEFT, 387 TokenType.RIGHT, 388 TokenType.FULL, 389 } 390 391 JOIN_KINDS = { 392 TokenType.INNER, 393 TokenType.OUTER, 394 TokenType.CROSS, 395 TokenType.SEMI, 396 TokenType.ANTI, 397 } 398 399 LAMBDAS = { 400 TokenType.ARROW: lambda self, expressions: self.expression( 401 exp.Lambda, 402 this=self._replace_lambda( 403 self._parse_conjunction(), 404 {node.name for node in expressions}, 405 ), 406 expressions=expressions, 407 ), 408 TokenType.FARROW: lambda self, expressions: self.expression( 409 exp.Kwarg, 410 this=exp.Var(this=expressions[0].name), 411 expression=self._parse_conjunction(), 412 ), 413 } 414 415 COLUMN_OPERATORS = { 416 TokenType.DOT: None, 417 TokenType.DCOLON: lambda self, this, to: self.expression( 418 exp.Cast if self.STRICT_CAST else exp.TryCast, 419 this=this, 420 to=to, 421 ), 422 TokenType.ARROW: lambda self, this, path: self.expression( 423 exp.JSONExtract, 424 this=this, 425 expression=path, 426 ), 427 TokenType.DARROW: lambda self, this, path: self.expression( 428 exp.JSONExtractScalar, 429 this=this, 430 expression=path, 431 ), 432 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 433 exp.JSONBExtract, 434 this=this, 435 expression=path, 436 ), 437 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 438 exp.JSONBExtractScalar, 439 this=this, 440 expression=path, 441 ), 442 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 443 exp.JSONBContains, 444 this=this, 445 expression=key, 446 ), 447 } 448 449 EXPRESSION_PARSERS = { 450 exp.Column: lambda self: self._parse_column(), 451 exp.DataType: lambda self: self._parse_types(), 452 exp.From: lambda self: self._parse_from(), 453 exp.Group: lambda self: self._parse_group(), 454 exp.Identifier: lambda self: self._parse_id_var(), 455 exp.Lateral: lambda self: self._parse_lateral(), 456 exp.Join: lambda self: self._parse_join(), 457 exp.Order: lambda self: self._parse_order(), 458 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 459 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 460 exp.Lambda: lambda self: self._parse_lambda(), 461 exp.Limit: lambda self: self._parse_limit(), 462 exp.Offset: lambda self: self._parse_offset(), 463 exp.TableAlias: lambda self: self._parse_table_alias(), 464 exp.Table: lambda self: self._parse_table(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.Expression: lambda self: self._parse_statement(), 467 exp.Properties: lambda self: self._parse_properties(), 468 exp.Where: lambda self: self._parse_where(), 469 exp.Ordered: lambda self: self._parse_ordered(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.With: lambda self: self._parse_with(), 472 exp.Window: lambda self: self._parse_named_window(), 473 exp.Qualify: lambda self: self._parse_qualify(), 474 exp.Returning: lambda self: self._parse_returning(), 475 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 476 } 477 478 STATEMENT_PARSERS = { 479 TokenType.ALTER: lambda self: self._parse_alter(), 480 TokenType.BEGIN: lambda self: self._parse_transaction(), 481 TokenType.CACHE: lambda self: self._parse_cache(), 482 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 483 TokenType.COMMENT: lambda self: self._parse_comment(), 484 TokenType.CREATE: lambda self: self._parse_create(), 485 TokenType.DELETE: lambda self: self._parse_delete(), 486 TokenType.DESC: lambda self: self._parse_describe(), 487 TokenType.DESCRIBE: lambda self: self._parse_describe(), 488 TokenType.DROP: lambda self: self._parse_drop(), 489 TokenType.END: lambda self: self._parse_commit_or_rollback(), 490 TokenType.INSERT: lambda self: self._parse_insert(), 491 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 492 TokenType.MERGE: lambda self: self._parse_merge(), 493 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 494 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 495 TokenType.SET: lambda self: self._parse_set(), 496 TokenType.UNCACHE: lambda self: self._parse_uncache(), 497 TokenType.UPDATE: lambda self: self._parse_update(), 498 TokenType.USE: lambda self: self.expression( 499 exp.Use, 500 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 501 and exp.Var(this=self._prev.text), 502 this=self._parse_table(schema=False), 503 ), 504 } 505 506 UNARY_PARSERS = { 507 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 508 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 509 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 510 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 511 } 512 513 PRIMARY_PARSERS = { 514 TokenType.STRING: lambda self, token: self.expression( 515 exp.Literal, this=token.text, is_string=True 516 ), 517 TokenType.NUMBER: lambda self, token: self.expression( 518 exp.Literal, this=token.text, is_string=False 519 ), 520 TokenType.STAR: lambda self, _: self.expression( 521 exp.Star, 522 **{"except": self._parse_except(), "replace": self._parse_replace()}, 523 ), 524 TokenType.NULL: lambda self, _: self.expression(exp.Null), 525 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 526 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 527 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 528 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 529 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 530 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 531 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 532 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 533 } 534 535 PLACEHOLDER_PARSERS = { 536 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 537 TokenType.PARAMETER: lambda self: self._parse_parameter(), 538 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 539 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 540 else None, 541 } 542 543 RANGE_PARSERS = { 544 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 545 TokenType.GLOB: binary_range_parser(exp.Glob), 546 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 547 TokenType.IN: lambda self, this: self._parse_in(this), 548 TokenType.IS: lambda self, this: self._parse_is(this), 549 TokenType.LIKE: binary_range_parser(exp.Like), 550 TokenType.ILIKE: binary_range_parser(exp.ILike), 551 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 552 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 553 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 554 } 555 556 PROPERTY_PARSERS = { 557 "AFTER": lambda self: self._parse_afterjournal( 558 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 559 ), 560 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 561 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 562 "BEFORE": lambda self: self._parse_journal( 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 564 ), 565 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 566 "CHARACTER SET": lambda self: self._parse_character_set(), 567 "CHECKSUM": lambda self: self._parse_checksum(), 568 "CLUSTER BY": lambda self: self.expression( 569 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 570 ), 571 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 572 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 573 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 574 default=self._prev.text.upper() == "DEFAULT" 575 ), 576 "DEFINER": lambda self: self._parse_definer(), 577 "DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 579 ), 580 "DISTKEY": lambda self: self._parse_distkey(), 581 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 582 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 583 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 584 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 585 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 586 "FREESPACE": lambda self: self._parse_freespace(), 587 "GLOBAL": lambda self: self._parse_temporary(global_=True), 588 "IMMUTABLE": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "JOURNAL": lambda self: self._parse_journal( 592 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 593 ), 594 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 595 "LIKE": lambda self: self._parse_create_like(), 596 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 597 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 598 "LOCK": lambda self: self._parse_locking(), 599 "LOCKING": lambda self: self._parse_locking(), 600 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 601 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 602 "MAX": lambda self: self._parse_datablocksize(), 603 "MAXIMUM": lambda self: self._parse_datablocksize(), 604 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 605 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 606 ), 607 "MIN": lambda self: self._parse_datablocksize(), 608 "MINIMUM": lambda self: self._parse_datablocksize(), 609 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 610 "NO": lambda self: self._parse_noprimaryindex(), 611 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 612 "ON": lambda self: self._parse_oncommit(), 613 "PARTITION BY": lambda self: self._parse_partitioned_by(), 614 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 615 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 616 "RETURNS": lambda self: self._parse_returns(), 617 "ROW": lambda self: self._parse_row(), 618 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 619 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 620 "SORTKEY": lambda self: self._parse_sortkey(), 621 "STABLE": lambda self: self.expression( 622 exp.StabilityProperty, this=exp.Literal.string("STABLE") 623 ), 624 "STORED": lambda self: self._parse_stored(), 625 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 626 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 627 "TEMP": lambda self: self._parse_temporary(global_=False), 628 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 629 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 630 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 631 "VOLATILE": lambda self: self._parse_volatile_property(), 632 "WITH": lambda self: self._parse_with_property(), 633 } 634 635 CONSTRAINT_PARSERS = { 636 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 637 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 638 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 639 "CHARACTER SET": lambda self: self.expression( 640 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 641 ), 642 "CHECK": lambda self: self.expression( 643 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 644 ), 645 "COLLATE": lambda self: self.expression( 646 exp.CollateColumnConstraint, this=self._parse_var() 647 ), 648 "COMMENT": lambda self: self.expression( 649 exp.CommentColumnConstraint, this=self._parse_string() 650 ), 651 "COMPRESS": lambda self: self._parse_compress(), 652 "DEFAULT": lambda self: self.expression( 653 exp.DefaultColumnConstraint, this=self._parse_bitwise() 654 ), 655 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 656 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 657 "FORMAT": lambda self: self.expression( 658 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "GENERATED": lambda self: self._parse_generated_as_identity(), 661 "IDENTITY": lambda self: self._parse_auto_increment(), 662 "INLINE": lambda self: self._parse_inline(), 663 "LIKE": lambda self: self._parse_create_like(), 664 "NOT": lambda self: self._parse_not_constraint(), 665 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 666 "ON": lambda self: self._match(TokenType.UPDATE) 667 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 668 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 669 "PRIMARY KEY": lambda self: self._parse_primary_key(), 670 "REFERENCES": lambda self: self._parse_references(match=False), 671 "TITLE": lambda self: self.expression( 672 exp.TitleColumnConstraint, this=self._parse_var_or_string() 673 ), 674 "UNIQUE": lambda self: self._parse_unique(), 675 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 676 } 677 678 ALTER_PARSERS = { 679 "ADD": lambda self: self._parse_alter_table_add(), 680 "ALTER": lambda self: self._parse_alter_table_alter(), 681 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 682 "DROP": lambda self: self._parse_alter_table_drop(), 683 "RENAME": lambda self: self._parse_alter_table_rename(), 684 } 685 686 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 687 688 NO_PAREN_FUNCTION_PARSERS = { 689 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 690 TokenType.CASE: lambda self: self._parse_case(), 691 TokenType.IF: lambda self: self._parse_if(), 692 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 693 exp.NextValueFor, 694 this=self._parse_column(), 695 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 696 ), 697 } 698 699 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 700 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 701 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 702 "DECODE": lambda self: self._parse_decode(), 703 "EXTRACT": lambda self: self._parse_extract(), 704 "JSON_OBJECT": lambda self: self._parse_json_object(), 705 "LOG": lambda self: self._parse_logarithm(), 706 "MATCH": lambda self: self._parse_match_against(), 707 "POSITION": lambda self: self._parse_position(), 708 "STRING_AGG": lambda self: self._parse_string_agg(), 709 "SUBSTRING": lambda self: self._parse_substring(), 710 "TRIM": lambda self: self._parse_trim(), 711 "TRY_CAST": lambda self: self._parse_cast(False), 712 "TRY_CONVERT": lambda self: self._parse_convert(False), 713 } 714 715 QUERY_MODIFIER_PARSERS = { 716 "match": lambda self: self._parse_match_recognize(), 717 "where": lambda self: self._parse_where(), 718 "group": lambda self: self._parse_group(), 719 "having": lambda self: self._parse_having(), 720 "qualify": lambda self: self._parse_qualify(), 721 "windows": lambda self: self._parse_window_clause(), 722 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 723 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 724 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 725 "order": lambda self: self._parse_order(), 726 "limit": lambda self: self._parse_limit(), 727 "offset": lambda self: self._parse_offset(), 728 "lock": lambda self: self._parse_lock(), 729 "sample": lambda self: self._parse_table_sample(as_modifier=True), 730 } 731 732 SET_PARSERS = { 733 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 734 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 735 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 736 "TRANSACTION": lambda self: self._parse_set_transaction(), 737 } 738 739 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 740 741 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 742 743 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 744 745 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 746 747 TRANSACTION_CHARACTERISTICS = { 748 "ISOLATION LEVEL REPEATABLE READ", 749 "ISOLATION LEVEL READ COMMITTED", 750 "ISOLATION LEVEL READ UNCOMMITTED", 751 "ISOLATION LEVEL SERIALIZABLE", 752 "READ WRITE", 753 "READ ONLY", 754 } 755 756 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 757 758 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 759 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 760 761 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 762 763 STRICT_CAST = True 764 765 CONVERT_TYPE_FIRST = False 766 767 QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None 768 PREFIXED_PIVOT_COLUMNS = False 769 770 LOG_BASE_FIRST = True 771 LOG_DEFAULTS_TO_LN = False 772 773 __slots__ = ( 774 "error_level", 775 "error_message_context", 776 "sql", 777 "errors", 778 "index_offset", 779 "unnest_column_only", 780 "alias_post_tablesample", 781 "max_errors", 782 "null_ordering", 783 "_tokens", 784 "_index", 785 "_curr", 786 "_next", 787 "_prev", 788 "_prev_comments", 789 "_show_trie", 790 "_set_trie", 791 ) 792 793 def __init__( 794 self, 795 error_level: t.Optional[ErrorLevel] = None, 796 error_message_context: int = 100, 797 index_offset: int = 0, 798 unnest_column_only: bool = False, 799 alias_post_tablesample: bool = False, 800 max_errors: int = 3, 801 null_ordering: t.Optional[str] = None, 802 ): 803 self.error_level = error_level or ErrorLevel.IMMEDIATE 804 self.error_message_context = error_message_context 805 self.index_offset = index_offset 806 self.unnest_column_only = unnest_column_only 807 self.alias_post_tablesample = alias_post_tablesample 808 self.max_errors = max_errors 809 self.null_ordering = null_ordering 810 self.reset() 811 812 def reset(self): 813 self.sql = "" 814 self.errors = [] 815 self._tokens = [] 816 self._index = 0 817 self._curr = None 818 self._next = None 819 self._prev = None 820 self._prev_comments = None 821 822 def parse( 823 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 824 ) -> t.List[t.Optional[exp.Expression]]: 825 """ 826 Parses a list of tokens and returns a list of syntax trees, one tree 827 per parsed SQL statement. 828 829 Args: 830 raw_tokens: the list of tokens. 831 sql: the original SQL string, used to produce helpful debug messages. 832 833 Returns: 834 The list of syntax trees. 835 """ 836 return self._parse( 837 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 838 ) 839 840 def parse_into( 841 self, 842 expression_types: exp.IntoType, 843 raw_tokens: t.List[Token], 844 sql: t.Optional[str] = None, 845 ) -> t.List[t.Optional[exp.Expression]]: 846 """ 847 Parses a list of tokens into a given Expression type. If a collection of Expression 848 types is given instead, this method will try to parse the token list into each one 849 of them, stopping at the first for which the parsing succeeds. 850 851 Args: 852 expression_types: the expression type(s) to try and parse the token list into. 853 raw_tokens: the list of tokens. 854 sql: the original SQL string, used to produce helpful debug messages. 855 856 Returns: 857 The target Expression. 858 """ 859 errors = [] 860 for expression_type in ensure_collection(expression_types): 861 parser = self.EXPRESSION_PARSERS.get(expression_type) 862 if not parser: 863 raise TypeError(f"No parser registered for {expression_type}") 864 try: 865 return self._parse(parser, raw_tokens, sql) 866 except ParseError as e: 867 e.errors[0]["into_expression"] = expression_type 868 errors.append(e) 869 raise ParseError( 870 f"Failed to parse into {expression_types}", 871 errors=merge_errors(errors), 872 ) from errors[-1] 873 874 def _parse( 875 self, 876 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 877 raw_tokens: t.List[Token], 878 sql: t.Optional[str] = None, 879 ) -> t.List[t.Optional[exp.Expression]]: 880 self.reset() 881 self.sql = sql or "" 882 total = len(raw_tokens) 883 chunks: t.List[t.List[Token]] = [[]] 884 885 for i, token in enumerate(raw_tokens): 886 if token.token_type == TokenType.SEMICOLON: 887 if i < total - 1: 888 chunks.append([]) 889 else: 890 chunks[-1].append(token) 891 892 expressions = [] 893 894 for tokens in chunks: 895 self._index = -1 896 self._tokens = tokens 897 self._advance() 898 899 expressions.append(parse_method(self)) 900 901 if self._index < len(self._tokens): 902 self.raise_error("Invalid expression / Unexpected token") 903 904 self.check_errors() 905 906 return expressions 907 908 def check_errors(self) -> None: 909 """ 910 Logs or raises any found errors, depending on the chosen error level setting. 911 """ 912 if self.error_level == ErrorLevel.WARN: 913 for error in self.errors: 914 logger.error(str(error)) 915 elif self.error_level == ErrorLevel.RAISE and self.errors: 916 raise ParseError( 917 concat_messages(self.errors, self.max_errors), 918 errors=merge_errors(self.errors), 919 ) 920 921 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 922 """ 923 Appends an error in the list of recorded errors or raises it, depending on the chosen 924 error level setting. 925 """ 926 token = token or self._curr or self._prev or Token.string("") 927 start = token.start 928 end = token.end 929 start_context = self.sql[max(start - self.error_message_context, 0) : start] 930 highlight = self.sql[start:end] 931 end_context = self.sql[end : end + self.error_message_context] 932 933 error = ParseError.new( 934 f"{message}. Line {token.line}, Col: {token.col}.\n" 935 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 936 description=message, 937 line=token.line, 938 col=token.col, 939 start_context=start_context, 940 highlight=highlight, 941 end_context=end_context, 942 ) 943 944 if self.error_level == ErrorLevel.IMMEDIATE: 945 raise error 946 947 self.errors.append(error) 948 949 def expression( 950 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 951 ) -> E: 952 """ 953 Creates a new, validated Expression. 954 955 Args: 956 exp_class: the expression class to instantiate. 957 comments: an optional list of comments to attach to the expression. 958 kwargs: the arguments to set for the expression along with their respective values. 959 960 Returns: 961 The target expression. 962 """ 963 instance = exp_class(**kwargs) 964 if self._prev_comments: 965 instance.comments = self._prev_comments 966 self._prev_comments = None 967 if comments: 968 instance.comments = comments 969 self.validate_expression(instance) 970 return instance 971 972 def validate_expression( 973 self, expression: exp.Expression, args: t.Optional[t.List] = None 974 ) -> None: 975 """ 976 Validates an already instantiated expression, making sure that all its mandatory arguments 977 are set. 978 979 Args: 980 expression: the expression to validate. 981 args: an optional list of items that was used to instantiate the expression, if it's a Func. 982 """ 983 if self.error_level == ErrorLevel.IGNORE: 984 return 985 986 for error_message in expression.error_messages(args): 987 self.raise_error(error_message) 988 989 def _find_sql(self, start: Token, end: Token) -> str: 990 return self.sql[start.start : end.end] 991 992 def _advance(self, times: int = 1) -> None: 993 self._index += times 994 self._curr = seq_get(self._tokens, self._index) 995 self._next = seq_get(self._tokens, self._index + 1) 996 if self._index > 0: 997 self._prev = self._tokens[self._index - 1] 998 self._prev_comments = self._prev.comments 999 else: 1000 self._prev = None 1001 self._prev_comments = None 1002 1003 def _retreat(self, index: int) -> None: 1004 if index != self._index: 1005 self._advance(index - self._index) 1006 1007 def _parse_command(self) -> exp.Command: 1008 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1009 1010 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1011 start = self._prev 1012 exists = self._parse_exists() if allow_exists else None 1013 1014 self._match(TokenType.ON) 1015 1016 kind = self._match_set(self.CREATABLES) and self._prev 1017 1018 if not kind: 1019 return self._parse_as_command(start) 1020 1021 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1022 this = self._parse_user_defined_function(kind=kind.token_type) 1023 elif kind.token_type == TokenType.TABLE: 1024 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1025 elif kind.token_type == TokenType.COLUMN: 1026 this = self._parse_column() 1027 else: 1028 this = self._parse_id_var() 1029 1030 self._match(TokenType.IS) 1031 1032 return self.expression( 1033 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1034 ) 1035 1036 def _parse_statement(self) -> t.Optional[exp.Expression]: 1037 if self._curr is None: 1038 return None 1039 1040 if self._match_set(self.STATEMENT_PARSERS): 1041 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1042 1043 if self._match_set(Tokenizer.COMMANDS): 1044 return self._parse_command() 1045 1046 expression = self._parse_expression() 1047 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1048 1049 self._parse_query_modifiers(expression) 1050 return expression 1051 1052 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1053 start = self._prev 1054 temporary = self._match(TokenType.TEMPORARY) 1055 materialized = self._match(TokenType.MATERIALIZED) 1056 kind = self._match_set(self.CREATABLES) and self._prev.text 1057 if not kind: 1058 return self._parse_as_command(start) 1059 1060 return self.expression( 1061 exp.Drop, 1062 exists=self._parse_exists(), 1063 this=self._parse_table(schema=True), 1064 kind=kind, 1065 temporary=temporary, 1066 materialized=materialized, 1067 cascade=self._match(TokenType.CASCADE), 1068 constraints=self._match_text_seq("CONSTRAINTS"), 1069 purge=self._match_text_seq("PURGE"), 1070 ) 1071 1072 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1073 return ( 1074 self._match(TokenType.IF) 1075 and (not not_ or self._match(TokenType.NOT)) 1076 and self._match(TokenType.EXISTS) 1077 ) 1078 1079 def _parse_create(self) -> t.Optional[exp.Expression]: 1080 start = self._prev 1081 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1082 TokenType.OR, TokenType.REPLACE 1083 ) 1084 unique = self._match(TokenType.UNIQUE) 1085 1086 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1087 self._match(TokenType.TABLE) 1088 1089 properties = None 1090 create_token = self._match_set(self.CREATABLES) and self._prev 1091 1092 if not create_token: 1093 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1094 create_token = self._match_set(self.CREATABLES) and self._prev 1095 1096 if not properties or not create_token: 1097 return self._parse_as_command(start) 1098 1099 exists = self._parse_exists(not_=True) 1100 this = None 1101 expression = None 1102 indexes = None 1103 no_schema_binding = None 1104 begin = None 1105 1106 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1107 this = self._parse_user_defined_function(kind=create_token.token_type) 1108 temp_properties = self._parse_properties() 1109 if properties and temp_properties: 1110 properties.expressions.extend(temp_properties.expressions) 1111 elif temp_properties: 1112 properties = temp_properties 1113 1114 self._match(TokenType.ALIAS) 1115 begin = self._match(TokenType.BEGIN) 1116 return_ = self._match_text_seq("RETURN") 1117 expression = self._parse_statement() 1118 1119 if return_: 1120 expression = self.expression(exp.Return, this=expression) 1121 elif create_token.token_type == TokenType.INDEX: 1122 this = self._parse_index() 1123 elif create_token.token_type in self.DB_CREATABLES: 1124 table_parts = self._parse_table_parts(schema=True) 1125 1126 # exp.Properties.Location.POST_NAME 1127 if self._match(TokenType.COMMA): 1128 temp_properties = self._parse_properties(before=True) 1129 if properties and temp_properties: 1130 properties.expressions.extend(temp_properties.expressions) 1131 elif temp_properties: 1132 properties = temp_properties 1133 1134 this = self._parse_schema(this=table_parts) 1135 1136 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1137 temp_properties = self._parse_properties() 1138 if properties and temp_properties: 1139 properties.expressions.extend(temp_properties.expressions) 1140 elif temp_properties: 1141 properties = temp_properties 1142 1143 self._match(TokenType.ALIAS) 1144 1145 # exp.Properties.Location.POST_ALIAS 1146 if not ( 1147 self._match(TokenType.SELECT, advance=False) 1148 or self._match(TokenType.WITH, advance=False) 1149 or self._match(TokenType.L_PAREN, advance=False) 1150 ): 1151 temp_properties = self._parse_properties() 1152 if properties and temp_properties: 1153 properties.expressions.extend(temp_properties.expressions) 1154 elif temp_properties: 1155 properties = temp_properties 1156 1157 expression = self._parse_ddl_select() 1158 1159 if create_token.token_type == TokenType.TABLE: 1160 # exp.Properties.Location.POST_EXPRESSION 1161 temp_properties = self._parse_properties() 1162 if properties and temp_properties: 1163 properties.expressions.extend(temp_properties.expressions) 1164 elif temp_properties: 1165 properties = temp_properties 1166 1167 indexes = [] 1168 while True: 1169 index = self._parse_create_table_index() 1170 1171 # exp.Properties.Location.POST_INDEX 1172 if self._match(TokenType.PARTITION_BY, advance=False): 1173 temp_properties = self._parse_properties() 1174 if properties and temp_properties: 1175 properties.expressions.extend(temp_properties.expressions) 1176 elif temp_properties: 1177 properties = temp_properties 1178 1179 if not index: 1180 break 1181 else: 1182 indexes.append(index) 1183 elif create_token.token_type == TokenType.VIEW: 1184 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1185 no_schema_binding = True 1186 1187 return self.expression( 1188 exp.Create, 1189 this=this, 1190 kind=create_token.text, 1191 replace=replace, 1192 unique=unique, 1193 expression=expression, 1194 exists=exists, 1195 properties=properties, 1196 indexes=indexes, 1197 no_schema_binding=no_schema_binding, 1198 begin=begin, 1199 ) 1200 1201 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1202 self._match(TokenType.COMMA) 1203 1204 # parsers look to _prev for no/dual/default, so need to consume first 1205 self._match_text_seq("NO") 1206 self._match_text_seq("DUAL") 1207 self._match_text_seq("DEFAULT") 1208 1209 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1210 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1211 1212 return None 1213 1214 def _parse_property(self) -> t.Optional[exp.Expression]: 1215 if self._match_texts(self.PROPERTY_PARSERS): 1216 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1217 1218 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1219 return self._parse_character_set(default=True) 1220 1221 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1222 return self._parse_sortkey(compound=True) 1223 1224 if self._match_text_seq("SQL", "SECURITY"): 1225 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1226 1227 assignment = self._match_pair( 1228 TokenType.VAR, TokenType.EQ, advance=False 1229 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1230 1231 if assignment: 1232 key = self._parse_var_or_string() 1233 self._match(TokenType.EQ) 1234 return self.expression(exp.Property, this=key, value=self._parse_column()) 1235 1236 return None 1237 1238 def _parse_stored(self) -> exp.Expression: 1239 self._match(TokenType.ALIAS) 1240 1241 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1242 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1243 1244 return self.expression( 1245 exp.FileFormatProperty, 1246 this=self.expression( 1247 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1248 ) 1249 if input_format or output_format 1250 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1251 ) 1252 1253 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1254 self._match(TokenType.EQ) 1255 self._match(TokenType.ALIAS) 1256 return self.expression( 1257 exp_class, 1258 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1259 ) 1260 1261 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1262 properties = [] 1263 1264 while True: 1265 if before: 1266 identified_property = self._parse_property_before() 1267 else: 1268 identified_property = self._parse_property() 1269 1270 if not identified_property: 1271 break 1272 for p in ensure_list(identified_property): 1273 properties.append(p) 1274 1275 if properties: 1276 return self.expression(exp.Properties, expressions=properties) 1277 1278 return None 1279 1280 def _parse_fallback(self, no=False) -> exp.Expression: 1281 self._match_text_seq("FALLBACK") 1282 return self.expression( 1283 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1284 ) 1285 1286 def _parse_volatile_property(self) -> exp.Expression: 1287 if self._index >= 2: 1288 pre_volatile_token = self._tokens[self._index - 2] 1289 else: 1290 pre_volatile_token = None 1291 1292 if pre_volatile_token and pre_volatile_token.token_type in ( 1293 TokenType.CREATE, 1294 TokenType.REPLACE, 1295 TokenType.UNIQUE, 1296 ): 1297 return exp.VolatileProperty() 1298 1299 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1300 1301 def _parse_with_property( 1302 self, 1303 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1304 self._match(TokenType.WITH) 1305 if self._match(TokenType.L_PAREN, advance=False): 1306 return self._parse_wrapped_csv(self._parse_property) 1307 1308 if self._match_text_seq("JOURNAL"): 1309 return self._parse_withjournaltable() 1310 1311 if self._match_text_seq("DATA"): 1312 return self._parse_withdata(no=False) 1313 elif self._match_text_seq("NO", "DATA"): 1314 return self._parse_withdata(no=True) 1315 1316 if not self._next: 1317 return None 1318 1319 return self._parse_withisolatedloading() 1320 1321 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1322 def _parse_definer(self) -> t.Optional[exp.Expression]: 1323 self._match(TokenType.EQ) 1324 1325 user = self._parse_id_var() 1326 self._match(TokenType.PARAMETER) 1327 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1328 1329 if not user or not host: 1330 return None 1331 1332 return exp.DefinerProperty(this=f"{user}@{host}") 1333 1334 def _parse_withjournaltable(self) -> exp.Expression: 1335 self._match(TokenType.TABLE) 1336 self._match(TokenType.EQ) 1337 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1338 1339 def _parse_log(self, no=False) -> exp.Expression: 1340 self._match_text_seq("LOG") 1341 return self.expression(exp.LogProperty, no=no) 1342 1343 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1344 before = self._match_text_seq("BEFORE") 1345 self._match_text_seq("JOURNAL") 1346 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1347 1348 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1349 self._match_text_seq("NOT") 1350 self._match_text_seq("LOCAL") 1351 self._match_text_seq("AFTER", "JOURNAL") 1352 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1353 1354 def _parse_checksum(self) -> exp.Expression: 1355 self._match_text_seq("CHECKSUM") 1356 self._match(TokenType.EQ) 1357 1358 on = None 1359 if self._match(TokenType.ON): 1360 on = True 1361 elif self._match_text_seq("OFF"): 1362 on = False 1363 default = self._match(TokenType.DEFAULT) 1364 1365 return self.expression( 1366 exp.ChecksumProperty, 1367 on=on, 1368 default=default, 1369 ) 1370 1371 def _parse_freespace(self) -> exp.Expression: 1372 self._match_text_seq("FREESPACE") 1373 self._match(TokenType.EQ) 1374 return self.expression( 1375 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1376 ) 1377 1378 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1379 self._match_text_seq("MERGEBLOCKRATIO") 1380 if self._match(TokenType.EQ): 1381 return self.expression( 1382 exp.MergeBlockRatioProperty, 1383 this=self._parse_number(), 1384 percent=self._match(TokenType.PERCENT), 1385 ) 1386 else: 1387 return self.expression( 1388 exp.MergeBlockRatioProperty, 1389 no=no, 1390 default=default, 1391 ) 1392 1393 def _parse_datablocksize(self, default=None) -> exp.Expression: 1394 if default: 1395 self._match_text_seq("DATABLOCKSIZE") 1396 return self.expression(exp.DataBlocksizeProperty, default=True) 1397 elif self._match_texts(("MIN", "MINIMUM")): 1398 self._match_text_seq("DATABLOCKSIZE") 1399 return self.expression(exp.DataBlocksizeProperty, min=True) 1400 elif self._match_texts(("MAX", "MAXIMUM")): 1401 self._match_text_seq("DATABLOCKSIZE") 1402 return self.expression(exp.DataBlocksizeProperty, min=False) 1403 1404 self._match_text_seq("DATABLOCKSIZE") 1405 self._match(TokenType.EQ) 1406 size = self._parse_number() 1407 units = None 1408 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1409 units = self._prev.text 1410 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1411 1412 def _parse_blockcompression(self) -> exp.Expression: 1413 self._match_text_seq("BLOCKCOMPRESSION") 1414 self._match(TokenType.EQ) 1415 always = self._match_text_seq("ALWAYS") 1416 manual = self._match_text_seq("MANUAL") 1417 never = self._match_text_seq("NEVER") 1418 default = self._match_text_seq("DEFAULT") 1419 autotemp = None 1420 if self._match_text_seq("AUTOTEMP"): 1421 autotemp = self._parse_schema() 1422 1423 return self.expression( 1424 exp.BlockCompressionProperty, 1425 always=always, 1426 manual=manual, 1427 never=never, 1428 default=default, 1429 autotemp=autotemp, 1430 ) 1431 1432 def _parse_withisolatedloading(self) -> exp.Expression: 1433 no = self._match_text_seq("NO") 1434 concurrent = self._match_text_seq("CONCURRENT") 1435 self._match_text_seq("ISOLATED", "LOADING") 1436 for_all = self._match_text_seq("FOR", "ALL") 1437 for_insert = self._match_text_seq("FOR", "INSERT") 1438 for_none = self._match_text_seq("FOR", "NONE") 1439 return self.expression( 1440 exp.IsolatedLoadingProperty, 1441 no=no, 1442 concurrent=concurrent, 1443 for_all=for_all, 1444 for_insert=for_insert, 1445 for_none=for_none, 1446 ) 1447 1448 def _parse_locking(self) -> exp.Expression: 1449 if self._match(TokenType.TABLE): 1450 kind = "TABLE" 1451 elif self._match(TokenType.VIEW): 1452 kind = "VIEW" 1453 elif self._match(TokenType.ROW): 1454 kind = "ROW" 1455 elif self._match_text_seq("DATABASE"): 1456 kind = "DATABASE" 1457 else: 1458 kind = None 1459 1460 if kind in ("DATABASE", "TABLE", "VIEW"): 1461 this = self._parse_table_parts() 1462 else: 1463 this = None 1464 1465 if self._match(TokenType.FOR): 1466 for_or_in = "FOR" 1467 elif self._match(TokenType.IN): 1468 for_or_in = "IN" 1469 else: 1470 for_or_in = None 1471 1472 if self._match_text_seq("ACCESS"): 1473 lock_type = "ACCESS" 1474 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1475 lock_type = "EXCLUSIVE" 1476 elif self._match_text_seq("SHARE"): 1477 lock_type = "SHARE" 1478 elif self._match_text_seq("READ"): 1479 lock_type = "READ" 1480 elif self._match_text_seq("WRITE"): 1481 lock_type = "WRITE" 1482 elif self._match_text_seq("CHECKSUM"): 1483 lock_type = "CHECKSUM" 1484 else: 1485 lock_type = None 1486 1487 override = self._match_text_seq("OVERRIDE") 1488 1489 return self.expression( 1490 exp.LockingProperty, 1491 this=this, 1492 kind=kind, 1493 for_or_in=for_or_in, 1494 lock_type=lock_type, 1495 override=override, 1496 ) 1497 1498 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1499 if self._match(TokenType.PARTITION_BY): 1500 return self._parse_csv(self._parse_conjunction) 1501 return [] 1502 1503 def _parse_partitioned_by(self) -> exp.Expression: 1504 self._match(TokenType.EQ) 1505 return self.expression( 1506 exp.PartitionedByProperty, 1507 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1508 ) 1509 1510 def _parse_withdata(self, no=False) -> exp.Expression: 1511 if self._match_text_seq("AND", "STATISTICS"): 1512 statistics = True 1513 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1514 statistics = False 1515 else: 1516 statistics = None 1517 1518 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1519 1520 def _parse_noprimaryindex(self) -> exp.Expression: 1521 self._match_text_seq("PRIMARY", "INDEX") 1522 return exp.NoPrimaryIndexProperty() 1523 1524 def _parse_oncommit(self) -> exp.Expression: 1525 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1526 return exp.OnCommitProperty() 1527 1528 def _parse_distkey(self) -> exp.Expression: 1529 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1530 1531 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1532 table = self._parse_table(schema=True) 1533 options = [] 1534 while self._match_texts(("INCLUDING", "EXCLUDING")): 1535 this = self._prev.text.upper() 1536 id_var = self._parse_id_var() 1537 1538 if not id_var: 1539 return None 1540 1541 options.append( 1542 self.expression( 1543 exp.Property, 1544 this=this, 1545 value=exp.Var(this=id_var.this.upper()), 1546 ) 1547 ) 1548 return self.expression(exp.LikeProperty, this=table, expressions=options) 1549 1550 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1551 return self.expression( 1552 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1553 ) 1554 1555 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1556 self._match(TokenType.EQ) 1557 return self.expression( 1558 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1559 ) 1560 1561 def _parse_returns(self) -> exp.Expression: 1562 value: t.Optional[exp.Expression] 1563 is_table = self._match(TokenType.TABLE) 1564 1565 if is_table: 1566 if self._match(TokenType.LT): 1567 value = self.expression( 1568 exp.Schema, 1569 this="TABLE", 1570 expressions=self._parse_csv(self._parse_struct_kwargs), 1571 ) 1572 if not self._match(TokenType.GT): 1573 self.raise_error("Expecting >") 1574 else: 1575 value = self._parse_schema(exp.Var(this="TABLE")) 1576 else: 1577 value = self._parse_types() 1578 1579 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1580 1581 def _parse_temporary(self, global_=False) -> exp.Expression: 1582 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1583 return self.expression(exp.TemporaryProperty, global_=global_) 1584 1585 def _parse_describe(self) -> exp.Expression: 1586 kind = self._match_set(self.CREATABLES) and self._prev.text 1587 this = self._parse_table() 1588 1589 return self.expression(exp.Describe, this=this, kind=kind) 1590 1591 def _parse_insert(self) -> exp.Expression: 1592 overwrite = self._match(TokenType.OVERWRITE) 1593 local = self._match(TokenType.LOCAL) 1594 alternative = None 1595 1596 if self._match_text_seq("DIRECTORY"): 1597 this: t.Optional[exp.Expression] = self.expression( 1598 exp.Directory, 1599 this=self._parse_var_or_string(), 1600 local=local, 1601 row_format=self._parse_row_format(match_row=True), 1602 ) 1603 else: 1604 if self._match(TokenType.OR): 1605 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1606 1607 self._match(TokenType.INTO) 1608 self._match(TokenType.TABLE) 1609 this = self._parse_table(schema=True) 1610 1611 return self.expression( 1612 exp.Insert, 1613 this=this, 1614 exists=self._parse_exists(), 1615 partition=self._parse_partition(), 1616 expression=self._parse_ddl_select(), 1617 conflict=self._parse_on_conflict(), 1618 returning=self._parse_returning(), 1619 overwrite=overwrite, 1620 alternative=alternative, 1621 ) 1622 1623 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1624 conflict = self._match_text_seq("ON", "CONFLICT") 1625 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1626 1627 if not (conflict or duplicate): 1628 return None 1629 1630 nothing = None 1631 expressions = None 1632 key = None 1633 constraint = None 1634 1635 if conflict: 1636 if self._match_text_seq("ON", "CONSTRAINT"): 1637 constraint = self._parse_id_var() 1638 else: 1639 key = self._parse_csv(self._parse_value) 1640 1641 self._match_text_seq("DO") 1642 if self._match_text_seq("NOTHING"): 1643 nothing = True 1644 else: 1645 self._match(TokenType.UPDATE) 1646 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1647 1648 return self.expression( 1649 exp.OnConflict, 1650 duplicate=duplicate, 1651 expressions=expressions, 1652 nothing=nothing, 1653 key=key, 1654 constraint=constraint, 1655 ) 1656 1657 def _parse_returning(self) -> t.Optional[exp.Expression]: 1658 if not self._match(TokenType.RETURNING): 1659 return None 1660 1661 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1662 1663 def _parse_row(self) -> t.Optional[exp.Expression]: 1664 if not self._match(TokenType.FORMAT): 1665 return None 1666 return self._parse_row_format() 1667 1668 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1669 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1670 return None 1671 1672 if self._match_text_seq("SERDE"): 1673 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1674 1675 self._match_text_seq("DELIMITED") 1676 1677 kwargs = {} 1678 1679 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1680 kwargs["fields"] = self._parse_string() 1681 if self._match_text_seq("ESCAPED", "BY"): 1682 kwargs["escaped"] = self._parse_string() 1683 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1684 kwargs["collection_items"] = self._parse_string() 1685 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1686 kwargs["map_keys"] = self._parse_string() 1687 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1688 kwargs["lines"] = self._parse_string() 1689 if self._match_text_seq("NULL", "DEFINED", "AS"): 1690 kwargs["null"] = self._parse_string() 1691 1692 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1693 1694 def _parse_load_data(self) -> exp.Expression: 1695 local = self._match(TokenType.LOCAL) 1696 self._match_text_seq("INPATH") 1697 inpath = self._parse_string() 1698 overwrite = self._match(TokenType.OVERWRITE) 1699 self._match_pair(TokenType.INTO, TokenType.TABLE) 1700 1701 return self.expression( 1702 exp.LoadData, 1703 this=self._parse_table(schema=True), 1704 local=local, 1705 overwrite=overwrite, 1706 inpath=inpath, 1707 partition=self._parse_partition(), 1708 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1709 serde=self._match_text_seq("SERDE") and self._parse_string(), 1710 ) 1711 1712 def _parse_delete(self) -> exp.Expression: 1713 self._match(TokenType.FROM) 1714 1715 return self.expression( 1716 exp.Delete, 1717 this=self._parse_table(), 1718 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1719 where=self._parse_where(), 1720 returning=self._parse_returning(), 1721 ) 1722 1723 def _parse_update(self) -> exp.Expression: 1724 return self.expression( 1725 exp.Update, 1726 **{ # type: ignore 1727 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1728 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1729 "from": self._parse_from(), 1730 "where": self._parse_where(), 1731 "returning": self._parse_returning(), 1732 }, 1733 ) 1734 1735 def _parse_uncache(self) -> exp.Expression: 1736 if not self._match(TokenType.TABLE): 1737 self.raise_error("Expecting TABLE after UNCACHE") 1738 1739 return self.expression( 1740 exp.Uncache, 1741 exists=self._parse_exists(), 1742 this=self._parse_table(schema=True), 1743 ) 1744 1745 def _parse_cache(self) -> exp.Expression: 1746 lazy = self._match(TokenType.LAZY) 1747 self._match(TokenType.TABLE) 1748 table = self._parse_table(schema=True) 1749 options = [] 1750 1751 if self._match(TokenType.OPTIONS): 1752 self._match_l_paren() 1753 k = self._parse_string() 1754 self._match(TokenType.EQ) 1755 v = self._parse_string() 1756 options = [k, v] 1757 self._match_r_paren() 1758 1759 self._match(TokenType.ALIAS) 1760 return self.expression( 1761 exp.Cache, 1762 this=table, 1763 lazy=lazy, 1764 options=options, 1765 expression=self._parse_select(nested=True), 1766 ) 1767 1768 def _parse_partition(self) -> t.Optional[exp.Expression]: 1769 if not self._match(TokenType.PARTITION): 1770 return None 1771 1772 return self.expression( 1773 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1774 ) 1775 1776 def _parse_value(self) -> exp.Expression: 1777 if self._match(TokenType.L_PAREN): 1778 expressions = self._parse_csv(self._parse_conjunction) 1779 self._match_r_paren() 1780 return self.expression(exp.Tuple, expressions=expressions) 1781 1782 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1783 # Source: https://prestodb.io/docs/current/sql/values.html 1784 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1785 1786 def _parse_select( 1787 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1788 ) -> t.Optional[exp.Expression]: 1789 cte = self._parse_with() 1790 if cte: 1791 this = self._parse_statement() 1792 1793 if not this: 1794 self.raise_error("Failed to parse any statement following CTE") 1795 return cte 1796 1797 if "with" in this.arg_types: 1798 this.set("with", cte) 1799 else: 1800 self.raise_error(f"{this.key} does not support CTE") 1801 this = cte 1802 elif self._match(TokenType.SELECT): 1803 comments = self._prev_comments 1804 1805 kind = ( 1806 self._match(TokenType.ALIAS) 1807 and self._match_texts(("STRUCT", "VALUE")) 1808 and self._prev.text 1809 ) 1810 hint = self._parse_hint() 1811 all_ = self._match(TokenType.ALL) 1812 distinct = self._match(TokenType.DISTINCT) 1813 1814 if distinct: 1815 distinct = self.expression( 1816 exp.Distinct, 1817 on=self._parse_value() if self._match(TokenType.ON) else None, 1818 ) 1819 1820 if all_ and distinct: 1821 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1822 1823 limit = self._parse_limit(top=True) 1824 expressions = self._parse_csv(self._parse_expression) 1825 1826 this = self.expression( 1827 exp.Select, 1828 kind=kind, 1829 hint=hint, 1830 distinct=distinct, 1831 expressions=expressions, 1832 limit=limit, 1833 ) 1834 this.comments = comments 1835 1836 into = self._parse_into() 1837 if into: 1838 this.set("into", into) 1839 1840 from_ = self._parse_from() 1841 if from_: 1842 this.set("from", from_) 1843 1844 self._parse_query_modifiers(this) 1845 elif (table or nested) and self._match(TokenType.L_PAREN): 1846 this = self._parse_table() if table else self._parse_select(nested=True) 1847 self._parse_query_modifiers(this) 1848 this = self._parse_set_operations(this) 1849 self._match_r_paren() 1850 1851 # early return so that subquery unions aren't parsed again 1852 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1853 # Union ALL should be a property of the top select node, not the subquery 1854 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1855 elif self._match(TokenType.VALUES): 1856 this = self.expression( 1857 exp.Values, 1858 expressions=self._parse_csv(self._parse_value), 1859 alias=self._parse_table_alias(), 1860 ) 1861 else: 1862 this = None 1863 1864 return self._parse_set_operations(this) 1865 1866 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1867 if not skip_with_token and not self._match(TokenType.WITH): 1868 return None 1869 1870 comments = self._prev_comments 1871 recursive = self._match(TokenType.RECURSIVE) 1872 1873 expressions = [] 1874 while True: 1875 expressions.append(self._parse_cte()) 1876 1877 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1878 break 1879 else: 1880 self._match(TokenType.WITH) 1881 1882 return self.expression( 1883 exp.With, comments=comments, expressions=expressions, recursive=recursive 1884 ) 1885 1886 def _parse_cte(self) -> exp.Expression: 1887 alias = self._parse_table_alias() 1888 if not alias or not alias.this: 1889 self.raise_error("Expected CTE to have alias") 1890 1891 self._match(TokenType.ALIAS) 1892 1893 return self.expression( 1894 exp.CTE, 1895 this=self._parse_wrapped(self._parse_statement), 1896 alias=alias, 1897 ) 1898 1899 def _parse_table_alias( 1900 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1901 ) -> t.Optional[exp.Expression]: 1902 any_token = self._match(TokenType.ALIAS) 1903 alias = ( 1904 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1905 or self._parse_string_as_identifier() 1906 ) 1907 1908 index = self._index 1909 if self._match(TokenType.L_PAREN): 1910 columns = self._parse_csv(self._parse_function_parameter) 1911 self._match_r_paren() if columns else self._retreat(index) 1912 else: 1913 columns = None 1914 1915 if not alias and not columns: 1916 return None 1917 1918 return self.expression(exp.TableAlias, this=alias, columns=columns) 1919 1920 def _parse_subquery( 1921 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1922 ) -> exp.Expression: 1923 return self.expression( 1924 exp.Subquery, 1925 this=this, 1926 pivots=self._parse_pivots(), 1927 alias=self._parse_table_alias() if parse_alias else None, 1928 ) 1929 1930 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1931 if not isinstance(this, self.MODIFIABLES): 1932 return 1933 1934 table = isinstance(this, exp.Table) 1935 1936 while True: 1937 join = self._parse_join() 1938 if join: 1939 this.append("joins", join) 1940 1941 lateral = None 1942 if not join: 1943 lateral = self._parse_lateral() 1944 if lateral: 1945 this.append("laterals", lateral) 1946 1947 comma = None if table else self._match(TokenType.COMMA) 1948 if comma: 1949 this.args["from"].append("expressions", self._parse_table()) 1950 1951 if not (lateral or join or comma): 1952 break 1953 1954 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1955 expression = parser(self) 1956 1957 if expression: 1958 this.set(key, expression) 1959 1960 def _parse_hint(self) -> t.Optional[exp.Expression]: 1961 if self._match(TokenType.HINT): 1962 hints = self._parse_csv(self._parse_function) 1963 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1964 self.raise_error("Expected */ after HINT") 1965 return self.expression(exp.Hint, expressions=hints) 1966 1967 return None 1968 1969 def _parse_into(self) -> t.Optional[exp.Expression]: 1970 if not self._match(TokenType.INTO): 1971 return None 1972 1973 temp = self._match(TokenType.TEMPORARY) 1974 unlogged = self._match(TokenType.UNLOGGED) 1975 self._match(TokenType.TABLE) 1976 1977 return self.expression( 1978 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1979 ) 1980 1981 def _parse_from(self) -> t.Optional[exp.Expression]: 1982 if not self._match(TokenType.FROM): 1983 return None 1984 1985 return self.expression( 1986 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1987 ) 1988 1989 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1990 if not self._match(TokenType.MATCH_RECOGNIZE): 1991 return None 1992 1993 self._match_l_paren() 1994 1995 partition = self._parse_partition_by() 1996 order = self._parse_order() 1997 measures = ( 1998 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 1999 ) 2000 2001 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2002 rows = exp.Var(this="ONE ROW PER MATCH") 2003 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2004 text = "ALL ROWS PER MATCH" 2005 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2006 text += f" SHOW EMPTY MATCHES" 2007 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2008 text += f" OMIT EMPTY MATCHES" 2009 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2010 text += f" WITH UNMATCHED ROWS" 2011 rows = exp.Var(this=text) 2012 else: 2013 rows = None 2014 2015 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2016 text = "AFTER MATCH SKIP" 2017 if self._match_text_seq("PAST", "LAST", "ROW"): 2018 text += f" PAST LAST ROW" 2019 elif self._match_text_seq("TO", "NEXT", "ROW"): 2020 text += f" TO NEXT ROW" 2021 elif self._match_text_seq("TO", "FIRST"): 2022 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2023 elif self._match_text_seq("TO", "LAST"): 2024 text += f" TO LAST {self._advance_any().text}" # type: ignore 2025 after = exp.Var(this=text) 2026 else: 2027 after = None 2028 2029 if self._match_text_seq("PATTERN"): 2030 self._match_l_paren() 2031 2032 if not self._curr: 2033 self.raise_error("Expecting )", self._curr) 2034 2035 paren = 1 2036 start = self._curr 2037 2038 while self._curr and paren > 0: 2039 if self._curr.token_type == TokenType.L_PAREN: 2040 paren += 1 2041 if self._curr.token_type == TokenType.R_PAREN: 2042 paren -= 1 2043 end = self._prev 2044 self._advance() 2045 if paren > 0: 2046 self.raise_error("Expecting )", self._curr) 2047 pattern = exp.Var(this=self._find_sql(start, end)) 2048 else: 2049 pattern = None 2050 2051 define = ( 2052 self._parse_csv( 2053 lambda: self.expression( 2054 exp.Alias, 2055 alias=self._parse_id_var(any_token=True), 2056 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2057 ) 2058 ) 2059 if self._match_text_seq("DEFINE") 2060 else None 2061 ) 2062 2063 self._match_r_paren() 2064 2065 return self.expression( 2066 exp.MatchRecognize, 2067 partition_by=partition, 2068 order=order, 2069 measures=measures, 2070 rows=rows, 2071 after=after, 2072 pattern=pattern, 2073 define=define, 2074 alias=self._parse_table_alias(), 2075 ) 2076 2077 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2078 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2079 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2080 2081 if outer_apply or cross_apply: 2082 this = self._parse_select(table=True) 2083 view = None 2084 outer = not cross_apply 2085 elif self._match(TokenType.LATERAL): 2086 this = self._parse_select(table=True) 2087 view = self._match(TokenType.VIEW) 2088 outer = self._match(TokenType.OUTER) 2089 else: 2090 return None 2091 2092 if not this: 2093 this = self._parse_function() or self._parse_id_var(any_token=False) 2094 while self._match(TokenType.DOT): 2095 this = exp.Dot( 2096 this=this, 2097 expression=self._parse_function() or self._parse_id_var(any_token=False), 2098 ) 2099 2100 table_alias: t.Optional[exp.Expression] 2101 2102 if view: 2103 table = self._parse_id_var(any_token=False) 2104 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2105 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2106 else: 2107 table_alias = self._parse_table_alias() 2108 2109 expression = self.expression( 2110 exp.Lateral, 2111 this=this, 2112 view=view, 2113 outer=outer, 2114 alias=table_alias, 2115 ) 2116 2117 return expression 2118 2119 def _parse_join_side_and_kind( 2120 self, 2121 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2122 return ( 2123 self._match(TokenType.NATURAL) and self._prev, 2124 self._match_set(self.JOIN_SIDES) and self._prev, 2125 self._match_set(self.JOIN_KINDS) and self._prev, 2126 ) 2127 2128 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2129 index = self._index 2130 natural, side, kind = self._parse_join_side_and_kind() 2131 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2132 join = self._match(TokenType.JOIN) 2133 2134 if not skip_join_token and not join: 2135 self._retreat(index) 2136 kind = None 2137 natural = None 2138 side = None 2139 2140 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2141 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2142 2143 if not skip_join_token and not join and not outer_apply and not cross_apply: 2144 return None 2145 2146 if outer_apply: 2147 side = Token(TokenType.LEFT, "LEFT") 2148 2149 kwargs: t.Dict[ 2150 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2151 ] = {"this": self._parse_table()} 2152 2153 if natural: 2154 kwargs["natural"] = True 2155 if side: 2156 kwargs["side"] = side.text 2157 if kind: 2158 kwargs["kind"] = kind.text 2159 if hint: 2160 kwargs["hint"] = hint 2161 2162 if self._match(TokenType.ON): 2163 kwargs["on"] = self._parse_conjunction() 2164 elif self._match(TokenType.USING): 2165 kwargs["using"] = self._parse_wrapped_id_vars() 2166 2167 return self.expression(exp.Join, **kwargs) # type: ignore 2168 2169 def _parse_index(self) -> exp.Expression: 2170 index = self._parse_id_var() 2171 self._match(TokenType.ON) 2172 self._match(TokenType.TABLE) # hive 2173 2174 return self.expression( 2175 exp.Index, 2176 this=index, 2177 table=self.expression(exp.Table, this=self._parse_id_var()), 2178 columns=self._parse_expression(), 2179 ) 2180 2181 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2182 unique = self._match(TokenType.UNIQUE) 2183 primary = self._match_text_seq("PRIMARY") 2184 amp = self._match_text_seq("AMP") 2185 if not self._match(TokenType.INDEX): 2186 return None 2187 index = self._parse_id_var() 2188 columns = None 2189 if self._match(TokenType.L_PAREN, advance=False): 2190 columns = self._parse_wrapped_csv(self._parse_column) 2191 return self.expression( 2192 exp.Index, 2193 this=index, 2194 columns=columns, 2195 unique=unique, 2196 primary=primary, 2197 amp=amp, 2198 ) 2199 2200 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2201 catalog = None 2202 db = None 2203 2204 table = ( 2205 (not schema and self._parse_function()) 2206 or self._parse_id_var(any_token=False) 2207 or self._parse_string_as_identifier() 2208 ) 2209 2210 while self._match(TokenType.DOT): 2211 if catalog: 2212 # This allows nesting the table in arbitrarily many dot expressions if needed 2213 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2214 else: 2215 catalog = db 2216 db = table 2217 table = self._parse_id_var() 2218 2219 if not table: 2220 self.raise_error(f"Expected table name but got {self._curr}") 2221 2222 return self.expression( 2223 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2224 ) 2225 2226 def _parse_table( 2227 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2228 ) -> t.Optional[exp.Expression]: 2229 lateral = self._parse_lateral() 2230 2231 if lateral: 2232 return lateral 2233 2234 unnest = self._parse_unnest() 2235 2236 if unnest: 2237 return unnest 2238 2239 values = self._parse_derived_table_values() 2240 2241 if values: 2242 return values 2243 2244 subquery = self._parse_select(table=True) 2245 2246 if subquery: 2247 if not subquery.args.get("pivots"): 2248 subquery.set("pivots", self._parse_pivots()) 2249 return subquery 2250 2251 this = self._parse_table_parts(schema=schema) 2252 2253 if schema: 2254 return self._parse_schema(this=this) 2255 2256 if self.alias_post_tablesample: 2257 table_sample = self._parse_table_sample() 2258 2259 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2260 2261 if alias: 2262 this.set("alias", alias) 2263 2264 if not this.args.get("pivots"): 2265 this.set("pivots", self._parse_pivots()) 2266 2267 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2268 this.set( 2269 "hints", 2270 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2271 ) 2272 self._match_r_paren() 2273 2274 if not self.alias_post_tablesample: 2275 table_sample = self._parse_table_sample() 2276 2277 if table_sample: 2278 table_sample.set("this", this) 2279 this = table_sample 2280 2281 return this 2282 2283 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2284 if not self._match(TokenType.UNNEST): 2285 return None 2286 2287 expressions = self._parse_wrapped_csv(self._parse_type) 2288 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2289 alias = self._parse_table_alias() 2290 2291 if alias and self.unnest_column_only: 2292 if alias.args.get("columns"): 2293 self.raise_error("Unexpected extra column alias in unnest.") 2294 alias.set("columns", [alias.this]) 2295 alias.set("this", None) 2296 2297 offset = None 2298 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2299 self._match(TokenType.ALIAS) 2300 offset = self._parse_id_var() or exp.Identifier(this="offset") 2301 2302 return self.expression( 2303 exp.Unnest, 2304 expressions=expressions, 2305 ordinality=ordinality, 2306 alias=alias, 2307 offset=offset, 2308 ) 2309 2310 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2311 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2312 if not is_derived and not self._match(TokenType.VALUES): 2313 return None 2314 2315 expressions = self._parse_csv(self._parse_value) 2316 2317 if is_derived: 2318 self._match_r_paren() 2319 2320 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2321 2322 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2323 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2324 as_modifier and self._match_text_seq("USING", "SAMPLE") 2325 ): 2326 return None 2327 2328 bucket_numerator = None 2329 bucket_denominator = None 2330 bucket_field = None 2331 percent = None 2332 rows = None 2333 size = None 2334 seed = None 2335 2336 kind = ( 2337 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2338 ) 2339 method = self._parse_var(tokens=(TokenType.ROW,)) 2340 2341 self._match(TokenType.L_PAREN) 2342 2343 num = self._parse_number() 2344 2345 if self._match(TokenType.BUCKET): 2346 bucket_numerator = self._parse_number() 2347 self._match(TokenType.OUT_OF) 2348 bucket_denominator = bucket_denominator = self._parse_number() 2349 self._match(TokenType.ON) 2350 bucket_field = self._parse_field() 2351 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2352 percent = num 2353 elif self._match(TokenType.ROWS): 2354 rows = num 2355 else: 2356 size = num 2357 2358 self._match(TokenType.R_PAREN) 2359 2360 if self._match(TokenType.L_PAREN): 2361 method = self._parse_var() 2362 seed = self._match(TokenType.COMMA) and self._parse_number() 2363 self._match_r_paren() 2364 elif self._match_texts(("SEED", "REPEATABLE")): 2365 seed = self._parse_wrapped(self._parse_number) 2366 2367 return self.expression( 2368 exp.TableSample, 2369 method=method, 2370 bucket_numerator=bucket_numerator, 2371 bucket_denominator=bucket_denominator, 2372 bucket_field=bucket_field, 2373 percent=percent, 2374 rows=rows, 2375 size=size, 2376 seed=seed, 2377 kind=kind, 2378 ) 2379 2380 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2381 return list(iter(self._parse_pivot, None)) 2382 2383 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2384 index = self._index 2385 2386 if self._match(TokenType.PIVOT): 2387 unpivot = False 2388 elif self._match(TokenType.UNPIVOT): 2389 unpivot = True 2390 else: 2391 return None 2392 2393 expressions = [] 2394 field = None 2395 2396 if not self._match(TokenType.L_PAREN): 2397 self._retreat(index) 2398 return None 2399 2400 if unpivot: 2401 expressions = self._parse_csv(self._parse_column) 2402 else: 2403 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2404 2405 if not expressions: 2406 self.raise_error("Failed to parse PIVOT's aggregation list") 2407 2408 if not self._match(TokenType.FOR): 2409 self.raise_error("Expecting FOR") 2410 2411 value = self._parse_column() 2412 2413 if not self._match(TokenType.IN): 2414 self.raise_error("Expecting IN") 2415 2416 field = self._parse_in(value) 2417 2418 self._match_r_paren() 2419 2420 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2421 2422 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2423 pivot.set("alias", self._parse_table_alias()) 2424 2425 if not unpivot: 2426 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2427 2428 columns: t.List[exp.Expression] = [] 2429 for col in pivot.args["field"].expressions: 2430 for name in names: 2431 if self.PREFIXED_PIVOT_COLUMNS: 2432 name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name 2433 else: 2434 name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name 2435 2436 columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS)) 2437 2438 pivot.set("columns", columns) 2439 2440 return pivot 2441 2442 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2443 return [agg.alias for agg in pivot_columns] 2444 2445 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2446 if not skip_where_token and not self._match(TokenType.WHERE): 2447 return None 2448 2449 return self.expression( 2450 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2451 ) 2452 2453 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2454 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2455 return None 2456 2457 elements = defaultdict(list) 2458 2459 while True: 2460 expressions = self._parse_csv(self._parse_conjunction) 2461 if expressions: 2462 elements["expressions"].extend(expressions) 2463 2464 grouping_sets = self._parse_grouping_sets() 2465 if grouping_sets: 2466 elements["grouping_sets"].extend(grouping_sets) 2467 2468 rollup = None 2469 cube = None 2470 2471 with_ = self._match(TokenType.WITH) 2472 if self._match(TokenType.ROLLUP): 2473 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2474 elements["rollup"].extend(ensure_list(rollup)) 2475 2476 if self._match(TokenType.CUBE): 2477 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2478 elements["cube"].extend(ensure_list(cube)) 2479 2480 if not (expressions or grouping_sets or rollup or cube): 2481 break 2482 2483 return self.expression(exp.Group, **elements) # type: ignore 2484 2485 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2486 if not self._match(TokenType.GROUPING_SETS): 2487 return None 2488 2489 return self._parse_wrapped_csv(self._parse_grouping_set) 2490 2491 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2492 if self._match(TokenType.L_PAREN): 2493 grouping_set = self._parse_csv(self._parse_column) 2494 self._match_r_paren() 2495 return self.expression(exp.Tuple, expressions=grouping_set) 2496 2497 return self._parse_column() 2498 2499 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2500 if not skip_having_token and not self._match(TokenType.HAVING): 2501 return None 2502 return self.expression(exp.Having, this=self._parse_conjunction()) 2503 2504 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2505 if not self._match(TokenType.QUALIFY): 2506 return None 2507 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2508 2509 def _parse_order( 2510 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2511 ) -> t.Optional[exp.Expression]: 2512 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2513 return this 2514 2515 return self.expression( 2516 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2517 ) 2518 2519 def _parse_sort( 2520 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2521 ) -> t.Optional[exp.Expression]: 2522 if not self._match(token_type): 2523 return None 2524 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2525 2526 def _parse_ordered(self) -> exp.Expression: 2527 this = self._parse_conjunction() 2528 self._match(TokenType.ASC) 2529 is_desc = self._match(TokenType.DESC) 2530 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2531 is_nulls_last = self._match(TokenType.NULLS_LAST) 2532 desc = is_desc or False 2533 asc = not desc 2534 nulls_first = is_nulls_first or False 2535 explicitly_null_ordered = is_nulls_first or is_nulls_last 2536 if ( 2537 not explicitly_null_ordered 2538 and ( 2539 (asc and self.null_ordering == "nulls_are_small") 2540 or (desc and self.null_ordering != "nulls_are_small") 2541 ) 2542 and self.null_ordering != "nulls_are_last" 2543 ): 2544 nulls_first = True 2545 2546 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2547 2548 def _parse_limit( 2549 self, this: t.Optional[exp.Expression] = None, top: bool = False 2550 ) -> t.Optional[exp.Expression]: 2551 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2552 limit_paren = self._match(TokenType.L_PAREN) 2553 limit_exp = self.expression( 2554 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2555 ) 2556 2557 if limit_paren: 2558 self._match_r_paren() 2559 2560 return limit_exp 2561 2562 if self._match(TokenType.FETCH): 2563 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2564 direction = self._prev.text if direction else "FIRST" 2565 2566 count = self._parse_number() 2567 percent = self._match(TokenType.PERCENT) 2568 2569 self._match_set((TokenType.ROW, TokenType.ROWS)) 2570 2571 only = self._match(TokenType.ONLY) 2572 with_ties = self._match_text_seq("WITH", "TIES") 2573 2574 if only and with_ties: 2575 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2576 2577 return self.expression( 2578 exp.Fetch, 2579 direction=direction, 2580 count=count, 2581 percent=percent, 2582 with_ties=with_ties, 2583 ) 2584 2585 return this 2586 2587 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2588 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2589 return this 2590 2591 count = self._parse_number() 2592 self._match_set((TokenType.ROW, TokenType.ROWS)) 2593 return self.expression(exp.Offset, this=this, expression=count) 2594 2595 def _parse_lock(self) -> t.Optional[exp.Expression]: 2596 if self._match_text_seq("FOR", "UPDATE"): 2597 return self.expression(exp.Lock, update=True) 2598 if self._match_text_seq("FOR", "SHARE"): 2599 return self.expression(exp.Lock, update=False) 2600 2601 return None 2602 2603 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2604 if not self._match_set(self.SET_OPERATIONS): 2605 return this 2606 2607 token_type = self._prev.token_type 2608 2609 if token_type == TokenType.UNION: 2610 expression = exp.Union 2611 elif token_type == TokenType.EXCEPT: 2612 expression = exp.Except 2613 else: 2614 expression = exp.Intersect 2615 2616 return self.expression( 2617 expression, 2618 this=this, 2619 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2620 expression=self._parse_set_operations(self._parse_select(nested=True)), 2621 ) 2622 2623 def _parse_expression(self) -> t.Optional[exp.Expression]: 2624 return self._parse_alias(self._parse_conjunction()) 2625 2626 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2627 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2628 2629 def _parse_equality(self) -> t.Optional[exp.Expression]: 2630 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2631 2632 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2633 return self._parse_tokens(self._parse_range, self.COMPARISON) 2634 2635 def _parse_range(self) -> t.Optional[exp.Expression]: 2636 this = self._parse_bitwise() 2637 negate = self._match(TokenType.NOT) 2638 2639 if self._match_set(self.RANGE_PARSERS): 2640 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2641 if not expression: 2642 return this 2643 2644 this = expression 2645 elif self._match(TokenType.ISNULL): 2646 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2647 2648 # Postgres supports ISNULL and NOTNULL for conditions. 2649 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2650 if self._match(TokenType.NOTNULL): 2651 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2652 this = self.expression(exp.Not, this=this) 2653 2654 if negate: 2655 this = self.expression(exp.Not, this=this) 2656 2657 if self._match(TokenType.IS): 2658 this = self._parse_is(this) 2659 2660 return this 2661 2662 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2663 index = self._index - 1 2664 negate = self._match(TokenType.NOT) 2665 if self._match(TokenType.DISTINCT_FROM): 2666 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2667 return self.expression(klass, this=this, expression=self._parse_expression()) 2668 2669 expression = self._parse_null() or self._parse_boolean() 2670 if not expression: 2671 self._retreat(index) 2672 return None 2673 2674 this = self.expression(exp.Is, this=this, expression=expression) 2675 return self.expression(exp.Not, this=this) if negate else this 2676 2677 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2678 unnest = self._parse_unnest() 2679 if unnest: 2680 this = self.expression(exp.In, this=this, unnest=unnest) 2681 elif self._match(TokenType.L_PAREN): 2682 expressions = self._parse_csv(self._parse_select_or_expression) 2683 2684 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2685 this = self.expression(exp.In, this=this, query=expressions[0]) 2686 else: 2687 this = self.expression(exp.In, this=this, expressions=expressions) 2688 2689 self._match_r_paren() 2690 else: 2691 this = self.expression(exp.In, this=this, field=self._parse_field()) 2692 2693 return this 2694 2695 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2696 low = self._parse_bitwise() 2697 self._match(TokenType.AND) 2698 high = self._parse_bitwise() 2699 return self.expression(exp.Between, this=this, low=low, high=high) 2700 2701 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2702 if not self._match(TokenType.ESCAPE): 2703 return this 2704 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2705 2706 def _parse_interval(self) -> t.Optional[exp.Expression]: 2707 if not self._match(TokenType.INTERVAL): 2708 return None 2709 2710 this = self._parse_primary() or self._parse_term() 2711 unit = self._parse_function() or self._parse_var() 2712 2713 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2714 # each INTERVAL expression into this canonical form so it's easy to transpile 2715 if this and isinstance(this, exp.Literal): 2716 if this.is_number: 2717 this = exp.Literal.string(this.name) 2718 2719 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2720 parts = this.name.split() 2721 if not unit and len(parts) <= 2: 2722 this = exp.Literal.string(seq_get(parts, 0)) 2723 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2724 2725 return self.expression(exp.Interval, this=this, unit=unit) 2726 2727 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2728 this = self._parse_term() 2729 2730 while True: 2731 if self._match_set(self.BITWISE): 2732 this = self.expression( 2733 self.BITWISE[self._prev.token_type], 2734 this=this, 2735 expression=self._parse_term(), 2736 ) 2737 elif self._match_pair(TokenType.LT, TokenType.LT): 2738 this = self.expression( 2739 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2740 ) 2741 elif self._match_pair(TokenType.GT, TokenType.GT): 2742 this = self.expression( 2743 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2744 ) 2745 else: 2746 break 2747 2748 return this 2749 2750 def _parse_term(self) -> t.Optional[exp.Expression]: 2751 return self._parse_tokens(self._parse_factor, self.TERM) 2752 2753 def _parse_factor(self) -> t.Optional[exp.Expression]: 2754 return self._parse_tokens(self._parse_unary, self.FACTOR) 2755 2756 def _parse_unary(self) -> t.Optional[exp.Expression]: 2757 if self._match_set(self.UNARY_PARSERS): 2758 return self.UNARY_PARSERS[self._prev.token_type](self) 2759 return self._parse_at_time_zone(self._parse_type()) 2760 2761 def _parse_type(self) -> t.Optional[exp.Expression]: 2762 interval = self._parse_interval() 2763 if interval: 2764 return interval 2765 2766 index = self._index 2767 data_type = self._parse_types(check_func=True) 2768 this = self._parse_column() 2769 2770 if data_type: 2771 if isinstance(this, exp.Literal): 2772 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2773 if parser: 2774 return parser(self, this, data_type) 2775 return self.expression(exp.Cast, this=this, to=data_type) 2776 if not data_type.args.get("expressions"): 2777 self._retreat(index) 2778 return self._parse_column() 2779 return data_type 2780 2781 return this 2782 2783 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2784 index = self._index 2785 2786 prefix = self._match_text_seq("SYSUDTLIB", ".") 2787 2788 if not self._match_set(self.TYPE_TOKENS): 2789 return None 2790 2791 type_token = self._prev.token_type 2792 2793 if type_token == TokenType.PSEUDO_TYPE: 2794 return self.expression(exp.PseudoType, this=self._prev.text) 2795 2796 nested = type_token in self.NESTED_TYPE_TOKENS 2797 is_struct = type_token == TokenType.STRUCT 2798 expressions = None 2799 maybe_func = False 2800 2801 if self._match(TokenType.L_PAREN): 2802 if is_struct: 2803 expressions = self._parse_csv(self._parse_struct_kwargs) 2804 elif nested: 2805 expressions = self._parse_csv(self._parse_types) 2806 else: 2807 expressions = self._parse_csv(self._parse_conjunction) 2808 2809 if not expressions or not self._match(TokenType.R_PAREN): 2810 self._retreat(index) 2811 return None 2812 2813 maybe_func = True 2814 2815 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2816 this = exp.DataType( 2817 this=exp.DataType.Type.ARRAY, 2818 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2819 nested=True, 2820 ) 2821 2822 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2823 this = exp.DataType( 2824 this=exp.DataType.Type.ARRAY, 2825 expressions=[this], 2826 nested=True, 2827 ) 2828 2829 return this 2830 2831 if self._match(TokenType.L_BRACKET): 2832 self._retreat(index) 2833 return None 2834 2835 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2836 if nested and self._match(TokenType.LT): 2837 if is_struct: 2838 expressions = self._parse_csv(self._parse_struct_kwargs) 2839 else: 2840 expressions = self._parse_csv(self._parse_types) 2841 2842 if not self._match(TokenType.GT): 2843 self.raise_error("Expecting >") 2844 2845 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2846 values = self._parse_csv(self._parse_conjunction) 2847 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2848 2849 value: t.Optional[exp.Expression] = None 2850 if type_token in self.TIMESTAMPS: 2851 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2852 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2853 elif ( 2854 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2855 ): 2856 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2857 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2858 if type_token == TokenType.TIME: 2859 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2860 else: 2861 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2862 2863 maybe_func = maybe_func and value is None 2864 2865 if value is None: 2866 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2867 elif type_token == TokenType.INTERVAL: 2868 unit = self._parse_var() 2869 2870 if not unit: 2871 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2872 else: 2873 value = self.expression(exp.Interval, unit=unit) 2874 2875 if maybe_func and check_func: 2876 index2 = self._index 2877 peek = self._parse_string() 2878 2879 if not peek: 2880 self._retreat(index) 2881 return None 2882 2883 self._retreat(index2) 2884 2885 if value: 2886 return value 2887 2888 return exp.DataType( 2889 this=exp.DataType.Type[type_token.value.upper()], 2890 expressions=expressions, 2891 nested=nested, 2892 values=values, 2893 prefix=prefix, 2894 ) 2895 2896 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2897 index = self._index 2898 this = self._parse_id_var() 2899 self._match(TokenType.COLON) 2900 data_type = self._parse_types() 2901 2902 if not data_type: 2903 self._retreat(index) 2904 return self._parse_types() 2905 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2906 2907 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2908 if not self._match(TokenType.AT_TIME_ZONE): 2909 return this 2910 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2911 2912 def _parse_column(self) -> t.Optional[exp.Expression]: 2913 this = self._parse_field() 2914 if isinstance(this, exp.Identifier): 2915 this = self.expression(exp.Column, this=this) 2916 elif not this: 2917 return self._parse_bracket(this) 2918 this = self._parse_bracket(this) 2919 2920 while self._match_set(self.COLUMN_OPERATORS): 2921 op_token = self._prev.token_type 2922 op = self.COLUMN_OPERATORS.get(op_token) 2923 2924 if op_token == TokenType.DCOLON: 2925 field = self._parse_types() 2926 if not field: 2927 self.raise_error("Expected type") 2928 elif op: 2929 self._advance() 2930 value = self._prev.text 2931 field = ( 2932 exp.Literal.number(value) 2933 if self._prev.token_type == TokenType.NUMBER 2934 else exp.Literal.string(value) 2935 ) 2936 else: 2937 field = ( 2938 self._parse_star() 2939 or self._parse_function(anonymous=True) 2940 or self._parse_id_var() 2941 ) 2942 2943 if isinstance(field, exp.Func): 2944 # bigquery allows function calls like x.y.count(...) 2945 # SAFE.SUBSTR(...) 2946 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2947 this = self._replace_columns_with_dots(this) 2948 2949 if op: 2950 this = op(self, this, field) 2951 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2952 this = self.expression( 2953 exp.Column, 2954 this=field, 2955 table=this.this, 2956 db=this.args.get("table"), 2957 catalog=this.args.get("db"), 2958 ) 2959 else: 2960 this = self.expression(exp.Dot, this=this, expression=field) 2961 this = self._parse_bracket(this) 2962 2963 return this 2964 2965 def _parse_primary(self) -> t.Optional[exp.Expression]: 2966 if self._match_set(self.PRIMARY_PARSERS): 2967 token_type = self._prev.token_type 2968 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2969 2970 if token_type == TokenType.STRING: 2971 expressions = [primary] 2972 while self._match(TokenType.STRING): 2973 expressions.append(exp.Literal.string(self._prev.text)) 2974 if len(expressions) > 1: 2975 return self.expression(exp.Concat, expressions=expressions) 2976 return primary 2977 2978 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2979 return exp.Literal.number(f"0.{self._prev.text}") 2980 2981 if self._match(TokenType.L_PAREN): 2982 comments = self._prev_comments 2983 query = self._parse_select() 2984 2985 if query: 2986 expressions = [query] 2987 else: 2988 expressions = self._parse_csv( 2989 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2990 ) 2991 2992 this = seq_get(expressions, 0) 2993 self._parse_query_modifiers(this) 2994 2995 if isinstance(this, exp.Subqueryable): 2996 this = self._parse_set_operations( 2997 self._parse_subquery(this=this, parse_alias=False) 2998 ) 2999 elif len(expressions) > 1: 3000 this = self.expression(exp.Tuple, expressions=expressions) 3001 else: 3002 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3003 3004 self._match_r_paren() 3005 comments.extend(self._prev_comments) 3006 3007 if this and comments: 3008 this.comments = comments 3009 3010 return this 3011 3012 return None 3013 3014 def _parse_field( 3015 self, 3016 any_token: bool = False, 3017 tokens: t.Optional[t.Collection[TokenType]] = None, 3018 ) -> t.Optional[exp.Expression]: 3019 return ( 3020 self._parse_primary() 3021 or self._parse_function() 3022 or self._parse_id_var(any_token=any_token, tokens=tokens) 3023 ) 3024 3025 def _parse_function( 3026 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3027 ) -> t.Optional[exp.Expression]: 3028 if not self._curr: 3029 return None 3030 3031 token_type = self._curr.token_type 3032 3033 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3034 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3035 3036 if not self._next or self._next.token_type != TokenType.L_PAREN: 3037 if token_type in self.NO_PAREN_FUNCTIONS: 3038 self._advance() 3039 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3040 3041 return None 3042 3043 if token_type not in self.FUNC_TOKENS: 3044 return None 3045 3046 this = self._curr.text 3047 upper = this.upper() 3048 self._advance(2) 3049 3050 parser = self.FUNCTION_PARSERS.get(upper) 3051 3052 if parser and not anonymous: 3053 this = parser(self) 3054 else: 3055 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3056 3057 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3058 this = self.expression(subquery_predicate, this=self._parse_select()) 3059 self._match_r_paren() 3060 return this 3061 3062 if functions is None: 3063 functions = self.FUNCTIONS 3064 3065 function = functions.get(upper) 3066 args = self._parse_csv(self._parse_lambda) 3067 3068 if function and not anonymous: 3069 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 3070 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 3071 if count_params(function) == 2: 3072 params = None 3073 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 3074 params = self._parse_csv(self._parse_lambda) 3075 3076 this = function(args, params) 3077 else: 3078 this = function(args) 3079 3080 self.validate_expression(this, args) 3081 else: 3082 this = self.expression(exp.Anonymous, this=this, expressions=args) 3083 3084 self._match_r_paren(this) 3085 return self._parse_window(this) 3086 3087 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3088 return self._parse_column_def(self._parse_id_var()) 3089 3090 def _parse_user_defined_function( 3091 self, kind: t.Optional[TokenType] = None 3092 ) -> t.Optional[exp.Expression]: 3093 this = self._parse_id_var() 3094 3095 while self._match(TokenType.DOT): 3096 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3097 3098 if not self._match(TokenType.L_PAREN): 3099 return this 3100 3101 expressions = self._parse_csv(self._parse_function_parameter) 3102 self._match_r_paren() 3103 return self.expression( 3104 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3105 ) 3106 3107 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3108 literal = self._parse_primary() 3109 if literal: 3110 return self.expression(exp.Introducer, this=token.text, expression=literal) 3111 3112 return self.expression(exp.Identifier, this=token.text) 3113 3114 def _parse_national(self, token: Token) -> exp.Expression: 3115 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3116 3117 def _parse_session_parameter(self) -> exp.Expression: 3118 kind = None 3119 this = self._parse_id_var() or self._parse_primary() 3120 3121 if this and self._match(TokenType.DOT): 3122 kind = this.name 3123 this = self._parse_var() or self._parse_primary() 3124 3125 return self.expression(exp.SessionParameter, this=this, kind=kind) 3126 3127 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3128 index = self._index 3129 3130 if self._match(TokenType.L_PAREN): 3131 expressions = self._parse_csv(self._parse_id_var) 3132 3133 if not self._match(TokenType.R_PAREN): 3134 self._retreat(index) 3135 else: 3136 expressions = [self._parse_id_var()] 3137 3138 if self._match_set(self.LAMBDAS): 3139 return self.LAMBDAS[self._prev.token_type](self, expressions) 3140 3141 self._retreat(index) 3142 3143 this: t.Optional[exp.Expression] 3144 3145 if self._match(TokenType.DISTINCT): 3146 this = self.expression( 3147 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3148 ) 3149 else: 3150 this = self._parse_select_or_expression() 3151 3152 if isinstance(this, exp.EQ): 3153 left = this.this 3154 if isinstance(left, exp.Column): 3155 left.replace(exp.Var(this=left.text("this"))) 3156 3157 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3158 3159 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3160 index = self._index 3161 3162 try: 3163 if self._parse_select(nested=True): 3164 return this 3165 except Exception: 3166 pass 3167 finally: 3168 self._retreat(index) 3169 3170 if not self._match(TokenType.L_PAREN): 3171 return this 3172 3173 args = self._parse_csv( 3174 lambda: self._parse_constraint() 3175 or self._parse_column_def(self._parse_field(any_token=True)) 3176 ) 3177 self._match_r_paren() 3178 return self.expression(exp.Schema, this=this, expressions=args) 3179 3180 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3181 kind = self._parse_types() 3182 3183 if self._match_text_seq("FOR", "ORDINALITY"): 3184 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3185 3186 constraints = [] 3187 while True: 3188 constraint = self._parse_column_constraint() 3189 if not constraint: 3190 break 3191 constraints.append(constraint) 3192 3193 if not kind and not constraints: 3194 return this 3195 3196 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3197 3198 def _parse_auto_increment(self) -> exp.Expression: 3199 start = None 3200 increment = None 3201 3202 if self._match(TokenType.L_PAREN, advance=False): 3203 args = self._parse_wrapped_csv(self._parse_bitwise) 3204 start = seq_get(args, 0) 3205 increment = seq_get(args, 1) 3206 elif self._match_text_seq("START"): 3207 start = self._parse_bitwise() 3208 self._match_text_seq("INCREMENT") 3209 increment = self._parse_bitwise() 3210 3211 if start and increment: 3212 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3213 3214 return exp.AutoIncrementColumnConstraint() 3215 3216 def _parse_compress(self) -> exp.Expression: 3217 if self._match(TokenType.L_PAREN, advance=False): 3218 return self.expression( 3219 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3220 ) 3221 3222 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3223 3224 def _parse_generated_as_identity(self) -> exp.Expression: 3225 if self._match(TokenType.BY_DEFAULT): 3226 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3227 else: 3228 self._match_text_seq("ALWAYS") 3229 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3230 3231 self._match_text_seq("AS", "IDENTITY") 3232 if self._match(TokenType.L_PAREN): 3233 if self._match_text_seq("START", "WITH"): 3234 this.set("start", self._parse_bitwise()) 3235 if self._match_text_seq("INCREMENT", "BY"): 3236 this.set("increment", self._parse_bitwise()) 3237 if self._match_text_seq("MINVALUE"): 3238 this.set("minvalue", self._parse_bitwise()) 3239 if self._match_text_seq("MAXVALUE"): 3240 this.set("maxvalue", self._parse_bitwise()) 3241 3242 if self._match_text_seq("CYCLE"): 3243 this.set("cycle", True) 3244 elif self._match_text_seq("NO", "CYCLE"): 3245 this.set("cycle", False) 3246 3247 self._match_r_paren() 3248 3249 return this 3250 3251 def _parse_inline(self) -> t.Optional[exp.Expression]: 3252 self._match_text_seq("LENGTH") 3253 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3254 3255 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3256 if self._match_text_seq("NULL"): 3257 return self.expression(exp.NotNullColumnConstraint) 3258 if self._match_text_seq("CASESPECIFIC"): 3259 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3260 return None 3261 3262 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3263 if self._match(TokenType.CONSTRAINT): 3264 this = self._parse_id_var() 3265 else: 3266 this = None 3267 3268 if self._match_texts(self.CONSTRAINT_PARSERS): 3269 return self.expression( 3270 exp.ColumnConstraint, 3271 this=this, 3272 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3273 ) 3274 3275 return this 3276 3277 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3278 if not self._match(TokenType.CONSTRAINT): 3279 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3280 3281 this = self._parse_id_var() 3282 expressions = [] 3283 3284 while True: 3285 constraint = self._parse_unnamed_constraint() or self._parse_function() 3286 if not constraint: 3287 break 3288 expressions.append(constraint) 3289 3290 return self.expression(exp.Constraint, this=this, expressions=expressions) 3291 3292 def _parse_unnamed_constraint( 3293 self, constraints: t.Optional[t.Collection[str]] = None 3294 ) -> t.Optional[exp.Expression]: 3295 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3296 return None 3297 3298 constraint = self._prev.text.upper() 3299 if constraint not in self.CONSTRAINT_PARSERS: 3300 self.raise_error(f"No parser found for schema constraint {constraint}.") 3301 3302 return self.CONSTRAINT_PARSERS[constraint](self) 3303 3304 def _parse_unique(self) -> exp.Expression: 3305 if not self._match(TokenType.L_PAREN, advance=False): 3306 return self.expression(exp.UniqueColumnConstraint) 3307 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3308 3309 def _parse_key_constraint_options(self) -> t.List[str]: 3310 options = [] 3311 while True: 3312 if not self._curr: 3313 break 3314 3315 if self._match(TokenType.ON): 3316 action = None 3317 on = self._advance_any() and self._prev.text 3318 3319 if self._match(TokenType.NO_ACTION): 3320 action = "NO ACTION" 3321 elif self._match(TokenType.CASCADE): 3322 action = "CASCADE" 3323 elif self._match_pair(TokenType.SET, TokenType.NULL): 3324 action = "SET NULL" 3325 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3326 action = "SET DEFAULT" 3327 else: 3328 self.raise_error("Invalid key constraint") 3329 3330 options.append(f"ON {on} {action}") 3331 elif self._match_text_seq("NOT", "ENFORCED"): 3332 options.append("NOT ENFORCED") 3333 elif self._match_text_seq("DEFERRABLE"): 3334 options.append("DEFERRABLE") 3335 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3336 options.append("INITIALLY DEFERRED") 3337 elif self._match_text_seq("NORELY"): 3338 options.append("NORELY") 3339 elif self._match_text_seq("MATCH", "FULL"): 3340 options.append("MATCH FULL") 3341 else: 3342 break 3343 3344 return options 3345 3346 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3347 if match and not self._match(TokenType.REFERENCES): 3348 return None 3349 3350 expressions = None 3351 this = self._parse_id_var() 3352 3353 if self._match(TokenType.L_PAREN, advance=False): 3354 expressions = self._parse_wrapped_id_vars() 3355 3356 options = self._parse_key_constraint_options() 3357 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3358 3359 def _parse_foreign_key(self) -> exp.Expression: 3360 expressions = self._parse_wrapped_id_vars() 3361 reference = self._parse_references() 3362 options = {} 3363 3364 while self._match(TokenType.ON): 3365 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3366 self.raise_error("Expected DELETE or UPDATE") 3367 3368 kind = self._prev.text.lower() 3369 3370 if self._match(TokenType.NO_ACTION): 3371 action = "NO ACTION" 3372 elif self._match(TokenType.SET): 3373 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3374 action = "SET " + self._prev.text.upper() 3375 else: 3376 self._advance() 3377 action = self._prev.text.upper() 3378 3379 options[kind] = action 3380 3381 return self.expression( 3382 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3383 ) 3384 3385 def _parse_primary_key(self) -> exp.Expression: 3386 desc = ( 3387 self._match_set((TokenType.ASC, TokenType.DESC)) 3388 and self._prev.token_type == TokenType.DESC 3389 ) 3390 3391 if not self._match(TokenType.L_PAREN, advance=False): 3392 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3393 3394 expressions = self._parse_wrapped_id_vars() 3395 options = self._parse_key_constraint_options() 3396 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3397 3398 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3399 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3400 return this 3401 3402 bracket_kind = self._prev.token_type 3403 expressions: t.List[t.Optional[exp.Expression]] 3404 3405 if self._match(TokenType.COLON): 3406 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3407 else: 3408 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3409 3410 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3411 if bracket_kind == TokenType.L_BRACE: 3412 this = self.expression(exp.Struct, expressions=expressions) 3413 elif not this or this.name.upper() == "ARRAY": 3414 this = self.expression(exp.Array, expressions=expressions) 3415 else: 3416 expressions = apply_index_offset(this, expressions, -self.index_offset) 3417 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3418 3419 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3420 self.raise_error("Expected ]") 3421 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3422 self.raise_error("Expected }") 3423 3424 this.comments = self._prev_comments 3425 return self._parse_bracket(this) 3426 3427 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3428 if self._match(TokenType.COLON): 3429 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3430 return this 3431 3432 def _parse_case(self) -> t.Optional[exp.Expression]: 3433 ifs = [] 3434 default = None 3435 3436 expression = self._parse_conjunction() 3437 3438 while self._match(TokenType.WHEN): 3439 this = self._parse_conjunction() 3440 self._match(TokenType.THEN) 3441 then = self._parse_conjunction() 3442 ifs.append(self.expression(exp.If, this=this, true=then)) 3443 3444 if self._match(TokenType.ELSE): 3445 default = self._parse_conjunction() 3446 3447 if not self._match(TokenType.END): 3448 self.raise_error("Expected END after CASE", self._prev) 3449 3450 return self._parse_window( 3451 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3452 ) 3453 3454 def _parse_if(self) -> t.Optional[exp.Expression]: 3455 if self._match(TokenType.L_PAREN): 3456 args = self._parse_csv(self._parse_conjunction) 3457 this = exp.If.from_arg_list(args) 3458 self.validate_expression(this, args) 3459 self._match_r_paren() 3460 else: 3461 index = self._index - 1 3462 condition = self._parse_conjunction() 3463 3464 if not condition: 3465 self._retreat(index) 3466 return None 3467 3468 self._match(TokenType.THEN) 3469 true = self._parse_conjunction() 3470 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3471 self._match(TokenType.END) 3472 this = self.expression(exp.If, this=condition, true=true, false=false) 3473 3474 return self._parse_window(this) 3475 3476 def _parse_extract(self) -> exp.Expression: 3477 this = self._parse_function() or self._parse_var() or self._parse_type() 3478 3479 if self._match(TokenType.FROM): 3480 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3481 3482 if not self._match(TokenType.COMMA): 3483 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3484 3485 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3486 3487 def _parse_cast(self, strict: bool) -> exp.Expression: 3488 this = self._parse_conjunction() 3489 3490 if not self._match(TokenType.ALIAS): 3491 self.raise_error("Expected AS after CAST") 3492 3493 to = self._parse_types() 3494 3495 if not to: 3496 self.raise_error("Expected TYPE after CAST") 3497 elif to.this == exp.DataType.Type.CHAR: 3498 if self._match(TokenType.CHARACTER_SET): 3499 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3500 3501 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3502 3503 def _parse_string_agg(self) -> exp.Expression: 3504 expression: t.Optional[exp.Expression] 3505 3506 if self._match(TokenType.DISTINCT): 3507 args = self._parse_csv(self._parse_conjunction) 3508 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3509 else: 3510 args = self._parse_csv(self._parse_conjunction) 3511 expression = seq_get(args, 0) 3512 3513 index = self._index 3514 if not self._match(TokenType.R_PAREN): 3515 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3516 order = self._parse_order(this=expression) 3517 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3518 3519 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3520 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3521 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3522 if not self._match(TokenType.WITHIN_GROUP): 3523 self._retreat(index) 3524 this = exp.GroupConcat.from_arg_list(args) 3525 self.validate_expression(this, args) 3526 return this 3527 3528 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3529 order = self._parse_order(this=expression) 3530 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3531 3532 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3533 to: t.Optional[exp.Expression] 3534 this = self._parse_bitwise() 3535 3536 if self._match(TokenType.USING): 3537 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3538 elif self._match(TokenType.COMMA): 3539 to = self._parse_bitwise() 3540 else: 3541 to = None 3542 3543 # Swap the argument order if needed to produce the correct AST 3544 if self.CONVERT_TYPE_FIRST: 3545 this, to = to, this 3546 3547 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3548 3549 def _parse_decode(self) -> t.Optional[exp.Expression]: 3550 """ 3551 There are generally two variants of the DECODE function: 3552 3553 - DECODE(bin, charset) 3554 - DECODE(expression, search, result [, search, result] ... [, default]) 3555 3556 The second variant will always be parsed into a CASE expression. Note that NULL 3557 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3558 instead of relying on pattern matching. 3559 """ 3560 args = self._parse_csv(self._parse_conjunction) 3561 3562 if len(args) < 3: 3563 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3564 3565 expression, *expressions = args 3566 if not expression: 3567 return None 3568 3569 ifs = [] 3570 for search, result in zip(expressions[::2], expressions[1::2]): 3571 if not search or not result: 3572 return None 3573 3574 if isinstance(search, exp.Literal): 3575 ifs.append( 3576 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3577 ) 3578 elif isinstance(search, exp.Null): 3579 ifs.append( 3580 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3581 ) 3582 else: 3583 cond = exp.or_( 3584 exp.EQ(this=expression.copy(), expression=search), 3585 exp.and_( 3586 exp.Is(this=expression.copy(), expression=exp.Null()), 3587 exp.Is(this=search.copy(), expression=exp.Null()), 3588 copy=False, 3589 ), 3590 copy=False, 3591 ) 3592 ifs.append(exp.If(this=cond, true=result)) 3593 3594 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3595 3596 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3597 self._match_text_seq("KEY") 3598 key = self._parse_field() 3599 self._match(TokenType.COLON) 3600 self._match_text_seq("VALUE") 3601 value = self._parse_field() 3602 if not key and not value: 3603 return None 3604 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3605 3606 def _parse_json_object(self) -> exp.Expression: 3607 expressions = self._parse_csv(self._parse_json_key_value) 3608 3609 null_handling = None 3610 if self._match_text_seq("NULL", "ON", "NULL"): 3611 null_handling = "NULL ON NULL" 3612 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3613 null_handling = "ABSENT ON NULL" 3614 3615 unique_keys = None 3616 if self._match_text_seq("WITH", "UNIQUE"): 3617 unique_keys = True 3618 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3619 unique_keys = False 3620 3621 self._match_text_seq("KEYS") 3622 3623 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3624 format_json = self._match_text_seq("FORMAT", "JSON") 3625 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3626 3627 return self.expression( 3628 exp.JSONObject, 3629 expressions=expressions, 3630 null_handling=null_handling, 3631 unique_keys=unique_keys, 3632 return_type=return_type, 3633 format_json=format_json, 3634 encoding=encoding, 3635 ) 3636 3637 def _parse_logarithm(self) -> exp.Expression: 3638 # Default argument order is base, expression 3639 args = self._parse_csv(self._parse_range) 3640 3641 if len(args) > 1: 3642 if not self.LOG_BASE_FIRST: 3643 args.reverse() 3644 return exp.Log.from_arg_list(args) 3645 3646 return self.expression( 3647 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3648 ) 3649 3650 def _parse_match_against(self) -> exp.Expression: 3651 expressions = self._parse_csv(self._parse_column) 3652 3653 self._match_text_seq(")", "AGAINST", "(") 3654 3655 this = self._parse_string() 3656 3657 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3658 modifier = "IN NATURAL LANGUAGE MODE" 3659 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3660 modifier = f"{modifier} WITH QUERY EXPANSION" 3661 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3662 modifier = "IN BOOLEAN MODE" 3663 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3664 modifier = "WITH QUERY EXPANSION" 3665 else: 3666 modifier = None 3667 3668 return self.expression( 3669 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3670 ) 3671 3672 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3673 args = self._parse_csv(self._parse_bitwise) 3674 3675 if self._match(TokenType.IN): 3676 return self.expression( 3677 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3678 ) 3679 3680 if haystack_first: 3681 haystack = seq_get(args, 0) 3682 needle = seq_get(args, 1) 3683 else: 3684 needle = seq_get(args, 0) 3685 haystack = seq_get(args, 1) 3686 3687 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3688 3689 self.validate_expression(this, args) 3690 3691 return this 3692 3693 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3694 args = self._parse_csv(self._parse_table) 3695 return exp.JoinHint(this=func_name.upper(), expressions=args) 3696 3697 def _parse_substring(self) -> exp.Expression: 3698 # Postgres supports the form: substring(string [from int] [for int]) 3699 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3700 3701 args = self._parse_csv(self._parse_bitwise) 3702 3703 if self._match(TokenType.FROM): 3704 args.append(self._parse_bitwise()) 3705 if self._match(TokenType.FOR): 3706 args.append(self._parse_bitwise()) 3707 3708 this = exp.Substring.from_arg_list(args) 3709 self.validate_expression(this, args) 3710 3711 return this 3712 3713 def _parse_trim(self) -> exp.Expression: 3714 # https://www.w3resource.com/sql/character-functions/trim.php 3715 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3716 3717 position = None 3718 collation = None 3719 3720 if self._match_set(self.TRIM_TYPES): 3721 position = self._prev.text.upper() 3722 3723 expression = self._parse_bitwise() 3724 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3725 this = self._parse_bitwise() 3726 else: 3727 this = expression 3728 expression = None 3729 3730 if self._match(TokenType.COLLATE): 3731 collation = self._parse_bitwise() 3732 3733 return self.expression( 3734 exp.Trim, 3735 this=this, 3736 position=position, 3737 expression=expression, 3738 collation=collation, 3739 ) 3740 3741 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3742 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3743 3744 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3745 return self._parse_window(self._parse_id_var(), alias=True) 3746 3747 def _parse_respect_or_ignore_nulls( 3748 self, this: t.Optional[exp.Expression] 3749 ) -> t.Optional[exp.Expression]: 3750 if self._match(TokenType.IGNORE_NULLS): 3751 return self.expression(exp.IgnoreNulls, this=this) 3752 if self._match(TokenType.RESPECT_NULLS): 3753 return self.expression(exp.RespectNulls, this=this) 3754 return this 3755 3756 def _parse_window( 3757 self, this: t.Optional[exp.Expression], alias: bool = False 3758 ) -> t.Optional[exp.Expression]: 3759 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3760 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3761 self._match_r_paren() 3762 3763 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3764 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3765 if self._match(TokenType.WITHIN_GROUP): 3766 order = self._parse_wrapped(self._parse_order) 3767 this = self.expression(exp.WithinGroup, this=this, expression=order) 3768 3769 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3770 # Some dialects choose to implement and some do not. 3771 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3772 3773 # There is some code above in _parse_lambda that handles 3774 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3775 3776 # The below changes handle 3777 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3778 3779 # Oracle allows both formats 3780 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3781 # and Snowflake chose to do the same for familiarity 3782 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3783 this = self._parse_respect_or_ignore_nulls(this) 3784 3785 # bigquery select from window x AS (partition by ...) 3786 if alias: 3787 over = None 3788 self._match(TokenType.ALIAS) 3789 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3790 return this 3791 else: 3792 over = self._prev.text.upper() 3793 3794 if not self._match(TokenType.L_PAREN): 3795 return self.expression( 3796 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3797 ) 3798 3799 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3800 3801 first = self._match(TokenType.FIRST) 3802 if self._match_text_seq("LAST"): 3803 first = False 3804 3805 partition = self._parse_partition_by() 3806 order = self._parse_order() 3807 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3808 3809 if kind: 3810 self._match(TokenType.BETWEEN) 3811 start = self._parse_window_spec() 3812 self._match(TokenType.AND) 3813 end = self._parse_window_spec() 3814 3815 spec = self.expression( 3816 exp.WindowSpec, 3817 kind=kind, 3818 start=start["value"], 3819 start_side=start["side"], 3820 end=end["value"], 3821 end_side=end["side"], 3822 ) 3823 else: 3824 spec = None 3825 3826 self._match_r_paren() 3827 3828 return self.expression( 3829 exp.Window, 3830 this=this, 3831 partition_by=partition, 3832 order=order, 3833 spec=spec, 3834 alias=window_alias, 3835 over=over, 3836 first=first, 3837 ) 3838 3839 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3840 self._match(TokenType.BETWEEN) 3841 3842 return { 3843 "value": ( 3844 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3845 ) 3846 or self._parse_bitwise(), 3847 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3848 } 3849 3850 def _parse_alias( 3851 self, this: t.Optional[exp.Expression], explicit: bool = False 3852 ) -> t.Optional[exp.Expression]: 3853 any_token = self._match(TokenType.ALIAS) 3854 3855 if explicit and not any_token: 3856 return this 3857 3858 if self._match(TokenType.L_PAREN): 3859 aliases = self.expression( 3860 exp.Aliases, 3861 this=this, 3862 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3863 ) 3864 self._match_r_paren(aliases) 3865 return aliases 3866 3867 alias = self._parse_id_var(any_token) 3868 3869 if alias: 3870 return self.expression(exp.Alias, this=this, alias=alias) 3871 3872 return this 3873 3874 def _parse_id_var( 3875 self, 3876 any_token: bool = True, 3877 tokens: t.Optional[t.Collection[TokenType]] = None, 3878 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3879 ) -> t.Optional[exp.Expression]: 3880 identifier = self._parse_identifier() 3881 3882 if identifier: 3883 return identifier 3884 3885 prefix = "" 3886 3887 if prefix_tokens: 3888 while self._match_set(prefix_tokens): 3889 prefix += self._prev.text 3890 3891 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3892 quoted = self._prev.token_type == TokenType.STRING 3893 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3894 3895 return None 3896 3897 def _parse_string(self) -> t.Optional[exp.Expression]: 3898 if self._match(TokenType.STRING): 3899 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3900 return self._parse_placeholder() 3901 3902 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3903 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3904 3905 def _parse_number(self) -> t.Optional[exp.Expression]: 3906 if self._match(TokenType.NUMBER): 3907 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3908 return self._parse_placeholder() 3909 3910 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3911 if self._match(TokenType.IDENTIFIER): 3912 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3913 return self._parse_placeholder() 3914 3915 def _parse_var( 3916 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3917 ) -> t.Optional[exp.Expression]: 3918 if ( 3919 (any_token and self._advance_any()) 3920 or self._match(TokenType.VAR) 3921 or (self._match_set(tokens) if tokens else False) 3922 ): 3923 return self.expression(exp.Var, this=self._prev.text) 3924 return self._parse_placeholder() 3925 3926 def _advance_any(self) -> t.Optional[Token]: 3927 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3928 self._advance() 3929 return self._prev 3930 return None 3931 3932 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3933 return self._parse_var() or self._parse_string() 3934 3935 def _parse_null(self) -> t.Optional[exp.Expression]: 3936 if self._match(TokenType.NULL): 3937 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3938 return None 3939 3940 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3941 if self._match(TokenType.TRUE): 3942 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3943 if self._match(TokenType.FALSE): 3944 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3945 return None 3946 3947 def _parse_star(self) -> t.Optional[exp.Expression]: 3948 if self._match(TokenType.STAR): 3949 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3950 return None 3951 3952 def _parse_parameter(self) -> exp.Expression: 3953 wrapped = self._match(TokenType.L_BRACE) 3954 this = self._parse_var() or self._parse_primary() 3955 self._match(TokenType.R_BRACE) 3956 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3957 3958 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3959 if self._match_set(self.PLACEHOLDER_PARSERS): 3960 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3961 if placeholder: 3962 return placeholder 3963 self._advance(-1) 3964 return None 3965 3966 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3967 if not self._match(TokenType.EXCEPT): 3968 return None 3969 if self._match(TokenType.L_PAREN, advance=False): 3970 return self._parse_wrapped_csv(self._parse_column) 3971 return self._parse_csv(self._parse_column) 3972 3973 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3974 if not self._match(TokenType.REPLACE): 3975 return None 3976 if self._match(TokenType.L_PAREN, advance=False): 3977 return self._parse_wrapped_csv(self._parse_expression) 3978 return self._parse_csv(self._parse_expression) 3979 3980 def _parse_csv( 3981 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3982 ) -> t.List[t.Optional[exp.Expression]]: 3983 parse_result = parse_method() 3984 items = [parse_result] if parse_result is not None else [] 3985 3986 while self._match(sep): 3987 if parse_result and self._prev_comments: 3988 parse_result.comments = self._prev_comments 3989 3990 parse_result = parse_method() 3991 if parse_result is not None: 3992 items.append(parse_result) 3993 3994 return items 3995 3996 def _parse_tokens( 3997 self, parse_method: t.Callable, expressions: t.Dict 3998 ) -> t.Optional[exp.Expression]: 3999 this = parse_method() 4000 4001 while self._match_set(expressions): 4002 this = self.expression( 4003 expressions[self._prev.token_type], 4004 this=this, 4005 comments=self._prev_comments, 4006 expression=parse_method(), 4007 ) 4008 4009 return this 4010 4011 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 4012 return self._parse_wrapped_csv(self._parse_id_var) 4013 4014 def _parse_wrapped_csv( 4015 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4016 ) -> t.List[t.Optional[exp.Expression]]: 4017 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 4018 4019 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 4020 self._match_l_paren() 4021 parse_result = parse_method() 4022 self._match_r_paren() 4023 return parse_result 4024 4025 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4026 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4027 4028 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4029 return self._parse_set_operations( 4030 self._parse_select(nested=True, parse_subquery_alias=False) 4031 ) 4032 4033 def _parse_transaction(self) -> exp.Expression: 4034 this = None 4035 if self._match_texts(self.TRANSACTION_KIND): 4036 this = self._prev.text 4037 4038 self._match_texts({"TRANSACTION", "WORK"}) 4039 4040 modes = [] 4041 while True: 4042 mode = [] 4043 while self._match(TokenType.VAR): 4044 mode.append(self._prev.text) 4045 4046 if mode: 4047 modes.append(" ".join(mode)) 4048 if not self._match(TokenType.COMMA): 4049 break 4050 4051 return self.expression(exp.Transaction, this=this, modes=modes) 4052 4053 def _parse_commit_or_rollback(self) -> exp.Expression: 4054 chain = None 4055 savepoint = None 4056 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4057 4058 self._match_texts({"TRANSACTION", "WORK"}) 4059 4060 if self._match_text_seq("TO"): 4061 self._match_text_seq("SAVEPOINT") 4062 savepoint = self._parse_id_var() 4063 4064 if self._match(TokenType.AND): 4065 chain = not self._match_text_seq("NO") 4066 self._match_text_seq("CHAIN") 4067 4068 if is_rollback: 4069 return self.expression(exp.Rollback, savepoint=savepoint) 4070 return self.expression(exp.Commit, chain=chain) 4071 4072 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4073 if not self._match_text_seq("ADD"): 4074 return None 4075 4076 self._match(TokenType.COLUMN) 4077 exists_column = self._parse_exists(not_=True) 4078 expression = self._parse_column_def(self._parse_field(any_token=True)) 4079 4080 if expression: 4081 expression.set("exists", exists_column) 4082 4083 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4084 if self._match_texts(("FIRST", "AFTER")): 4085 position = self._prev.text 4086 column_position = self.expression( 4087 exp.ColumnPosition, this=self._parse_column(), position=position 4088 ) 4089 expression.set("position", column_position) 4090 4091 return expression 4092 4093 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4094 drop = self._match(TokenType.DROP) and self._parse_drop() 4095 if drop and not isinstance(drop, exp.Command): 4096 drop.set("kind", drop.args.get("kind", "COLUMN")) 4097 return drop 4098 4099 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4100 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4101 return self.expression( 4102 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4103 ) 4104 4105 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4106 this = None 4107 kind = self._prev.token_type 4108 4109 if kind == TokenType.CONSTRAINT: 4110 this = self._parse_id_var() 4111 4112 if self._match_text_seq("CHECK"): 4113 expression = self._parse_wrapped(self._parse_conjunction) 4114 enforced = self._match_text_seq("ENFORCED") 4115 4116 return self.expression( 4117 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4118 ) 4119 4120 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4121 expression = self._parse_foreign_key() 4122 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4123 expression = self._parse_primary_key() 4124 else: 4125 expression = None 4126 4127 return self.expression(exp.AddConstraint, this=this, expression=expression) 4128 4129 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4130 index = self._index - 1 4131 4132 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4133 return self._parse_csv(self._parse_add_constraint) 4134 4135 self._retreat(index) 4136 return self._parse_csv(self._parse_add_column) 4137 4138 def _parse_alter_table_alter(self) -> exp.Expression: 4139 self._match(TokenType.COLUMN) 4140 column = self._parse_field(any_token=True) 4141 4142 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4143 return self.expression(exp.AlterColumn, this=column, drop=True) 4144 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4145 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4146 4147 self._match_text_seq("SET", "DATA") 4148 return self.expression( 4149 exp.AlterColumn, 4150 this=column, 4151 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4152 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4153 using=self._match(TokenType.USING) and self._parse_conjunction(), 4154 ) 4155 4156 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4157 index = self._index - 1 4158 4159 partition_exists = self._parse_exists() 4160 if self._match(TokenType.PARTITION, advance=False): 4161 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4162 4163 self._retreat(index) 4164 return self._parse_csv(self._parse_drop_column) 4165 4166 def _parse_alter_table_rename(self) -> exp.Expression: 4167 self._match_text_seq("TO") 4168 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4169 4170 def _parse_alter(self) -> t.Optional[exp.Expression]: 4171 start = self._prev 4172 4173 if not self._match(TokenType.TABLE): 4174 return self._parse_as_command(start) 4175 4176 exists = self._parse_exists() 4177 this = self._parse_table(schema=True) 4178 4179 if self._next: 4180 self._advance() 4181 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4182 4183 if parser: 4184 actions = ensure_list(parser(self)) 4185 4186 if not self._curr: 4187 return self.expression( 4188 exp.AlterTable, 4189 this=this, 4190 exists=exists, 4191 actions=actions, 4192 ) 4193 return self._parse_as_command(start) 4194 4195 def _parse_merge(self) -> exp.Expression: 4196 self._match(TokenType.INTO) 4197 target = self._parse_table() 4198 4199 self._match(TokenType.USING) 4200 using = self._parse_table() 4201 4202 self._match(TokenType.ON) 4203 on = self._parse_conjunction() 4204 4205 whens = [] 4206 while self._match(TokenType.WHEN): 4207 matched = not self._match(TokenType.NOT) 4208 self._match_text_seq("MATCHED") 4209 source = ( 4210 False 4211 if self._match_text_seq("BY", "TARGET") 4212 else self._match_text_seq("BY", "SOURCE") 4213 ) 4214 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4215 4216 self._match(TokenType.THEN) 4217 4218 if self._match(TokenType.INSERT): 4219 _this = self._parse_star() 4220 if _this: 4221 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4222 else: 4223 then = self.expression( 4224 exp.Insert, 4225 this=self._parse_value(), 4226 expression=self._match(TokenType.VALUES) and self._parse_value(), 4227 ) 4228 elif self._match(TokenType.UPDATE): 4229 expressions = self._parse_star() 4230 if expressions: 4231 then = self.expression(exp.Update, expressions=expressions) 4232 else: 4233 then = self.expression( 4234 exp.Update, 4235 expressions=self._match(TokenType.SET) 4236 and self._parse_csv(self._parse_equality), 4237 ) 4238 elif self._match(TokenType.DELETE): 4239 then = self.expression(exp.Var, this=self._prev.text) 4240 else: 4241 then = None 4242 4243 whens.append( 4244 self.expression( 4245 exp.When, 4246 matched=matched, 4247 source=source, 4248 condition=condition, 4249 then=then, 4250 ) 4251 ) 4252 4253 return self.expression( 4254 exp.Merge, 4255 this=target, 4256 using=using, 4257 on=on, 4258 expressions=whens, 4259 ) 4260 4261 def _parse_show(self) -> t.Optional[exp.Expression]: 4262 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4263 if parser: 4264 return parser(self) 4265 self._advance() 4266 return self.expression(exp.Show, this=self._prev.text.upper()) 4267 4268 def _parse_set_item_assignment( 4269 self, kind: t.Optional[str] = None 4270 ) -> t.Optional[exp.Expression]: 4271 index = self._index 4272 4273 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4274 return self._parse_set_transaction(global_=kind == "GLOBAL") 4275 4276 left = self._parse_primary() or self._parse_id_var() 4277 4278 if not self._match_texts(("=", "TO")): 4279 self._retreat(index) 4280 return None 4281 4282 right = self._parse_statement() or self._parse_id_var() 4283 this = self.expression( 4284 exp.EQ, 4285 this=left, 4286 expression=right, 4287 ) 4288 4289 return self.expression( 4290 exp.SetItem, 4291 this=this, 4292 kind=kind, 4293 ) 4294 4295 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4296 self._match_text_seq("TRANSACTION") 4297 characteristics = self._parse_csv( 4298 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4299 ) 4300 return self.expression( 4301 exp.SetItem, 4302 expressions=characteristics, 4303 kind="TRANSACTION", 4304 **{"global": global_}, # type: ignore 4305 ) 4306 4307 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4308 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4309 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4310 4311 def _parse_set(self) -> exp.Expression: 4312 index = self._index 4313 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4314 4315 if self._curr: 4316 self._retreat(index) 4317 return self._parse_as_command(self._prev) 4318 4319 return set_ 4320 4321 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4322 for option in options: 4323 if self._match_text_seq(*option.split(" ")): 4324 return exp.Var(this=option) 4325 return None 4326 4327 def _parse_as_command(self, start: Token) -> exp.Command: 4328 while self._curr: 4329 self._advance() 4330 text = self._find_sql(start, self._prev) 4331 size = len(start.text) 4332 return exp.Command(this=text[:size], expression=text[size:]) 4333 4334 def _find_parser( 4335 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4336 ) -> t.Optional[t.Callable]: 4337 if not self._curr: 4338 return None 4339 4340 index = self._index 4341 this = [] 4342 while True: 4343 # The current token might be multiple words 4344 curr = self._curr.text.upper() 4345 key = curr.split(" ") 4346 this.append(curr) 4347 self._advance() 4348 result, trie = in_trie(trie, key) 4349 if result == 0: 4350 break 4351 if result == 2: 4352 subparser = parsers[" ".join(this)] 4353 return subparser 4354 self._retreat(index) 4355 return None 4356 4357 def _match(self, token_type, advance=True): 4358 if not self._curr: 4359 return None 4360 4361 if self._curr.token_type == token_type: 4362 if advance: 4363 self._advance() 4364 return True 4365 4366 return None 4367 4368 def _match_set(self, types, advance=True): 4369 if not self._curr: 4370 return None 4371 4372 if self._curr.token_type in types: 4373 if advance: 4374 self._advance() 4375 return True 4376 4377 return None 4378 4379 def _match_pair(self, token_type_a, token_type_b, advance=True): 4380 if not self._curr or not self._next: 4381 return None 4382 4383 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4384 if advance: 4385 self._advance(2) 4386 return True 4387 4388 return None 4389 4390 def _match_l_paren(self, expression=None): 4391 if not self._match(TokenType.L_PAREN): 4392 self.raise_error("Expecting (") 4393 if expression and self._prev_comments: 4394 expression.comments = self._prev_comments 4395 4396 def _match_r_paren(self, expression=None): 4397 if not self._match(TokenType.R_PAREN): 4398 self.raise_error("Expecting )") 4399 if expression and self._prev_comments: 4400 expression.comments = self._prev_comments 4401 4402 def _match_texts(self, texts, advance=True): 4403 if self._curr and self._curr.text.upper() in texts: 4404 if advance: 4405 self._advance() 4406 return True 4407 return False 4408 4409 def _match_text_seq(self, *texts, advance=True): 4410 index = self._index 4411 for text in texts: 4412 if self._curr and self._curr.text.upper() == text: 4413 self._advance() 4414 else: 4415 self._retreat(index) 4416 return False 4417 4418 if not advance: 4419 self._retreat(index) 4420 4421 return True 4422 4423 def _replace_columns_with_dots(self, this): 4424 if isinstance(this, exp.Dot): 4425 exp.replace_children(this, self._replace_columns_with_dots) 4426 elif isinstance(this, exp.Column): 4427 exp.replace_children(this, self._replace_columns_with_dots) 4428 table = this.args.get("table") 4429 this = ( 4430 self.expression(exp.Dot, this=table, expression=this.this) 4431 if table 4432 else self.expression(exp.Var, this=this.name) 4433 ) 4434 elif isinstance(this, exp.Identifier): 4435 this = self.expression(exp.Var, this=this.name) 4436 return this 4437 4438 def _replace_lambda(self, node, lambda_variables): 4439 for column in node.find_all(exp.Column): 4440 if column.parts[0].name in lambda_variables: 4441 dot_or_id = column.to_dot() if column.table else column.this 4442 parent = column.parent 4443 4444 while isinstance(parent, exp.Dot): 4445 if not isinstance(parent.parent, exp.Dot): 4446 parent.replace(dot_or_id) 4447 break 4448 parent = parent.parent 4449 else: 4450 if column is node: 4451 node = dot_or_id 4452 else: 4453 column.replace(dot_or_id) 4454 return node
25def parse_var_map(args: t.Sequence) -> exp.Expression: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 return exp.VarMap( 35 keys=exp.Array(expressions=keys), 36 values=exp.Array(expressions=values), 37 )
62class Parser(metaclass=_Parser): 63 """ 64 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 65 a parsed syntax tree. 66 67 Args: 68 error_level: the desired error level. 69 Default: ErrorLevel.RAISE 70 error_message_context: determines the amount of context to capture from a 71 query string when displaying the error message (in number of characters). 72 Default: 50. 73 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 74 Default: 0 75 alias_post_tablesample: If the table alias comes after tablesample. 76 Default: False 77 max_errors: Maximum number of error messages to include in a raised ParseError. 78 This is only relevant if error_level is ErrorLevel.RAISE. 79 Default: 3 80 null_ordering: Indicates the default null ordering method to use if not explicitly set. 81 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 82 Default: "nulls_are_small" 83 """ 84 85 FUNCTIONS: t.Dict[str, t.Callable] = { 86 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 87 "DATE_TO_DATE_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 92 "IFNULL": exp.Coalesce.from_arg_list, 93 "LIKE": parse_like, 94 "TIME_TO_TIME_STR": lambda args: exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 99 this=exp.Cast( 100 this=seq_get(args, 0), 101 to=exp.DataType(this=exp.DataType.Type.TEXT), 102 ), 103 start=exp.Literal.number(1), 104 length=exp.Literal.number(10), 105 ), 106 "VAR_MAP": parse_var_map, 107 } 108 109 NO_PAREN_FUNCTIONS = { 110 TokenType.CURRENT_DATE: exp.CurrentDate, 111 TokenType.CURRENT_DATETIME: exp.CurrentDate, 112 TokenType.CURRENT_TIME: exp.CurrentTime, 113 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 114 TokenType.CURRENT_USER: exp.CurrentUser, 115 } 116 117 JOIN_HINTS: t.Set[str] = set() 118 119 NESTED_TYPE_TOKENS = { 120 TokenType.ARRAY, 121 TokenType.MAP, 122 TokenType.STRUCT, 123 TokenType.NULLABLE, 124 } 125 126 TYPE_TOKENS = { 127 TokenType.BIT, 128 TokenType.BOOLEAN, 129 TokenType.TINYINT, 130 TokenType.SMALLINT, 131 TokenType.INT, 132 TokenType.BIGINT, 133 TokenType.FLOAT, 134 TokenType.DOUBLE, 135 TokenType.CHAR, 136 TokenType.NCHAR, 137 TokenType.VARCHAR, 138 TokenType.NVARCHAR, 139 TokenType.TEXT, 140 TokenType.MEDIUMTEXT, 141 TokenType.LONGTEXT, 142 TokenType.MEDIUMBLOB, 143 TokenType.LONGBLOB, 144 TokenType.BINARY, 145 TokenType.VARBINARY, 146 TokenType.JSON, 147 TokenType.JSONB, 148 TokenType.INTERVAL, 149 TokenType.TIME, 150 TokenType.TIMESTAMP, 151 TokenType.TIMESTAMPTZ, 152 TokenType.TIMESTAMPLTZ, 153 TokenType.DATETIME, 154 TokenType.DATE, 155 TokenType.DECIMAL, 156 TokenType.BIGDECIMAL, 157 TokenType.UUID, 158 TokenType.GEOGRAPHY, 159 TokenType.GEOMETRY, 160 TokenType.HLLSKETCH, 161 TokenType.HSTORE, 162 TokenType.PSEUDO_TYPE, 163 TokenType.SUPER, 164 TokenType.SERIAL, 165 TokenType.SMALLSERIAL, 166 TokenType.BIGSERIAL, 167 TokenType.XML, 168 TokenType.UNIQUEIDENTIFIER, 169 TokenType.MONEY, 170 TokenType.SMALLMONEY, 171 TokenType.ROWVERSION, 172 TokenType.IMAGE, 173 TokenType.VARIANT, 174 TokenType.OBJECT, 175 TokenType.INET, 176 *NESTED_TYPE_TOKENS, 177 } 178 179 SUBQUERY_PREDICATES = { 180 TokenType.ANY: exp.Any, 181 TokenType.ALL: exp.All, 182 TokenType.EXISTS: exp.Exists, 183 TokenType.SOME: exp.Any, 184 } 185 186 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 187 188 DB_CREATABLES = { 189 TokenType.DATABASE, 190 TokenType.SCHEMA, 191 TokenType.TABLE, 192 TokenType.VIEW, 193 } 194 195 CREATABLES = { 196 TokenType.COLUMN, 197 TokenType.FUNCTION, 198 TokenType.INDEX, 199 TokenType.PROCEDURE, 200 *DB_CREATABLES, 201 } 202 203 ID_VAR_TOKENS = { 204 TokenType.VAR, 205 TokenType.ANTI, 206 TokenType.APPLY, 207 TokenType.AUTO_INCREMENT, 208 TokenType.BEGIN, 209 TokenType.BOTH, 210 TokenType.BUCKET, 211 TokenType.CACHE, 212 TokenType.CASCADE, 213 TokenType.COLLATE, 214 TokenType.COMMAND, 215 TokenType.COMMENT, 216 TokenType.COMMIT, 217 TokenType.COMPOUND, 218 TokenType.CONSTRAINT, 219 TokenType.DEFAULT, 220 TokenType.DELETE, 221 TokenType.DESCRIBE, 222 TokenType.DIV, 223 TokenType.END, 224 TokenType.EXECUTE, 225 TokenType.ESCAPE, 226 TokenType.FALSE, 227 TokenType.FIRST, 228 TokenType.FILTER, 229 TokenType.FOLLOWING, 230 TokenType.FORMAT, 231 TokenType.FULL, 232 TokenType.IF, 233 TokenType.IS, 234 TokenType.ISNULL, 235 TokenType.INTERVAL, 236 TokenType.KEEP, 237 TokenType.LAZY, 238 TokenType.LEADING, 239 TokenType.LEFT, 240 TokenType.LOCAL, 241 TokenType.MATERIALIZED, 242 TokenType.MERGE, 243 TokenType.NATURAL, 244 TokenType.NEXT, 245 TokenType.OFFSET, 246 TokenType.ONLY, 247 TokenType.OPTIONS, 248 TokenType.ORDINALITY, 249 TokenType.OVERWRITE, 250 TokenType.PARTITION, 251 TokenType.PERCENT, 252 TokenType.PIVOT, 253 TokenType.PRAGMA, 254 TokenType.PRECEDING, 255 TokenType.RANGE, 256 TokenType.REFERENCES, 257 TokenType.RIGHT, 258 TokenType.ROW, 259 TokenType.ROWS, 260 TokenType.SEED, 261 TokenType.SEMI, 262 TokenType.SET, 263 TokenType.SHOW, 264 TokenType.SORTKEY, 265 TokenType.TEMPORARY, 266 TokenType.TOP, 267 TokenType.TRAILING, 268 TokenType.TRUE, 269 TokenType.UNBOUNDED, 270 TokenType.UNIQUE, 271 TokenType.UNLOGGED, 272 TokenType.UNPIVOT, 273 TokenType.VOLATILE, 274 TokenType.WINDOW, 275 *CREATABLES, 276 *SUBQUERY_PREDICATES, 277 *TYPE_TOKENS, 278 *NO_PAREN_FUNCTIONS, 279 } 280 281 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 282 283 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 284 TokenType.APPLY, 285 TokenType.FULL, 286 TokenType.LEFT, 287 TokenType.NATURAL, 288 TokenType.OFFSET, 289 TokenType.RIGHT, 290 TokenType.WINDOW, 291 } 292 293 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 294 295 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 296 297 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 298 299 FUNC_TOKENS = { 300 TokenType.COMMAND, 301 TokenType.CURRENT_DATE, 302 TokenType.CURRENT_DATETIME, 303 TokenType.CURRENT_TIMESTAMP, 304 TokenType.CURRENT_TIME, 305 TokenType.CURRENT_USER, 306 TokenType.FILTER, 307 TokenType.FIRST, 308 TokenType.FORMAT, 309 TokenType.GLOB, 310 TokenType.IDENTIFIER, 311 TokenType.INDEX, 312 TokenType.ISNULL, 313 TokenType.ILIKE, 314 TokenType.LIKE, 315 TokenType.MERGE, 316 TokenType.OFFSET, 317 TokenType.PRIMARY_KEY, 318 TokenType.REPLACE, 319 TokenType.ROW, 320 TokenType.UNNEST, 321 TokenType.VAR, 322 TokenType.LEFT, 323 TokenType.RIGHT, 324 TokenType.DATE, 325 TokenType.DATETIME, 326 TokenType.TABLE, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMPTZ, 329 TokenType.WINDOW, 330 *TYPE_TOKENS, 331 *SUBQUERY_PREDICATES, 332 } 333 334 CONJUNCTION = { 335 TokenType.AND: exp.And, 336 TokenType.OR: exp.Or, 337 } 338 339 EQUALITY = { 340 TokenType.EQ: exp.EQ, 341 TokenType.NEQ: exp.NEQ, 342 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 343 } 344 345 COMPARISON = { 346 TokenType.GT: exp.GT, 347 TokenType.GTE: exp.GTE, 348 TokenType.LT: exp.LT, 349 TokenType.LTE: exp.LTE, 350 } 351 352 BITWISE = { 353 TokenType.AMP: exp.BitwiseAnd, 354 TokenType.CARET: exp.BitwiseXor, 355 TokenType.PIPE: exp.BitwiseOr, 356 TokenType.DPIPE: exp.DPipe, 357 } 358 359 TERM = { 360 TokenType.DASH: exp.Sub, 361 TokenType.PLUS: exp.Add, 362 TokenType.MOD: exp.Mod, 363 TokenType.COLLATE: exp.Collate, 364 } 365 366 FACTOR = { 367 TokenType.DIV: exp.IntDiv, 368 TokenType.LR_ARROW: exp.Distance, 369 TokenType.SLASH: exp.Div, 370 TokenType.STAR: exp.Mul, 371 } 372 373 TIMESTAMPS = { 374 TokenType.TIME, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.TIMESTAMPLTZ, 378 } 379 380 SET_OPERATIONS = { 381 TokenType.UNION, 382 TokenType.INTERSECT, 383 TokenType.EXCEPT, 384 } 385 386 JOIN_SIDES = { 387 TokenType.LEFT, 388 TokenType.RIGHT, 389 TokenType.FULL, 390 } 391 392 JOIN_KINDS = { 393 TokenType.INNER, 394 TokenType.OUTER, 395 TokenType.CROSS, 396 TokenType.SEMI, 397 TokenType.ANTI, 398 } 399 400 LAMBDAS = { 401 TokenType.ARROW: lambda self, expressions: self.expression( 402 exp.Lambda, 403 this=self._replace_lambda( 404 self._parse_conjunction(), 405 {node.name for node in expressions}, 406 ), 407 expressions=expressions, 408 ), 409 TokenType.FARROW: lambda self, expressions: self.expression( 410 exp.Kwarg, 411 this=exp.Var(this=expressions[0].name), 412 expression=self._parse_conjunction(), 413 ), 414 } 415 416 COLUMN_OPERATORS = { 417 TokenType.DOT: None, 418 TokenType.DCOLON: lambda self, this, to: self.expression( 419 exp.Cast if self.STRICT_CAST else exp.TryCast, 420 this=this, 421 to=to, 422 ), 423 TokenType.ARROW: lambda self, this, path: self.expression( 424 exp.JSONExtract, 425 this=this, 426 expression=path, 427 ), 428 TokenType.DARROW: lambda self, this, path: self.expression( 429 exp.JSONExtractScalar, 430 this=this, 431 expression=path, 432 ), 433 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 434 exp.JSONBExtract, 435 this=this, 436 expression=path, 437 ), 438 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 439 exp.JSONBExtractScalar, 440 this=this, 441 expression=path, 442 ), 443 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 444 exp.JSONBContains, 445 this=this, 446 expression=key, 447 ), 448 } 449 450 EXPRESSION_PARSERS = { 451 exp.Column: lambda self: self._parse_column(), 452 exp.DataType: lambda self: self._parse_types(), 453 exp.From: lambda self: self._parse_from(), 454 exp.Group: lambda self: self._parse_group(), 455 exp.Identifier: lambda self: self._parse_id_var(), 456 exp.Lateral: lambda self: self._parse_lateral(), 457 exp.Join: lambda self: self._parse_join(), 458 exp.Order: lambda self: self._parse_order(), 459 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 460 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 461 exp.Lambda: lambda self: self._parse_lambda(), 462 exp.Limit: lambda self: self._parse_limit(), 463 exp.Offset: lambda self: self._parse_offset(), 464 exp.TableAlias: lambda self: self._parse_table_alias(), 465 exp.Table: lambda self: self._parse_table(), 466 exp.Condition: lambda self: self._parse_conjunction(), 467 exp.Expression: lambda self: self._parse_statement(), 468 exp.Properties: lambda self: self._parse_properties(), 469 exp.Where: lambda self: self._parse_where(), 470 exp.Ordered: lambda self: self._parse_ordered(), 471 exp.Having: lambda self: self._parse_having(), 472 exp.With: lambda self: self._parse_with(), 473 exp.Window: lambda self: self._parse_named_window(), 474 exp.Qualify: lambda self: self._parse_qualify(), 475 exp.Returning: lambda self: self._parse_returning(), 476 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 477 } 478 479 STATEMENT_PARSERS = { 480 TokenType.ALTER: lambda self: self._parse_alter(), 481 TokenType.BEGIN: lambda self: self._parse_transaction(), 482 TokenType.CACHE: lambda self: self._parse_cache(), 483 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 484 TokenType.COMMENT: lambda self: self._parse_comment(), 485 TokenType.CREATE: lambda self: self._parse_create(), 486 TokenType.DELETE: lambda self: self._parse_delete(), 487 TokenType.DESC: lambda self: self._parse_describe(), 488 TokenType.DESCRIBE: lambda self: self._parse_describe(), 489 TokenType.DROP: lambda self: self._parse_drop(), 490 TokenType.END: lambda self: self._parse_commit_or_rollback(), 491 TokenType.INSERT: lambda self: self._parse_insert(), 492 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 493 TokenType.MERGE: lambda self: self._parse_merge(), 494 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 495 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 496 TokenType.SET: lambda self: self._parse_set(), 497 TokenType.UNCACHE: lambda self: self._parse_uncache(), 498 TokenType.UPDATE: lambda self: self._parse_update(), 499 TokenType.USE: lambda self: self.expression( 500 exp.Use, 501 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 502 and exp.Var(this=self._prev.text), 503 this=self._parse_table(schema=False), 504 ), 505 } 506 507 UNARY_PARSERS = { 508 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 509 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 510 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 511 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 512 } 513 514 PRIMARY_PARSERS = { 515 TokenType.STRING: lambda self, token: self.expression( 516 exp.Literal, this=token.text, is_string=True 517 ), 518 TokenType.NUMBER: lambda self, token: self.expression( 519 exp.Literal, this=token.text, is_string=False 520 ), 521 TokenType.STAR: lambda self, _: self.expression( 522 exp.Star, 523 **{"except": self._parse_except(), "replace": self._parse_replace()}, 524 ), 525 TokenType.NULL: lambda self, _: self.expression(exp.Null), 526 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 527 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 528 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 529 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 530 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 531 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 532 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 533 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 534 } 535 536 PLACEHOLDER_PARSERS = { 537 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 538 TokenType.PARAMETER: lambda self: self._parse_parameter(), 539 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 540 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 541 else None, 542 } 543 544 RANGE_PARSERS = { 545 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 546 TokenType.GLOB: binary_range_parser(exp.Glob), 547 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 548 TokenType.IN: lambda self, this: self._parse_in(this), 549 TokenType.IS: lambda self, this: self._parse_is(this), 550 TokenType.LIKE: binary_range_parser(exp.Like), 551 TokenType.ILIKE: binary_range_parser(exp.ILike), 552 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 553 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 554 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 555 } 556 557 PROPERTY_PARSERS = { 558 "AFTER": lambda self: self._parse_afterjournal( 559 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 560 ), 561 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 562 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 563 "BEFORE": lambda self: self._parse_journal( 564 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 565 ), 566 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 567 "CHARACTER SET": lambda self: self._parse_character_set(), 568 "CHECKSUM": lambda self: self._parse_checksum(), 569 "CLUSTER BY": lambda self: self.expression( 570 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 571 ), 572 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 573 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 574 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 575 default=self._prev.text.upper() == "DEFAULT" 576 ), 577 "DEFINER": lambda self: self._parse_definer(), 578 "DETERMINISTIC": lambda self: self.expression( 579 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 580 ), 581 "DISTKEY": lambda self: self._parse_distkey(), 582 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 583 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 584 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 585 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 586 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 587 "FREESPACE": lambda self: self._parse_freespace(), 588 "GLOBAL": lambda self: self._parse_temporary(global_=True), 589 "IMMUTABLE": lambda self: self.expression( 590 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 591 ), 592 "JOURNAL": lambda self: self._parse_journal( 593 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 594 ), 595 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 596 "LIKE": lambda self: self._parse_create_like(), 597 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 598 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 599 "LOCK": lambda self: self._parse_locking(), 600 "LOCKING": lambda self: self._parse_locking(), 601 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 602 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 603 "MAX": lambda self: self._parse_datablocksize(), 604 "MAXIMUM": lambda self: self._parse_datablocksize(), 605 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 606 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 607 ), 608 "MIN": lambda self: self._parse_datablocksize(), 609 "MINIMUM": lambda self: self._parse_datablocksize(), 610 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 611 "NO": lambda self: self._parse_noprimaryindex(), 612 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 613 "ON": lambda self: self._parse_oncommit(), 614 "PARTITION BY": lambda self: self._parse_partitioned_by(), 615 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 616 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 617 "RETURNS": lambda self: self._parse_returns(), 618 "ROW": lambda self: self._parse_row(), 619 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 620 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 621 "SORTKEY": lambda self: self._parse_sortkey(), 622 "STABLE": lambda self: self.expression( 623 exp.StabilityProperty, this=exp.Literal.string("STABLE") 624 ), 625 "STORED": lambda self: self._parse_stored(), 626 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 627 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 628 "TEMP": lambda self: self._parse_temporary(global_=False), 629 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 630 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 631 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 632 "VOLATILE": lambda self: self._parse_volatile_property(), 633 "WITH": lambda self: self._parse_with_property(), 634 } 635 636 CONSTRAINT_PARSERS = { 637 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 638 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 639 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 640 "CHARACTER SET": lambda self: self.expression( 641 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 642 ), 643 "CHECK": lambda self: self.expression( 644 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 645 ), 646 "COLLATE": lambda self: self.expression( 647 exp.CollateColumnConstraint, this=self._parse_var() 648 ), 649 "COMMENT": lambda self: self.expression( 650 exp.CommentColumnConstraint, this=self._parse_string() 651 ), 652 "COMPRESS": lambda self: self._parse_compress(), 653 "DEFAULT": lambda self: self.expression( 654 exp.DefaultColumnConstraint, this=self._parse_bitwise() 655 ), 656 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 657 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 658 "FORMAT": lambda self: self.expression( 659 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 660 ), 661 "GENERATED": lambda self: self._parse_generated_as_identity(), 662 "IDENTITY": lambda self: self._parse_auto_increment(), 663 "INLINE": lambda self: self._parse_inline(), 664 "LIKE": lambda self: self._parse_create_like(), 665 "NOT": lambda self: self._parse_not_constraint(), 666 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 667 "ON": lambda self: self._match(TokenType.UPDATE) 668 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 669 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(), 671 "REFERENCES": lambda self: self._parse_references(match=False), 672 "TITLE": lambda self: self.expression( 673 exp.TitleColumnConstraint, this=self._parse_var_or_string() 674 ), 675 "UNIQUE": lambda self: self._parse_unique(), 676 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 677 } 678 679 ALTER_PARSERS = { 680 "ADD": lambda self: self._parse_alter_table_add(), 681 "ALTER": lambda self: self._parse_alter_table_alter(), 682 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 683 "DROP": lambda self: self._parse_alter_table_drop(), 684 "RENAME": lambda self: self._parse_alter_table_rename(), 685 } 686 687 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 688 689 NO_PAREN_FUNCTION_PARSERS = { 690 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 691 TokenType.CASE: lambda self: self._parse_case(), 692 TokenType.IF: lambda self: self._parse_if(), 693 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 694 exp.NextValueFor, 695 this=self._parse_column(), 696 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 697 ), 698 } 699 700 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 701 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 702 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 703 "DECODE": lambda self: self._parse_decode(), 704 "EXTRACT": lambda self: self._parse_extract(), 705 "JSON_OBJECT": lambda self: self._parse_json_object(), 706 "LOG": lambda self: self._parse_logarithm(), 707 "MATCH": lambda self: self._parse_match_against(), 708 "POSITION": lambda self: self._parse_position(), 709 "STRING_AGG": lambda self: self._parse_string_agg(), 710 "SUBSTRING": lambda self: self._parse_substring(), 711 "TRIM": lambda self: self._parse_trim(), 712 "TRY_CAST": lambda self: self._parse_cast(False), 713 "TRY_CONVERT": lambda self: self._parse_convert(False), 714 } 715 716 QUERY_MODIFIER_PARSERS = { 717 "match": lambda self: self._parse_match_recognize(), 718 "where": lambda self: self._parse_where(), 719 "group": lambda self: self._parse_group(), 720 "having": lambda self: self._parse_having(), 721 "qualify": lambda self: self._parse_qualify(), 722 "windows": lambda self: self._parse_window_clause(), 723 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 724 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 725 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 726 "order": lambda self: self._parse_order(), 727 "limit": lambda self: self._parse_limit(), 728 "offset": lambda self: self._parse_offset(), 729 "lock": lambda self: self._parse_lock(), 730 "sample": lambda self: self._parse_table_sample(as_modifier=True), 731 } 732 733 SET_PARSERS = { 734 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 735 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 736 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 737 "TRANSACTION": lambda self: self._parse_set_transaction(), 738 } 739 740 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 741 742 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 743 744 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 745 746 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 747 748 TRANSACTION_CHARACTERISTICS = { 749 "ISOLATION LEVEL REPEATABLE READ", 750 "ISOLATION LEVEL READ COMMITTED", 751 "ISOLATION LEVEL READ UNCOMMITTED", 752 "ISOLATION LEVEL SERIALIZABLE", 753 "READ WRITE", 754 "READ ONLY", 755 } 756 757 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 758 759 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 760 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 761 762 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 763 764 STRICT_CAST = True 765 766 CONVERT_TYPE_FIRST = False 767 768 QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None 769 PREFIXED_PIVOT_COLUMNS = False 770 771 LOG_BASE_FIRST = True 772 LOG_DEFAULTS_TO_LN = False 773 774 __slots__ = ( 775 "error_level", 776 "error_message_context", 777 "sql", 778 "errors", 779 "index_offset", 780 "unnest_column_only", 781 "alias_post_tablesample", 782 "max_errors", 783 "null_ordering", 784 "_tokens", 785 "_index", 786 "_curr", 787 "_next", 788 "_prev", 789 "_prev_comments", 790 "_show_trie", 791 "_set_trie", 792 ) 793 794 def __init__( 795 self, 796 error_level: t.Optional[ErrorLevel] = None, 797 error_message_context: int = 100, 798 index_offset: int = 0, 799 unnest_column_only: bool = False, 800 alias_post_tablesample: bool = False, 801 max_errors: int = 3, 802 null_ordering: t.Optional[str] = None, 803 ): 804 self.error_level = error_level or ErrorLevel.IMMEDIATE 805 self.error_message_context = error_message_context 806 self.index_offset = index_offset 807 self.unnest_column_only = unnest_column_only 808 self.alias_post_tablesample = alias_post_tablesample 809 self.max_errors = max_errors 810 self.null_ordering = null_ordering 811 self.reset() 812 813 def reset(self): 814 self.sql = "" 815 self.errors = [] 816 self._tokens = [] 817 self._index = 0 818 self._curr = None 819 self._next = None 820 self._prev = None 821 self._prev_comments = None 822 823 def parse( 824 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 825 ) -> t.List[t.Optional[exp.Expression]]: 826 """ 827 Parses a list of tokens and returns a list of syntax trees, one tree 828 per parsed SQL statement. 829 830 Args: 831 raw_tokens: the list of tokens. 832 sql: the original SQL string, used to produce helpful debug messages. 833 834 Returns: 835 The list of syntax trees. 836 """ 837 return self._parse( 838 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 839 ) 840 841 def parse_into( 842 self, 843 expression_types: exp.IntoType, 844 raw_tokens: t.List[Token], 845 sql: t.Optional[str] = None, 846 ) -> t.List[t.Optional[exp.Expression]]: 847 """ 848 Parses a list of tokens into a given Expression type. If a collection of Expression 849 types is given instead, this method will try to parse the token list into each one 850 of them, stopping at the first for which the parsing succeeds. 851 852 Args: 853 expression_types: the expression type(s) to try and parse the token list into. 854 raw_tokens: the list of tokens. 855 sql: the original SQL string, used to produce helpful debug messages. 856 857 Returns: 858 The target Expression. 859 """ 860 errors = [] 861 for expression_type in ensure_collection(expression_types): 862 parser = self.EXPRESSION_PARSERS.get(expression_type) 863 if not parser: 864 raise TypeError(f"No parser registered for {expression_type}") 865 try: 866 return self._parse(parser, raw_tokens, sql) 867 except ParseError as e: 868 e.errors[0]["into_expression"] = expression_type 869 errors.append(e) 870 raise ParseError( 871 f"Failed to parse into {expression_types}", 872 errors=merge_errors(errors), 873 ) from errors[-1] 874 875 def _parse( 876 self, 877 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 878 raw_tokens: t.List[Token], 879 sql: t.Optional[str] = None, 880 ) -> t.List[t.Optional[exp.Expression]]: 881 self.reset() 882 self.sql = sql or "" 883 total = len(raw_tokens) 884 chunks: t.List[t.List[Token]] = [[]] 885 886 for i, token in enumerate(raw_tokens): 887 if token.token_type == TokenType.SEMICOLON: 888 if i < total - 1: 889 chunks.append([]) 890 else: 891 chunks[-1].append(token) 892 893 expressions = [] 894 895 for tokens in chunks: 896 self._index = -1 897 self._tokens = tokens 898 self._advance() 899 900 expressions.append(parse_method(self)) 901 902 if self._index < len(self._tokens): 903 self.raise_error("Invalid expression / Unexpected token") 904 905 self.check_errors() 906 907 return expressions 908 909 def check_errors(self) -> None: 910 """ 911 Logs or raises any found errors, depending on the chosen error level setting. 912 """ 913 if self.error_level == ErrorLevel.WARN: 914 for error in self.errors: 915 logger.error(str(error)) 916 elif self.error_level == ErrorLevel.RAISE and self.errors: 917 raise ParseError( 918 concat_messages(self.errors, self.max_errors), 919 errors=merge_errors(self.errors), 920 ) 921 922 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 923 """ 924 Appends an error in the list of recorded errors or raises it, depending on the chosen 925 error level setting. 926 """ 927 token = token or self._curr or self._prev or Token.string("") 928 start = token.start 929 end = token.end 930 start_context = self.sql[max(start - self.error_message_context, 0) : start] 931 highlight = self.sql[start:end] 932 end_context = self.sql[end : end + self.error_message_context] 933 934 error = ParseError.new( 935 f"{message}. Line {token.line}, Col: {token.col}.\n" 936 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 937 description=message, 938 line=token.line, 939 col=token.col, 940 start_context=start_context, 941 highlight=highlight, 942 end_context=end_context, 943 ) 944 945 if self.error_level == ErrorLevel.IMMEDIATE: 946 raise error 947 948 self.errors.append(error) 949 950 def expression( 951 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 952 ) -> E: 953 """ 954 Creates a new, validated Expression. 955 956 Args: 957 exp_class: the expression class to instantiate. 958 comments: an optional list of comments to attach to the expression. 959 kwargs: the arguments to set for the expression along with their respective values. 960 961 Returns: 962 The target expression. 963 """ 964 instance = exp_class(**kwargs) 965 if self._prev_comments: 966 instance.comments = self._prev_comments 967 self._prev_comments = None 968 if comments: 969 instance.comments = comments 970 self.validate_expression(instance) 971 return instance 972 973 def validate_expression( 974 self, expression: exp.Expression, args: t.Optional[t.List] = None 975 ) -> None: 976 """ 977 Validates an already instantiated expression, making sure that all its mandatory arguments 978 are set. 979 980 Args: 981 expression: the expression to validate. 982 args: an optional list of items that was used to instantiate the expression, if it's a Func. 983 """ 984 if self.error_level == ErrorLevel.IGNORE: 985 return 986 987 for error_message in expression.error_messages(args): 988 self.raise_error(error_message) 989 990 def _find_sql(self, start: Token, end: Token) -> str: 991 return self.sql[start.start : end.end] 992 993 def _advance(self, times: int = 1) -> None: 994 self._index += times 995 self._curr = seq_get(self._tokens, self._index) 996 self._next = seq_get(self._tokens, self._index + 1) 997 if self._index > 0: 998 self._prev = self._tokens[self._index - 1] 999 self._prev_comments = self._prev.comments 1000 else: 1001 self._prev = None 1002 self._prev_comments = None 1003 1004 def _retreat(self, index: int) -> None: 1005 if index != self._index: 1006 self._advance(index - self._index) 1007 1008 def _parse_command(self) -> exp.Command: 1009 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1010 1011 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1012 start = self._prev 1013 exists = self._parse_exists() if allow_exists else None 1014 1015 self._match(TokenType.ON) 1016 1017 kind = self._match_set(self.CREATABLES) and self._prev 1018 1019 if not kind: 1020 return self._parse_as_command(start) 1021 1022 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1023 this = self._parse_user_defined_function(kind=kind.token_type) 1024 elif kind.token_type == TokenType.TABLE: 1025 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1026 elif kind.token_type == TokenType.COLUMN: 1027 this = self._parse_column() 1028 else: 1029 this = self._parse_id_var() 1030 1031 self._match(TokenType.IS) 1032 1033 return self.expression( 1034 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1035 ) 1036 1037 def _parse_statement(self) -> t.Optional[exp.Expression]: 1038 if self._curr is None: 1039 return None 1040 1041 if self._match_set(self.STATEMENT_PARSERS): 1042 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1043 1044 if self._match_set(Tokenizer.COMMANDS): 1045 return self._parse_command() 1046 1047 expression = self._parse_expression() 1048 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1049 1050 self._parse_query_modifiers(expression) 1051 return expression 1052 1053 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1054 start = self._prev 1055 temporary = self._match(TokenType.TEMPORARY) 1056 materialized = self._match(TokenType.MATERIALIZED) 1057 kind = self._match_set(self.CREATABLES) and self._prev.text 1058 if not kind: 1059 return self._parse_as_command(start) 1060 1061 return self.expression( 1062 exp.Drop, 1063 exists=self._parse_exists(), 1064 this=self._parse_table(schema=True), 1065 kind=kind, 1066 temporary=temporary, 1067 materialized=materialized, 1068 cascade=self._match(TokenType.CASCADE), 1069 constraints=self._match_text_seq("CONSTRAINTS"), 1070 purge=self._match_text_seq("PURGE"), 1071 ) 1072 1073 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1074 return ( 1075 self._match(TokenType.IF) 1076 and (not not_ or self._match(TokenType.NOT)) 1077 and self._match(TokenType.EXISTS) 1078 ) 1079 1080 def _parse_create(self) -> t.Optional[exp.Expression]: 1081 start = self._prev 1082 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1083 TokenType.OR, TokenType.REPLACE 1084 ) 1085 unique = self._match(TokenType.UNIQUE) 1086 1087 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1088 self._match(TokenType.TABLE) 1089 1090 properties = None 1091 create_token = self._match_set(self.CREATABLES) and self._prev 1092 1093 if not create_token: 1094 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1095 create_token = self._match_set(self.CREATABLES) and self._prev 1096 1097 if not properties or not create_token: 1098 return self._parse_as_command(start) 1099 1100 exists = self._parse_exists(not_=True) 1101 this = None 1102 expression = None 1103 indexes = None 1104 no_schema_binding = None 1105 begin = None 1106 1107 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1108 this = self._parse_user_defined_function(kind=create_token.token_type) 1109 temp_properties = self._parse_properties() 1110 if properties and temp_properties: 1111 properties.expressions.extend(temp_properties.expressions) 1112 elif temp_properties: 1113 properties = temp_properties 1114 1115 self._match(TokenType.ALIAS) 1116 begin = self._match(TokenType.BEGIN) 1117 return_ = self._match_text_seq("RETURN") 1118 expression = self._parse_statement() 1119 1120 if return_: 1121 expression = self.expression(exp.Return, this=expression) 1122 elif create_token.token_type == TokenType.INDEX: 1123 this = self._parse_index() 1124 elif create_token.token_type in self.DB_CREATABLES: 1125 table_parts = self._parse_table_parts(schema=True) 1126 1127 # exp.Properties.Location.POST_NAME 1128 if self._match(TokenType.COMMA): 1129 temp_properties = self._parse_properties(before=True) 1130 if properties and temp_properties: 1131 properties.expressions.extend(temp_properties.expressions) 1132 elif temp_properties: 1133 properties = temp_properties 1134 1135 this = self._parse_schema(this=table_parts) 1136 1137 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1138 temp_properties = self._parse_properties() 1139 if properties and temp_properties: 1140 properties.expressions.extend(temp_properties.expressions) 1141 elif temp_properties: 1142 properties = temp_properties 1143 1144 self._match(TokenType.ALIAS) 1145 1146 # exp.Properties.Location.POST_ALIAS 1147 if not ( 1148 self._match(TokenType.SELECT, advance=False) 1149 or self._match(TokenType.WITH, advance=False) 1150 or self._match(TokenType.L_PAREN, advance=False) 1151 ): 1152 temp_properties = self._parse_properties() 1153 if properties and temp_properties: 1154 properties.expressions.extend(temp_properties.expressions) 1155 elif temp_properties: 1156 properties = temp_properties 1157 1158 expression = self._parse_ddl_select() 1159 1160 if create_token.token_type == TokenType.TABLE: 1161 # exp.Properties.Location.POST_EXPRESSION 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 indexes = [] 1169 while True: 1170 index = self._parse_create_table_index() 1171 1172 # exp.Properties.Location.POST_INDEX 1173 if self._match(TokenType.PARTITION_BY, advance=False): 1174 temp_properties = self._parse_properties() 1175 if properties and temp_properties: 1176 properties.expressions.extend(temp_properties.expressions) 1177 elif temp_properties: 1178 properties = temp_properties 1179 1180 if not index: 1181 break 1182 else: 1183 indexes.append(index) 1184 elif create_token.token_type == TokenType.VIEW: 1185 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1186 no_schema_binding = True 1187 1188 return self.expression( 1189 exp.Create, 1190 this=this, 1191 kind=create_token.text, 1192 replace=replace, 1193 unique=unique, 1194 expression=expression, 1195 exists=exists, 1196 properties=properties, 1197 indexes=indexes, 1198 no_schema_binding=no_schema_binding, 1199 begin=begin, 1200 ) 1201 1202 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1203 self._match(TokenType.COMMA) 1204 1205 # parsers look to _prev for no/dual/default, so need to consume first 1206 self._match_text_seq("NO") 1207 self._match_text_seq("DUAL") 1208 self._match_text_seq("DEFAULT") 1209 1210 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1211 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1212 1213 return None 1214 1215 def _parse_property(self) -> t.Optional[exp.Expression]: 1216 if self._match_texts(self.PROPERTY_PARSERS): 1217 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1218 1219 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1220 return self._parse_character_set(default=True) 1221 1222 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1223 return self._parse_sortkey(compound=True) 1224 1225 if self._match_text_seq("SQL", "SECURITY"): 1226 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1227 1228 assignment = self._match_pair( 1229 TokenType.VAR, TokenType.EQ, advance=False 1230 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1231 1232 if assignment: 1233 key = self._parse_var_or_string() 1234 self._match(TokenType.EQ) 1235 return self.expression(exp.Property, this=key, value=self._parse_column()) 1236 1237 return None 1238 1239 def _parse_stored(self) -> exp.Expression: 1240 self._match(TokenType.ALIAS) 1241 1242 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1243 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1244 1245 return self.expression( 1246 exp.FileFormatProperty, 1247 this=self.expression( 1248 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1249 ) 1250 if input_format or output_format 1251 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1252 ) 1253 1254 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1255 self._match(TokenType.EQ) 1256 self._match(TokenType.ALIAS) 1257 return self.expression( 1258 exp_class, 1259 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1260 ) 1261 1262 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1263 properties = [] 1264 1265 while True: 1266 if before: 1267 identified_property = self._parse_property_before() 1268 else: 1269 identified_property = self._parse_property() 1270 1271 if not identified_property: 1272 break 1273 for p in ensure_list(identified_property): 1274 properties.append(p) 1275 1276 if properties: 1277 return self.expression(exp.Properties, expressions=properties) 1278 1279 return None 1280 1281 def _parse_fallback(self, no=False) -> exp.Expression: 1282 self._match_text_seq("FALLBACK") 1283 return self.expression( 1284 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1285 ) 1286 1287 def _parse_volatile_property(self) -> exp.Expression: 1288 if self._index >= 2: 1289 pre_volatile_token = self._tokens[self._index - 2] 1290 else: 1291 pre_volatile_token = None 1292 1293 if pre_volatile_token and pre_volatile_token.token_type in ( 1294 TokenType.CREATE, 1295 TokenType.REPLACE, 1296 TokenType.UNIQUE, 1297 ): 1298 return exp.VolatileProperty() 1299 1300 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1301 1302 def _parse_with_property( 1303 self, 1304 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1305 self._match(TokenType.WITH) 1306 if self._match(TokenType.L_PAREN, advance=False): 1307 return self._parse_wrapped_csv(self._parse_property) 1308 1309 if self._match_text_seq("JOURNAL"): 1310 return self._parse_withjournaltable() 1311 1312 if self._match_text_seq("DATA"): 1313 return self._parse_withdata(no=False) 1314 elif self._match_text_seq("NO", "DATA"): 1315 return self._parse_withdata(no=True) 1316 1317 if not self._next: 1318 return None 1319 1320 return self._parse_withisolatedloading() 1321 1322 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1323 def _parse_definer(self) -> t.Optional[exp.Expression]: 1324 self._match(TokenType.EQ) 1325 1326 user = self._parse_id_var() 1327 self._match(TokenType.PARAMETER) 1328 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1329 1330 if not user or not host: 1331 return None 1332 1333 return exp.DefinerProperty(this=f"{user}@{host}") 1334 1335 def _parse_withjournaltable(self) -> exp.Expression: 1336 self._match(TokenType.TABLE) 1337 self._match(TokenType.EQ) 1338 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1339 1340 def _parse_log(self, no=False) -> exp.Expression: 1341 self._match_text_seq("LOG") 1342 return self.expression(exp.LogProperty, no=no) 1343 1344 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1345 before = self._match_text_seq("BEFORE") 1346 self._match_text_seq("JOURNAL") 1347 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1348 1349 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1350 self._match_text_seq("NOT") 1351 self._match_text_seq("LOCAL") 1352 self._match_text_seq("AFTER", "JOURNAL") 1353 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1354 1355 def _parse_checksum(self) -> exp.Expression: 1356 self._match_text_seq("CHECKSUM") 1357 self._match(TokenType.EQ) 1358 1359 on = None 1360 if self._match(TokenType.ON): 1361 on = True 1362 elif self._match_text_seq("OFF"): 1363 on = False 1364 default = self._match(TokenType.DEFAULT) 1365 1366 return self.expression( 1367 exp.ChecksumProperty, 1368 on=on, 1369 default=default, 1370 ) 1371 1372 def _parse_freespace(self) -> exp.Expression: 1373 self._match_text_seq("FREESPACE") 1374 self._match(TokenType.EQ) 1375 return self.expression( 1376 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1377 ) 1378 1379 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1380 self._match_text_seq("MERGEBLOCKRATIO") 1381 if self._match(TokenType.EQ): 1382 return self.expression( 1383 exp.MergeBlockRatioProperty, 1384 this=self._parse_number(), 1385 percent=self._match(TokenType.PERCENT), 1386 ) 1387 else: 1388 return self.expression( 1389 exp.MergeBlockRatioProperty, 1390 no=no, 1391 default=default, 1392 ) 1393 1394 def _parse_datablocksize(self, default=None) -> exp.Expression: 1395 if default: 1396 self._match_text_seq("DATABLOCKSIZE") 1397 return self.expression(exp.DataBlocksizeProperty, default=True) 1398 elif self._match_texts(("MIN", "MINIMUM")): 1399 self._match_text_seq("DATABLOCKSIZE") 1400 return self.expression(exp.DataBlocksizeProperty, min=True) 1401 elif self._match_texts(("MAX", "MAXIMUM")): 1402 self._match_text_seq("DATABLOCKSIZE") 1403 return self.expression(exp.DataBlocksizeProperty, min=False) 1404 1405 self._match_text_seq("DATABLOCKSIZE") 1406 self._match(TokenType.EQ) 1407 size = self._parse_number() 1408 units = None 1409 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1410 units = self._prev.text 1411 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1412 1413 def _parse_blockcompression(self) -> exp.Expression: 1414 self._match_text_seq("BLOCKCOMPRESSION") 1415 self._match(TokenType.EQ) 1416 always = self._match_text_seq("ALWAYS") 1417 manual = self._match_text_seq("MANUAL") 1418 never = self._match_text_seq("NEVER") 1419 default = self._match_text_seq("DEFAULT") 1420 autotemp = None 1421 if self._match_text_seq("AUTOTEMP"): 1422 autotemp = self._parse_schema() 1423 1424 return self.expression( 1425 exp.BlockCompressionProperty, 1426 always=always, 1427 manual=manual, 1428 never=never, 1429 default=default, 1430 autotemp=autotemp, 1431 ) 1432 1433 def _parse_withisolatedloading(self) -> exp.Expression: 1434 no = self._match_text_seq("NO") 1435 concurrent = self._match_text_seq("CONCURRENT") 1436 self._match_text_seq("ISOLATED", "LOADING") 1437 for_all = self._match_text_seq("FOR", "ALL") 1438 for_insert = self._match_text_seq("FOR", "INSERT") 1439 for_none = self._match_text_seq("FOR", "NONE") 1440 return self.expression( 1441 exp.IsolatedLoadingProperty, 1442 no=no, 1443 concurrent=concurrent, 1444 for_all=for_all, 1445 for_insert=for_insert, 1446 for_none=for_none, 1447 ) 1448 1449 def _parse_locking(self) -> exp.Expression: 1450 if self._match(TokenType.TABLE): 1451 kind = "TABLE" 1452 elif self._match(TokenType.VIEW): 1453 kind = "VIEW" 1454 elif self._match(TokenType.ROW): 1455 kind = "ROW" 1456 elif self._match_text_seq("DATABASE"): 1457 kind = "DATABASE" 1458 else: 1459 kind = None 1460 1461 if kind in ("DATABASE", "TABLE", "VIEW"): 1462 this = self._parse_table_parts() 1463 else: 1464 this = None 1465 1466 if self._match(TokenType.FOR): 1467 for_or_in = "FOR" 1468 elif self._match(TokenType.IN): 1469 for_or_in = "IN" 1470 else: 1471 for_or_in = None 1472 1473 if self._match_text_seq("ACCESS"): 1474 lock_type = "ACCESS" 1475 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1476 lock_type = "EXCLUSIVE" 1477 elif self._match_text_seq("SHARE"): 1478 lock_type = "SHARE" 1479 elif self._match_text_seq("READ"): 1480 lock_type = "READ" 1481 elif self._match_text_seq("WRITE"): 1482 lock_type = "WRITE" 1483 elif self._match_text_seq("CHECKSUM"): 1484 lock_type = "CHECKSUM" 1485 else: 1486 lock_type = None 1487 1488 override = self._match_text_seq("OVERRIDE") 1489 1490 return self.expression( 1491 exp.LockingProperty, 1492 this=this, 1493 kind=kind, 1494 for_or_in=for_or_in, 1495 lock_type=lock_type, 1496 override=override, 1497 ) 1498 1499 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1500 if self._match(TokenType.PARTITION_BY): 1501 return self._parse_csv(self._parse_conjunction) 1502 return [] 1503 1504 def _parse_partitioned_by(self) -> exp.Expression: 1505 self._match(TokenType.EQ) 1506 return self.expression( 1507 exp.PartitionedByProperty, 1508 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1509 ) 1510 1511 def _parse_withdata(self, no=False) -> exp.Expression: 1512 if self._match_text_seq("AND", "STATISTICS"): 1513 statistics = True 1514 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1515 statistics = False 1516 else: 1517 statistics = None 1518 1519 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1520 1521 def _parse_noprimaryindex(self) -> exp.Expression: 1522 self._match_text_seq("PRIMARY", "INDEX") 1523 return exp.NoPrimaryIndexProperty() 1524 1525 def _parse_oncommit(self) -> exp.Expression: 1526 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1527 return exp.OnCommitProperty() 1528 1529 def _parse_distkey(self) -> exp.Expression: 1530 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1531 1532 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1533 table = self._parse_table(schema=True) 1534 options = [] 1535 while self._match_texts(("INCLUDING", "EXCLUDING")): 1536 this = self._prev.text.upper() 1537 id_var = self._parse_id_var() 1538 1539 if not id_var: 1540 return None 1541 1542 options.append( 1543 self.expression( 1544 exp.Property, 1545 this=this, 1546 value=exp.Var(this=id_var.this.upper()), 1547 ) 1548 ) 1549 return self.expression(exp.LikeProperty, this=table, expressions=options) 1550 1551 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1552 return self.expression( 1553 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1554 ) 1555 1556 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1557 self._match(TokenType.EQ) 1558 return self.expression( 1559 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1560 ) 1561 1562 def _parse_returns(self) -> exp.Expression: 1563 value: t.Optional[exp.Expression] 1564 is_table = self._match(TokenType.TABLE) 1565 1566 if is_table: 1567 if self._match(TokenType.LT): 1568 value = self.expression( 1569 exp.Schema, 1570 this="TABLE", 1571 expressions=self._parse_csv(self._parse_struct_kwargs), 1572 ) 1573 if not self._match(TokenType.GT): 1574 self.raise_error("Expecting >") 1575 else: 1576 value = self._parse_schema(exp.Var(this="TABLE")) 1577 else: 1578 value = self._parse_types() 1579 1580 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1581 1582 def _parse_temporary(self, global_=False) -> exp.Expression: 1583 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1584 return self.expression(exp.TemporaryProperty, global_=global_) 1585 1586 def _parse_describe(self) -> exp.Expression: 1587 kind = self._match_set(self.CREATABLES) and self._prev.text 1588 this = self._parse_table() 1589 1590 return self.expression(exp.Describe, this=this, kind=kind) 1591 1592 def _parse_insert(self) -> exp.Expression: 1593 overwrite = self._match(TokenType.OVERWRITE) 1594 local = self._match(TokenType.LOCAL) 1595 alternative = None 1596 1597 if self._match_text_seq("DIRECTORY"): 1598 this: t.Optional[exp.Expression] = self.expression( 1599 exp.Directory, 1600 this=self._parse_var_or_string(), 1601 local=local, 1602 row_format=self._parse_row_format(match_row=True), 1603 ) 1604 else: 1605 if self._match(TokenType.OR): 1606 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1607 1608 self._match(TokenType.INTO) 1609 self._match(TokenType.TABLE) 1610 this = self._parse_table(schema=True) 1611 1612 return self.expression( 1613 exp.Insert, 1614 this=this, 1615 exists=self._parse_exists(), 1616 partition=self._parse_partition(), 1617 expression=self._parse_ddl_select(), 1618 conflict=self._parse_on_conflict(), 1619 returning=self._parse_returning(), 1620 overwrite=overwrite, 1621 alternative=alternative, 1622 ) 1623 1624 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1625 conflict = self._match_text_seq("ON", "CONFLICT") 1626 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1627 1628 if not (conflict or duplicate): 1629 return None 1630 1631 nothing = None 1632 expressions = None 1633 key = None 1634 constraint = None 1635 1636 if conflict: 1637 if self._match_text_seq("ON", "CONSTRAINT"): 1638 constraint = self._parse_id_var() 1639 else: 1640 key = self._parse_csv(self._parse_value) 1641 1642 self._match_text_seq("DO") 1643 if self._match_text_seq("NOTHING"): 1644 nothing = True 1645 else: 1646 self._match(TokenType.UPDATE) 1647 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1648 1649 return self.expression( 1650 exp.OnConflict, 1651 duplicate=duplicate, 1652 expressions=expressions, 1653 nothing=nothing, 1654 key=key, 1655 constraint=constraint, 1656 ) 1657 1658 def _parse_returning(self) -> t.Optional[exp.Expression]: 1659 if not self._match(TokenType.RETURNING): 1660 return None 1661 1662 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1663 1664 def _parse_row(self) -> t.Optional[exp.Expression]: 1665 if not self._match(TokenType.FORMAT): 1666 return None 1667 return self._parse_row_format() 1668 1669 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1670 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1671 return None 1672 1673 if self._match_text_seq("SERDE"): 1674 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1675 1676 self._match_text_seq("DELIMITED") 1677 1678 kwargs = {} 1679 1680 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1681 kwargs["fields"] = self._parse_string() 1682 if self._match_text_seq("ESCAPED", "BY"): 1683 kwargs["escaped"] = self._parse_string() 1684 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1685 kwargs["collection_items"] = self._parse_string() 1686 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1687 kwargs["map_keys"] = self._parse_string() 1688 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1689 kwargs["lines"] = self._parse_string() 1690 if self._match_text_seq("NULL", "DEFINED", "AS"): 1691 kwargs["null"] = self._parse_string() 1692 1693 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1694 1695 def _parse_load_data(self) -> exp.Expression: 1696 local = self._match(TokenType.LOCAL) 1697 self._match_text_seq("INPATH") 1698 inpath = self._parse_string() 1699 overwrite = self._match(TokenType.OVERWRITE) 1700 self._match_pair(TokenType.INTO, TokenType.TABLE) 1701 1702 return self.expression( 1703 exp.LoadData, 1704 this=self._parse_table(schema=True), 1705 local=local, 1706 overwrite=overwrite, 1707 inpath=inpath, 1708 partition=self._parse_partition(), 1709 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1710 serde=self._match_text_seq("SERDE") and self._parse_string(), 1711 ) 1712 1713 def _parse_delete(self) -> exp.Expression: 1714 self._match(TokenType.FROM) 1715 1716 return self.expression( 1717 exp.Delete, 1718 this=self._parse_table(), 1719 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1720 where=self._parse_where(), 1721 returning=self._parse_returning(), 1722 ) 1723 1724 def _parse_update(self) -> exp.Expression: 1725 return self.expression( 1726 exp.Update, 1727 **{ # type: ignore 1728 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1729 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1730 "from": self._parse_from(), 1731 "where": self._parse_where(), 1732 "returning": self._parse_returning(), 1733 }, 1734 ) 1735 1736 def _parse_uncache(self) -> exp.Expression: 1737 if not self._match(TokenType.TABLE): 1738 self.raise_error("Expecting TABLE after UNCACHE") 1739 1740 return self.expression( 1741 exp.Uncache, 1742 exists=self._parse_exists(), 1743 this=self._parse_table(schema=True), 1744 ) 1745 1746 def _parse_cache(self) -> exp.Expression: 1747 lazy = self._match(TokenType.LAZY) 1748 self._match(TokenType.TABLE) 1749 table = self._parse_table(schema=True) 1750 options = [] 1751 1752 if self._match(TokenType.OPTIONS): 1753 self._match_l_paren() 1754 k = self._parse_string() 1755 self._match(TokenType.EQ) 1756 v = self._parse_string() 1757 options = [k, v] 1758 self._match_r_paren() 1759 1760 self._match(TokenType.ALIAS) 1761 return self.expression( 1762 exp.Cache, 1763 this=table, 1764 lazy=lazy, 1765 options=options, 1766 expression=self._parse_select(nested=True), 1767 ) 1768 1769 def _parse_partition(self) -> t.Optional[exp.Expression]: 1770 if not self._match(TokenType.PARTITION): 1771 return None 1772 1773 return self.expression( 1774 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1775 ) 1776 1777 def _parse_value(self) -> exp.Expression: 1778 if self._match(TokenType.L_PAREN): 1779 expressions = self._parse_csv(self._parse_conjunction) 1780 self._match_r_paren() 1781 return self.expression(exp.Tuple, expressions=expressions) 1782 1783 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1784 # Source: https://prestodb.io/docs/current/sql/values.html 1785 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1786 1787 def _parse_select( 1788 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1789 ) -> t.Optional[exp.Expression]: 1790 cte = self._parse_with() 1791 if cte: 1792 this = self._parse_statement() 1793 1794 if not this: 1795 self.raise_error("Failed to parse any statement following CTE") 1796 return cte 1797 1798 if "with" in this.arg_types: 1799 this.set("with", cte) 1800 else: 1801 self.raise_error(f"{this.key} does not support CTE") 1802 this = cte 1803 elif self._match(TokenType.SELECT): 1804 comments = self._prev_comments 1805 1806 kind = ( 1807 self._match(TokenType.ALIAS) 1808 and self._match_texts(("STRUCT", "VALUE")) 1809 and self._prev.text 1810 ) 1811 hint = self._parse_hint() 1812 all_ = self._match(TokenType.ALL) 1813 distinct = self._match(TokenType.DISTINCT) 1814 1815 if distinct: 1816 distinct = self.expression( 1817 exp.Distinct, 1818 on=self._parse_value() if self._match(TokenType.ON) else None, 1819 ) 1820 1821 if all_ and distinct: 1822 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1823 1824 limit = self._parse_limit(top=True) 1825 expressions = self._parse_csv(self._parse_expression) 1826 1827 this = self.expression( 1828 exp.Select, 1829 kind=kind, 1830 hint=hint, 1831 distinct=distinct, 1832 expressions=expressions, 1833 limit=limit, 1834 ) 1835 this.comments = comments 1836 1837 into = self._parse_into() 1838 if into: 1839 this.set("into", into) 1840 1841 from_ = self._parse_from() 1842 if from_: 1843 this.set("from", from_) 1844 1845 self._parse_query_modifiers(this) 1846 elif (table or nested) and self._match(TokenType.L_PAREN): 1847 this = self._parse_table() if table else self._parse_select(nested=True) 1848 self._parse_query_modifiers(this) 1849 this = self._parse_set_operations(this) 1850 self._match_r_paren() 1851 1852 # early return so that subquery unions aren't parsed again 1853 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1854 # Union ALL should be a property of the top select node, not the subquery 1855 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1856 elif self._match(TokenType.VALUES): 1857 this = self.expression( 1858 exp.Values, 1859 expressions=self._parse_csv(self._parse_value), 1860 alias=self._parse_table_alias(), 1861 ) 1862 else: 1863 this = None 1864 1865 return self._parse_set_operations(this) 1866 1867 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1868 if not skip_with_token and not self._match(TokenType.WITH): 1869 return None 1870 1871 comments = self._prev_comments 1872 recursive = self._match(TokenType.RECURSIVE) 1873 1874 expressions = [] 1875 while True: 1876 expressions.append(self._parse_cte()) 1877 1878 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1879 break 1880 else: 1881 self._match(TokenType.WITH) 1882 1883 return self.expression( 1884 exp.With, comments=comments, expressions=expressions, recursive=recursive 1885 ) 1886 1887 def _parse_cte(self) -> exp.Expression: 1888 alias = self._parse_table_alias() 1889 if not alias or not alias.this: 1890 self.raise_error("Expected CTE to have alias") 1891 1892 self._match(TokenType.ALIAS) 1893 1894 return self.expression( 1895 exp.CTE, 1896 this=self._parse_wrapped(self._parse_statement), 1897 alias=alias, 1898 ) 1899 1900 def _parse_table_alias( 1901 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1902 ) -> t.Optional[exp.Expression]: 1903 any_token = self._match(TokenType.ALIAS) 1904 alias = ( 1905 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1906 or self._parse_string_as_identifier() 1907 ) 1908 1909 index = self._index 1910 if self._match(TokenType.L_PAREN): 1911 columns = self._parse_csv(self._parse_function_parameter) 1912 self._match_r_paren() if columns else self._retreat(index) 1913 else: 1914 columns = None 1915 1916 if not alias and not columns: 1917 return None 1918 1919 return self.expression(exp.TableAlias, this=alias, columns=columns) 1920 1921 def _parse_subquery( 1922 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1923 ) -> exp.Expression: 1924 return self.expression( 1925 exp.Subquery, 1926 this=this, 1927 pivots=self._parse_pivots(), 1928 alias=self._parse_table_alias() if parse_alias else None, 1929 ) 1930 1931 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1932 if not isinstance(this, self.MODIFIABLES): 1933 return 1934 1935 table = isinstance(this, exp.Table) 1936 1937 while True: 1938 join = self._parse_join() 1939 if join: 1940 this.append("joins", join) 1941 1942 lateral = None 1943 if not join: 1944 lateral = self._parse_lateral() 1945 if lateral: 1946 this.append("laterals", lateral) 1947 1948 comma = None if table else self._match(TokenType.COMMA) 1949 if comma: 1950 this.args["from"].append("expressions", self._parse_table()) 1951 1952 if not (lateral or join or comma): 1953 break 1954 1955 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1956 expression = parser(self) 1957 1958 if expression: 1959 this.set(key, expression) 1960 1961 def _parse_hint(self) -> t.Optional[exp.Expression]: 1962 if self._match(TokenType.HINT): 1963 hints = self._parse_csv(self._parse_function) 1964 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1965 self.raise_error("Expected */ after HINT") 1966 return self.expression(exp.Hint, expressions=hints) 1967 1968 return None 1969 1970 def _parse_into(self) -> t.Optional[exp.Expression]: 1971 if not self._match(TokenType.INTO): 1972 return None 1973 1974 temp = self._match(TokenType.TEMPORARY) 1975 unlogged = self._match(TokenType.UNLOGGED) 1976 self._match(TokenType.TABLE) 1977 1978 return self.expression( 1979 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1980 ) 1981 1982 def _parse_from(self) -> t.Optional[exp.Expression]: 1983 if not self._match(TokenType.FROM): 1984 return None 1985 1986 return self.expression( 1987 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1988 ) 1989 1990 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1991 if not self._match(TokenType.MATCH_RECOGNIZE): 1992 return None 1993 1994 self._match_l_paren() 1995 1996 partition = self._parse_partition_by() 1997 order = self._parse_order() 1998 measures = ( 1999 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2000 ) 2001 2002 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2003 rows = exp.Var(this="ONE ROW PER MATCH") 2004 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2005 text = "ALL ROWS PER MATCH" 2006 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2007 text += f" SHOW EMPTY MATCHES" 2008 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2009 text += f" OMIT EMPTY MATCHES" 2010 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2011 text += f" WITH UNMATCHED ROWS" 2012 rows = exp.Var(this=text) 2013 else: 2014 rows = None 2015 2016 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2017 text = "AFTER MATCH SKIP" 2018 if self._match_text_seq("PAST", "LAST", "ROW"): 2019 text += f" PAST LAST ROW" 2020 elif self._match_text_seq("TO", "NEXT", "ROW"): 2021 text += f" TO NEXT ROW" 2022 elif self._match_text_seq("TO", "FIRST"): 2023 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2024 elif self._match_text_seq("TO", "LAST"): 2025 text += f" TO LAST {self._advance_any().text}" # type: ignore 2026 after = exp.Var(this=text) 2027 else: 2028 after = None 2029 2030 if self._match_text_seq("PATTERN"): 2031 self._match_l_paren() 2032 2033 if not self._curr: 2034 self.raise_error("Expecting )", self._curr) 2035 2036 paren = 1 2037 start = self._curr 2038 2039 while self._curr and paren > 0: 2040 if self._curr.token_type == TokenType.L_PAREN: 2041 paren += 1 2042 if self._curr.token_type == TokenType.R_PAREN: 2043 paren -= 1 2044 end = self._prev 2045 self._advance() 2046 if paren > 0: 2047 self.raise_error("Expecting )", self._curr) 2048 pattern = exp.Var(this=self._find_sql(start, end)) 2049 else: 2050 pattern = None 2051 2052 define = ( 2053 self._parse_csv( 2054 lambda: self.expression( 2055 exp.Alias, 2056 alias=self._parse_id_var(any_token=True), 2057 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2058 ) 2059 ) 2060 if self._match_text_seq("DEFINE") 2061 else None 2062 ) 2063 2064 self._match_r_paren() 2065 2066 return self.expression( 2067 exp.MatchRecognize, 2068 partition_by=partition, 2069 order=order, 2070 measures=measures, 2071 rows=rows, 2072 after=after, 2073 pattern=pattern, 2074 define=define, 2075 alias=self._parse_table_alias(), 2076 ) 2077 2078 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2079 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2080 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2081 2082 if outer_apply or cross_apply: 2083 this = self._parse_select(table=True) 2084 view = None 2085 outer = not cross_apply 2086 elif self._match(TokenType.LATERAL): 2087 this = self._parse_select(table=True) 2088 view = self._match(TokenType.VIEW) 2089 outer = self._match(TokenType.OUTER) 2090 else: 2091 return None 2092 2093 if not this: 2094 this = self._parse_function() or self._parse_id_var(any_token=False) 2095 while self._match(TokenType.DOT): 2096 this = exp.Dot( 2097 this=this, 2098 expression=self._parse_function() or self._parse_id_var(any_token=False), 2099 ) 2100 2101 table_alias: t.Optional[exp.Expression] 2102 2103 if view: 2104 table = self._parse_id_var(any_token=False) 2105 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2106 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2107 else: 2108 table_alias = self._parse_table_alias() 2109 2110 expression = self.expression( 2111 exp.Lateral, 2112 this=this, 2113 view=view, 2114 outer=outer, 2115 alias=table_alias, 2116 ) 2117 2118 return expression 2119 2120 def _parse_join_side_and_kind( 2121 self, 2122 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2123 return ( 2124 self._match(TokenType.NATURAL) and self._prev, 2125 self._match_set(self.JOIN_SIDES) and self._prev, 2126 self._match_set(self.JOIN_KINDS) and self._prev, 2127 ) 2128 2129 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2130 index = self._index 2131 natural, side, kind = self._parse_join_side_and_kind() 2132 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2133 join = self._match(TokenType.JOIN) 2134 2135 if not skip_join_token and not join: 2136 self._retreat(index) 2137 kind = None 2138 natural = None 2139 side = None 2140 2141 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2142 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2143 2144 if not skip_join_token and not join and not outer_apply and not cross_apply: 2145 return None 2146 2147 if outer_apply: 2148 side = Token(TokenType.LEFT, "LEFT") 2149 2150 kwargs: t.Dict[ 2151 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2152 ] = {"this": self._parse_table()} 2153 2154 if natural: 2155 kwargs["natural"] = True 2156 if side: 2157 kwargs["side"] = side.text 2158 if kind: 2159 kwargs["kind"] = kind.text 2160 if hint: 2161 kwargs["hint"] = hint 2162 2163 if self._match(TokenType.ON): 2164 kwargs["on"] = self._parse_conjunction() 2165 elif self._match(TokenType.USING): 2166 kwargs["using"] = self._parse_wrapped_id_vars() 2167 2168 return self.expression(exp.Join, **kwargs) # type: ignore 2169 2170 def _parse_index(self) -> exp.Expression: 2171 index = self._parse_id_var() 2172 self._match(TokenType.ON) 2173 self._match(TokenType.TABLE) # hive 2174 2175 return self.expression( 2176 exp.Index, 2177 this=index, 2178 table=self.expression(exp.Table, this=self._parse_id_var()), 2179 columns=self._parse_expression(), 2180 ) 2181 2182 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2183 unique = self._match(TokenType.UNIQUE) 2184 primary = self._match_text_seq("PRIMARY") 2185 amp = self._match_text_seq("AMP") 2186 if not self._match(TokenType.INDEX): 2187 return None 2188 index = self._parse_id_var() 2189 columns = None 2190 if self._match(TokenType.L_PAREN, advance=False): 2191 columns = self._parse_wrapped_csv(self._parse_column) 2192 return self.expression( 2193 exp.Index, 2194 this=index, 2195 columns=columns, 2196 unique=unique, 2197 primary=primary, 2198 amp=amp, 2199 ) 2200 2201 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2202 catalog = None 2203 db = None 2204 2205 table = ( 2206 (not schema and self._parse_function()) 2207 or self._parse_id_var(any_token=False) 2208 or self._parse_string_as_identifier() 2209 ) 2210 2211 while self._match(TokenType.DOT): 2212 if catalog: 2213 # This allows nesting the table in arbitrarily many dot expressions if needed 2214 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2215 else: 2216 catalog = db 2217 db = table 2218 table = self._parse_id_var() 2219 2220 if not table: 2221 self.raise_error(f"Expected table name but got {self._curr}") 2222 2223 return self.expression( 2224 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2225 ) 2226 2227 def _parse_table( 2228 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2229 ) -> t.Optional[exp.Expression]: 2230 lateral = self._parse_lateral() 2231 2232 if lateral: 2233 return lateral 2234 2235 unnest = self._parse_unnest() 2236 2237 if unnest: 2238 return unnest 2239 2240 values = self._parse_derived_table_values() 2241 2242 if values: 2243 return values 2244 2245 subquery = self._parse_select(table=True) 2246 2247 if subquery: 2248 if not subquery.args.get("pivots"): 2249 subquery.set("pivots", self._parse_pivots()) 2250 return subquery 2251 2252 this = self._parse_table_parts(schema=schema) 2253 2254 if schema: 2255 return self._parse_schema(this=this) 2256 2257 if self.alias_post_tablesample: 2258 table_sample = self._parse_table_sample() 2259 2260 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2261 2262 if alias: 2263 this.set("alias", alias) 2264 2265 if not this.args.get("pivots"): 2266 this.set("pivots", self._parse_pivots()) 2267 2268 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2269 this.set( 2270 "hints", 2271 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2272 ) 2273 self._match_r_paren() 2274 2275 if not self.alias_post_tablesample: 2276 table_sample = self._parse_table_sample() 2277 2278 if table_sample: 2279 table_sample.set("this", this) 2280 this = table_sample 2281 2282 return this 2283 2284 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2285 if not self._match(TokenType.UNNEST): 2286 return None 2287 2288 expressions = self._parse_wrapped_csv(self._parse_type) 2289 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2290 alias = self._parse_table_alias() 2291 2292 if alias and self.unnest_column_only: 2293 if alias.args.get("columns"): 2294 self.raise_error("Unexpected extra column alias in unnest.") 2295 alias.set("columns", [alias.this]) 2296 alias.set("this", None) 2297 2298 offset = None 2299 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2300 self._match(TokenType.ALIAS) 2301 offset = self._parse_id_var() or exp.Identifier(this="offset") 2302 2303 return self.expression( 2304 exp.Unnest, 2305 expressions=expressions, 2306 ordinality=ordinality, 2307 alias=alias, 2308 offset=offset, 2309 ) 2310 2311 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2312 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2313 if not is_derived and not self._match(TokenType.VALUES): 2314 return None 2315 2316 expressions = self._parse_csv(self._parse_value) 2317 2318 if is_derived: 2319 self._match_r_paren() 2320 2321 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2322 2323 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2324 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2325 as_modifier and self._match_text_seq("USING", "SAMPLE") 2326 ): 2327 return None 2328 2329 bucket_numerator = None 2330 bucket_denominator = None 2331 bucket_field = None 2332 percent = None 2333 rows = None 2334 size = None 2335 seed = None 2336 2337 kind = ( 2338 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2339 ) 2340 method = self._parse_var(tokens=(TokenType.ROW,)) 2341 2342 self._match(TokenType.L_PAREN) 2343 2344 num = self._parse_number() 2345 2346 if self._match(TokenType.BUCKET): 2347 bucket_numerator = self._parse_number() 2348 self._match(TokenType.OUT_OF) 2349 bucket_denominator = bucket_denominator = self._parse_number() 2350 self._match(TokenType.ON) 2351 bucket_field = self._parse_field() 2352 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2353 percent = num 2354 elif self._match(TokenType.ROWS): 2355 rows = num 2356 else: 2357 size = num 2358 2359 self._match(TokenType.R_PAREN) 2360 2361 if self._match(TokenType.L_PAREN): 2362 method = self._parse_var() 2363 seed = self._match(TokenType.COMMA) and self._parse_number() 2364 self._match_r_paren() 2365 elif self._match_texts(("SEED", "REPEATABLE")): 2366 seed = self._parse_wrapped(self._parse_number) 2367 2368 return self.expression( 2369 exp.TableSample, 2370 method=method, 2371 bucket_numerator=bucket_numerator, 2372 bucket_denominator=bucket_denominator, 2373 bucket_field=bucket_field, 2374 percent=percent, 2375 rows=rows, 2376 size=size, 2377 seed=seed, 2378 kind=kind, 2379 ) 2380 2381 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2382 return list(iter(self._parse_pivot, None)) 2383 2384 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2385 index = self._index 2386 2387 if self._match(TokenType.PIVOT): 2388 unpivot = False 2389 elif self._match(TokenType.UNPIVOT): 2390 unpivot = True 2391 else: 2392 return None 2393 2394 expressions = [] 2395 field = None 2396 2397 if not self._match(TokenType.L_PAREN): 2398 self._retreat(index) 2399 return None 2400 2401 if unpivot: 2402 expressions = self._parse_csv(self._parse_column) 2403 else: 2404 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2405 2406 if not expressions: 2407 self.raise_error("Failed to parse PIVOT's aggregation list") 2408 2409 if not self._match(TokenType.FOR): 2410 self.raise_error("Expecting FOR") 2411 2412 value = self._parse_column() 2413 2414 if not self._match(TokenType.IN): 2415 self.raise_error("Expecting IN") 2416 2417 field = self._parse_in(value) 2418 2419 self._match_r_paren() 2420 2421 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2422 2423 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2424 pivot.set("alias", self._parse_table_alias()) 2425 2426 if not unpivot: 2427 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2428 2429 columns: t.List[exp.Expression] = [] 2430 for col in pivot.args["field"].expressions: 2431 for name in names: 2432 if self.PREFIXED_PIVOT_COLUMNS: 2433 name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name 2434 else: 2435 name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name 2436 2437 columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS)) 2438 2439 pivot.set("columns", columns) 2440 2441 return pivot 2442 2443 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2444 return [agg.alias for agg in pivot_columns] 2445 2446 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2447 if not skip_where_token and not self._match(TokenType.WHERE): 2448 return None 2449 2450 return self.expression( 2451 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2452 ) 2453 2454 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2455 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2456 return None 2457 2458 elements = defaultdict(list) 2459 2460 while True: 2461 expressions = self._parse_csv(self._parse_conjunction) 2462 if expressions: 2463 elements["expressions"].extend(expressions) 2464 2465 grouping_sets = self._parse_grouping_sets() 2466 if grouping_sets: 2467 elements["grouping_sets"].extend(grouping_sets) 2468 2469 rollup = None 2470 cube = None 2471 2472 with_ = self._match(TokenType.WITH) 2473 if self._match(TokenType.ROLLUP): 2474 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2475 elements["rollup"].extend(ensure_list(rollup)) 2476 2477 if self._match(TokenType.CUBE): 2478 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2479 elements["cube"].extend(ensure_list(cube)) 2480 2481 if not (expressions or grouping_sets or rollup or cube): 2482 break 2483 2484 return self.expression(exp.Group, **elements) # type: ignore 2485 2486 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2487 if not self._match(TokenType.GROUPING_SETS): 2488 return None 2489 2490 return self._parse_wrapped_csv(self._parse_grouping_set) 2491 2492 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2493 if self._match(TokenType.L_PAREN): 2494 grouping_set = self._parse_csv(self._parse_column) 2495 self._match_r_paren() 2496 return self.expression(exp.Tuple, expressions=grouping_set) 2497 2498 return self._parse_column() 2499 2500 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2501 if not skip_having_token and not self._match(TokenType.HAVING): 2502 return None 2503 return self.expression(exp.Having, this=self._parse_conjunction()) 2504 2505 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2506 if not self._match(TokenType.QUALIFY): 2507 return None 2508 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2509 2510 def _parse_order( 2511 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2512 ) -> t.Optional[exp.Expression]: 2513 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2514 return this 2515 2516 return self.expression( 2517 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2518 ) 2519 2520 def _parse_sort( 2521 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2522 ) -> t.Optional[exp.Expression]: 2523 if not self._match(token_type): 2524 return None 2525 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2526 2527 def _parse_ordered(self) -> exp.Expression: 2528 this = self._parse_conjunction() 2529 self._match(TokenType.ASC) 2530 is_desc = self._match(TokenType.DESC) 2531 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2532 is_nulls_last = self._match(TokenType.NULLS_LAST) 2533 desc = is_desc or False 2534 asc = not desc 2535 nulls_first = is_nulls_first or False 2536 explicitly_null_ordered = is_nulls_first or is_nulls_last 2537 if ( 2538 not explicitly_null_ordered 2539 and ( 2540 (asc and self.null_ordering == "nulls_are_small") 2541 or (desc and self.null_ordering != "nulls_are_small") 2542 ) 2543 and self.null_ordering != "nulls_are_last" 2544 ): 2545 nulls_first = True 2546 2547 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2548 2549 def _parse_limit( 2550 self, this: t.Optional[exp.Expression] = None, top: bool = False 2551 ) -> t.Optional[exp.Expression]: 2552 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2553 limit_paren = self._match(TokenType.L_PAREN) 2554 limit_exp = self.expression( 2555 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2556 ) 2557 2558 if limit_paren: 2559 self._match_r_paren() 2560 2561 return limit_exp 2562 2563 if self._match(TokenType.FETCH): 2564 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2565 direction = self._prev.text if direction else "FIRST" 2566 2567 count = self._parse_number() 2568 percent = self._match(TokenType.PERCENT) 2569 2570 self._match_set((TokenType.ROW, TokenType.ROWS)) 2571 2572 only = self._match(TokenType.ONLY) 2573 with_ties = self._match_text_seq("WITH", "TIES") 2574 2575 if only and with_ties: 2576 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2577 2578 return self.expression( 2579 exp.Fetch, 2580 direction=direction, 2581 count=count, 2582 percent=percent, 2583 with_ties=with_ties, 2584 ) 2585 2586 return this 2587 2588 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2589 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2590 return this 2591 2592 count = self._parse_number() 2593 self._match_set((TokenType.ROW, TokenType.ROWS)) 2594 return self.expression(exp.Offset, this=this, expression=count) 2595 2596 def _parse_lock(self) -> t.Optional[exp.Expression]: 2597 if self._match_text_seq("FOR", "UPDATE"): 2598 return self.expression(exp.Lock, update=True) 2599 if self._match_text_seq("FOR", "SHARE"): 2600 return self.expression(exp.Lock, update=False) 2601 2602 return None 2603 2604 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2605 if not self._match_set(self.SET_OPERATIONS): 2606 return this 2607 2608 token_type = self._prev.token_type 2609 2610 if token_type == TokenType.UNION: 2611 expression = exp.Union 2612 elif token_type == TokenType.EXCEPT: 2613 expression = exp.Except 2614 else: 2615 expression = exp.Intersect 2616 2617 return self.expression( 2618 expression, 2619 this=this, 2620 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2621 expression=self._parse_set_operations(self._parse_select(nested=True)), 2622 ) 2623 2624 def _parse_expression(self) -> t.Optional[exp.Expression]: 2625 return self._parse_alias(self._parse_conjunction()) 2626 2627 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2628 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2629 2630 def _parse_equality(self) -> t.Optional[exp.Expression]: 2631 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2632 2633 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2634 return self._parse_tokens(self._parse_range, self.COMPARISON) 2635 2636 def _parse_range(self) -> t.Optional[exp.Expression]: 2637 this = self._parse_bitwise() 2638 negate = self._match(TokenType.NOT) 2639 2640 if self._match_set(self.RANGE_PARSERS): 2641 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2642 if not expression: 2643 return this 2644 2645 this = expression 2646 elif self._match(TokenType.ISNULL): 2647 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2648 2649 # Postgres supports ISNULL and NOTNULL for conditions. 2650 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2651 if self._match(TokenType.NOTNULL): 2652 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2653 this = self.expression(exp.Not, this=this) 2654 2655 if negate: 2656 this = self.expression(exp.Not, this=this) 2657 2658 if self._match(TokenType.IS): 2659 this = self._parse_is(this) 2660 2661 return this 2662 2663 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2664 index = self._index - 1 2665 negate = self._match(TokenType.NOT) 2666 if self._match(TokenType.DISTINCT_FROM): 2667 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2668 return self.expression(klass, this=this, expression=self._parse_expression()) 2669 2670 expression = self._parse_null() or self._parse_boolean() 2671 if not expression: 2672 self._retreat(index) 2673 return None 2674 2675 this = self.expression(exp.Is, this=this, expression=expression) 2676 return self.expression(exp.Not, this=this) if negate else this 2677 2678 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2679 unnest = self._parse_unnest() 2680 if unnest: 2681 this = self.expression(exp.In, this=this, unnest=unnest) 2682 elif self._match(TokenType.L_PAREN): 2683 expressions = self._parse_csv(self._parse_select_or_expression) 2684 2685 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2686 this = self.expression(exp.In, this=this, query=expressions[0]) 2687 else: 2688 this = self.expression(exp.In, this=this, expressions=expressions) 2689 2690 self._match_r_paren() 2691 else: 2692 this = self.expression(exp.In, this=this, field=self._parse_field()) 2693 2694 return this 2695 2696 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2697 low = self._parse_bitwise() 2698 self._match(TokenType.AND) 2699 high = self._parse_bitwise() 2700 return self.expression(exp.Between, this=this, low=low, high=high) 2701 2702 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2703 if not self._match(TokenType.ESCAPE): 2704 return this 2705 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2706 2707 def _parse_interval(self) -> t.Optional[exp.Expression]: 2708 if not self._match(TokenType.INTERVAL): 2709 return None 2710 2711 this = self._parse_primary() or self._parse_term() 2712 unit = self._parse_function() or self._parse_var() 2713 2714 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2715 # each INTERVAL expression into this canonical form so it's easy to transpile 2716 if this and isinstance(this, exp.Literal): 2717 if this.is_number: 2718 this = exp.Literal.string(this.name) 2719 2720 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2721 parts = this.name.split() 2722 if not unit and len(parts) <= 2: 2723 this = exp.Literal.string(seq_get(parts, 0)) 2724 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2725 2726 return self.expression(exp.Interval, this=this, unit=unit) 2727 2728 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2729 this = self._parse_term() 2730 2731 while True: 2732 if self._match_set(self.BITWISE): 2733 this = self.expression( 2734 self.BITWISE[self._prev.token_type], 2735 this=this, 2736 expression=self._parse_term(), 2737 ) 2738 elif self._match_pair(TokenType.LT, TokenType.LT): 2739 this = self.expression( 2740 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2741 ) 2742 elif self._match_pair(TokenType.GT, TokenType.GT): 2743 this = self.expression( 2744 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2745 ) 2746 else: 2747 break 2748 2749 return this 2750 2751 def _parse_term(self) -> t.Optional[exp.Expression]: 2752 return self._parse_tokens(self._parse_factor, self.TERM) 2753 2754 def _parse_factor(self) -> t.Optional[exp.Expression]: 2755 return self._parse_tokens(self._parse_unary, self.FACTOR) 2756 2757 def _parse_unary(self) -> t.Optional[exp.Expression]: 2758 if self._match_set(self.UNARY_PARSERS): 2759 return self.UNARY_PARSERS[self._prev.token_type](self) 2760 return self._parse_at_time_zone(self._parse_type()) 2761 2762 def _parse_type(self) -> t.Optional[exp.Expression]: 2763 interval = self._parse_interval() 2764 if interval: 2765 return interval 2766 2767 index = self._index 2768 data_type = self._parse_types(check_func=True) 2769 this = self._parse_column() 2770 2771 if data_type: 2772 if isinstance(this, exp.Literal): 2773 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2774 if parser: 2775 return parser(self, this, data_type) 2776 return self.expression(exp.Cast, this=this, to=data_type) 2777 if not data_type.args.get("expressions"): 2778 self._retreat(index) 2779 return self._parse_column() 2780 return data_type 2781 2782 return this 2783 2784 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2785 index = self._index 2786 2787 prefix = self._match_text_seq("SYSUDTLIB", ".") 2788 2789 if not self._match_set(self.TYPE_TOKENS): 2790 return None 2791 2792 type_token = self._prev.token_type 2793 2794 if type_token == TokenType.PSEUDO_TYPE: 2795 return self.expression(exp.PseudoType, this=self._prev.text) 2796 2797 nested = type_token in self.NESTED_TYPE_TOKENS 2798 is_struct = type_token == TokenType.STRUCT 2799 expressions = None 2800 maybe_func = False 2801 2802 if self._match(TokenType.L_PAREN): 2803 if is_struct: 2804 expressions = self._parse_csv(self._parse_struct_kwargs) 2805 elif nested: 2806 expressions = self._parse_csv(self._parse_types) 2807 else: 2808 expressions = self._parse_csv(self._parse_conjunction) 2809 2810 if not expressions or not self._match(TokenType.R_PAREN): 2811 self._retreat(index) 2812 return None 2813 2814 maybe_func = True 2815 2816 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2817 this = exp.DataType( 2818 this=exp.DataType.Type.ARRAY, 2819 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2820 nested=True, 2821 ) 2822 2823 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2824 this = exp.DataType( 2825 this=exp.DataType.Type.ARRAY, 2826 expressions=[this], 2827 nested=True, 2828 ) 2829 2830 return this 2831 2832 if self._match(TokenType.L_BRACKET): 2833 self._retreat(index) 2834 return None 2835 2836 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2837 if nested and self._match(TokenType.LT): 2838 if is_struct: 2839 expressions = self._parse_csv(self._parse_struct_kwargs) 2840 else: 2841 expressions = self._parse_csv(self._parse_types) 2842 2843 if not self._match(TokenType.GT): 2844 self.raise_error("Expecting >") 2845 2846 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2847 values = self._parse_csv(self._parse_conjunction) 2848 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2849 2850 value: t.Optional[exp.Expression] = None 2851 if type_token in self.TIMESTAMPS: 2852 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2853 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2854 elif ( 2855 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2856 ): 2857 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2858 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2859 if type_token == TokenType.TIME: 2860 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2861 else: 2862 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2863 2864 maybe_func = maybe_func and value is None 2865 2866 if value is None: 2867 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2868 elif type_token == TokenType.INTERVAL: 2869 unit = self._parse_var() 2870 2871 if not unit: 2872 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2873 else: 2874 value = self.expression(exp.Interval, unit=unit) 2875 2876 if maybe_func and check_func: 2877 index2 = self._index 2878 peek = self._parse_string() 2879 2880 if not peek: 2881 self._retreat(index) 2882 return None 2883 2884 self._retreat(index2) 2885 2886 if value: 2887 return value 2888 2889 return exp.DataType( 2890 this=exp.DataType.Type[type_token.value.upper()], 2891 expressions=expressions, 2892 nested=nested, 2893 values=values, 2894 prefix=prefix, 2895 ) 2896 2897 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2898 index = self._index 2899 this = self._parse_id_var() 2900 self._match(TokenType.COLON) 2901 data_type = self._parse_types() 2902 2903 if not data_type: 2904 self._retreat(index) 2905 return self._parse_types() 2906 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2907 2908 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2909 if not self._match(TokenType.AT_TIME_ZONE): 2910 return this 2911 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2912 2913 def _parse_column(self) -> t.Optional[exp.Expression]: 2914 this = self._parse_field() 2915 if isinstance(this, exp.Identifier): 2916 this = self.expression(exp.Column, this=this) 2917 elif not this: 2918 return self._parse_bracket(this) 2919 this = self._parse_bracket(this) 2920 2921 while self._match_set(self.COLUMN_OPERATORS): 2922 op_token = self._prev.token_type 2923 op = self.COLUMN_OPERATORS.get(op_token) 2924 2925 if op_token == TokenType.DCOLON: 2926 field = self._parse_types() 2927 if not field: 2928 self.raise_error("Expected type") 2929 elif op: 2930 self._advance() 2931 value = self._prev.text 2932 field = ( 2933 exp.Literal.number(value) 2934 if self._prev.token_type == TokenType.NUMBER 2935 else exp.Literal.string(value) 2936 ) 2937 else: 2938 field = ( 2939 self._parse_star() 2940 or self._parse_function(anonymous=True) 2941 or self._parse_id_var() 2942 ) 2943 2944 if isinstance(field, exp.Func): 2945 # bigquery allows function calls like x.y.count(...) 2946 # SAFE.SUBSTR(...) 2947 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2948 this = self._replace_columns_with_dots(this) 2949 2950 if op: 2951 this = op(self, this, field) 2952 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2953 this = self.expression( 2954 exp.Column, 2955 this=field, 2956 table=this.this, 2957 db=this.args.get("table"), 2958 catalog=this.args.get("db"), 2959 ) 2960 else: 2961 this = self.expression(exp.Dot, this=this, expression=field) 2962 this = self._parse_bracket(this) 2963 2964 return this 2965 2966 def _parse_primary(self) -> t.Optional[exp.Expression]: 2967 if self._match_set(self.PRIMARY_PARSERS): 2968 token_type = self._prev.token_type 2969 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2970 2971 if token_type == TokenType.STRING: 2972 expressions = [primary] 2973 while self._match(TokenType.STRING): 2974 expressions.append(exp.Literal.string(self._prev.text)) 2975 if len(expressions) > 1: 2976 return self.expression(exp.Concat, expressions=expressions) 2977 return primary 2978 2979 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2980 return exp.Literal.number(f"0.{self._prev.text}") 2981 2982 if self._match(TokenType.L_PAREN): 2983 comments = self._prev_comments 2984 query = self._parse_select() 2985 2986 if query: 2987 expressions = [query] 2988 else: 2989 expressions = self._parse_csv( 2990 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2991 ) 2992 2993 this = seq_get(expressions, 0) 2994 self._parse_query_modifiers(this) 2995 2996 if isinstance(this, exp.Subqueryable): 2997 this = self._parse_set_operations( 2998 self._parse_subquery(this=this, parse_alias=False) 2999 ) 3000 elif len(expressions) > 1: 3001 this = self.expression(exp.Tuple, expressions=expressions) 3002 else: 3003 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3004 3005 self._match_r_paren() 3006 comments.extend(self._prev_comments) 3007 3008 if this and comments: 3009 this.comments = comments 3010 3011 return this 3012 3013 return None 3014 3015 def _parse_field( 3016 self, 3017 any_token: bool = False, 3018 tokens: t.Optional[t.Collection[TokenType]] = None, 3019 ) -> t.Optional[exp.Expression]: 3020 return ( 3021 self._parse_primary() 3022 or self._parse_function() 3023 or self._parse_id_var(any_token=any_token, tokens=tokens) 3024 ) 3025 3026 def _parse_function( 3027 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3028 ) -> t.Optional[exp.Expression]: 3029 if not self._curr: 3030 return None 3031 3032 token_type = self._curr.token_type 3033 3034 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3035 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3036 3037 if not self._next or self._next.token_type != TokenType.L_PAREN: 3038 if token_type in self.NO_PAREN_FUNCTIONS: 3039 self._advance() 3040 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3041 3042 return None 3043 3044 if token_type not in self.FUNC_TOKENS: 3045 return None 3046 3047 this = self._curr.text 3048 upper = this.upper() 3049 self._advance(2) 3050 3051 parser = self.FUNCTION_PARSERS.get(upper) 3052 3053 if parser and not anonymous: 3054 this = parser(self) 3055 else: 3056 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3057 3058 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3059 this = self.expression(subquery_predicate, this=self._parse_select()) 3060 self._match_r_paren() 3061 return this 3062 3063 if functions is None: 3064 functions = self.FUNCTIONS 3065 3066 function = functions.get(upper) 3067 args = self._parse_csv(self._parse_lambda) 3068 3069 if function and not anonymous: 3070 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 3071 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 3072 if count_params(function) == 2: 3073 params = None 3074 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 3075 params = self._parse_csv(self._parse_lambda) 3076 3077 this = function(args, params) 3078 else: 3079 this = function(args) 3080 3081 self.validate_expression(this, args) 3082 else: 3083 this = self.expression(exp.Anonymous, this=this, expressions=args) 3084 3085 self._match_r_paren(this) 3086 return self._parse_window(this) 3087 3088 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3089 return self._parse_column_def(self._parse_id_var()) 3090 3091 def _parse_user_defined_function( 3092 self, kind: t.Optional[TokenType] = None 3093 ) -> t.Optional[exp.Expression]: 3094 this = self._parse_id_var() 3095 3096 while self._match(TokenType.DOT): 3097 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3098 3099 if not self._match(TokenType.L_PAREN): 3100 return this 3101 3102 expressions = self._parse_csv(self._parse_function_parameter) 3103 self._match_r_paren() 3104 return self.expression( 3105 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3106 ) 3107 3108 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3109 literal = self._parse_primary() 3110 if literal: 3111 return self.expression(exp.Introducer, this=token.text, expression=literal) 3112 3113 return self.expression(exp.Identifier, this=token.text) 3114 3115 def _parse_national(self, token: Token) -> exp.Expression: 3116 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3117 3118 def _parse_session_parameter(self) -> exp.Expression: 3119 kind = None 3120 this = self._parse_id_var() or self._parse_primary() 3121 3122 if this and self._match(TokenType.DOT): 3123 kind = this.name 3124 this = self._parse_var() or self._parse_primary() 3125 3126 return self.expression(exp.SessionParameter, this=this, kind=kind) 3127 3128 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3129 index = self._index 3130 3131 if self._match(TokenType.L_PAREN): 3132 expressions = self._parse_csv(self._parse_id_var) 3133 3134 if not self._match(TokenType.R_PAREN): 3135 self._retreat(index) 3136 else: 3137 expressions = [self._parse_id_var()] 3138 3139 if self._match_set(self.LAMBDAS): 3140 return self.LAMBDAS[self._prev.token_type](self, expressions) 3141 3142 self._retreat(index) 3143 3144 this: t.Optional[exp.Expression] 3145 3146 if self._match(TokenType.DISTINCT): 3147 this = self.expression( 3148 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3149 ) 3150 else: 3151 this = self._parse_select_or_expression() 3152 3153 if isinstance(this, exp.EQ): 3154 left = this.this 3155 if isinstance(left, exp.Column): 3156 left.replace(exp.Var(this=left.text("this"))) 3157 3158 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3159 3160 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3161 index = self._index 3162 3163 try: 3164 if self._parse_select(nested=True): 3165 return this 3166 except Exception: 3167 pass 3168 finally: 3169 self._retreat(index) 3170 3171 if not self._match(TokenType.L_PAREN): 3172 return this 3173 3174 args = self._parse_csv( 3175 lambda: self._parse_constraint() 3176 or self._parse_column_def(self._parse_field(any_token=True)) 3177 ) 3178 self._match_r_paren() 3179 return self.expression(exp.Schema, this=this, expressions=args) 3180 3181 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3182 kind = self._parse_types() 3183 3184 if self._match_text_seq("FOR", "ORDINALITY"): 3185 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3186 3187 constraints = [] 3188 while True: 3189 constraint = self._parse_column_constraint() 3190 if not constraint: 3191 break 3192 constraints.append(constraint) 3193 3194 if not kind and not constraints: 3195 return this 3196 3197 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3198 3199 def _parse_auto_increment(self) -> exp.Expression: 3200 start = None 3201 increment = None 3202 3203 if self._match(TokenType.L_PAREN, advance=False): 3204 args = self._parse_wrapped_csv(self._parse_bitwise) 3205 start = seq_get(args, 0) 3206 increment = seq_get(args, 1) 3207 elif self._match_text_seq("START"): 3208 start = self._parse_bitwise() 3209 self._match_text_seq("INCREMENT") 3210 increment = self._parse_bitwise() 3211 3212 if start and increment: 3213 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3214 3215 return exp.AutoIncrementColumnConstraint() 3216 3217 def _parse_compress(self) -> exp.Expression: 3218 if self._match(TokenType.L_PAREN, advance=False): 3219 return self.expression( 3220 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3221 ) 3222 3223 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3224 3225 def _parse_generated_as_identity(self) -> exp.Expression: 3226 if self._match(TokenType.BY_DEFAULT): 3227 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3228 else: 3229 self._match_text_seq("ALWAYS") 3230 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3231 3232 self._match_text_seq("AS", "IDENTITY") 3233 if self._match(TokenType.L_PAREN): 3234 if self._match_text_seq("START", "WITH"): 3235 this.set("start", self._parse_bitwise()) 3236 if self._match_text_seq("INCREMENT", "BY"): 3237 this.set("increment", self._parse_bitwise()) 3238 if self._match_text_seq("MINVALUE"): 3239 this.set("minvalue", self._parse_bitwise()) 3240 if self._match_text_seq("MAXVALUE"): 3241 this.set("maxvalue", self._parse_bitwise()) 3242 3243 if self._match_text_seq("CYCLE"): 3244 this.set("cycle", True) 3245 elif self._match_text_seq("NO", "CYCLE"): 3246 this.set("cycle", False) 3247 3248 self._match_r_paren() 3249 3250 return this 3251 3252 def _parse_inline(self) -> t.Optional[exp.Expression]: 3253 self._match_text_seq("LENGTH") 3254 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3255 3256 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3257 if self._match_text_seq("NULL"): 3258 return self.expression(exp.NotNullColumnConstraint) 3259 if self._match_text_seq("CASESPECIFIC"): 3260 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3261 return None 3262 3263 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3264 if self._match(TokenType.CONSTRAINT): 3265 this = self._parse_id_var() 3266 else: 3267 this = None 3268 3269 if self._match_texts(self.CONSTRAINT_PARSERS): 3270 return self.expression( 3271 exp.ColumnConstraint, 3272 this=this, 3273 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3274 ) 3275 3276 return this 3277 3278 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3279 if not self._match(TokenType.CONSTRAINT): 3280 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3281 3282 this = self._parse_id_var() 3283 expressions = [] 3284 3285 while True: 3286 constraint = self._parse_unnamed_constraint() or self._parse_function() 3287 if not constraint: 3288 break 3289 expressions.append(constraint) 3290 3291 return self.expression(exp.Constraint, this=this, expressions=expressions) 3292 3293 def _parse_unnamed_constraint( 3294 self, constraints: t.Optional[t.Collection[str]] = None 3295 ) -> t.Optional[exp.Expression]: 3296 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3297 return None 3298 3299 constraint = self._prev.text.upper() 3300 if constraint not in self.CONSTRAINT_PARSERS: 3301 self.raise_error(f"No parser found for schema constraint {constraint}.") 3302 3303 return self.CONSTRAINT_PARSERS[constraint](self) 3304 3305 def _parse_unique(self) -> exp.Expression: 3306 if not self._match(TokenType.L_PAREN, advance=False): 3307 return self.expression(exp.UniqueColumnConstraint) 3308 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3309 3310 def _parse_key_constraint_options(self) -> t.List[str]: 3311 options = [] 3312 while True: 3313 if not self._curr: 3314 break 3315 3316 if self._match(TokenType.ON): 3317 action = None 3318 on = self._advance_any() and self._prev.text 3319 3320 if self._match(TokenType.NO_ACTION): 3321 action = "NO ACTION" 3322 elif self._match(TokenType.CASCADE): 3323 action = "CASCADE" 3324 elif self._match_pair(TokenType.SET, TokenType.NULL): 3325 action = "SET NULL" 3326 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3327 action = "SET DEFAULT" 3328 else: 3329 self.raise_error("Invalid key constraint") 3330 3331 options.append(f"ON {on} {action}") 3332 elif self._match_text_seq("NOT", "ENFORCED"): 3333 options.append("NOT ENFORCED") 3334 elif self._match_text_seq("DEFERRABLE"): 3335 options.append("DEFERRABLE") 3336 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3337 options.append("INITIALLY DEFERRED") 3338 elif self._match_text_seq("NORELY"): 3339 options.append("NORELY") 3340 elif self._match_text_seq("MATCH", "FULL"): 3341 options.append("MATCH FULL") 3342 else: 3343 break 3344 3345 return options 3346 3347 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3348 if match and not self._match(TokenType.REFERENCES): 3349 return None 3350 3351 expressions = None 3352 this = self._parse_id_var() 3353 3354 if self._match(TokenType.L_PAREN, advance=False): 3355 expressions = self._parse_wrapped_id_vars() 3356 3357 options = self._parse_key_constraint_options() 3358 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3359 3360 def _parse_foreign_key(self) -> exp.Expression: 3361 expressions = self._parse_wrapped_id_vars() 3362 reference = self._parse_references() 3363 options = {} 3364 3365 while self._match(TokenType.ON): 3366 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3367 self.raise_error("Expected DELETE or UPDATE") 3368 3369 kind = self._prev.text.lower() 3370 3371 if self._match(TokenType.NO_ACTION): 3372 action = "NO ACTION" 3373 elif self._match(TokenType.SET): 3374 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3375 action = "SET " + self._prev.text.upper() 3376 else: 3377 self._advance() 3378 action = self._prev.text.upper() 3379 3380 options[kind] = action 3381 3382 return self.expression( 3383 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3384 ) 3385 3386 def _parse_primary_key(self) -> exp.Expression: 3387 desc = ( 3388 self._match_set((TokenType.ASC, TokenType.DESC)) 3389 and self._prev.token_type == TokenType.DESC 3390 ) 3391 3392 if not self._match(TokenType.L_PAREN, advance=False): 3393 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3394 3395 expressions = self._parse_wrapped_id_vars() 3396 options = self._parse_key_constraint_options() 3397 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3398 3399 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3400 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3401 return this 3402 3403 bracket_kind = self._prev.token_type 3404 expressions: t.List[t.Optional[exp.Expression]] 3405 3406 if self._match(TokenType.COLON): 3407 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3408 else: 3409 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3410 3411 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3412 if bracket_kind == TokenType.L_BRACE: 3413 this = self.expression(exp.Struct, expressions=expressions) 3414 elif not this or this.name.upper() == "ARRAY": 3415 this = self.expression(exp.Array, expressions=expressions) 3416 else: 3417 expressions = apply_index_offset(this, expressions, -self.index_offset) 3418 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3419 3420 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3421 self.raise_error("Expected ]") 3422 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3423 self.raise_error("Expected }") 3424 3425 this.comments = self._prev_comments 3426 return self._parse_bracket(this) 3427 3428 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3429 if self._match(TokenType.COLON): 3430 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3431 return this 3432 3433 def _parse_case(self) -> t.Optional[exp.Expression]: 3434 ifs = [] 3435 default = None 3436 3437 expression = self._parse_conjunction() 3438 3439 while self._match(TokenType.WHEN): 3440 this = self._parse_conjunction() 3441 self._match(TokenType.THEN) 3442 then = self._parse_conjunction() 3443 ifs.append(self.expression(exp.If, this=this, true=then)) 3444 3445 if self._match(TokenType.ELSE): 3446 default = self._parse_conjunction() 3447 3448 if not self._match(TokenType.END): 3449 self.raise_error("Expected END after CASE", self._prev) 3450 3451 return self._parse_window( 3452 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3453 ) 3454 3455 def _parse_if(self) -> t.Optional[exp.Expression]: 3456 if self._match(TokenType.L_PAREN): 3457 args = self._parse_csv(self._parse_conjunction) 3458 this = exp.If.from_arg_list(args) 3459 self.validate_expression(this, args) 3460 self._match_r_paren() 3461 else: 3462 index = self._index - 1 3463 condition = self._parse_conjunction() 3464 3465 if not condition: 3466 self._retreat(index) 3467 return None 3468 3469 self._match(TokenType.THEN) 3470 true = self._parse_conjunction() 3471 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3472 self._match(TokenType.END) 3473 this = self.expression(exp.If, this=condition, true=true, false=false) 3474 3475 return self._parse_window(this) 3476 3477 def _parse_extract(self) -> exp.Expression: 3478 this = self._parse_function() or self._parse_var() or self._parse_type() 3479 3480 if self._match(TokenType.FROM): 3481 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3482 3483 if not self._match(TokenType.COMMA): 3484 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3485 3486 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3487 3488 def _parse_cast(self, strict: bool) -> exp.Expression: 3489 this = self._parse_conjunction() 3490 3491 if not self._match(TokenType.ALIAS): 3492 self.raise_error("Expected AS after CAST") 3493 3494 to = self._parse_types() 3495 3496 if not to: 3497 self.raise_error("Expected TYPE after CAST") 3498 elif to.this == exp.DataType.Type.CHAR: 3499 if self._match(TokenType.CHARACTER_SET): 3500 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3501 3502 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3503 3504 def _parse_string_agg(self) -> exp.Expression: 3505 expression: t.Optional[exp.Expression] 3506 3507 if self._match(TokenType.DISTINCT): 3508 args = self._parse_csv(self._parse_conjunction) 3509 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3510 else: 3511 args = self._parse_csv(self._parse_conjunction) 3512 expression = seq_get(args, 0) 3513 3514 index = self._index 3515 if not self._match(TokenType.R_PAREN): 3516 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3517 order = self._parse_order(this=expression) 3518 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3519 3520 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3521 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3522 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3523 if not self._match(TokenType.WITHIN_GROUP): 3524 self._retreat(index) 3525 this = exp.GroupConcat.from_arg_list(args) 3526 self.validate_expression(this, args) 3527 return this 3528 3529 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3530 order = self._parse_order(this=expression) 3531 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3532 3533 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3534 to: t.Optional[exp.Expression] 3535 this = self._parse_bitwise() 3536 3537 if self._match(TokenType.USING): 3538 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3539 elif self._match(TokenType.COMMA): 3540 to = self._parse_bitwise() 3541 else: 3542 to = None 3543 3544 # Swap the argument order if needed to produce the correct AST 3545 if self.CONVERT_TYPE_FIRST: 3546 this, to = to, this 3547 3548 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3549 3550 def _parse_decode(self) -> t.Optional[exp.Expression]: 3551 """ 3552 There are generally two variants of the DECODE function: 3553 3554 - DECODE(bin, charset) 3555 - DECODE(expression, search, result [, search, result] ... [, default]) 3556 3557 The second variant will always be parsed into a CASE expression. Note that NULL 3558 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3559 instead of relying on pattern matching. 3560 """ 3561 args = self._parse_csv(self._parse_conjunction) 3562 3563 if len(args) < 3: 3564 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3565 3566 expression, *expressions = args 3567 if not expression: 3568 return None 3569 3570 ifs = [] 3571 for search, result in zip(expressions[::2], expressions[1::2]): 3572 if not search or not result: 3573 return None 3574 3575 if isinstance(search, exp.Literal): 3576 ifs.append( 3577 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3578 ) 3579 elif isinstance(search, exp.Null): 3580 ifs.append( 3581 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3582 ) 3583 else: 3584 cond = exp.or_( 3585 exp.EQ(this=expression.copy(), expression=search), 3586 exp.and_( 3587 exp.Is(this=expression.copy(), expression=exp.Null()), 3588 exp.Is(this=search.copy(), expression=exp.Null()), 3589 copy=False, 3590 ), 3591 copy=False, 3592 ) 3593 ifs.append(exp.If(this=cond, true=result)) 3594 3595 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3596 3597 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3598 self._match_text_seq("KEY") 3599 key = self._parse_field() 3600 self._match(TokenType.COLON) 3601 self._match_text_seq("VALUE") 3602 value = self._parse_field() 3603 if not key and not value: 3604 return None 3605 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3606 3607 def _parse_json_object(self) -> exp.Expression: 3608 expressions = self._parse_csv(self._parse_json_key_value) 3609 3610 null_handling = None 3611 if self._match_text_seq("NULL", "ON", "NULL"): 3612 null_handling = "NULL ON NULL" 3613 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3614 null_handling = "ABSENT ON NULL" 3615 3616 unique_keys = None 3617 if self._match_text_seq("WITH", "UNIQUE"): 3618 unique_keys = True 3619 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3620 unique_keys = False 3621 3622 self._match_text_seq("KEYS") 3623 3624 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3625 format_json = self._match_text_seq("FORMAT", "JSON") 3626 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3627 3628 return self.expression( 3629 exp.JSONObject, 3630 expressions=expressions, 3631 null_handling=null_handling, 3632 unique_keys=unique_keys, 3633 return_type=return_type, 3634 format_json=format_json, 3635 encoding=encoding, 3636 ) 3637 3638 def _parse_logarithm(self) -> exp.Expression: 3639 # Default argument order is base, expression 3640 args = self._parse_csv(self._parse_range) 3641 3642 if len(args) > 1: 3643 if not self.LOG_BASE_FIRST: 3644 args.reverse() 3645 return exp.Log.from_arg_list(args) 3646 3647 return self.expression( 3648 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3649 ) 3650 3651 def _parse_match_against(self) -> exp.Expression: 3652 expressions = self._parse_csv(self._parse_column) 3653 3654 self._match_text_seq(")", "AGAINST", "(") 3655 3656 this = self._parse_string() 3657 3658 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3659 modifier = "IN NATURAL LANGUAGE MODE" 3660 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3661 modifier = f"{modifier} WITH QUERY EXPANSION" 3662 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3663 modifier = "IN BOOLEAN MODE" 3664 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3665 modifier = "WITH QUERY EXPANSION" 3666 else: 3667 modifier = None 3668 3669 return self.expression( 3670 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3671 ) 3672 3673 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3674 args = self._parse_csv(self._parse_bitwise) 3675 3676 if self._match(TokenType.IN): 3677 return self.expression( 3678 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3679 ) 3680 3681 if haystack_first: 3682 haystack = seq_get(args, 0) 3683 needle = seq_get(args, 1) 3684 else: 3685 needle = seq_get(args, 0) 3686 haystack = seq_get(args, 1) 3687 3688 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3689 3690 self.validate_expression(this, args) 3691 3692 return this 3693 3694 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3695 args = self._parse_csv(self._parse_table) 3696 return exp.JoinHint(this=func_name.upper(), expressions=args) 3697 3698 def _parse_substring(self) -> exp.Expression: 3699 # Postgres supports the form: substring(string [from int] [for int]) 3700 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3701 3702 args = self._parse_csv(self._parse_bitwise) 3703 3704 if self._match(TokenType.FROM): 3705 args.append(self._parse_bitwise()) 3706 if self._match(TokenType.FOR): 3707 args.append(self._parse_bitwise()) 3708 3709 this = exp.Substring.from_arg_list(args) 3710 self.validate_expression(this, args) 3711 3712 return this 3713 3714 def _parse_trim(self) -> exp.Expression: 3715 # https://www.w3resource.com/sql/character-functions/trim.php 3716 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3717 3718 position = None 3719 collation = None 3720 3721 if self._match_set(self.TRIM_TYPES): 3722 position = self._prev.text.upper() 3723 3724 expression = self._parse_bitwise() 3725 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3726 this = self._parse_bitwise() 3727 else: 3728 this = expression 3729 expression = None 3730 3731 if self._match(TokenType.COLLATE): 3732 collation = self._parse_bitwise() 3733 3734 return self.expression( 3735 exp.Trim, 3736 this=this, 3737 position=position, 3738 expression=expression, 3739 collation=collation, 3740 ) 3741 3742 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3743 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3744 3745 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3746 return self._parse_window(self._parse_id_var(), alias=True) 3747 3748 def _parse_respect_or_ignore_nulls( 3749 self, this: t.Optional[exp.Expression] 3750 ) -> t.Optional[exp.Expression]: 3751 if self._match(TokenType.IGNORE_NULLS): 3752 return self.expression(exp.IgnoreNulls, this=this) 3753 if self._match(TokenType.RESPECT_NULLS): 3754 return self.expression(exp.RespectNulls, this=this) 3755 return this 3756 3757 def _parse_window( 3758 self, this: t.Optional[exp.Expression], alias: bool = False 3759 ) -> t.Optional[exp.Expression]: 3760 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3761 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3762 self._match_r_paren() 3763 3764 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3765 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3766 if self._match(TokenType.WITHIN_GROUP): 3767 order = self._parse_wrapped(self._parse_order) 3768 this = self.expression(exp.WithinGroup, this=this, expression=order) 3769 3770 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3771 # Some dialects choose to implement and some do not. 3772 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3773 3774 # There is some code above in _parse_lambda that handles 3775 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3776 3777 # The below changes handle 3778 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3779 3780 # Oracle allows both formats 3781 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3782 # and Snowflake chose to do the same for familiarity 3783 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3784 this = self._parse_respect_or_ignore_nulls(this) 3785 3786 # bigquery select from window x AS (partition by ...) 3787 if alias: 3788 over = None 3789 self._match(TokenType.ALIAS) 3790 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3791 return this 3792 else: 3793 over = self._prev.text.upper() 3794 3795 if not self._match(TokenType.L_PAREN): 3796 return self.expression( 3797 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3798 ) 3799 3800 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3801 3802 first = self._match(TokenType.FIRST) 3803 if self._match_text_seq("LAST"): 3804 first = False 3805 3806 partition = self._parse_partition_by() 3807 order = self._parse_order() 3808 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3809 3810 if kind: 3811 self._match(TokenType.BETWEEN) 3812 start = self._parse_window_spec() 3813 self._match(TokenType.AND) 3814 end = self._parse_window_spec() 3815 3816 spec = self.expression( 3817 exp.WindowSpec, 3818 kind=kind, 3819 start=start["value"], 3820 start_side=start["side"], 3821 end=end["value"], 3822 end_side=end["side"], 3823 ) 3824 else: 3825 spec = None 3826 3827 self._match_r_paren() 3828 3829 return self.expression( 3830 exp.Window, 3831 this=this, 3832 partition_by=partition, 3833 order=order, 3834 spec=spec, 3835 alias=window_alias, 3836 over=over, 3837 first=first, 3838 ) 3839 3840 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3841 self._match(TokenType.BETWEEN) 3842 3843 return { 3844 "value": ( 3845 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3846 ) 3847 or self._parse_bitwise(), 3848 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3849 } 3850 3851 def _parse_alias( 3852 self, this: t.Optional[exp.Expression], explicit: bool = False 3853 ) -> t.Optional[exp.Expression]: 3854 any_token = self._match(TokenType.ALIAS) 3855 3856 if explicit and not any_token: 3857 return this 3858 3859 if self._match(TokenType.L_PAREN): 3860 aliases = self.expression( 3861 exp.Aliases, 3862 this=this, 3863 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3864 ) 3865 self._match_r_paren(aliases) 3866 return aliases 3867 3868 alias = self._parse_id_var(any_token) 3869 3870 if alias: 3871 return self.expression(exp.Alias, this=this, alias=alias) 3872 3873 return this 3874 3875 def _parse_id_var( 3876 self, 3877 any_token: bool = True, 3878 tokens: t.Optional[t.Collection[TokenType]] = None, 3879 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3880 ) -> t.Optional[exp.Expression]: 3881 identifier = self._parse_identifier() 3882 3883 if identifier: 3884 return identifier 3885 3886 prefix = "" 3887 3888 if prefix_tokens: 3889 while self._match_set(prefix_tokens): 3890 prefix += self._prev.text 3891 3892 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3893 quoted = self._prev.token_type == TokenType.STRING 3894 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3895 3896 return None 3897 3898 def _parse_string(self) -> t.Optional[exp.Expression]: 3899 if self._match(TokenType.STRING): 3900 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3901 return self._parse_placeholder() 3902 3903 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3904 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3905 3906 def _parse_number(self) -> t.Optional[exp.Expression]: 3907 if self._match(TokenType.NUMBER): 3908 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3909 return self._parse_placeholder() 3910 3911 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3912 if self._match(TokenType.IDENTIFIER): 3913 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3914 return self._parse_placeholder() 3915 3916 def _parse_var( 3917 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3918 ) -> t.Optional[exp.Expression]: 3919 if ( 3920 (any_token and self._advance_any()) 3921 or self._match(TokenType.VAR) 3922 or (self._match_set(tokens) if tokens else False) 3923 ): 3924 return self.expression(exp.Var, this=self._prev.text) 3925 return self._parse_placeholder() 3926 3927 def _advance_any(self) -> t.Optional[Token]: 3928 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3929 self._advance() 3930 return self._prev 3931 return None 3932 3933 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3934 return self._parse_var() or self._parse_string() 3935 3936 def _parse_null(self) -> t.Optional[exp.Expression]: 3937 if self._match(TokenType.NULL): 3938 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3939 return None 3940 3941 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3942 if self._match(TokenType.TRUE): 3943 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3944 if self._match(TokenType.FALSE): 3945 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3946 return None 3947 3948 def _parse_star(self) -> t.Optional[exp.Expression]: 3949 if self._match(TokenType.STAR): 3950 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3951 return None 3952 3953 def _parse_parameter(self) -> exp.Expression: 3954 wrapped = self._match(TokenType.L_BRACE) 3955 this = self._parse_var() or self._parse_primary() 3956 self._match(TokenType.R_BRACE) 3957 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3958 3959 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3960 if self._match_set(self.PLACEHOLDER_PARSERS): 3961 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3962 if placeholder: 3963 return placeholder 3964 self._advance(-1) 3965 return None 3966 3967 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3968 if not self._match(TokenType.EXCEPT): 3969 return None 3970 if self._match(TokenType.L_PAREN, advance=False): 3971 return self._parse_wrapped_csv(self._parse_column) 3972 return self._parse_csv(self._parse_column) 3973 3974 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3975 if not self._match(TokenType.REPLACE): 3976 return None 3977 if self._match(TokenType.L_PAREN, advance=False): 3978 return self._parse_wrapped_csv(self._parse_expression) 3979 return self._parse_csv(self._parse_expression) 3980 3981 def _parse_csv( 3982 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3983 ) -> t.List[t.Optional[exp.Expression]]: 3984 parse_result = parse_method() 3985 items = [parse_result] if parse_result is not None else [] 3986 3987 while self._match(sep): 3988 if parse_result and self._prev_comments: 3989 parse_result.comments = self._prev_comments 3990 3991 parse_result = parse_method() 3992 if parse_result is not None: 3993 items.append(parse_result) 3994 3995 return items 3996 3997 def _parse_tokens( 3998 self, parse_method: t.Callable, expressions: t.Dict 3999 ) -> t.Optional[exp.Expression]: 4000 this = parse_method() 4001 4002 while self._match_set(expressions): 4003 this = self.expression( 4004 expressions[self._prev.token_type], 4005 this=this, 4006 comments=self._prev_comments, 4007 expression=parse_method(), 4008 ) 4009 4010 return this 4011 4012 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 4013 return self._parse_wrapped_csv(self._parse_id_var) 4014 4015 def _parse_wrapped_csv( 4016 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4017 ) -> t.List[t.Optional[exp.Expression]]: 4018 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 4019 4020 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 4021 self._match_l_paren() 4022 parse_result = parse_method() 4023 self._match_r_paren() 4024 return parse_result 4025 4026 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4027 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4028 4029 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4030 return self._parse_set_operations( 4031 self._parse_select(nested=True, parse_subquery_alias=False) 4032 ) 4033 4034 def _parse_transaction(self) -> exp.Expression: 4035 this = None 4036 if self._match_texts(self.TRANSACTION_KIND): 4037 this = self._prev.text 4038 4039 self._match_texts({"TRANSACTION", "WORK"}) 4040 4041 modes = [] 4042 while True: 4043 mode = [] 4044 while self._match(TokenType.VAR): 4045 mode.append(self._prev.text) 4046 4047 if mode: 4048 modes.append(" ".join(mode)) 4049 if not self._match(TokenType.COMMA): 4050 break 4051 4052 return self.expression(exp.Transaction, this=this, modes=modes) 4053 4054 def _parse_commit_or_rollback(self) -> exp.Expression: 4055 chain = None 4056 savepoint = None 4057 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4058 4059 self._match_texts({"TRANSACTION", "WORK"}) 4060 4061 if self._match_text_seq("TO"): 4062 self._match_text_seq("SAVEPOINT") 4063 savepoint = self._parse_id_var() 4064 4065 if self._match(TokenType.AND): 4066 chain = not self._match_text_seq("NO") 4067 self._match_text_seq("CHAIN") 4068 4069 if is_rollback: 4070 return self.expression(exp.Rollback, savepoint=savepoint) 4071 return self.expression(exp.Commit, chain=chain) 4072 4073 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4074 if not self._match_text_seq("ADD"): 4075 return None 4076 4077 self._match(TokenType.COLUMN) 4078 exists_column = self._parse_exists(not_=True) 4079 expression = self._parse_column_def(self._parse_field(any_token=True)) 4080 4081 if expression: 4082 expression.set("exists", exists_column) 4083 4084 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4085 if self._match_texts(("FIRST", "AFTER")): 4086 position = self._prev.text 4087 column_position = self.expression( 4088 exp.ColumnPosition, this=self._parse_column(), position=position 4089 ) 4090 expression.set("position", column_position) 4091 4092 return expression 4093 4094 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4095 drop = self._match(TokenType.DROP) and self._parse_drop() 4096 if drop and not isinstance(drop, exp.Command): 4097 drop.set("kind", drop.args.get("kind", "COLUMN")) 4098 return drop 4099 4100 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4101 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4102 return self.expression( 4103 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4104 ) 4105 4106 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4107 this = None 4108 kind = self._prev.token_type 4109 4110 if kind == TokenType.CONSTRAINT: 4111 this = self._parse_id_var() 4112 4113 if self._match_text_seq("CHECK"): 4114 expression = self._parse_wrapped(self._parse_conjunction) 4115 enforced = self._match_text_seq("ENFORCED") 4116 4117 return self.expression( 4118 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4119 ) 4120 4121 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4122 expression = self._parse_foreign_key() 4123 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4124 expression = self._parse_primary_key() 4125 else: 4126 expression = None 4127 4128 return self.expression(exp.AddConstraint, this=this, expression=expression) 4129 4130 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4131 index = self._index - 1 4132 4133 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4134 return self._parse_csv(self._parse_add_constraint) 4135 4136 self._retreat(index) 4137 return self._parse_csv(self._parse_add_column) 4138 4139 def _parse_alter_table_alter(self) -> exp.Expression: 4140 self._match(TokenType.COLUMN) 4141 column = self._parse_field(any_token=True) 4142 4143 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4144 return self.expression(exp.AlterColumn, this=column, drop=True) 4145 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4146 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4147 4148 self._match_text_seq("SET", "DATA") 4149 return self.expression( 4150 exp.AlterColumn, 4151 this=column, 4152 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4153 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4154 using=self._match(TokenType.USING) and self._parse_conjunction(), 4155 ) 4156 4157 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4158 index = self._index - 1 4159 4160 partition_exists = self._parse_exists() 4161 if self._match(TokenType.PARTITION, advance=False): 4162 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4163 4164 self._retreat(index) 4165 return self._parse_csv(self._parse_drop_column) 4166 4167 def _parse_alter_table_rename(self) -> exp.Expression: 4168 self._match_text_seq("TO") 4169 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4170 4171 def _parse_alter(self) -> t.Optional[exp.Expression]: 4172 start = self._prev 4173 4174 if not self._match(TokenType.TABLE): 4175 return self._parse_as_command(start) 4176 4177 exists = self._parse_exists() 4178 this = self._parse_table(schema=True) 4179 4180 if self._next: 4181 self._advance() 4182 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4183 4184 if parser: 4185 actions = ensure_list(parser(self)) 4186 4187 if not self._curr: 4188 return self.expression( 4189 exp.AlterTable, 4190 this=this, 4191 exists=exists, 4192 actions=actions, 4193 ) 4194 return self._parse_as_command(start) 4195 4196 def _parse_merge(self) -> exp.Expression: 4197 self._match(TokenType.INTO) 4198 target = self._parse_table() 4199 4200 self._match(TokenType.USING) 4201 using = self._parse_table() 4202 4203 self._match(TokenType.ON) 4204 on = self._parse_conjunction() 4205 4206 whens = [] 4207 while self._match(TokenType.WHEN): 4208 matched = not self._match(TokenType.NOT) 4209 self._match_text_seq("MATCHED") 4210 source = ( 4211 False 4212 if self._match_text_seq("BY", "TARGET") 4213 else self._match_text_seq("BY", "SOURCE") 4214 ) 4215 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4216 4217 self._match(TokenType.THEN) 4218 4219 if self._match(TokenType.INSERT): 4220 _this = self._parse_star() 4221 if _this: 4222 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4223 else: 4224 then = self.expression( 4225 exp.Insert, 4226 this=self._parse_value(), 4227 expression=self._match(TokenType.VALUES) and self._parse_value(), 4228 ) 4229 elif self._match(TokenType.UPDATE): 4230 expressions = self._parse_star() 4231 if expressions: 4232 then = self.expression(exp.Update, expressions=expressions) 4233 else: 4234 then = self.expression( 4235 exp.Update, 4236 expressions=self._match(TokenType.SET) 4237 and self._parse_csv(self._parse_equality), 4238 ) 4239 elif self._match(TokenType.DELETE): 4240 then = self.expression(exp.Var, this=self._prev.text) 4241 else: 4242 then = None 4243 4244 whens.append( 4245 self.expression( 4246 exp.When, 4247 matched=matched, 4248 source=source, 4249 condition=condition, 4250 then=then, 4251 ) 4252 ) 4253 4254 return self.expression( 4255 exp.Merge, 4256 this=target, 4257 using=using, 4258 on=on, 4259 expressions=whens, 4260 ) 4261 4262 def _parse_show(self) -> t.Optional[exp.Expression]: 4263 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4264 if parser: 4265 return parser(self) 4266 self._advance() 4267 return self.expression(exp.Show, this=self._prev.text.upper()) 4268 4269 def _parse_set_item_assignment( 4270 self, kind: t.Optional[str] = None 4271 ) -> t.Optional[exp.Expression]: 4272 index = self._index 4273 4274 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4275 return self._parse_set_transaction(global_=kind == "GLOBAL") 4276 4277 left = self._parse_primary() or self._parse_id_var() 4278 4279 if not self._match_texts(("=", "TO")): 4280 self._retreat(index) 4281 return None 4282 4283 right = self._parse_statement() or self._parse_id_var() 4284 this = self.expression( 4285 exp.EQ, 4286 this=left, 4287 expression=right, 4288 ) 4289 4290 return self.expression( 4291 exp.SetItem, 4292 this=this, 4293 kind=kind, 4294 ) 4295 4296 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4297 self._match_text_seq("TRANSACTION") 4298 characteristics = self._parse_csv( 4299 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4300 ) 4301 return self.expression( 4302 exp.SetItem, 4303 expressions=characteristics, 4304 kind="TRANSACTION", 4305 **{"global": global_}, # type: ignore 4306 ) 4307 4308 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4309 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4310 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4311 4312 def _parse_set(self) -> exp.Expression: 4313 index = self._index 4314 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4315 4316 if self._curr: 4317 self._retreat(index) 4318 return self._parse_as_command(self._prev) 4319 4320 return set_ 4321 4322 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4323 for option in options: 4324 if self._match_text_seq(*option.split(" ")): 4325 return exp.Var(this=option) 4326 return None 4327 4328 def _parse_as_command(self, start: Token) -> exp.Command: 4329 while self._curr: 4330 self._advance() 4331 text = self._find_sql(start, self._prev) 4332 size = len(start.text) 4333 return exp.Command(this=text[:size], expression=text[size:]) 4334 4335 def _find_parser( 4336 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4337 ) -> t.Optional[t.Callable]: 4338 if not self._curr: 4339 return None 4340 4341 index = self._index 4342 this = [] 4343 while True: 4344 # The current token might be multiple words 4345 curr = self._curr.text.upper() 4346 key = curr.split(" ") 4347 this.append(curr) 4348 self._advance() 4349 result, trie = in_trie(trie, key) 4350 if result == 0: 4351 break 4352 if result == 2: 4353 subparser = parsers[" ".join(this)] 4354 return subparser 4355 self._retreat(index) 4356 return None 4357 4358 def _match(self, token_type, advance=True): 4359 if not self._curr: 4360 return None 4361 4362 if self._curr.token_type == token_type: 4363 if advance: 4364 self._advance() 4365 return True 4366 4367 return None 4368 4369 def _match_set(self, types, advance=True): 4370 if not self._curr: 4371 return None 4372 4373 if self._curr.token_type in types: 4374 if advance: 4375 self._advance() 4376 return True 4377 4378 return None 4379 4380 def _match_pair(self, token_type_a, token_type_b, advance=True): 4381 if not self._curr or not self._next: 4382 return None 4383 4384 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4385 if advance: 4386 self._advance(2) 4387 return True 4388 4389 return None 4390 4391 def _match_l_paren(self, expression=None): 4392 if not self._match(TokenType.L_PAREN): 4393 self.raise_error("Expecting (") 4394 if expression and self._prev_comments: 4395 expression.comments = self._prev_comments 4396 4397 def _match_r_paren(self, expression=None): 4398 if not self._match(TokenType.R_PAREN): 4399 self.raise_error("Expecting )") 4400 if expression and self._prev_comments: 4401 expression.comments = self._prev_comments 4402 4403 def _match_texts(self, texts, advance=True): 4404 if self._curr and self._curr.text.upper() in texts: 4405 if advance: 4406 self._advance() 4407 return True 4408 return False 4409 4410 def _match_text_seq(self, *texts, advance=True): 4411 index = self._index 4412 for text in texts: 4413 if self._curr and self._curr.text.upper() == text: 4414 self._advance() 4415 else: 4416 self._retreat(index) 4417 return False 4418 4419 if not advance: 4420 self._retreat(index) 4421 4422 return True 4423 4424 def _replace_columns_with_dots(self, this): 4425 if isinstance(this, exp.Dot): 4426 exp.replace_children(this, self._replace_columns_with_dots) 4427 elif isinstance(this, exp.Column): 4428 exp.replace_children(this, self._replace_columns_with_dots) 4429 table = this.args.get("table") 4430 this = ( 4431 self.expression(exp.Dot, this=table, expression=this.this) 4432 if table 4433 else self.expression(exp.Var, this=this.name) 4434 ) 4435 elif isinstance(this, exp.Identifier): 4436 this = self.expression(exp.Var, this=this.name) 4437 return this 4438 4439 def _replace_lambda(self, node, lambda_variables): 4440 for column in node.find_all(exp.Column): 4441 if column.parts[0].name in lambda_variables: 4442 dot_or_id = column.to_dot() if column.table else column.this 4443 parent = column.parent 4444 4445 while isinstance(parent, exp.Dot): 4446 if not isinstance(parent.parent, exp.Dot): 4447 parent.replace(dot_or_id) 4448 break 4449 parent = parent.parent 4450 else: 4451 if column is node: 4452 node = dot_or_id 4453 else: 4454 column.replace(dot_or_id) 4455 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
794 def __init__( 795 self, 796 error_level: t.Optional[ErrorLevel] = None, 797 error_message_context: int = 100, 798 index_offset: int = 0, 799 unnest_column_only: bool = False, 800 alias_post_tablesample: bool = False, 801 max_errors: int = 3, 802 null_ordering: t.Optional[str] = None, 803 ): 804 self.error_level = error_level or ErrorLevel.IMMEDIATE 805 self.error_message_context = error_message_context 806 self.index_offset = index_offset 807 self.unnest_column_only = unnest_column_only 808 self.alias_post_tablesample = alias_post_tablesample 809 self.max_errors = max_errors 810 self.null_ordering = null_ordering 811 self.reset()
823 def parse( 824 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 825 ) -> t.List[t.Optional[exp.Expression]]: 826 """ 827 Parses a list of tokens and returns a list of syntax trees, one tree 828 per parsed SQL statement. 829 830 Args: 831 raw_tokens: the list of tokens. 832 sql: the original SQL string, used to produce helpful debug messages. 833 834 Returns: 835 The list of syntax trees. 836 """ 837 return self._parse( 838 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 839 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
841 def parse_into( 842 self, 843 expression_types: exp.IntoType, 844 raw_tokens: t.List[Token], 845 sql: t.Optional[str] = None, 846 ) -> t.List[t.Optional[exp.Expression]]: 847 """ 848 Parses a list of tokens into a given Expression type. If a collection of Expression 849 types is given instead, this method will try to parse the token list into each one 850 of them, stopping at the first for which the parsing succeeds. 851 852 Args: 853 expression_types: the expression type(s) to try and parse the token list into. 854 raw_tokens: the list of tokens. 855 sql: the original SQL string, used to produce helpful debug messages. 856 857 Returns: 858 The target Expression. 859 """ 860 errors = [] 861 for expression_type in ensure_collection(expression_types): 862 parser = self.EXPRESSION_PARSERS.get(expression_type) 863 if not parser: 864 raise TypeError(f"No parser registered for {expression_type}") 865 try: 866 return self._parse(parser, raw_tokens, sql) 867 except ParseError as e: 868 e.errors[0]["into_expression"] = expression_type 869 errors.append(e) 870 raise ParseError( 871 f"Failed to parse into {expression_types}", 872 errors=merge_errors(errors), 873 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
909 def check_errors(self) -> None: 910 """ 911 Logs or raises any found errors, depending on the chosen error level setting. 912 """ 913 if self.error_level == ErrorLevel.WARN: 914 for error in self.errors: 915 logger.error(str(error)) 916 elif self.error_level == ErrorLevel.RAISE and self.errors: 917 raise ParseError( 918 concat_messages(self.errors, self.max_errors), 919 errors=merge_errors(self.errors), 920 )
Logs or raises any found errors, depending on the chosen error level setting.
922 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 923 """ 924 Appends an error in the list of recorded errors or raises it, depending on the chosen 925 error level setting. 926 """ 927 token = token or self._curr or self._prev or Token.string("") 928 start = token.start 929 end = token.end 930 start_context = self.sql[max(start - self.error_message_context, 0) : start] 931 highlight = self.sql[start:end] 932 end_context = self.sql[end : end + self.error_message_context] 933 934 error = ParseError.new( 935 f"{message}. Line {token.line}, Col: {token.col}.\n" 936 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 937 description=message, 938 line=token.line, 939 col=token.col, 940 start_context=start_context, 941 highlight=highlight, 942 end_context=end_context, 943 ) 944 945 if self.error_level == ErrorLevel.IMMEDIATE: 946 raise error 947 948 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
950 def expression( 951 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 952 ) -> E: 953 """ 954 Creates a new, validated Expression. 955 956 Args: 957 exp_class: the expression class to instantiate. 958 comments: an optional list of comments to attach to the expression. 959 kwargs: the arguments to set for the expression along with their respective values. 960 961 Returns: 962 The target expression. 963 """ 964 instance = exp_class(**kwargs) 965 if self._prev_comments: 966 instance.comments = self._prev_comments 967 self._prev_comments = None 968 if comments: 969 instance.comments = comments 970 self.validate_expression(instance) 971 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
973 def validate_expression( 974 self, expression: exp.Expression, args: t.Optional[t.List] = None 975 ) -> None: 976 """ 977 Validates an already instantiated expression, making sure that all its mandatory arguments 978 are set. 979 980 Args: 981 expression: the expression to validate. 982 args: an optional list of items that was used to instantiate the expression, if it's a Func. 983 """ 984 if self.error_level == ErrorLevel.IGNORE: 985 return 986 987 for error_message in expression.error_messages(args): 988 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.