sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args: t.Sequence) -> exp.Expression: 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def parse_like(args): 35 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 36 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 37 38 39def binary_range_parser( 40 expr_type: t.Type[exp.Expression], 41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 42 return lambda self, this: self._parse_escape( 43 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 44 ) 45 46 47class _Parser(type): 48 def __new__(cls, clsname, bases, attrs): 49 klass = super().__new__(cls, clsname, bases, attrs) 50 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 51 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 52 53 return klass 54 55 56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.MAP, 113 TokenType.STRUCT, 114 TokenType.NULLABLE, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.SMALLINT, 122 TokenType.INT, 123 TokenType.BIGINT, 124 TokenType.FLOAT, 125 TokenType.DOUBLE, 126 TokenType.CHAR, 127 TokenType.NCHAR, 128 TokenType.VARCHAR, 129 TokenType.NVARCHAR, 130 TokenType.TEXT, 131 TokenType.MEDIUMTEXT, 132 TokenType.LONGTEXT, 133 TokenType.MEDIUMBLOB, 134 TokenType.LONGBLOB, 135 TokenType.BINARY, 136 TokenType.VARBINARY, 137 TokenType.JSON, 138 TokenType.JSONB, 139 TokenType.INTERVAL, 140 TokenType.TIME, 141 TokenType.TIMESTAMP, 142 TokenType.TIMESTAMPTZ, 143 TokenType.TIMESTAMPLTZ, 144 TokenType.DATETIME, 145 TokenType.DATE, 146 TokenType.DECIMAL, 147 TokenType.UUID, 148 TokenType.GEOGRAPHY, 149 TokenType.GEOMETRY, 150 TokenType.HLLSKETCH, 151 TokenType.HSTORE, 152 TokenType.PSEUDO_TYPE, 153 TokenType.SUPER, 154 TokenType.SERIAL, 155 TokenType.SMALLSERIAL, 156 TokenType.BIGSERIAL, 157 TokenType.XML, 158 TokenType.UNIQUEIDENTIFIER, 159 TokenType.MONEY, 160 TokenType.SMALLMONEY, 161 TokenType.ROWVERSION, 162 TokenType.IMAGE, 163 TokenType.VARIANT, 164 TokenType.OBJECT, 165 TokenType.INET, 166 *NESTED_TYPE_TOKENS, 167 } 168 169 SUBQUERY_PREDICATES = { 170 TokenType.ANY: exp.Any, 171 TokenType.ALL: exp.All, 172 TokenType.EXISTS: exp.Exists, 173 TokenType.SOME: exp.Any, 174 } 175 176 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 177 178 DB_CREATABLES = { 179 TokenType.DATABASE, 180 TokenType.SCHEMA, 181 TokenType.TABLE, 182 TokenType.VIEW, 183 } 184 185 CREATABLES = { 186 TokenType.COLUMN, 187 TokenType.FUNCTION, 188 TokenType.INDEX, 189 TokenType.PROCEDURE, 190 *DB_CREATABLES, 191 } 192 193 ID_VAR_TOKENS = { 194 TokenType.VAR, 195 TokenType.ANTI, 196 TokenType.APPLY, 197 TokenType.AUTO_INCREMENT, 198 TokenType.BEGIN, 199 TokenType.BOTH, 200 TokenType.BUCKET, 201 TokenType.CACHE, 202 TokenType.CASCADE, 203 TokenType.COLLATE, 204 TokenType.COMMAND, 205 TokenType.COMMENT, 206 TokenType.COMMIT, 207 TokenType.COMPOUND, 208 TokenType.CONSTRAINT, 209 TokenType.DEFAULT, 210 TokenType.DELETE, 211 TokenType.DESCRIBE, 212 TokenType.DIV, 213 TokenType.END, 214 TokenType.EXECUTE, 215 TokenType.ESCAPE, 216 TokenType.FALSE, 217 TokenType.FIRST, 218 TokenType.FILTER, 219 TokenType.FOLLOWING, 220 TokenType.FORMAT, 221 TokenType.FULL, 222 TokenType.IF, 223 TokenType.ISNULL, 224 TokenType.INTERVAL, 225 TokenType.LAZY, 226 TokenType.LEADING, 227 TokenType.LEFT, 228 TokenType.LOCAL, 229 TokenType.MATERIALIZED, 230 TokenType.MERGE, 231 TokenType.NATURAL, 232 TokenType.NEXT, 233 TokenType.OFFSET, 234 TokenType.ONLY, 235 TokenType.OPTIONS, 236 TokenType.ORDINALITY, 237 TokenType.PARTITION, 238 TokenType.PERCENT, 239 TokenType.PIVOT, 240 TokenType.PRAGMA, 241 TokenType.PRECEDING, 242 TokenType.RANGE, 243 TokenType.REFERENCES, 244 TokenType.RIGHT, 245 TokenType.ROW, 246 TokenType.ROWS, 247 TokenType.SEED, 248 TokenType.SEMI, 249 TokenType.SET, 250 TokenType.SHOW, 251 TokenType.SORTKEY, 252 TokenType.TEMPORARY, 253 TokenType.TOP, 254 TokenType.TRAILING, 255 TokenType.TRUE, 256 TokenType.UNBOUNDED, 257 TokenType.UNIQUE, 258 TokenType.UNLOGGED, 259 TokenType.UNPIVOT, 260 TokenType.VOLATILE, 261 TokenType.WINDOW, 262 *CREATABLES, 263 *SUBQUERY_PREDICATES, 264 *TYPE_TOKENS, 265 *NO_PAREN_FUNCTIONS, 266 } 267 268 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 269 TokenType.APPLY, 270 TokenType.FULL, 271 TokenType.LEFT, 272 TokenType.NATURAL, 273 TokenType.OFFSET, 274 TokenType.RIGHT, 275 TokenType.WINDOW, 276 } 277 278 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 279 280 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 281 282 FUNC_TOKENS = { 283 TokenType.COMMAND, 284 TokenType.CURRENT_DATE, 285 TokenType.CURRENT_DATETIME, 286 TokenType.CURRENT_TIMESTAMP, 287 TokenType.CURRENT_TIME, 288 TokenType.FILTER, 289 TokenType.FIRST, 290 TokenType.FORMAT, 291 TokenType.GLOB, 292 TokenType.IDENTIFIER, 293 TokenType.INDEX, 294 TokenType.ISNULL, 295 TokenType.ILIKE, 296 TokenType.LIKE, 297 TokenType.MERGE, 298 TokenType.OFFSET, 299 TokenType.PRIMARY_KEY, 300 TokenType.REPLACE, 301 TokenType.ROW, 302 TokenType.UNNEST, 303 TokenType.VAR, 304 TokenType.LEFT, 305 TokenType.RIGHT, 306 TokenType.DATE, 307 TokenType.DATETIME, 308 TokenType.TABLE, 309 TokenType.TIMESTAMP, 310 TokenType.TIMESTAMPTZ, 311 TokenType.WINDOW, 312 *TYPE_TOKENS, 313 *SUBQUERY_PREDICATES, 314 } 315 316 CONJUNCTION = { 317 TokenType.AND: exp.And, 318 TokenType.OR: exp.Or, 319 } 320 321 EQUALITY = { 322 TokenType.EQ: exp.EQ, 323 TokenType.NEQ: exp.NEQ, 324 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 325 } 326 327 COMPARISON = { 328 TokenType.GT: exp.GT, 329 TokenType.GTE: exp.GTE, 330 TokenType.LT: exp.LT, 331 TokenType.LTE: exp.LTE, 332 } 333 334 BITWISE = { 335 TokenType.AMP: exp.BitwiseAnd, 336 TokenType.CARET: exp.BitwiseXor, 337 TokenType.PIPE: exp.BitwiseOr, 338 TokenType.DPIPE: exp.DPipe, 339 } 340 341 TERM = { 342 TokenType.DASH: exp.Sub, 343 TokenType.PLUS: exp.Add, 344 TokenType.MOD: exp.Mod, 345 TokenType.COLLATE: exp.Collate, 346 } 347 348 FACTOR = { 349 TokenType.DIV: exp.IntDiv, 350 TokenType.LR_ARROW: exp.Distance, 351 TokenType.SLASH: exp.Div, 352 TokenType.STAR: exp.Mul, 353 } 354 355 TIMESTAMPS = { 356 TokenType.TIME, 357 TokenType.TIMESTAMP, 358 TokenType.TIMESTAMPTZ, 359 TokenType.TIMESTAMPLTZ, 360 } 361 362 SET_OPERATIONS = { 363 TokenType.UNION, 364 TokenType.INTERSECT, 365 TokenType.EXCEPT, 366 } 367 368 JOIN_SIDES = { 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.FULL, 372 } 373 374 JOIN_KINDS = { 375 TokenType.INNER, 376 TokenType.OUTER, 377 TokenType.CROSS, 378 TokenType.SEMI, 379 TokenType.ANTI, 380 } 381 382 LAMBDAS = { 383 TokenType.ARROW: lambda self, expressions: self.expression( 384 exp.Lambda, 385 this=self._replace_lambda( 386 self._parse_conjunction(), 387 {node.name for node in expressions}, 388 ), 389 expressions=expressions, 390 ), 391 TokenType.FARROW: lambda self, expressions: self.expression( 392 exp.Kwarg, 393 this=exp.Var(this=expressions[0].name), 394 expression=self._parse_conjunction(), 395 ), 396 } 397 398 COLUMN_OPERATORS = { 399 TokenType.DOT: None, 400 TokenType.DCOLON: lambda self, this, to: self.expression( 401 exp.Cast, 402 this=this, 403 to=to, 404 ), 405 TokenType.ARROW: lambda self, this, path: self.expression( 406 exp.JSONExtract, 407 this=this, 408 expression=path, 409 ), 410 TokenType.DARROW: lambda self, this, path: self.expression( 411 exp.JSONExtractScalar, 412 this=this, 413 expression=path, 414 ), 415 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 416 exp.JSONBExtract, 417 this=this, 418 expression=path, 419 ), 420 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 421 exp.JSONBExtractScalar, 422 this=this, 423 expression=path, 424 ), 425 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 426 exp.JSONBContains, 427 this=this, 428 expression=key, 429 ), 430 } 431 432 EXPRESSION_PARSERS = { 433 exp.Column: lambda self: self._parse_column(), 434 exp.DataType: lambda self: self._parse_types(), 435 exp.From: lambda self: self._parse_from(), 436 exp.Group: lambda self: self._parse_group(), 437 exp.Identifier: lambda self: self._parse_id_var(), 438 exp.Lateral: lambda self: self._parse_lateral(), 439 exp.Join: lambda self: self._parse_join(), 440 exp.Order: lambda self: self._parse_order(), 441 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 442 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 443 exp.Lambda: lambda self: self._parse_lambda(), 444 exp.Limit: lambda self: self._parse_limit(), 445 exp.Offset: lambda self: self._parse_offset(), 446 exp.TableAlias: lambda self: self._parse_table_alias(), 447 exp.Table: lambda self: self._parse_table(), 448 exp.Condition: lambda self: self._parse_conjunction(), 449 exp.Expression: lambda self: self._parse_statement(), 450 exp.Properties: lambda self: self._parse_properties(), 451 exp.Where: lambda self: self._parse_where(), 452 exp.Ordered: lambda self: self._parse_ordered(), 453 exp.Having: lambda self: self._parse_having(), 454 exp.With: lambda self: self._parse_with(), 455 exp.Window: lambda self: self._parse_named_window(), 456 exp.Qualify: lambda self: self._parse_qualify(), 457 exp.Returning: lambda self: self._parse_returning(), 458 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 459 } 460 461 STATEMENT_PARSERS = { 462 TokenType.ALTER: lambda self: self._parse_alter(), 463 TokenType.BEGIN: lambda self: self._parse_transaction(), 464 TokenType.CACHE: lambda self: self._parse_cache(), 465 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 466 TokenType.COMMENT: lambda self: self._parse_comment(), 467 TokenType.CREATE: lambda self: self._parse_create(), 468 TokenType.DELETE: lambda self: self._parse_delete(), 469 TokenType.DESC: lambda self: self._parse_describe(), 470 TokenType.DESCRIBE: lambda self: self._parse_describe(), 471 TokenType.DROP: lambda self: self._parse_drop(), 472 TokenType.END: lambda self: self._parse_commit_or_rollback(), 473 TokenType.INSERT: lambda self: self._parse_insert(), 474 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 475 TokenType.MERGE: lambda self: self._parse_merge(), 476 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 477 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 478 TokenType.SET: lambda self: self._parse_set(), 479 TokenType.UNCACHE: lambda self: self._parse_uncache(), 480 TokenType.UPDATE: lambda self: self._parse_update(), 481 TokenType.USE: lambda self: self.expression( 482 exp.Use, 483 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 484 and exp.Var(this=self._prev.text), 485 this=self._parse_table(schema=False), 486 ), 487 } 488 489 UNARY_PARSERS = { 490 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 491 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 492 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 493 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 494 } 495 496 PRIMARY_PARSERS = { 497 TokenType.STRING: lambda self, token: self.expression( 498 exp.Literal, this=token.text, is_string=True 499 ), 500 TokenType.NUMBER: lambda self, token: self.expression( 501 exp.Literal, this=token.text, is_string=False 502 ), 503 TokenType.STAR: lambda self, _: self.expression( 504 exp.Star, 505 **{"except": self._parse_except(), "replace": self._parse_replace()}, 506 ), 507 TokenType.NULL: lambda self, _: self.expression(exp.Null), 508 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 509 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 510 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 511 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 512 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 513 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 514 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 515 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 516 } 517 518 PLACEHOLDER_PARSERS = { 519 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 520 TokenType.PARAMETER: lambda self: self._parse_parameter(), 521 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 522 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 523 else None, 524 } 525 526 RANGE_PARSERS = { 527 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 528 TokenType.GLOB: binary_range_parser(exp.Glob), 529 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 530 TokenType.IN: lambda self, this: self._parse_in(this), 531 TokenType.IS: lambda self, this: self._parse_is(this), 532 TokenType.LIKE: binary_range_parser(exp.Like), 533 TokenType.ILIKE: binary_range_parser(exp.ILike), 534 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 535 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 536 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 537 } 538 539 PROPERTY_PARSERS = { 540 "AFTER": lambda self: self._parse_afterjournal( 541 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 542 ), 543 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 544 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 545 "BEFORE": lambda self: self._parse_journal( 546 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 547 ), 548 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 549 "CHARACTER SET": lambda self: self._parse_character_set(), 550 "CHECKSUM": lambda self: self._parse_checksum(), 551 "CLUSTER BY": lambda self: self.expression( 552 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 553 ), 554 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 555 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 556 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 557 default=self._prev.text.upper() == "DEFAULT" 558 ), 559 "DEFINER": lambda self: self._parse_definer(), 560 "DETERMINISTIC": lambda self: self.expression( 561 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 562 ), 563 "DISTKEY": lambda self: self._parse_distkey(), 564 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 565 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 566 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 567 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 568 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 569 "FREESPACE": lambda self: self._parse_freespace(), 570 "GLOBAL": lambda self: self._parse_temporary(global_=True), 571 "IMMUTABLE": lambda self: self.expression( 572 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 573 ), 574 "JOURNAL": lambda self: self._parse_journal( 575 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 576 ), 577 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 578 "LIKE": lambda self: self._parse_create_like(), 579 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 580 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 581 "LOCK": lambda self: self._parse_locking(), 582 "LOCKING": lambda self: self._parse_locking(), 583 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 584 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 585 "MAX": lambda self: self._parse_datablocksize(), 586 "MAXIMUM": lambda self: self._parse_datablocksize(), 587 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 588 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 589 ), 590 "MIN": lambda self: self._parse_datablocksize(), 591 "MINIMUM": lambda self: self._parse_datablocksize(), 592 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 593 "NO": lambda self: self._parse_noprimaryindex(), 594 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 595 "ON": lambda self: self._parse_oncommit(), 596 "PARTITION BY": lambda self: self._parse_partitioned_by(), 597 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 598 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 599 "RETURNS": lambda self: self._parse_returns(), 600 "ROW": lambda self: self._parse_row(), 601 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 602 "SORTKEY": lambda self: self._parse_sortkey(), 603 "STABLE": lambda self: self.expression( 604 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 605 ), 606 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 608 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 609 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 610 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 611 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 612 "VOLATILE": lambda self: self.expression( 613 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 614 ), 615 "WITH": lambda self: self._parse_with_property(), 616 } 617 618 CONSTRAINT_PARSERS = { 619 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 620 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 621 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 622 "CHARACTER SET": lambda self: self.expression( 623 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 624 ), 625 "CHECK": lambda self: self.expression( 626 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 627 ), 628 "COLLATE": lambda self: self.expression( 629 exp.CollateColumnConstraint, this=self._parse_var() 630 ), 631 "COMMENT": lambda self: self.expression( 632 exp.CommentColumnConstraint, this=self._parse_string() 633 ), 634 "COMPRESS": lambda self: self._parse_compress(), 635 "DEFAULT": lambda self: self.expression( 636 exp.DefaultColumnConstraint, this=self._parse_bitwise() 637 ), 638 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 639 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 640 "FORMAT": lambda self: self.expression( 641 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 642 ), 643 "GENERATED": lambda self: self._parse_generated_as_identity(), 644 "IDENTITY": lambda self: self._parse_auto_increment(), 645 "INLINE": lambda self: self._parse_inline(), 646 "LIKE": lambda self: self._parse_create_like(), 647 "NOT": lambda self: self._parse_not_constraint(), 648 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 649 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 650 "PRIMARY KEY": lambda self: self._parse_primary_key(), 651 "TITLE": lambda self: self.expression( 652 exp.TitleColumnConstraint, this=self._parse_var_or_string() 653 ), 654 "UNIQUE": lambda self: self._parse_unique(), 655 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 656 } 657 658 ALTER_PARSERS = { 659 "ADD": lambda self: self._parse_alter_table_add(), 660 "ALTER": lambda self: self._parse_alter_table_alter(), 661 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 662 "DROP": lambda self: self._parse_alter_table_drop(), 663 "RENAME": lambda self: self._parse_alter_table_rename(), 664 } 665 666 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 667 668 NO_PAREN_FUNCTION_PARSERS = { 669 TokenType.CASE: lambda self: self._parse_case(), 670 TokenType.IF: lambda self: self._parse_if(), 671 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 672 } 673 674 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 675 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 676 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 677 "EXTRACT": lambda self: self._parse_extract(), 678 "JSON_OBJECT": lambda self: self._parse_json_object(), 679 "POSITION": lambda self: self._parse_position(), 680 "STRING_AGG": lambda self: self._parse_string_agg(), 681 "SUBSTRING": lambda self: self._parse_substring(), 682 "TRIM": lambda self: self._parse_trim(), 683 "TRY_CAST": lambda self: self._parse_cast(False), 684 "TRY_CONVERT": lambda self: self._parse_convert(False), 685 } 686 687 QUERY_MODIFIER_PARSERS = { 688 "match": lambda self: self._parse_match_recognize(), 689 "where": lambda self: self._parse_where(), 690 "group": lambda self: self._parse_group(), 691 "having": lambda self: self._parse_having(), 692 "qualify": lambda self: self._parse_qualify(), 693 "windows": lambda self: self._parse_window_clause(), 694 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 695 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 696 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 697 "order": lambda self: self._parse_order(), 698 "limit": lambda self: self._parse_limit(), 699 "offset": lambda self: self._parse_offset(), 700 "lock": lambda self: self._parse_lock(), 701 "sample": lambda self: self._parse_table_sample(as_modifier=True), 702 } 703 704 SET_PARSERS = { 705 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 706 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 707 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 708 "TRANSACTION": lambda self: self._parse_set_transaction(), 709 } 710 711 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 712 713 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 714 715 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 716 717 TRANSACTION_CHARACTERISTICS = { 718 "ISOLATION LEVEL REPEATABLE READ", 719 "ISOLATION LEVEL READ COMMITTED", 720 "ISOLATION LEVEL READ UNCOMMITTED", 721 "ISOLATION LEVEL SERIALIZABLE", 722 "READ WRITE", 723 "READ ONLY", 724 } 725 726 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 727 728 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 729 730 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 731 732 STRICT_CAST = True 733 734 CONVERT_TYPE_FIRST = False 735 736 __slots__ = ( 737 "error_level", 738 "error_message_context", 739 "sql", 740 "errors", 741 "index_offset", 742 "unnest_column_only", 743 "alias_post_tablesample", 744 "max_errors", 745 "null_ordering", 746 "_tokens", 747 "_index", 748 "_curr", 749 "_next", 750 "_prev", 751 "_prev_comments", 752 "_show_trie", 753 "_set_trie", 754 ) 755 756 def __init__( 757 self, 758 error_level: t.Optional[ErrorLevel] = None, 759 error_message_context: int = 100, 760 index_offset: int = 0, 761 unnest_column_only: bool = False, 762 alias_post_tablesample: bool = False, 763 max_errors: int = 3, 764 null_ordering: t.Optional[str] = None, 765 ): 766 self.error_level = error_level or ErrorLevel.IMMEDIATE 767 self.error_message_context = error_message_context 768 self.index_offset = index_offset 769 self.unnest_column_only = unnest_column_only 770 self.alias_post_tablesample = alias_post_tablesample 771 self.max_errors = max_errors 772 self.null_ordering = null_ordering 773 self.reset() 774 775 def reset(self): 776 self.sql = "" 777 self.errors = [] 778 self._tokens = [] 779 self._index = 0 780 self._curr = None 781 self._next = None 782 self._prev = None 783 self._prev_comments = None 784 785 def parse( 786 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 787 ) -> t.List[t.Optional[exp.Expression]]: 788 """ 789 Parses a list of tokens and returns a list of syntax trees, one tree 790 per parsed SQL statement. 791 792 Args: 793 raw_tokens: the list of tokens. 794 sql: the original SQL string, used to produce helpful debug messages. 795 796 Returns: 797 The list of syntax trees. 798 """ 799 return self._parse( 800 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 801 ) 802 803 def parse_into( 804 self, 805 expression_types: exp.IntoType, 806 raw_tokens: t.List[Token], 807 sql: t.Optional[str] = None, 808 ) -> t.List[t.Optional[exp.Expression]]: 809 """ 810 Parses a list of tokens into a given Expression type. If a collection of Expression 811 types is given instead, this method will try to parse the token list into each one 812 of them, stopping at the first for which the parsing succeeds. 813 814 Args: 815 expression_types: the expression type(s) to try and parse the token list into. 816 raw_tokens: the list of tokens. 817 sql: the original SQL string, used to produce helpful debug messages. 818 819 Returns: 820 The target Expression. 821 """ 822 errors = [] 823 for expression_type in ensure_collection(expression_types): 824 parser = self.EXPRESSION_PARSERS.get(expression_type) 825 if not parser: 826 raise TypeError(f"No parser registered for {expression_type}") 827 try: 828 return self._parse(parser, raw_tokens, sql) 829 except ParseError as e: 830 e.errors[0]["into_expression"] = expression_type 831 errors.append(e) 832 raise ParseError( 833 f"Failed to parse into {expression_types}", 834 errors=merge_errors(errors), 835 ) from errors[-1] 836 837 def _parse( 838 self, 839 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 840 raw_tokens: t.List[Token], 841 sql: t.Optional[str] = None, 842 ) -> t.List[t.Optional[exp.Expression]]: 843 self.reset() 844 self.sql = sql or "" 845 total = len(raw_tokens) 846 chunks: t.List[t.List[Token]] = [[]] 847 848 for i, token in enumerate(raw_tokens): 849 if token.token_type == TokenType.SEMICOLON: 850 if i < total - 1: 851 chunks.append([]) 852 else: 853 chunks[-1].append(token) 854 855 expressions = [] 856 857 for tokens in chunks: 858 self._index = -1 859 self._tokens = tokens 860 self._advance() 861 862 expressions.append(parse_method(self)) 863 864 if self._index < len(self._tokens): 865 self.raise_error("Invalid expression / Unexpected token") 866 867 self.check_errors() 868 869 return expressions 870 871 def check_errors(self) -> None: 872 """ 873 Logs or raises any found errors, depending on the chosen error level setting. 874 """ 875 if self.error_level == ErrorLevel.WARN: 876 for error in self.errors: 877 logger.error(str(error)) 878 elif self.error_level == ErrorLevel.RAISE and self.errors: 879 raise ParseError( 880 concat_messages(self.errors, self.max_errors), 881 errors=merge_errors(self.errors), 882 ) 883 884 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 885 """ 886 Appends an error in the list of recorded errors or raises it, depending on the chosen 887 error level setting. 888 """ 889 token = token or self._curr or self._prev or Token.string("") 890 start = self._find_token(token) 891 end = start + len(token.text) 892 start_context = self.sql[max(start - self.error_message_context, 0) : start] 893 highlight = self.sql[start:end] 894 end_context = self.sql[end : end + self.error_message_context] 895 896 error = ParseError.new( 897 f"{message}. Line {token.line}, Col: {token.col}.\n" 898 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 899 description=message, 900 line=token.line, 901 col=token.col, 902 start_context=start_context, 903 highlight=highlight, 904 end_context=end_context, 905 ) 906 907 if self.error_level == ErrorLevel.IMMEDIATE: 908 raise error 909 910 self.errors.append(error) 911 912 def expression( 913 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 914 ) -> exp.Expression: 915 """ 916 Creates a new, validated Expression. 917 918 Args: 919 exp_class: the expression class to instantiate. 920 comments: an optional list of comments to attach to the expression. 921 kwargs: the arguments to set for the expression along with their respective values. 922 923 Returns: 924 The target expression. 925 """ 926 instance = exp_class(**kwargs) 927 if self._prev_comments: 928 instance.comments = self._prev_comments 929 self._prev_comments = None 930 if comments: 931 instance.comments = comments 932 self.validate_expression(instance) 933 return instance 934 935 def validate_expression( 936 self, expression: exp.Expression, args: t.Optional[t.List] = None 937 ) -> None: 938 """ 939 Validates an already instantiated expression, making sure that all its mandatory arguments 940 are set. 941 942 Args: 943 expression: the expression to validate. 944 args: an optional list of items that was used to instantiate the expression, if it's a Func. 945 """ 946 if self.error_level == ErrorLevel.IGNORE: 947 return 948 949 for error_message in expression.error_messages(args): 950 self.raise_error(error_message) 951 952 def _find_sql(self, start: Token, end: Token) -> str: 953 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 954 955 def _find_token(self, token: Token) -> int: 956 line = 1 957 col = 1 958 index = 0 959 960 while line < token.line or col < token.col: 961 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 962 line += 1 963 col = 1 964 else: 965 col += 1 966 index += 1 967 968 return index 969 970 def _advance(self, times: int = 1) -> None: 971 self._index += times 972 self._curr = seq_get(self._tokens, self._index) 973 self._next = seq_get(self._tokens, self._index + 1) 974 if self._index > 0: 975 self._prev = self._tokens[self._index - 1] 976 self._prev_comments = self._prev.comments 977 else: 978 self._prev = None 979 self._prev_comments = None 980 981 def _retreat(self, index: int) -> None: 982 if index != self._index: 983 self._advance(index - self._index) 984 985 def _parse_command(self) -> exp.Expression: 986 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 987 988 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 989 start = self._prev 990 exists = self._parse_exists() if allow_exists else None 991 992 self._match(TokenType.ON) 993 994 kind = self._match_set(self.CREATABLES) and self._prev 995 996 if not kind: 997 return self._parse_as_command(start) 998 999 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1000 this = self._parse_user_defined_function(kind=kind.token_type) 1001 elif kind.token_type == TokenType.TABLE: 1002 this = self._parse_table() 1003 elif kind.token_type == TokenType.COLUMN: 1004 this = self._parse_column() 1005 else: 1006 this = self._parse_id_var() 1007 1008 self._match(TokenType.IS) 1009 1010 return self.expression( 1011 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1012 ) 1013 1014 def _parse_statement(self) -> t.Optional[exp.Expression]: 1015 if self._curr is None: 1016 return None 1017 1018 if self._match_set(self.STATEMENT_PARSERS): 1019 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1020 1021 if self._match_set(Tokenizer.COMMANDS): 1022 return self._parse_command() 1023 1024 expression = self._parse_expression() 1025 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1026 1027 self._parse_query_modifiers(expression) 1028 return expression 1029 1030 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1031 start = self._prev 1032 temporary = self._match(TokenType.TEMPORARY) 1033 materialized = self._match(TokenType.MATERIALIZED) 1034 kind = self._match_set(self.CREATABLES) and self._prev.text 1035 if not kind: 1036 if default_kind: 1037 kind = default_kind 1038 else: 1039 return self._parse_as_command(start) 1040 1041 return self.expression( 1042 exp.Drop, 1043 exists=self._parse_exists(), 1044 this=self._parse_table(schema=True), 1045 kind=kind, 1046 temporary=temporary, 1047 materialized=materialized, 1048 cascade=self._match(TokenType.CASCADE), 1049 ) 1050 1051 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1052 return ( 1053 self._match(TokenType.IF) 1054 and (not not_ or self._match(TokenType.NOT)) 1055 and self._match(TokenType.EXISTS) 1056 ) 1057 1058 def _parse_create(self) -> t.Optional[exp.Expression]: 1059 start = self._prev 1060 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1061 TokenType.OR, TokenType.REPLACE 1062 ) 1063 unique = self._match(TokenType.UNIQUE) 1064 volatile = self._match(TokenType.VOLATILE) 1065 1066 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1067 self._match(TokenType.TABLE) 1068 1069 properties = None 1070 create_token = self._match_set(self.CREATABLES) and self._prev 1071 1072 if not create_token: 1073 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1074 create_token = self._match_set(self.CREATABLES) and self._prev 1075 1076 if not properties or not create_token: 1077 return self._parse_as_command(start) 1078 1079 exists = self._parse_exists(not_=True) 1080 this = None 1081 expression = None 1082 indexes = None 1083 no_schema_binding = None 1084 begin = None 1085 1086 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1087 this = self._parse_user_defined_function(kind=create_token.token_type) 1088 temp_properties = self._parse_properties() 1089 if properties and temp_properties: 1090 properties.expressions.extend(temp_properties.expressions) 1091 elif temp_properties: 1092 properties = temp_properties 1093 1094 self._match(TokenType.ALIAS) 1095 begin = self._match(TokenType.BEGIN) 1096 return_ = self._match_text_seq("RETURN") 1097 expression = self._parse_statement() 1098 1099 if return_: 1100 expression = self.expression(exp.Return, this=expression) 1101 elif create_token.token_type == TokenType.INDEX: 1102 this = self._parse_index() 1103 elif create_token.token_type in self.DB_CREATABLES: 1104 table_parts = self._parse_table_parts(schema=True) 1105 1106 # exp.Properties.Location.POST_NAME 1107 if self._match(TokenType.COMMA): 1108 temp_properties = self._parse_properties(before=True) 1109 if properties and temp_properties: 1110 properties.expressions.extend(temp_properties.expressions) 1111 elif temp_properties: 1112 properties = temp_properties 1113 1114 this = self._parse_schema(this=table_parts) 1115 1116 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1117 temp_properties = self._parse_properties() 1118 if properties and temp_properties: 1119 properties.expressions.extend(temp_properties.expressions) 1120 elif temp_properties: 1121 properties = temp_properties 1122 1123 self._match(TokenType.ALIAS) 1124 1125 # exp.Properties.Location.POST_ALIAS 1126 if not ( 1127 self._match(TokenType.SELECT, advance=False) 1128 or self._match(TokenType.WITH, advance=False) 1129 or self._match(TokenType.L_PAREN, advance=False) 1130 ): 1131 temp_properties = self._parse_properties() 1132 if properties and temp_properties: 1133 properties.expressions.extend(temp_properties.expressions) 1134 elif temp_properties: 1135 properties = temp_properties 1136 1137 expression = self._parse_ddl_select() 1138 1139 if create_token.token_type == TokenType.TABLE: 1140 # exp.Properties.Location.POST_EXPRESSION 1141 temp_properties = self._parse_properties() 1142 if properties and temp_properties: 1143 properties.expressions.extend(temp_properties.expressions) 1144 elif temp_properties: 1145 properties = temp_properties 1146 1147 indexes = [] 1148 while True: 1149 index = self._parse_create_table_index() 1150 1151 # exp.Properties.Location.POST_INDEX 1152 if self._match(TokenType.PARTITION_BY, advance=False): 1153 temp_properties = self._parse_properties() 1154 if properties and temp_properties: 1155 properties.expressions.extend(temp_properties.expressions) 1156 elif temp_properties: 1157 properties = temp_properties 1158 1159 if not index: 1160 break 1161 else: 1162 indexes.append(index) 1163 elif create_token.token_type == TokenType.VIEW: 1164 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1165 no_schema_binding = True 1166 1167 return self.expression( 1168 exp.Create, 1169 this=this, 1170 kind=create_token.text, 1171 replace=replace, 1172 unique=unique, 1173 volatile=volatile, 1174 expression=expression, 1175 exists=exists, 1176 properties=properties, 1177 indexes=indexes, 1178 no_schema_binding=no_schema_binding, 1179 begin=begin, 1180 ) 1181 1182 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1183 self._match(TokenType.COMMA) 1184 1185 # parsers look to _prev for no/dual/default, so need to consume first 1186 self._match_text_seq("NO") 1187 self._match_text_seq("DUAL") 1188 self._match_text_seq("DEFAULT") 1189 1190 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1191 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1192 1193 return None 1194 1195 def _parse_property(self) -> t.Optional[exp.Expression]: 1196 if self._match_texts(self.PROPERTY_PARSERS): 1197 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1198 1199 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1200 return self._parse_character_set(default=True) 1201 1202 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1203 return self._parse_sortkey(compound=True) 1204 1205 if self._match_text_seq("SQL", "SECURITY"): 1206 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1207 1208 assignment = self._match_pair( 1209 TokenType.VAR, TokenType.EQ, advance=False 1210 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1211 1212 if assignment: 1213 key = self._parse_var_or_string() 1214 self._match(TokenType.EQ) 1215 return self.expression(exp.Property, this=key, value=self._parse_column()) 1216 1217 return None 1218 1219 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1220 self._match(TokenType.EQ) 1221 self._match(TokenType.ALIAS) 1222 return self.expression( 1223 exp_class, 1224 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1225 ) 1226 1227 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1228 properties = [] 1229 1230 while True: 1231 if before: 1232 identified_property = self._parse_property_before() 1233 else: 1234 identified_property = self._parse_property() 1235 1236 if not identified_property: 1237 break 1238 for p in ensure_list(identified_property): 1239 properties.append(p) 1240 1241 if properties: 1242 return self.expression(exp.Properties, expressions=properties) 1243 1244 return None 1245 1246 def _parse_fallback(self, no=False) -> exp.Expression: 1247 self._match_text_seq("FALLBACK") 1248 return self.expression( 1249 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1250 ) 1251 1252 def _parse_with_property( 1253 self, 1254 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1255 self._match(TokenType.WITH) 1256 if self._match(TokenType.L_PAREN, advance=False): 1257 return self._parse_wrapped_csv(self._parse_property) 1258 1259 if self._match_text_seq("JOURNAL"): 1260 return self._parse_withjournaltable() 1261 1262 if self._match_text_seq("DATA"): 1263 return self._parse_withdata(no=False) 1264 elif self._match_text_seq("NO", "DATA"): 1265 return self._parse_withdata(no=True) 1266 1267 if not self._next: 1268 return None 1269 1270 return self._parse_withisolatedloading() 1271 1272 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1273 def _parse_definer(self) -> t.Optional[exp.Expression]: 1274 self._match(TokenType.EQ) 1275 1276 user = self._parse_id_var() 1277 self._match(TokenType.PARAMETER) 1278 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1279 1280 if not user or not host: 1281 return None 1282 1283 return exp.DefinerProperty(this=f"{user}@{host}") 1284 1285 def _parse_withjournaltable(self) -> exp.Expression: 1286 self._match(TokenType.TABLE) 1287 self._match(TokenType.EQ) 1288 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1289 1290 def _parse_log(self, no=False) -> exp.Expression: 1291 self._match_text_seq("LOG") 1292 return self.expression(exp.LogProperty, no=no) 1293 1294 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1295 before = self._match_text_seq("BEFORE") 1296 self._match_text_seq("JOURNAL") 1297 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1298 1299 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1300 self._match_text_seq("NOT") 1301 self._match_text_seq("LOCAL") 1302 self._match_text_seq("AFTER", "JOURNAL") 1303 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1304 1305 def _parse_checksum(self) -> exp.Expression: 1306 self._match_text_seq("CHECKSUM") 1307 self._match(TokenType.EQ) 1308 1309 on = None 1310 if self._match(TokenType.ON): 1311 on = True 1312 elif self._match_text_seq("OFF"): 1313 on = False 1314 default = self._match(TokenType.DEFAULT) 1315 1316 return self.expression( 1317 exp.ChecksumProperty, 1318 on=on, 1319 default=default, 1320 ) 1321 1322 def _parse_freespace(self) -> exp.Expression: 1323 self._match_text_seq("FREESPACE") 1324 self._match(TokenType.EQ) 1325 return self.expression( 1326 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1327 ) 1328 1329 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1330 self._match_text_seq("MERGEBLOCKRATIO") 1331 if self._match(TokenType.EQ): 1332 return self.expression( 1333 exp.MergeBlockRatioProperty, 1334 this=self._parse_number(), 1335 percent=self._match(TokenType.PERCENT), 1336 ) 1337 else: 1338 return self.expression( 1339 exp.MergeBlockRatioProperty, 1340 no=no, 1341 default=default, 1342 ) 1343 1344 def _parse_datablocksize(self, default=None) -> exp.Expression: 1345 if default: 1346 self._match_text_seq("DATABLOCKSIZE") 1347 return self.expression(exp.DataBlocksizeProperty, default=True) 1348 elif self._match_texts(("MIN", "MINIMUM")): 1349 self._match_text_seq("DATABLOCKSIZE") 1350 return self.expression(exp.DataBlocksizeProperty, min=True) 1351 elif self._match_texts(("MAX", "MAXIMUM")): 1352 self._match_text_seq("DATABLOCKSIZE") 1353 return self.expression(exp.DataBlocksizeProperty, min=False) 1354 1355 self._match_text_seq("DATABLOCKSIZE") 1356 self._match(TokenType.EQ) 1357 size = self._parse_number() 1358 units = None 1359 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1360 units = self._prev.text 1361 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1362 1363 def _parse_blockcompression(self) -> exp.Expression: 1364 self._match_text_seq("BLOCKCOMPRESSION") 1365 self._match(TokenType.EQ) 1366 always = self._match_text_seq("ALWAYS") 1367 manual = self._match_text_seq("MANUAL") 1368 never = self._match_text_seq("NEVER") 1369 default = self._match_text_seq("DEFAULT") 1370 autotemp = None 1371 if self._match_text_seq("AUTOTEMP"): 1372 autotemp = self._parse_schema() 1373 1374 return self.expression( 1375 exp.BlockCompressionProperty, 1376 always=always, 1377 manual=manual, 1378 never=never, 1379 default=default, 1380 autotemp=autotemp, 1381 ) 1382 1383 def _parse_withisolatedloading(self) -> exp.Expression: 1384 no = self._match_text_seq("NO") 1385 concurrent = self._match_text_seq("CONCURRENT") 1386 self._match_text_seq("ISOLATED", "LOADING") 1387 for_all = self._match_text_seq("FOR", "ALL") 1388 for_insert = self._match_text_seq("FOR", "INSERT") 1389 for_none = self._match_text_seq("FOR", "NONE") 1390 return self.expression( 1391 exp.IsolatedLoadingProperty, 1392 no=no, 1393 concurrent=concurrent, 1394 for_all=for_all, 1395 for_insert=for_insert, 1396 for_none=for_none, 1397 ) 1398 1399 def _parse_locking(self) -> exp.Expression: 1400 if self._match(TokenType.TABLE): 1401 kind = "TABLE" 1402 elif self._match(TokenType.VIEW): 1403 kind = "VIEW" 1404 elif self._match(TokenType.ROW): 1405 kind = "ROW" 1406 elif self._match_text_seq("DATABASE"): 1407 kind = "DATABASE" 1408 else: 1409 kind = None 1410 1411 if kind in ("DATABASE", "TABLE", "VIEW"): 1412 this = self._parse_table_parts() 1413 else: 1414 this = None 1415 1416 if self._match(TokenType.FOR): 1417 for_or_in = "FOR" 1418 elif self._match(TokenType.IN): 1419 for_or_in = "IN" 1420 else: 1421 for_or_in = None 1422 1423 if self._match_text_seq("ACCESS"): 1424 lock_type = "ACCESS" 1425 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1426 lock_type = "EXCLUSIVE" 1427 elif self._match_text_seq("SHARE"): 1428 lock_type = "SHARE" 1429 elif self._match_text_seq("READ"): 1430 lock_type = "READ" 1431 elif self._match_text_seq("WRITE"): 1432 lock_type = "WRITE" 1433 elif self._match_text_seq("CHECKSUM"): 1434 lock_type = "CHECKSUM" 1435 else: 1436 lock_type = None 1437 1438 override = self._match_text_seq("OVERRIDE") 1439 1440 return self.expression( 1441 exp.LockingProperty, 1442 this=this, 1443 kind=kind, 1444 for_or_in=for_or_in, 1445 lock_type=lock_type, 1446 override=override, 1447 ) 1448 1449 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1450 if self._match(TokenType.PARTITION_BY): 1451 return self._parse_csv(self._parse_conjunction) 1452 return [] 1453 1454 def _parse_partitioned_by(self) -> exp.Expression: 1455 self._match(TokenType.EQ) 1456 return self.expression( 1457 exp.PartitionedByProperty, 1458 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1459 ) 1460 1461 def _parse_withdata(self, no=False) -> exp.Expression: 1462 if self._match_text_seq("AND", "STATISTICS"): 1463 statistics = True 1464 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1465 statistics = False 1466 else: 1467 statistics = None 1468 1469 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1470 1471 def _parse_noprimaryindex(self) -> exp.Expression: 1472 self._match_text_seq("PRIMARY", "INDEX") 1473 return exp.NoPrimaryIndexProperty() 1474 1475 def _parse_oncommit(self) -> exp.Expression: 1476 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1477 return exp.OnCommitProperty() 1478 1479 def _parse_distkey(self) -> exp.Expression: 1480 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1481 1482 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1483 table = self._parse_table(schema=True) 1484 options = [] 1485 while self._match_texts(("INCLUDING", "EXCLUDING")): 1486 this = self._prev.text.upper() 1487 id_var = self._parse_id_var() 1488 1489 if not id_var: 1490 return None 1491 1492 options.append( 1493 self.expression( 1494 exp.Property, 1495 this=this, 1496 value=exp.Var(this=id_var.this.upper()), 1497 ) 1498 ) 1499 return self.expression(exp.LikeProperty, this=table, expressions=options) 1500 1501 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1502 return self.expression( 1503 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1504 ) 1505 1506 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1507 self._match(TokenType.EQ) 1508 return self.expression( 1509 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1510 ) 1511 1512 def _parse_returns(self) -> exp.Expression: 1513 value: t.Optional[exp.Expression] 1514 is_table = self._match(TokenType.TABLE) 1515 1516 if is_table: 1517 if self._match(TokenType.LT): 1518 value = self.expression( 1519 exp.Schema, 1520 this="TABLE", 1521 expressions=self._parse_csv(self._parse_struct_kwargs), 1522 ) 1523 if not self._match(TokenType.GT): 1524 self.raise_error("Expecting >") 1525 else: 1526 value = self._parse_schema(exp.Var(this="TABLE")) 1527 else: 1528 value = self._parse_types() 1529 1530 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1531 1532 def _parse_temporary(self, global_=False) -> exp.Expression: 1533 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1534 return self.expression(exp.TemporaryProperty, global_=global_) 1535 1536 def _parse_describe(self) -> exp.Expression: 1537 kind = self._match_set(self.CREATABLES) and self._prev.text 1538 this = self._parse_table() 1539 1540 return self.expression(exp.Describe, this=this, kind=kind) 1541 1542 def _parse_insert(self) -> exp.Expression: 1543 overwrite = self._match(TokenType.OVERWRITE) 1544 local = self._match(TokenType.LOCAL) 1545 alternative = None 1546 1547 if self._match_text_seq("DIRECTORY"): 1548 this: t.Optional[exp.Expression] = self.expression( 1549 exp.Directory, 1550 this=self._parse_var_or_string(), 1551 local=local, 1552 row_format=self._parse_row_format(match_row=True), 1553 ) 1554 else: 1555 if self._match(TokenType.OR): 1556 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1557 1558 self._match(TokenType.INTO) 1559 self._match(TokenType.TABLE) 1560 this = self._parse_table(schema=True) 1561 1562 return self.expression( 1563 exp.Insert, 1564 this=this, 1565 exists=self._parse_exists(), 1566 partition=self._parse_partition(), 1567 expression=self._parse_ddl_select(), 1568 returning=self._parse_returning(), 1569 overwrite=overwrite, 1570 alternative=alternative, 1571 ) 1572 1573 def _parse_returning(self) -> t.Optional[exp.Expression]: 1574 if not self._match(TokenType.RETURNING): 1575 return None 1576 1577 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1578 1579 def _parse_row(self) -> t.Optional[exp.Expression]: 1580 if not self._match(TokenType.FORMAT): 1581 return None 1582 return self._parse_row_format() 1583 1584 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1585 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1586 return None 1587 1588 if self._match_text_seq("SERDE"): 1589 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1590 1591 self._match_text_seq("DELIMITED") 1592 1593 kwargs = {} 1594 1595 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1596 kwargs["fields"] = self._parse_string() 1597 if self._match_text_seq("ESCAPED", "BY"): 1598 kwargs["escaped"] = self._parse_string() 1599 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1600 kwargs["collection_items"] = self._parse_string() 1601 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1602 kwargs["map_keys"] = self._parse_string() 1603 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1604 kwargs["lines"] = self._parse_string() 1605 if self._match_text_seq("NULL", "DEFINED", "AS"): 1606 kwargs["null"] = self._parse_string() 1607 1608 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1609 1610 def _parse_load_data(self) -> exp.Expression: 1611 local = self._match(TokenType.LOCAL) 1612 self._match_text_seq("INPATH") 1613 inpath = self._parse_string() 1614 overwrite = self._match(TokenType.OVERWRITE) 1615 self._match_pair(TokenType.INTO, TokenType.TABLE) 1616 1617 return self.expression( 1618 exp.LoadData, 1619 this=self._parse_table(schema=True), 1620 local=local, 1621 overwrite=overwrite, 1622 inpath=inpath, 1623 partition=self._parse_partition(), 1624 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1625 serde=self._match_text_seq("SERDE") and self._parse_string(), 1626 ) 1627 1628 def _parse_delete(self) -> exp.Expression: 1629 self._match(TokenType.FROM) 1630 1631 return self.expression( 1632 exp.Delete, 1633 this=self._parse_table(schema=True), 1634 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1635 where=self._parse_where(), 1636 returning=self._parse_returning(), 1637 ) 1638 1639 def _parse_update(self) -> exp.Expression: 1640 return self.expression( 1641 exp.Update, 1642 **{ # type: ignore 1643 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1644 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1645 "from": self._parse_from(), 1646 "where": self._parse_where(), 1647 "returning": self._parse_returning(), 1648 }, 1649 ) 1650 1651 def _parse_uncache(self) -> exp.Expression: 1652 if not self._match(TokenType.TABLE): 1653 self.raise_error("Expecting TABLE after UNCACHE") 1654 1655 return self.expression( 1656 exp.Uncache, 1657 exists=self._parse_exists(), 1658 this=self._parse_table(schema=True), 1659 ) 1660 1661 def _parse_cache(self) -> exp.Expression: 1662 lazy = self._match(TokenType.LAZY) 1663 self._match(TokenType.TABLE) 1664 table = self._parse_table(schema=True) 1665 options = [] 1666 1667 if self._match(TokenType.OPTIONS): 1668 self._match_l_paren() 1669 k = self._parse_string() 1670 self._match(TokenType.EQ) 1671 v = self._parse_string() 1672 options = [k, v] 1673 self._match_r_paren() 1674 1675 self._match(TokenType.ALIAS) 1676 return self.expression( 1677 exp.Cache, 1678 this=table, 1679 lazy=lazy, 1680 options=options, 1681 expression=self._parse_select(nested=True), 1682 ) 1683 1684 def _parse_partition(self) -> t.Optional[exp.Expression]: 1685 if not self._match(TokenType.PARTITION): 1686 return None 1687 1688 return self.expression( 1689 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1690 ) 1691 1692 def _parse_value(self) -> exp.Expression: 1693 if self._match(TokenType.L_PAREN): 1694 expressions = self._parse_csv(self._parse_conjunction) 1695 self._match_r_paren() 1696 return self.expression(exp.Tuple, expressions=expressions) 1697 1698 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1699 # Source: https://prestodb.io/docs/current/sql/values.html 1700 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1701 1702 def _parse_select( 1703 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1704 ) -> t.Optional[exp.Expression]: 1705 cte = self._parse_with() 1706 if cte: 1707 this = self._parse_statement() 1708 1709 if not this: 1710 self.raise_error("Failed to parse any statement following CTE") 1711 return cte 1712 1713 if "with" in this.arg_types: 1714 this.set("with", cte) 1715 else: 1716 self.raise_error(f"{this.key} does not support CTE") 1717 this = cte 1718 elif self._match(TokenType.SELECT): 1719 comments = self._prev_comments 1720 1721 kind = ( 1722 self._match(TokenType.ALIAS) 1723 and self._match_texts(("STRUCT", "VALUE")) 1724 and self._prev.text 1725 ) 1726 hint = self._parse_hint() 1727 all_ = self._match(TokenType.ALL) 1728 distinct = self._match(TokenType.DISTINCT) 1729 1730 if distinct: 1731 distinct = self.expression( 1732 exp.Distinct, 1733 on=self._parse_value() if self._match(TokenType.ON) else None, 1734 ) 1735 1736 if all_ and distinct: 1737 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1738 1739 limit = self._parse_limit(top=True) 1740 expressions = self._parse_csv(self._parse_expression) 1741 1742 this = self.expression( 1743 exp.Select, 1744 kind=kind, 1745 hint=hint, 1746 distinct=distinct, 1747 expressions=expressions, 1748 limit=limit, 1749 ) 1750 this.comments = comments 1751 1752 into = self._parse_into() 1753 if into: 1754 this.set("into", into) 1755 1756 from_ = self._parse_from() 1757 if from_: 1758 this.set("from", from_) 1759 1760 self._parse_query_modifiers(this) 1761 elif (table or nested) and self._match(TokenType.L_PAREN): 1762 this = self._parse_table() if table else self._parse_select(nested=True) 1763 self._parse_query_modifiers(this) 1764 this = self._parse_set_operations(this) 1765 self._match_r_paren() 1766 1767 # early return so that subquery unions aren't parsed again 1768 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1769 # Union ALL should be a property of the top select node, not the subquery 1770 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1771 elif self._match(TokenType.VALUES): 1772 this = self.expression( 1773 exp.Values, 1774 expressions=self._parse_csv(self._parse_value), 1775 alias=self._parse_table_alias(), 1776 ) 1777 else: 1778 this = None 1779 1780 return self._parse_set_operations(this) 1781 1782 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1783 if not skip_with_token and not self._match(TokenType.WITH): 1784 return None 1785 1786 recursive = self._match(TokenType.RECURSIVE) 1787 1788 expressions = [] 1789 while True: 1790 expressions.append(self._parse_cte()) 1791 1792 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1793 break 1794 else: 1795 self._match(TokenType.WITH) 1796 1797 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1798 1799 def _parse_cte(self) -> exp.Expression: 1800 alias = self._parse_table_alias() 1801 if not alias or not alias.this: 1802 self.raise_error("Expected CTE to have alias") 1803 1804 self._match(TokenType.ALIAS) 1805 1806 return self.expression( 1807 exp.CTE, 1808 this=self._parse_wrapped(self._parse_statement), 1809 alias=alias, 1810 ) 1811 1812 def _parse_table_alias( 1813 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1814 ) -> t.Optional[exp.Expression]: 1815 any_token = self._match(TokenType.ALIAS) 1816 alias = ( 1817 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1818 or self._parse_string_as_identifier() 1819 ) 1820 1821 index = self._index 1822 if self._match(TokenType.L_PAREN): 1823 columns = self._parse_csv(self._parse_function_parameter) 1824 self._match_r_paren() if columns else self._retreat(index) 1825 else: 1826 columns = None 1827 1828 if not alias and not columns: 1829 return None 1830 1831 return self.expression(exp.TableAlias, this=alias, columns=columns) 1832 1833 def _parse_subquery( 1834 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1835 ) -> exp.Expression: 1836 return self.expression( 1837 exp.Subquery, 1838 this=this, 1839 pivots=self._parse_pivots(), 1840 alias=self._parse_table_alias() if parse_alias else None, 1841 ) 1842 1843 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1844 if not isinstance(this, self.MODIFIABLES): 1845 return 1846 1847 table = isinstance(this, exp.Table) 1848 1849 while True: 1850 lateral = self._parse_lateral() 1851 join = self._parse_join() 1852 comma = None if table else self._match(TokenType.COMMA) 1853 if lateral: 1854 this.append("laterals", lateral) 1855 if join: 1856 this.append("joins", join) 1857 if comma: 1858 this.args["from"].append("expressions", self._parse_table()) 1859 if not (lateral or join or comma): 1860 break 1861 1862 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1863 expression = parser(self) 1864 1865 if expression: 1866 this.set(key, expression) 1867 1868 def _parse_hint(self) -> t.Optional[exp.Expression]: 1869 if self._match(TokenType.HINT): 1870 hints = self._parse_csv(self._parse_function) 1871 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1872 self.raise_error("Expected */ after HINT") 1873 return self.expression(exp.Hint, expressions=hints) 1874 1875 return None 1876 1877 def _parse_into(self) -> t.Optional[exp.Expression]: 1878 if not self._match(TokenType.INTO): 1879 return None 1880 1881 temp = self._match(TokenType.TEMPORARY) 1882 unlogged = self._match(TokenType.UNLOGGED) 1883 self._match(TokenType.TABLE) 1884 1885 return self.expression( 1886 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1887 ) 1888 1889 def _parse_from(self) -> t.Optional[exp.Expression]: 1890 if not self._match(TokenType.FROM): 1891 return None 1892 1893 return self.expression( 1894 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1895 ) 1896 1897 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1898 if not self._match(TokenType.MATCH_RECOGNIZE): 1899 return None 1900 self._match_l_paren() 1901 1902 partition = self._parse_partition_by() 1903 order = self._parse_order() 1904 measures = ( 1905 self._parse_alias(self._parse_conjunction()) 1906 if self._match_text_seq("MEASURES") 1907 else None 1908 ) 1909 1910 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1911 rows = exp.Var(this="ONE ROW PER MATCH") 1912 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1913 text = "ALL ROWS PER MATCH" 1914 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1915 text += f" SHOW EMPTY MATCHES" 1916 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1917 text += f" OMIT EMPTY MATCHES" 1918 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1919 text += f" WITH UNMATCHED ROWS" 1920 rows = exp.Var(this=text) 1921 else: 1922 rows = None 1923 1924 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1925 text = "AFTER MATCH SKIP" 1926 if self._match_text_seq("PAST", "LAST", "ROW"): 1927 text += f" PAST LAST ROW" 1928 elif self._match_text_seq("TO", "NEXT", "ROW"): 1929 text += f" TO NEXT ROW" 1930 elif self._match_text_seq("TO", "FIRST"): 1931 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1932 elif self._match_text_seq("TO", "LAST"): 1933 text += f" TO LAST {self._advance_any().text}" # type: ignore 1934 after = exp.Var(this=text) 1935 else: 1936 after = None 1937 1938 if self._match_text_seq("PATTERN"): 1939 self._match_l_paren() 1940 1941 if not self._curr: 1942 self.raise_error("Expecting )", self._curr) 1943 1944 paren = 1 1945 start = self._curr 1946 1947 while self._curr and paren > 0: 1948 if self._curr.token_type == TokenType.L_PAREN: 1949 paren += 1 1950 if self._curr.token_type == TokenType.R_PAREN: 1951 paren -= 1 1952 end = self._prev 1953 self._advance() 1954 if paren > 0: 1955 self.raise_error("Expecting )", self._curr) 1956 pattern = exp.Var(this=self._find_sql(start, end)) 1957 else: 1958 pattern = None 1959 1960 define = ( 1961 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1962 ) 1963 self._match_r_paren() 1964 1965 return self.expression( 1966 exp.MatchRecognize, 1967 partition_by=partition, 1968 order=order, 1969 measures=measures, 1970 rows=rows, 1971 after=after, 1972 pattern=pattern, 1973 define=define, 1974 ) 1975 1976 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1977 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1978 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1979 1980 if outer_apply or cross_apply: 1981 this = self._parse_select(table=True) 1982 view = None 1983 outer = not cross_apply 1984 elif self._match(TokenType.LATERAL): 1985 this = self._parse_select(table=True) 1986 view = self._match(TokenType.VIEW) 1987 outer = self._match(TokenType.OUTER) 1988 else: 1989 return None 1990 1991 if not this: 1992 this = self._parse_function() or self._parse_id_var(any_token=False) 1993 while self._match(TokenType.DOT): 1994 this = exp.Dot( 1995 this=this, 1996 expression=self._parse_function() or self._parse_id_var(any_token=False), 1997 ) 1998 1999 table_alias: t.Optional[exp.Expression] 2000 2001 if view: 2002 table = self._parse_id_var(any_token=False) 2003 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2004 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2005 else: 2006 table_alias = self._parse_table_alias() 2007 2008 expression = self.expression( 2009 exp.Lateral, 2010 this=this, 2011 view=view, 2012 outer=outer, 2013 alias=table_alias, 2014 ) 2015 2016 if outer_apply or cross_apply: 2017 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 2018 2019 return expression 2020 2021 def _parse_join_side_and_kind( 2022 self, 2023 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2024 return ( 2025 self._match(TokenType.NATURAL) and self._prev, 2026 self._match_set(self.JOIN_SIDES) and self._prev, 2027 self._match_set(self.JOIN_KINDS) and self._prev, 2028 ) 2029 2030 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2031 natural, side, kind = self._parse_join_side_and_kind() 2032 2033 if not skip_join_token and not self._match(TokenType.JOIN): 2034 return None 2035 2036 kwargs: t.Dict[ 2037 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2038 ] = {"this": self._parse_table()} 2039 2040 if natural: 2041 kwargs["natural"] = True 2042 if side: 2043 kwargs["side"] = side.text 2044 if kind: 2045 kwargs["kind"] = kind.text 2046 2047 if self._match(TokenType.ON): 2048 kwargs["on"] = self._parse_conjunction() 2049 elif self._match(TokenType.USING): 2050 kwargs["using"] = self._parse_wrapped_id_vars() 2051 2052 return self.expression(exp.Join, **kwargs) # type: ignore 2053 2054 def _parse_index(self) -> exp.Expression: 2055 index = self._parse_id_var() 2056 self._match(TokenType.ON) 2057 self._match(TokenType.TABLE) # hive 2058 2059 return self.expression( 2060 exp.Index, 2061 this=index, 2062 table=self.expression(exp.Table, this=self._parse_id_var()), 2063 columns=self._parse_expression(), 2064 ) 2065 2066 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2067 unique = self._match(TokenType.UNIQUE) 2068 primary = self._match_text_seq("PRIMARY") 2069 amp = self._match_text_seq("AMP") 2070 if not self._match(TokenType.INDEX): 2071 return None 2072 index = self._parse_id_var() 2073 columns = None 2074 if self._match(TokenType.L_PAREN, advance=False): 2075 columns = self._parse_wrapped_csv(self._parse_column) 2076 return self.expression( 2077 exp.Index, 2078 this=index, 2079 columns=columns, 2080 unique=unique, 2081 primary=primary, 2082 amp=amp, 2083 ) 2084 2085 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2086 catalog = None 2087 db = None 2088 2089 table = ( 2090 (not schema and self._parse_function()) 2091 or self._parse_id_var(any_token=False) 2092 or self._parse_string_as_identifier() 2093 ) 2094 2095 while self._match(TokenType.DOT): 2096 if catalog: 2097 # This allows nesting the table in arbitrarily many dot expressions if needed 2098 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2099 else: 2100 catalog = db 2101 db = table 2102 table = self._parse_id_var() 2103 2104 if not table: 2105 self.raise_error(f"Expected table name but got {self._curr}") 2106 2107 return self.expression( 2108 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2109 ) 2110 2111 def _parse_table( 2112 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2113 ) -> t.Optional[exp.Expression]: 2114 lateral = self._parse_lateral() 2115 2116 if lateral: 2117 return lateral 2118 2119 unnest = self._parse_unnest() 2120 2121 if unnest: 2122 return unnest 2123 2124 values = self._parse_derived_table_values() 2125 2126 if values: 2127 return values 2128 2129 subquery = self._parse_select(table=True) 2130 2131 if subquery: 2132 if not subquery.args.get("pivots"): 2133 subquery.set("pivots", self._parse_pivots()) 2134 return subquery 2135 2136 this = self._parse_table_parts(schema=schema) 2137 2138 if schema: 2139 return self._parse_schema(this=this) 2140 2141 if self.alias_post_tablesample: 2142 table_sample = self._parse_table_sample() 2143 2144 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2145 2146 if alias: 2147 this.set("alias", alias) 2148 2149 if not this.args.get("pivots"): 2150 this.set("pivots", self._parse_pivots()) 2151 2152 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2153 this.set( 2154 "hints", 2155 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2156 ) 2157 self._match_r_paren() 2158 2159 if not self.alias_post_tablesample: 2160 table_sample = self._parse_table_sample() 2161 2162 if table_sample: 2163 table_sample.set("this", this) 2164 this = table_sample 2165 2166 return this 2167 2168 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2169 if not self._match(TokenType.UNNEST): 2170 return None 2171 2172 expressions = self._parse_wrapped_csv(self._parse_column) 2173 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2174 alias = self._parse_table_alias() 2175 2176 if alias and self.unnest_column_only: 2177 if alias.args.get("columns"): 2178 self.raise_error("Unexpected extra column alias in unnest.") 2179 alias.set("columns", [alias.this]) 2180 alias.set("this", None) 2181 2182 offset = None 2183 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2184 self._match(TokenType.ALIAS) 2185 offset = self._parse_conjunction() 2186 2187 return self.expression( 2188 exp.Unnest, 2189 expressions=expressions, 2190 ordinality=ordinality, 2191 alias=alias, 2192 offset=offset, 2193 ) 2194 2195 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2196 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2197 if not is_derived and not self._match(TokenType.VALUES): 2198 return None 2199 2200 expressions = self._parse_csv(self._parse_value) 2201 2202 if is_derived: 2203 self._match_r_paren() 2204 2205 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2206 2207 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2208 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2209 as_modifier and self._match_text_seq("USING", "SAMPLE") 2210 ): 2211 return None 2212 2213 bucket_numerator = None 2214 bucket_denominator = None 2215 bucket_field = None 2216 percent = None 2217 rows = None 2218 size = None 2219 seed = None 2220 2221 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2222 method = self._parse_var(tokens=(TokenType.ROW,)) 2223 2224 self._match(TokenType.L_PAREN) 2225 2226 num = self._parse_number() 2227 2228 if self._match(TokenType.BUCKET): 2229 bucket_numerator = self._parse_number() 2230 self._match(TokenType.OUT_OF) 2231 bucket_denominator = bucket_denominator = self._parse_number() 2232 self._match(TokenType.ON) 2233 bucket_field = self._parse_field() 2234 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2235 percent = num 2236 elif self._match(TokenType.ROWS): 2237 rows = num 2238 else: 2239 size = num 2240 2241 self._match(TokenType.R_PAREN) 2242 2243 if self._match(TokenType.L_PAREN): 2244 method = self._parse_var() 2245 seed = self._match(TokenType.COMMA) and self._parse_number() 2246 self._match_r_paren() 2247 elif self._match_texts(("SEED", "REPEATABLE")): 2248 seed = self._parse_wrapped(self._parse_number) 2249 2250 return self.expression( 2251 exp.TableSample, 2252 method=method, 2253 bucket_numerator=bucket_numerator, 2254 bucket_denominator=bucket_denominator, 2255 bucket_field=bucket_field, 2256 percent=percent, 2257 rows=rows, 2258 size=size, 2259 seed=seed, 2260 kind=kind, 2261 ) 2262 2263 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2264 return list(iter(self._parse_pivot, None)) 2265 2266 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2267 index = self._index 2268 2269 if self._match(TokenType.PIVOT): 2270 unpivot = False 2271 elif self._match(TokenType.UNPIVOT): 2272 unpivot = True 2273 else: 2274 return None 2275 2276 expressions = [] 2277 field = None 2278 2279 if not self._match(TokenType.L_PAREN): 2280 self._retreat(index) 2281 return None 2282 2283 if unpivot: 2284 expressions = self._parse_csv(self._parse_column) 2285 else: 2286 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2287 2288 if not self._match(TokenType.FOR): 2289 self.raise_error("Expecting FOR") 2290 2291 value = self._parse_column() 2292 2293 if not self._match(TokenType.IN): 2294 self.raise_error("Expecting IN") 2295 2296 field = self._parse_in(value) 2297 2298 self._match_r_paren() 2299 2300 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2301 2302 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2303 pivot.set("alias", self._parse_table_alias()) 2304 2305 return pivot 2306 2307 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2308 if not skip_where_token and not self._match(TokenType.WHERE): 2309 return None 2310 2311 return self.expression( 2312 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2313 ) 2314 2315 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2316 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2317 return None 2318 2319 elements = defaultdict(list) 2320 2321 while True: 2322 expressions = self._parse_csv(self._parse_conjunction) 2323 if expressions: 2324 elements["expressions"].extend(expressions) 2325 2326 grouping_sets = self._parse_grouping_sets() 2327 if grouping_sets: 2328 elements["grouping_sets"].extend(grouping_sets) 2329 2330 rollup = None 2331 cube = None 2332 2333 with_ = self._match(TokenType.WITH) 2334 if self._match(TokenType.ROLLUP): 2335 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2336 elements["rollup"].extend(ensure_list(rollup)) 2337 2338 if self._match(TokenType.CUBE): 2339 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2340 elements["cube"].extend(ensure_list(cube)) 2341 2342 if not (expressions or grouping_sets or rollup or cube): 2343 break 2344 2345 return self.expression(exp.Group, **elements) # type: ignore 2346 2347 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2348 if not self._match(TokenType.GROUPING_SETS): 2349 return None 2350 2351 return self._parse_wrapped_csv(self._parse_grouping_set) 2352 2353 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2354 if self._match(TokenType.L_PAREN): 2355 grouping_set = self._parse_csv(self._parse_column) 2356 self._match_r_paren() 2357 return self.expression(exp.Tuple, expressions=grouping_set) 2358 2359 return self._parse_column() 2360 2361 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2362 if not skip_having_token and not self._match(TokenType.HAVING): 2363 return None 2364 return self.expression(exp.Having, this=self._parse_conjunction()) 2365 2366 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2367 if not self._match(TokenType.QUALIFY): 2368 return None 2369 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2370 2371 def _parse_order( 2372 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2373 ) -> t.Optional[exp.Expression]: 2374 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2375 return this 2376 2377 return self.expression( 2378 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2379 ) 2380 2381 def _parse_sort( 2382 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2383 ) -> t.Optional[exp.Expression]: 2384 if not self._match(token_type): 2385 return None 2386 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2387 2388 def _parse_ordered(self) -> exp.Expression: 2389 this = self._parse_conjunction() 2390 self._match(TokenType.ASC) 2391 is_desc = self._match(TokenType.DESC) 2392 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2393 is_nulls_last = self._match(TokenType.NULLS_LAST) 2394 desc = is_desc or False 2395 asc = not desc 2396 nulls_first = is_nulls_first or False 2397 explicitly_null_ordered = is_nulls_first or is_nulls_last 2398 if ( 2399 not explicitly_null_ordered 2400 and ( 2401 (asc and self.null_ordering == "nulls_are_small") 2402 or (desc and self.null_ordering != "nulls_are_small") 2403 ) 2404 and self.null_ordering != "nulls_are_last" 2405 ): 2406 nulls_first = True 2407 2408 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2409 2410 def _parse_limit( 2411 self, this: t.Optional[exp.Expression] = None, top: bool = False 2412 ) -> t.Optional[exp.Expression]: 2413 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2414 limit_paren = self._match(TokenType.L_PAREN) 2415 limit_exp = self.expression( 2416 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2417 ) 2418 2419 if limit_paren: 2420 self._match_r_paren() 2421 2422 return limit_exp 2423 2424 if self._match(TokenType.FETCH): 2425 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2426 direction = self._prev.text if direction else "FIRST" 2427 count = self._parse_number() 2428 self._match_set((TokenType.ROW, TokenType.ROWS)) 2429 self._match(TokenType.ONLY) 2430 return self.expression(exp.Fetch, direction=direction, count=count) 2431 2432 return this 2433 2434 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2435 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2436 return this 2437 2438 count = self._parse_number() 2439 self._match_set((TokenType.ROW, TokenType.ROWS)) 2440 return self.expression(exp.Offset, this=this, expression=count) 2441 2442 def _parse_lock(self) -> t.Optional[exp.Expression]: 2443 if self._match_text_seq("FOR", "UPDATE"): 2444 return self.expression(exp.Lock, update=True) 2445 if self._match_text_seq("FOR", "SHARE"): 2446 return self.expression(exp.Lock, update=False) 2447 2448 return None 2449 2450 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2451 if not self._match_set(self.SET_OPERATIONS): 2452 return this 2453 2454 token_type = self._prev.token_type 2455 2456 if token_type == TokenType.UNION: 2457 expression = exp.Union 2458 elif token_type == TokenType.EXCEPT: 2459 expression = exp.Except 2460 else: 2461 expression = exp.Intersect 2462 2463 return self.expression( 2464 expression, 2465 this=this, 2466 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2467 expression=self._parse_set_operations(self._parse_select(nested=True)), 2468 ) 2469 2470 def _parse_expression(self) -> t.Optional[exp.Expression]: 2471 return self._parse_alias(self._parse_conjunction()) 2472 2473 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2474 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2475 2476 def _parse_equality(self) -> t.Optional[exp.Expression]: 2477 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2478 2479 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2480 return self._parse_tokens(self._parse_range, self.COMPARISON) 2481 2482 def _parse_range(self) -> t.Optional[exp.Expression]: 2483 this = self._parse_bitwise() 2484 negate = self._match(TokenType.NOT) 2485 2486 if self._match_set(self.RANGE_PARSERS): 2487 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2488 elif self._match(TokenType.ISNULL): 2489 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2490 2491 # Postgres supports ISNULL and NOTNULL for conditions. 2492 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2493 if self._match(TokenType.NOTNULL): 2494 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2495 this = self.expression(exp.Not, this=this) 2496 2497 if negate: 2498 this = self.expression(exp.Not, this=this) 2499 2500 if self._match(TokenType.IS): 2501 this = self._parse_is(this) 2502 2503 return this 2504 2505 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2506 negate = self._match(TokenType.NOT) 2507 if self._match(TokenType.DISTINCT_FROM): 2508 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2509 return self.expression(klass, this=this, expression=self._parse_expression()) 2510 2511 this = self.expression( 2512 exp.Is, 2513 this=this, 2514 expression=self._parse_null() or self._parse_boolean(), 2515 ) 2516 return self.expression(exp.Not, this=this) if negate else this 2517 2518 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2519 unnest = self._parse_unnest() 2520 if unnest: 2521 this = self.expression(exp.In, this=this, unnest=unnest) 2522 elif self._match(TokenType.L_PAREN): 2523 expressions = self._parse_csv(self._parse_select_or_expression) 2524 2525 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2526 this = self.expression(exp.In, this=this, query=expressions[0]) 2527 else: 2528 this = self.expression(exp.In, this=this, expressions=expressions) 2529 2530 self._match_r_paren() 2531 else: 2532 this = self.expression(exp.In, this=this, field=self._parse_field()) 2533 2534 return this 2535 2536 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2537 low = self._parse_bitwise() 2538 self._match(TokenType.AND) 2539 high = self._parse_bitwise() 2540 return self.expression(exp.Between, this=this, low=low, high=high) 2541 2542 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2543 if not self._match(TokenType.ESCAPE): 2544 return this 2545 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2546 2547 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2548 this = self._parse_term() 2549 2550 while True: 2551 if self._match_set(self.BITWISE): 2552 this = self.expression( 2553 self.BITWISE[self._prev.token_type], 2554 this=this, 2555 expression=self._parse_term(), 2556 ) 2557 elif self._match_pair(TokenType.LT, TokenType.LT): 2558 this = self.expression( 2559 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2560 ) 2561 elif self._match_pair(TokenType.GT, TokenType.GT): 2562 this = self.expression( 2563 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2564 ) 2565 else: 2566 break 2567 2568 return this 2569 2570 def _parse_term(self) -> t.Optional[exp.Expression]: 2571 return self._parse_tokens(self._parse_factor, self.TERM) 2572 2573 def _parse_factor(self) -> t.Optional[exp.Expression]: 2574 return self._parse_tokens(self._parse_unary, self.FACTOR) 2575 2576 def _parse_unary(self) -> t.Optional[exp.Expression]: 2577 if self._match_set(self.UNARY_PARSERS): 2578 return self.UNARY_PARSERS[self._prev.token_type](self) 2579 return self._parse_at_time_zone(self._parse_type()) 2580 2581 def _parse_type(self) -> t.Optional[exp.Expression]: 2582 if self._match(TokenType.INTERVAL): 2583 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2584 2585 index = self._index 2586 type_token = self._parse_types(check_func=True) 2587 this = self._parse_column() 2588 2589 if type_token: 2590 if isinstance(this, exp.Literal): 2591 return self.expression(exp.Cast, this=this, to=type_token) 2592 if not type_token.args.get("expressions"): 2593 self._retreat(index) 2594 return self._parse_column() 2595 return type_token 2596 2597 return this 2598 2599 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2600 index = self._index 2601 2602 prefix = self._match_text_seq("SYSUDTLIB", ".") 2603 2604 if not self._match_set(self.TYPE_TOKENS): 2605 return None 2606 2607 type_token = self._prev.token_type 2608 2609 if type_token == TokenType.PSEUDO_TYPE: 2610 return self.expression(exp.PseudoType, this=self._prev.text) 2611 2612 nested = type_token in self.NESTED_TYPE_TOKENS 2613 is_struct = type_token == TokenType.STRUCT 2614 expressions = None 2615 maybe_func = False 2616 2617 if self._match(TokenType.L_PAREN): 2618 if is_struct: 2619 expressions = self._parse_csv(self._parse_struct_kwargs) 2620 elif nested: 2621 expressions = self._parse_csv(self._parse_types) 2622 else: 2623 expressions = self._parse_csv(self._parse_conjunction) 2624 2625 if not expressions: 2626 self._retreat(index) 2627 return None 2628 2629 self._match_r_paren() 2630 maybe_func = True 2631 2632 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2633 this = exp.DataType( 2634 this=exp.DataType.Type.ARRAY, 2635 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2636 nested=True, 2637 ) 2638 2639 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2640 this = exp.DataType( 2641 this=exp.DataType.Type.ARRAY, 2642 expressions=[this], 2643 nested=True, 2644 ) 2645 2646 return this 2647 2648 if self._match(TokenType.L_BRACKET): 2649 self._retreat(index) 2650 return None 2651 2652 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2653 if nested and self._match(TokenType.LT): 2654 if is_struct: 2655 expressions = self._parse_csv(self._parse_struct_kwargs) 2656 else: 2657 expressions = self._parse_csv(self._parse_types) 2658 2659 if not self._match(TokenType.GT): 2660 self.raise_error("Expecting >") 2661 2662 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2663 values = self._parse_csv(self._parse_conjunction) 2664 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2665 2666 value: t.Optional[exp.Expression] = None 2667 if type_token in self.TIMESTAMPS: 2668 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2669 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2670 elif ( 2671 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2672 ): 2673 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2674 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2675 if type_token == TokenType.TIME: 2676 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2677 else: 2678 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2679 2680 maybe_func = maybe_func and value is None 2681 2682 if value is None: 2683 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2684 elif type_token == TokenType.INTERVAL: 2685 unit = self._parse_var() 2686 2687 if not unit: 2688 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2689 else: 2690 value = self.expression(exp.Interval, unit=unit) 2691 2692 if maybe_func and check_func: 2693 index2 = self._index 2694 peek = self._parse_string() 2695 2696 if not peek: 2697 self._retreat(index) 2698 return None 2699 2700 self._retreat(index2) 2701 2702 if value: 2703 return value 2704 2705 return exp.DataType( 2706 this=exp.DataType.Type[type_token.value.upper()], 2707 expressions=expressions, 2708 nested=nested, 2709 values=values, 2710 prefix=prefix, 2711 ) 2712 2713 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2714 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2715 return self._parse_types() 2716 2717 this = self._parse_id_var() 2718 self._match(TokenType.COLON) 2719 data_type = self._parse_types() 2720 2721 if not data_type: 2722 return None 2723 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2724 2725 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2726 if not self._match(TokenType.AT_TIME_ZONE): 2727 return this 2728 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2729 2730 def _parse_column(self) -> t.Optional[exp.Expression]: 2731 this = self._parse_field() 2732 if isinstance(this, exp.Identifier): 2733 this = self.expression(exp.Column, this=this) 2734 elif not this: 2735 return self._parse_bracket(this) 2736 this = self._parse_bracket(this) 2737 2738 while self._match_set(self.COLUMN_OPERATORS): 2739 op_token = self._prev.token_type 2740 op = self.COLUMN_OPERATORS.get(op_token) 2741 2742 if op_token == TokenType.DCOLON: 2743 field = self._parse_types() 2744 if not field: 2745 self.raise_error("Expected type") 2746 elif op: 2747 self._advance() 2748 value = self._prev.text 2749 field = ( 2750 exp.Literal.number(value) 2751 if self._prev.token_type == TokenType.NUMBER 2752 else exp.Literal.string(value) 2753 ) 2754 else: 2755 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2756 2757 if isinstance(field, exp.Func): 2758 # bigquery allows function calls like x.y.count(...) 2759 # SAFE.SUBSTR(...) 2760 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2761 this = self._replace_columns_with_dots(this) 2762 2763 if op: 2764 this = op(self, this, field) 2765 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2766 this = self.expression( 2767 exp.Column, 2768 this=field, 2769 table=this.this, 2770 db=this.args.get("table"), 2771 catalog=this.args.get("db"), 2772 ) 2773 else: 2774 this = self.expression(exp.Dot, this=this, expression=field) 2775 this = self._parse_bracket(this) 2776 2777 return this 2778 2779 def _parse_primary(self) -> t.Optional[exp.Expression]: 2780 if self._match_set(self.PRIMARY_PARSERS): 2781 token_type = self._prev.token_type 2782 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2783 2784 if token_type == TokenType.STRING: 2785 expressions = [primary] 2786 while self._match(TokenType.STRING): 2787 expressions.append(exp.Literal.string(self._prev.text)) 2788 if len(expressions) > 1: 2789 return self.expression(exp.Concat, expressions=expressions) 2790 return primary 2791 2792 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2793 return exp.Literal.number(f"0.{self._prev.text}") 2794 2795 if self._match(TokenType.L_PAREN): 2796 comments = self._prev_comments 2797 query = self._parse_select() 2798 2799 if query: 2800 expressions = [query] 2801 else: 2802 expressions = self._parse_csv( 2803 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2804 ) 2805 2806 this = seq_get(expressions, 0) 2807 self._parse_query_modifiers(this) 2808 2809 if isinstance(this, exp.Subqueryable): 2810 this = self._parse_set_operations( 2811 self._parse_subquery(this=this, parse_alias=False) 2812 ) 2813 elif len(expressions) > 1: 2814 this = self.expression(exp.Tuple, expressions=expressions) 2815 else: 2816 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2817 2818 self._match_r_paren() 2819 2820 if this and comments: 2821 this.comments = comments 2822 2823 return this 2824 2825 return None 2826 2827 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2828 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2829 2830 def _parse_function( 2831 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2832 ) -> t.Optional[exp.Expression]: 2833 if not self._curr: 2834 return None 2835 2836 token_type = self._curr.token_type 2837 2838 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2839 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2840 2841 if not self._next or self._next.token_type != TokenType.L_PAREN: 2842 if token_type in self.NO_PAREN_FUNCTIONS: 2843 self._advance() 2844 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2845 2846 return None 2847 2848 if token_type not in self.FUNC_TOKENS: 2849 return None 2850 2851 this = self._curr.text 2852 upper = this.upper() 2853 self._advance(2) 2854 2855 parser = self.FUNCTION_PARSERS.get(upper) 2856 2857 if parser: 2858 this = parser(self) 2859 else: 2860 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2861 2862 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2863 this = self.expression(subquery_predicate, this=self._parse_select()) 2864 self._match_r_paren() 2865 return this 2866 2867 if functions is None: 2868 functions = self.FUNCTIONS 2869 2870 function = functions.get(upper) 2871 args = self._parse_csv(self._parse_lambda) 2872 2873 if function: 2874 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2875 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2876 if count_params(function) == 2: 2877 params = None 2878 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2879 params = self._parse_csv(self._parse_lambda) 2880 2881 this = function(args, params) 2882 else: 2883 this = function(args) 2884 2885 self.validate_expression(this, args) 2886 else: 2887 this = self.expression(exp.Anonymous, this=this, expressions=args) 2888 2889 self._match_r_paren(this) 2890 return self._parse_window(this) 2891 2892 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2893 return self._parse_column_def(self._parse_id_var()) 2894 2895 def _parse_user_defined_function( 2896 self, kind: t.Optional[TokenType] = None 2897 ) -> t.Optional[exp.Expression]: 2898 this = self._parse_id_var() 2899 2900 while self._match(TokenType.DOT): 2901 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2902 2903 if not self._match(TokenType.L_PAREN): 2904 return this 2905 2906 expressions = self._parse_csv(self._parse_function_parameter) 2907 self._match_r_paren() 2908 return self.expression( 2909 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2910 ) 2911 2912 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2913 literal = self._parse_primary() 2914 if literal: 2915 return self.expression(exp.Introducer, this=token.text, expression=literal) 2916 2917 return self.expression(exp.Identifier, this=token.text) 2918 2919 def _parse_national(self, token: Token) -> exp.Expression: 2920 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2921 2922 def _parse_session_parameter(self) -> exp.Expression: 2923 kind = None 2924 this = self._parse_id_var() or self._parse_primary() 2925 2926 if this and self._match(TokenType.DOT): 2927 kind = this.name 2928 this = self._parse_var() or self._parse_primary() 2929 2930 return self.expression(exp.SessionParameter, this=this, kind=kind) 2931 2932 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2933 index = self._index 2934 2935 if self._match(TokenType.L_PAREN): 2936 expressions = self._parse_csv(self._parse_id_var) 2937 2938 if not self._match(TokenType.R_PAREN): 2939 self._retreat(index) 2940 else: 2941 expressions = [self._parse_id_var()] 2942 2943 if self._match_set(self.LAMBDAS): 2944 return self.LAMBDAS[self._prev.token_type](self, expressions) 2945 2946 self._retreat(index) 2947 2948 this: t.Optional[exp.Expression] 2949 2950 if self._match(TokenType.DISTINCT): 2951 this = self.expression( 2952 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2953 ) 2954 else: 2955 this = self._parse_select_or_expression() 2956 2957 if self._match(TokenType.IGNORE_NULLS): 2958 this = self.expression(exp.IgnoreNulls, this=this) 2959 else: 2960 self._match(TokenType.RESPECT_NULLS) 2961 2962 return self._parse_limit(self._parse_order(this)) 2963 2964 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2965 index = self._index 2966 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2967 self._retreat(index) 2968 return this 2969 2970 args = self._parse_csv( 2971 lambda: self._parse_constraint() 2972 or self._parse_column_def(self._parse_field(any_token=True)) 2973 ) 2974 self._match_r_paren() 2975 return self.expression(exp.Schema, this=this, expressions=args) 2976 2977 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2978 kind = self._parse_types() 2979 2980 if self._match_text_seq("FOR", "ORDINALITY"): 2981 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2982 2983 constraints = [] 2984 while True: 2985 constraint = self._parse_column_constraint() 2986 if not constraint: 2987 break 2988 constraints.append(constraint) 2989 2990 if not kind and not constraints: 2991 return this 2992 2993 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2994 2995 def _parse_auto_increment(self) -> exp.Expression: 2996 start = None 2997 increment = None 2998 2999 if self._match(TokenType.L_PAREN, advance=False): 3000 args = self._parse_wrapped_csv(self._parse_bitwise) 3001 start = seq_get(args, 0) 3002 increment = seq_get(args, 1) 3003 elif self._match_text_seq("START"): 3004 start = self._parse_bitwise() 3005 self._match_text_seq("INCREMENT") 3006 increment = self._parse_bitwise() 3007 3008 if start and increment: 3009 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3010 3011 return exp.AutoIncrementColumnConstraint() 3012 3013 def _parse_compress(self) -> exp.Expression: 3014 if self._match(TokenType.L_PAREN, advance=False): 3015 return self.expression( 3016 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3017 ) 3018 3019 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3020 3021 def _parse_generated_as_identity(self) -> exp.Expression: 3022 if self._match(TokenType.BY_DEFAULT): 3023 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3024 else: 3025 self._match_text_seq("ALWAYS") 3026 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3027 3028 self._match_text_seq("AS", "IDENTITY") 3029 if self._match(TokenType.L_PAREN): 3030 if self._match_text_seq("START", "WITH"): 3031 this.set("start", self._parse_bitwise()) 3032 if self._match_text_seq("INCREMENT", "BY"): 3033 this.set("increment", self._parse_bitwise()) 3034 if self._match_text_seq("MINVALUE"): 3035 this.set("minvalue", self._parse_bitwise()) 3036 if self._match_text_seq("MAXVALUE"): 3037 this.set("maxvalue", self._parse_bitwise()) 3038 3039 if self._match_text_seq("CYCLE"): 3040 this.set("cycle", True) 3041 elif self._match_text_seq("NO", "CYCLE"): 3042 this.set("cycle", False) 3043 3044 self._match_r_paren() 3045 3046 return this 3047 3048 def _parse_inline(self) -> t.Optional[exp.Expression]: 3049 self._match_text_seq("LENGTH") 3050 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3051 3052 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3053 if self._match_text_seq("NULL"): 3054 return self.expression(exp.NotNullColumnConstraint) 3055 if self._match_text_seq("CASESPECIFIC"): 3056 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3057 return None 3058 3059 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3060 this = self._parse_references() 3061 if this: 3062 return this 3063 3064 if self._match(TokenType.CONSTRAINT): 3065 this = self._parse_id_var() 3066 3067 if self._match_texts(self.CONSTRAINT_PARSERS): 3068 return self.expression( 3069 exp.ColumnConstraint, 3070 this=this, 3071 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3072 ) 3073 3074 return this 3075 3076 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3077 if not self._match(TokenType.CONSTRAINT): 3078 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3079 3080 this = self._parse_id_var() 3081 expressions = [] 3082 3083 while True: 3084 constraint = self._parse_unnamed_constraint() or self._parse_function() 3085 if not constraint: 3086 break 3087 expressions.append(constraint) 3088 3089 return self.expression(exp.Constraint, this=this, expressions=expressions) 3090 3091 def _parse_unnamed_constraint( 3092 self, constraints: t.Optional[t.Collection[str]] = None 3093 ) -> t.Optional[exp.Expression]: 3094 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3095 return None 3096 3097 constraint = self._prev.text.upper() 3098 if constraint not in self.CONSTRAINT_PARSERS: 3099 self.raise_error(f"No parser found for schema constraint {constraint}.") 3100 3101 return self.CONSTRAINT_PARSERS[constraint](self) 3102 3103 def _parse_unique(self) -> exp.Expression: 3104 if not self._match(TokenType.L_PAREN, advance=False): 3105 return self.expression(exp.UniqueColumnConstraint) 3106 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3107 3108 def _parse_key_constraint_options(self) -> t.List[str]: 3109 options = [] 3110 while True: 3111 if not self._curr: 3112 break 3113 3114 if self._match(TokenType.ON): 3115 action = None 3116 on = self._advance_any() and self._prev.text 3117 3118 if self._match(TokenType.NO_ACTION): 3119 action = "NO ACTION" 3120 elif self._match(TokenType.CASCADE): 3121 action = "CASCADE" 3122 elif self._match_pair(TokenType.SET, TokenType.NULL): 3123 action = "SET NULL" 3124 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3125 action = "SET DEFAULT" 3126 else: 3127 self.raise_error("Invalid key constraint") 3128 3129 options.append(f"ON {on} {action}") 3130 elif self._match_text_seq("NOT", "ENFORCED"): 3131 options.append("NOT ENFORCED") 3132 elif self._match_text_seq("DEFERRABLE"): 3133 options.append("DEFERRABLE") 3134 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3135 options.append("INITIALLY DEFERRED") 3136 elif self._match_text_seq("NORELY"): 3137 options.append("NORELY") 3138 elif self._match_text_seq("MATCH", "FULL"): 3139 options.append("MATCH FULL") 3140 else: 3141 break 3142 3143 return options 3144 3145 def _parse_references(self) -> t.Optional[exp.Expression]: 3146 if not self._match(TokenType.REFERENCES): 3147 return None 3148 3149 expressions = None 3150 this = self._parse_id_var() 3151 3152 if self._match(TokenType.L_PAREN, advance=False): 3153 expressions = self._parse_wrapped_id_vars() 3154 3155 options = self._parse_key_constraint_options() 3156 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3157 3158 def _parse_foreign_key(self) -> exp.Expression: 3159 expressions = self._parse_wrapped_id_vars() 3160 reference = self._parse_references() 3161 options = {} 3162 3163 while self._match(TokenType.ON): 3164 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3165 self.raise_error("Expected DELETE or UPDATE") 3166 3167 kind = self._prev.text.lower() 3168 3169 if self._match(TokenType.NO_ACTION): 3170 action = "NO ACTION" 3171 elif self._match(TokenType.SET): 3172 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3173 action = "SET " + self._prev.text.upper() 3174 else: 3175 self._advance() 3176 action = self._prev.text.upper() 3177 3178 options[kind] = action 3179 3180 return self.expression( 3181 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3182 ) 3183 3184 def _parse_primary_key(self) -> exp.Expression: 3185 desc = ( 3186 self._match_set((TokenType.ASC, TokenType.DESC)) 3187 and self._prev.token_type == TokenType.DESC 3188 ) 3189 3190 if not self._match(TokenType.L_PAREN, advance=False): 3191 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3192 3193 expressions = self._parse_wrapped_id_vars() 3194 options = self._parse_key_constraint_options() 3195 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3196 3197 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3198 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3199 return this 3200 3201 bracket_kind = self._prev.token_type 3202 expressions: t.List[t.Optional[exp.Expression]] 3203 3204 if self._match(TokenType.COLON): 3205 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3206 else: 3207 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3208 3209 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3210 if bracket_kind == TokenType.L_BRACE: 3211 this = self.expression(exp.Struct, expressions=expressions) 3212 elif not this or this.name.upper() == "ARRAY": 3213 this = self.expression(exp.Array, expressions=expressions) 3214 else: 3215 expressions = apply_index_offset(expressions, -self.index_offset) 3216 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3217 3218 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3219 self.raise_error("Expected ]") 3220 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3221 self.raise_error("Expected }") 3222 3223 this.comments = self._prev_comments 3224 return self._parse_bracket(this) 3225 3226 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3227 if self._match(TokenType.COLON): 3228 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3229 return this 3230 3231 def _parse_case(self) -> t.Optional[exp.Expression]: 3232 ifs = [] 3233 default = None 3234 3235 expression = self._parse_conjunction() 3236 3237 while self._match(TokenType.WHEN): 3238 this = self._parse_conjunction() 3239 self._match(TokenType.THEN) 3240 then = self._parse_conjunction() 3241 ifs.append(self.expression(exp.If, this=this, true=then)) 3242 3243 if self._match(TokenType.ELSE): 3244 default = self._parse_conjunction() 3245 3246 if not self._match(TokenType.END): 3247 self.raise_error("Expected END after CASE", self._prev) 3248 3249 return self._parse_window( 3250 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3251 ) 3252 3253 def _parse_if(self) -> t.Optional[exp.Expression]: 3254 if self._match(TokenType.L_PAREN): 3255 args = self._parse_csv(self._parse_conjunction) 3256 this = exp.If.from_arg_list(args) 3257 self.validate_expression(this, args) 3258 self._match_r_paren() 3259 else: 3260 condition = self._parse_conjunction() 3261 self._match(TokenType.THEN) 3262 true = self._parse_conjunction() 3263 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3264 self._match(TokenType.END) 3265 this = self.expression(exp.If, this=condition, true=true, false=false) 3266 3267 return self._parse_window(this) 3268 3269 def _parse_extract(self) -> exp.Expression: 3270 this = self._parse_function() or self._parse_var() or self._parse_type() 3271 3272 if self._match(TokenType.FROM): 3273 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3274 3275 if not self._match(TokenType.COMMA): 3276 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3277 3278 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3279 3280 def _parse_cast(self, strict: bool) -> exp.Expression: 3281 this = self._parse_conjunction() 3282 3283 if not self._match(TokenType.ALIAS): 3284 self.raise_error("Expected AS after CAST") 3285 3286 to = self._parse_types() 3287 3288 if not to: 3289 self.raise_error("Expected TYPE after CAST") 3290 elif to.this == exp.DataType.Type.CHAR: 3291 if self._match(TokenType.CHARACTER_SET): 3292 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3293 3294 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3295 3296 def _parse_string_agg(self) -> exp.Expression: 3297 expression: t.Optional[exp.Expression] 3298 3299 if self._match(TokenType.DISTINCT): 3300 args = self._parse_csv(self._parse_conjunction) 3301 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3302 else: 3303 args = self._parse_csv(self._parse_conjunction) 3304 expression = seq_get(args, 0) 3305 3306 index = self._index 3307 if not self._match(TokenType.R_PAREN): 3308 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3309 order = self._parse_order(this=expression) 3310 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3311 3312 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3313 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3314 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3315 if not self._match(TokenType.WITHIN_GROUP): 3316 self._retreat(index) 3317 this = exp.GroupConcat.from_arg_list(args) 3318 self.validate_expression(this, args) 3319 return this 3320 3321 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3322 order = self._parse_order(this=expression) 3323 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3324 3325 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3326 to: t.Optional[exp.Expression] 3327 this = self._parse_bitwise() 3328 3329 if self._match(TokenType.USING): 3330 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3331 elif self._match(TokenType.COMMA): 3332 to = self._parse_bitwise() 3333 else: 3334 to = None 3335 3336 # Swap the argument order if needed to produce the correct AST 3337 if self.CONVERT_TYPE_FIRST: 3338 this, to = to, this 3339 3340 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3341 3342 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3343 self._match_text_seq("KEY") 3344 key = self._parse_field() 3345 self._match(TokenType.COLON) 3346 self._match_text_seq("VALUE") 3347 value = self._parse_field() 3348 if not key and not value: 3349 return None 3350 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3351 3352 def _parse_json_object(self) -> exp.Expression: 3353 expressions = self._parse_csv(self._parse_json_key_value) 3354 3355 null_handling = None 3356 if self._match_text_seq("NULL", "ON", "NULL"): 3357 null_handling = "NULL ON NULL" 3358 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3359 null_handling = "ABSENT ON NULL" 3360 3361 unique_keys = None 3362 if self._match_text_seq("WITH", "UNIQUE"): 3363 unique_keys = True 3364 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3365 unique_keys = False 3366 3367 self._match_text_seq("KEYS") 3368 3369 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3370 format_json = self._match_text_seq("FORMAT", "JSON") 3371 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3372 3373 return self.expression( 3374 exp.JSONObject, 3375 expressions=expressions, 3376 null_handling=null_handling, 3377 unique_keys=unique_keys, 3378 return_type=return_type, 3379 format_json=format_json, 3380 encoding=encoding, 3381 ) 3382 3383 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3384 args = self._parse_csv(self._parse_bitwise) 3385 3386 if self._match(TokenType.IN): 3387 return self.expression( 3388 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3389 ) 3390 3391 if haystack_first: 3392 haystack = seq_get(args, 0) 3393 needle = seq_get(args, 1) 3394 else: 3395 needle = seq_get(args, 0) 3396 haystack = seq_get(args, 1) 3397 3398 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3399 3400 self.validate_expression(this, args) 3401 3402 return this 3403 3404 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3405 args = self._parse_csv(self._parse_table) 3406 return exp.JoinHint(this=func_name.upper(), expressions=args) 3407 3408 def _parse_substring(self) -> exp.Expression: 3409 # Postgres supports the form: substring(string [from int] [for int]) 3410 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3411 3412 args = self._parse_csv(self._parse_bitwise) 3413 3414 if self._match(TokenType.FROM): 3415 args.append(self._parse_bitwise()) 3416 if self._match(TokenType.FOR): 3417 args.append(self._parse_bitwise()) 3418 3419 this = exp.Substring.from_arg_list(args) 3420 self.validate_expression(this, args) 3421 3422 return this 3423 3424 def _parse_trim(self) -> exp.Expression: 3425 # https://www.w3resource.com/sql/character-functions/trim.php 3426 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3427 3428 position = None 3429 collation = None 3430 3431 if self._match_set(self.TRIM_TYPES): 3432 position = self._prev.text.upper() 3433 3434 expression = self._parse_term() 3435 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3436 this = self._parse_term() 3437 else: 3438 this = expression 3439 expression = None 3440 3441 if self._match(TokenType.COLLATE): 3442 collation = self._parse_term() 3443 3444 return self.expression( 3445 exp.Trim, 3446 this=this, 3447 position=position, 3448 expression=expression, 3449 collation=collation, 3450 ) 3451 3452 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3453 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3454 3455 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3456 return self._parse_window(self._parse_id_var(), alias=True) 3457 3458 def _parse_window( 3459 self, this: t.Optional[exp.Expression], alias: bool = False 3460 ) -> t.Optional[exp.Expression]: 3461 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3462 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3463 self._match_r_paren() 3464 3465 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3466 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3467 if self._match(TokenType.WITHIN_GROUP): 3468 order = self._parse_wrapped(self._parse_order) 3469 this = self.expression(exp.WithinGroup, this=this, expression=order) 3470 3471 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3472 # Some dialects choose to implement and some do not. 3473 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3474 3475 # There is some code above in _parse_lambda that handles 3476 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3477 3478 # The below changes handle 3479 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3480 3481 # Oracle allows both formats 3482 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3483 # and Snowflake chose to do the same for familiarity 3484 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3485 if self._match(TokenType.IGNORE_NULLS): 3486 this = self.expression(exp.IgnoreNulls, this=this) 3487 elif self._match(TokenType.RESPECT_NULLS): 3488 this = self.expression(exp.RespectNulls, this=this) 3489 3490 # bigquery select from window x AS (partition by ...) 3491 if alias: 3492 self._match(TokenType.ALIAS) 3493 elif not self._match(TokenType.OVER): 3494 return this 3495 3496 if not self._match(TokenType.L_PAREN): 3497 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3498 3499 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3500 partition = self._parse_partition_by() 3501 order = self._parse_order() 3502 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3503 3504 if kind: 3505 self._match(TokenType.BETWEEN) 3506 start = self._parse_window_spec() 3507 self._match(TokenType.AND) 3508 end = self._parse_window_spec() 3509 3510 spec = self.expression( 3511 exp.WindowSpec, 3512 kind=kind, 3513 start=start["value"], 3514 start_side=start["side"], 3515 end=end["value"], 3516 end_side=end["side"], 3517 ) 3518 else: 3519 spec = None 3520 3521 self._match_r_paren() 3522 3523 return self.expression( 3524 exp.Window, 3525 this=this, 3526 partition_by=partition, 3527 order=order, 3528 spec=spec, 3529 alias=window_alias, 3530 ) 3531 3532 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3533 self._match(TokenType.BETWEEN) 3534 3535 return { 3536 "value": ( 3537 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3538 ) 3539 or self._parse_bitwise(), 3540 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3541 } 3542 3543 def _parse_alias( 3544 self, this: t.Optional[exp.Expression], explicit: bool = False 3545 ) -> t.Optional[exp.Expression]: 3546 any_token = self._match(TokenType.ALIAS) 3547 3548 if explicit and not any_token: 3549 return this 3550 3551 if self._match(TokenType.L_PAREN): 3552 aliases = self.expression( 3553 exp.Aliases, 3554 this=this, 3555 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3556 ) 3557 self._match_r_paren(aliases) 3558 return aliases 3559 3560 alias = self._parse_id_var(any_token) 3561 3562 if alias: 3563 return self.expression(exp.Alias, this=this, alias=alias) 3564 3565 return this 3566 3567 def _parse_id_var( 3568 self, 3569 any_token: bool = True, 3570 tokens: t.Optional[t.Collection[TokenType]] = None, 3571 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3572 ) -> t.Optional[exp.Expression]: 3573 identifier = self._parse_identifier() 3574 3575 if identifier: 3576 return identifier 3577 3578 prefix = "" 3579 3580 if prefix_tokens: 3581 while self._match_set(prefix_tokens): 3582 prefix += self._prev.text 3583 3584 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3585 quoted = self._prev.token_type == TokenType.STRING 3586 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3587 3588 return None 3589 3590 def _parse_string(self) -> t.Optional[exp.Expression]: 3591 if self._match(TokenType.STRING): 3592 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3593 return self._parse_placeholder() 3594 3595 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3596 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3597 3598 def _parse_number(self) -> t.Optional[exp.Expression]: 3599 if self._match(TokenType.NUMBER): 3600 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3601 return self._parse_placeholder() 3602 3603 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3604 if self._match(TokenType.IDENTIFIER): 3605 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3606 return self._parse_placeholder() 3607 3608 def _parse_var( 3609 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3610 ) -> t.Optional[exp.Expression]: 3611 if ( 3612 (any_token and self._advance_any()) 3613 or self._match(TokenType.VAR) 3614 or (self._match_set(tokens) if tokens else False) 3615 ): 3616 return self.expression(exp.Var, this=self._prev.text) 3617 return self._parse_placeholder() 3618 3619 def _advance_any(self) -> t.Optional[Token]: 3620 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3621 self._advance() 3622 return self._prev 3623 return None 3624 3625 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3626 return self._parse_var() or self._parse_string() 3627 3628 def _parse_null(self) -> t.Optional[exp.Expression]: 3629 if self._match(TokenType.NULL): 3630 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3631 return None 3632 3633 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3634 if self._match(TokenType.TRUE): 3635 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3636 if self._match(TokenType.FALSE): 3637 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3638 return None 3639 3640 def _parse_star(self) -> t.Optional[exp.Expression]: 3641 if self._match(TokenType.STAR): 3642 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3643 return None 3644 3645 def _parse_parameter(self) -> exp.Expression: 3646 wrapped = self._match(TokenType.L_BRACE) 3647 this = self._parse_var() or self._parse_primary() 3648 self._match(TokenType.R_BRACE) 3649 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3650 3651 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3652 if self._match_set(self.PLACEHOLDER_PARSERS): 3653 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3654 if placeholder: 3655 return placeholder 3656 self._advance(-1) 3657 return None 3658 3659 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3660 if not self._match(TokenType.EXCEPT): 3661 return None 3662 if self._match(TokenType.L_PAREN, advance=False): 3663 return self._parse_wrapped_csv(self._parse_column) 3664 return self._parse_csv(self._parse_column) 3665 3666 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3667 if not self._match(TokenType.REPLACE): 3668 return None 3669 if self._match(TokenType.L_PAREN, advance=False): 3670 return self._parse_wrapped_csv(self._parse_expression) 3671 return self._parse_csv(self._parse_expression) 3672 3673 def _parse_csv( 3674 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3675 ) -> t.List[t.Optional[exp.Expression]]: 3676 parse_result = parse_method() 3677 items = [parse_result] if parse_result is not None else [] 3678 3679 while self._match(sep): 3680 if parse_result and self._prev_comments: 3681 parse_result.comments = self._prev_comments 3682 3683 parse_result = parse_method() 3684 if parse_result is not None: 3685 items.append(parse_result) 3686 3687 return items 3688 3689 def _parse_tokens( 3690 self, parse_method: t.Callable, expressions: t.Dict 3691 ) -> t.Optional[exp.Expression]: 3692 this = parse_method() 3693 3694 while self._match_set(expressions): 3695 this = self.expression( 3696 expressions[self._prev.token_type], 3697 this=this, 3698 comments=self._prev_comments, 3699 expression=parse_method(), 3700 ) 3701 3702 return this 3703 3704 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3705 return self._parse_wrapped_csv(self._parse_id_var) 3706 3707 def _parse_wrapped_csv( 3708 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3709 ) -> t.List[t.Optional[exp.Expression]]: 3710 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3711 3712 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3713 self._match_l_paren() 3714 parse_result = parse_method() 3715 self._match_r_paren() 3716 return parse_result 3717 3718 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3719 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3720 3721 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3722 return self._parse_set_operations( 3723 self._parse_select(nested=True, parse_subquery_alias=False) 3724 ) 3725 3726 def _parse_transaction(self) -> exp.Expression: 3727 this = None 3728 if self._match_texts(self.TRANSACTION_KIND): 3729 this = self._prev.text 3730 3731 self._match_texts({"TRANSACTION", "WORK"}) 3732 3733 modes = [] 3734 while True: 3735 mode = [] 3736 while self._match(TokenType.VAR): 3737 mode.append(self._prev.text) 3738 3739 if mode: 3740 modes.append(" ".join(mode)) 3741 if not self._match(TokenType.COMMA): 3742 break 3743 3744 return self.expression(exp.Transaction, this=this, modes=modes) 3745 3746 def _parse_commit_or_rollback(self) -> exp.Expression: 3747 chain = None 3748 savepoint = None 3749 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3750 3751 self._match_texts({"TRANSACTION", "WORK"}) 3752 3753 if self._match_text_seq("TO"): 3754 self._match_text_seq("SAVEPOINT") 3755 savepoint = self._parse_id_var() 3756 3757 if self._match(TokenType.AND): 3758 chain = not self._match_text_seq("NO") 3759 self._match_text_seq("CHAIN") 3760 3761 if is_rollback: 3762 return self.expression(exp.Rollback, savepoint=savepoint) 3763 return self.expression(exp.Commit, chain=chain) 3764 3765 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3766 if not self._match_text_seq("ADD"): 3767 return None 3768 3769 self._match(TokenType.COLUMN) 3770 exists_column = self._parse_exists(not_=True) 3771 expression = self._parse_column_def(self._parse_field(any_token=True)) 3772 3773 if expression: 3774 expression.set("exists", exists_column) 3775 3776 return expression 3777 3778 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3779 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3780 3781 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3782 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3783 return self.expression( 3784 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3785 ) 3786 3787 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3788 this = None 3789 kind = self._prev.token_type 3790 3791 if kind == TokenType.CONSTRAINT: 3792 this = self._parse_id_var() 3793 3794 if self._match_text_seq("CHECK"): 3795 expression = self._parse_wrapped(self._parse_conjunction) 3796 enforced = self._match_text_seq("ENFORCED") 3797 3798 return self.expression( 3799 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3800 ) 3801 3802 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3803 expression = self._parse_foreign_key() 3804 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3805 expression = self._parse_primary_key() 3806 3807 return self.expression(exp.AddConstraint, this=this, expression=expression) 3808 3809 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3810 index = self._index - 1 3811 3812 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3813 return self._parse_csv(self._parse_add_constraint) 3814 3815 self._retreat(index) 3816 return self._parse_csv(self._parse_add_column) 3817 3818 def _parse_alter_table_alter(self) -> exp.Expression: 3819 self._match(TokenType.COLUMN) 3820 column = self._parse_field(any_token=True) 3821 3822 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3823 return self.expression(exp.AlterColumn, this=column, drop=True) 3824 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3825 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3826 3827 self._match_text_seq("SET", "DATA") 3828 return self.expression( 3829 exp.AlterColumn, 3830 this=column, 3831 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3832 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3833 using=self._match(TokenType.USING) and self._parse_conjunction(), 3834 ) 3835 3836 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3837 index = self._index - 1 3838 3839 partition_exists = self._parse_exists() 3840 if self._match(TokenType.PARTITION, advance=False): 3841 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3842 3843 self._retreat(index) 3844 return self._parse_csv(self._parse_drop_column) 3845 3846 def _parse_alter_table_rename(self) -> exp.Expression: 3847 self._match_text_seq("TO") 3848 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3849 3850 def _parse_alter(self) -> t.Optional[exp.Expression]: 3851 start = self._prev 3852 3853 if not self._match(TokenType.TABLE): 3854 return self._parse_as_command(start) 3855 3856 exists = self._parse_exists() 3857 this = self._parse_table(schema=True) 3858 3859 if self._next: 3860 self._advance() 3861 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3862 3863 if parser: 3864 return self.expression( 3865 exp.AlterTable, 3866 this=this, 3867 exists=exists, 3868 actions=ensure_list(parser(self)), 3869 ) 3870 return self._parse_as_command(start) 3871 3872 def _parse_merge(self) -> exp.Expression: 3873 self._match(TokenType.INTO) 3874 target = self._parse_table() 3875 3876 self._match(TokenType.USING) 3877 using = self._parse_table() 3878 3879 self._match(TokenType.ON) 3880 on = self._parse_conjunction() 3881 3882 whens = [] 3883 while self._match(TokenType.WHEN): 3884 matched = not self._match(TokenType.NOT) 3885 self._match_text_seq("MATCHED") 3886 source = ( 3887 False 3888 if self._match_text_seq("BY", "TARGET") 3889 else self._match_text_seq("BY", "SOURCE") 3890 ) 3891 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3892 3893 self._match(TokenType.THEN) 3894 3895 if self._match(TokenType.INSERT): 3896 _this = self._parse_star() 3897 if _this: 3898 then = self.expression(exp.Insert, this=_this) 3899 else: 3900 then = self.expression( 3901 exp.Insert, 3902 this=self._parse_value(), 3903 expression=self._match(TokenType.VALUES) and self._parse_value(), 3904 ) 3905 elif self._match(TokenType.UPDATE): 3906 expressions = self._parse_star() 3907 if expressions: 3908 then = self.expression(exp.Update, expressions=expressions) 3909 else: 3910 then = self.expression( 3911 exp.Update, 3912 expressions=self._match(TokenType.SET) 3913 and self._parse_csv(self._parse_equality), 3914 ) 3915 elif self._match(TokenType.DELETE): 3916 then = self.expression(exp.Var, this=self._prev.text) 3917 else: 3918 then = None 3919 3920 whens.append( 3921 self.expression( 3922 exp.When, 3923 matched=matched, 3924 source=source, 3925 condition=condition, 3926 then=then, 3927 ) 3928 ) 3929 3930 return self.expression( 3931 exp.Merge, 3932 this=target, 3933 using=using, 3934 on=on, 3935 expressions=whens, 3936 ) 3937 3938 def _parse_show(self) -> t.Optional[exp.Expression]: 3939 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3940 if parser: 3941 return parser(self) 3942 self._advance() 3943 return self.expression(exp.Show, this=self._prev.text.upper()) 3944 3945 def _parse_set_item_assignment( 3946 self, kind: t.Optional[str] = None 3947 ) -> t.Optional[exp.Expression]: 3948 index = self._index 3949 3950 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 3951 return self._parse_set_transaction(global_=kind == "GLOBAL") 3952 3953 left = self._parse_primary() or self._parse_id_var() 3954 3955 if not self._match_texts(("=", "TO")): 3956 self._retreat(index) 3957 return None 3958 3959 right = self._parse_statement() or self._parse_id_var() 3960 this = self.expression( 3961 exp.EQ, 3962 this=left, 3963 expression=right, 3964 ) 3965 3966 return self.expression( 3967 exp.SetItem, 3968 this=this, 3969 kind=kind, 3970 ) 3971 3972 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 3973 self._match_text_seq("TRANSACTION") 3974 characteristics = self._parse_csv( 3975 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 3976 ) 3977 return self.expression( 3978 exp.SetItem, 3979 expressions=characteristics, 3980 kind="TRANSACTION", 3981 **{"global": global_}, # type: ignore 3982 ) 3983 3984 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3985 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3986 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 3987 3988 def _parse_set(self) -> exp.Expression: 3989 index = self._index 3990 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3991 3992 if self._curr: 3993 self._retreat(index) 3994 return self._parse_as_command(self._prev) 3995 3996 return set_ 3997 3998 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 3999 for option in options: 4000 if self._match_text_seq(*option.split(" ")): 4001 return exp.Var(this=option) 4002 return None 4003 4004 def _parse_as_command(self, start: Token) -> exp.Command: 4005 while self._curr: 4006 self._advance() 4007 text = self._find_sql(start, self._prev) 4008 size = len(start.text) 4009 return exp.Command(this=text[:size], expression=text[size:]) 4010 4011 def _find_parser( 4012 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4013 ) -> t.Optional[t.Callable]: 4014 if not self._curr: 4015 return None 4016 4017 index = self._index 4018 this = [] 4019 while True: 4020 # The current token might be multiple words 4021 curr = self._curr.text.upper() 4022 key = curr.split(" ") 4023 this.append(curr) 4024 self._advance() 4025 result, trie = in_trie(trie, key) 4026 if result == 0: 4027 break 4028 if result == 2: 4029 subparser = parsers[" ".join(this)] 4030 return subparser 4031 self._retreat(index) 4032 return None 4033 4034 def _match(self, token_type, advance=True): 4035 if not self._curr: 4036 return None 4037 4038 if self._curr.token_type == token_type: 4039 if advance: 4040 self._advance() 4041 return True 4042 4043 return None 4044 4045 def _match_set(self, types, advance=True): 4046 if not self._curr: 4047 return None 4048 4049 if self._curr.token_type in types: 4050 if advance: 4051 self._advance() 4052 return True 4053 4054 return None 4055 4056 def _match_pair(self, token_type_a, token_type_b, advance=True): 4057 if not self._curr or not self._next: 4058 return None 4059 4060 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4061 if advance: 4062 self._advance(2) 4063 return True 4064 4065 return None 4066 4067 def _match_l_paren(self, expression=None): 4068 if not self._match(TokenType.L_PAREN): 4069 self.raise_error("Expecting (") 4070 if expression and self._prev_comments: 4071 expression.comments = self._prev_comments 4072 4073 def _match_r_paren(self, expression=None): 4074 if not self._match(TokenType.R_PAREN): 4075 self.raise_error("Expecting )") 4076 if expression and self._prev_comments: 4077 expression.comments = self._prev_comments 4078 4079 def _match_texts(self, texts, advance=True): 4080 if self._curr and self._curr.text.upper() in texts: 4081 if advance: 4082 self._advance() 4083 return True 4084 return False 4085 4086 def _match_text_seq(self, *texts, advance=True): 4087 index = self._index 4088 for text in texts: 4089 if self._curr and self._curr.text.upper() == text: 4090 self._advance() 4091 else: 4092 self._retreat(index) 4093 return False 4094 4095 if not advance: 4096 self._retreat(index) 4097 4098 return True 4099 4100 def _replace_columns_with_dots(self, this): 4101 if isinstance(this, exp.Dot): 4102 exp.replace_children(this, self._replace_columns_with_dots) 4103 elif isinstance(this, exp.Column): 4104 exp.replace_children(this, self._replace_columns_with_dots) 4105 table = this.args.get("table") 4106 this = ( 4107 self.expression(exp.Dot, this=table, expression=this.this) 4108 if table 4109 else self.expression(exp.Var, this=this.name) 4110 ) 4111 elif isinstance(this, exp.Identifier): 4112 this = self.expression(exp.Var, this=this.name) 4113 return this 4114 4115 def _replace_lambda(self, node, lambda_variables): 4116 for column in node.find_all(exp.Column): 4117 if column.parts[0].name in lambda_variables: 4118 dot_or_id = column.to_dot() if column.table else column.this 4119 parent = column.parent 4120 4121 while isinstance(parent, exp.Dot): 4122 if not isinstance(parent.parent, exp.Dot): 4123 parent.replace(dot_or_id) 4124 break 4125 parent = parent.parent 4126 else: 4127 column.replace(dot_or_id) 4128 return node
57class Parser(metaclass=_Parser): 58 """ 59 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 60 a parsed syntax tree. 61 62 Args: 63 error_level: the desired error level. 64 Default: ErrorLevel.RAISE 65 error_message_context: determines the amount of context to capture from a 66 query string when displaying the error message (in number of characters). 67 Default: 50. 68 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 69 Default: 0 70 alias_post_tablesample: If the table alias comes after tablesample. 71 Default: False 72 max_errors: Maximum number of error messages to include in a raised ParseError. 73 This is only relevant if error_level is ErrorLevel.RAISE. 74 Default: 3 75 null_ordering: Indicates the default null ordering method to use if not explicitly set. 76 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 77 Default: "nulls_are_small" 78 """ 79 80 FUNCTIONS: t.Dict[str, t.Callable] = { 81 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 82 "DATE_TO_DATE_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 87 "IFNULL": exp.Coalesce.from_arg_list, 88 "LIKE": parse_like, 89 "TIME_TO_TIME_STR": lambda args: exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 94 this=exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 start=exp.Literal.number(1), 99 length=exp.Literal.number(10), 100 ), 101 "VAR_MAP": parse_var_map, 102 } 103 104 NO_PAREN_FUNCTIONS = { 105 TokenType.CURRENT_DATE: exp.CurrentDate, 106 TokenType.CURRENT_DATETIME: exp.CurrentDate, 107 TokenType.CURRENT_TIME: exp.CurrentTime, 108 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.MAP, 114 TokenType.STRUCT, 115 TokenType.NULLABLE, 116 } 117 118 TYPE_TOKENS = { 119 TokenType.BIT, 120 TokenType.BOOLEAN, 121 TokenType.TINYINT, 122 TokenType.SMALLINT, 123 TokenType.INT, 124 TokenType.BIGINT, 125 TokenType.FLOAT, 126 TokenType.DOUBLE, 127 TokenType.CHAR, 128 TokenType.NCHAR, 129 TokenType.VARCHAR, 130 TokenType.NVARCHAR, 131 TokenType.TEXT, 132 TokenType.MEDIUMTEXT, 133 TokenType.LONGTEXT, 134 TokenType.MEDIUMBLOB, 135 TokenType.LONGBLOB, 136 TokenType.BINARY, 137 TokenType.VARBINARY, 138 TokenType.JSON, 139 TokenType.JSONB, 140 TokenType.INTERVAL, 141 TokenType.TIME, 142 TokenType.TIMESTAMP, 143 TokenType.TIMESTAMPTZ, 144 TokenType.TIMESTAMPLTZ, 145 TokenType.DATETIME, 146 TokenType.DATE, 147 TokenType.DECIMAL, 148 TokenType.UUID, 149 TokenType.GEOGRAPHY, 150 TokenType.GEOMETRY, 151 TokenType.HLLSKETCH, 152 TokenType.HSTORE, 153 TokenType.PSEUDO_TYPE, 154 TokenType.SUPER, 155 TokenType.SERIAL, 156 TokenType.SMALLSERIAL, 157 TokenType.BIGSERIAL, 158 TokenType.XML, 159 TokenType.UNIQUEIDENTIFIER, 160 TokenType.MONEY, 161 TokenType.SMALLMONEY, 162 TokenType.ROWVERSION, 163 TokenType.IMAGE, 164 TokenType.VARIANT, 165 TokenType.OBJECT, 166 TokenType.INET, 167 *NESTED_TYPE_TOKENS, 168 } 169 170 SUBQUERY_PREDICATES = { 171 TokenType.ANY: exp.Any, 172 TokenType.ALL: exp.All, 173 TokenType.EXISTS: exp.Exists, 174 TokenType.SOME: exp.Any, 175 } 176 177 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 178 179 DB_CREATABLES = { 180 TokenType.DATABASE, 181 TokenType.SCHEMA, 182 TokenType.TABLE, 183 TokenType.VIEW, 184 } 185 186 CREATABLES = { 187 TokenType.COLUMN, 188 TokenType.FUNCTION, 189 TokenType.INDEX, 190 TokenType.PROCEDURE, 191 *DB_CREATABLES, 192 } 193 194 ID_VAR_TOKENS = { 195 TokenType.VAR, 196 TokenType.ANTI, 197 TokenType.APPLY, 198 TokenType.AUTO_INCREMENT, 199 TokenType.BEGIN, 200 TokenType.BOTH, 201 TokenType.BUCKET, 202 TokenType.CACHE, 203 TokenType.CASCADE, 204 TokenType.COLLATE, 205 TokenType.COMMAND, 206 TokenType.COMMENT, 207 TokenType.COMMIT, 208 TokenType.COMPOUND, 209 TokenType.CONSTRAINT, 210 TokenType.DEFAULT, 211 TokenType.DELETE, 212 TokenType.DESCRIBE, 213 TokenType.DIV, 214 TokenType.END, 215 TokenType.EXECUTE, 216 TokenType.ESCAPE, 217 TokenType.FALSE, 218 TokenType.FIRST, 219 TokenType.FILTER, 220 TokenType.FOLLOWING, 221 TokenType.FORMAT, 222 TokenType.FULL, 223 TokenType.IF, 224 TokenType.ISNULL, 225 TokenType.INTERVAL, 226 TokenType.LAZY, 227 TokenType.LEADING, 228 TokenType.LEFT, 229 TokenType.LOCAL, 230 TokenType.MATERIALIZED, 231 TokenType.MERGE, 232 TokenType.NATURAL, 233 TokenType.NEXT, 234 TokenType.OFFSET, 235 TokenType.ONLY, 236 TokenType.OPTIONS, 237 TokenType.ORDINALITY, 238 TokenType.PARTITION, 239 TokenType.PERCENT, 240 TokenType.PIVOT, 241 TokenType.PRAGMA, 242 TokenType.PRECEDING, 243 TokenType.RANGE, 244 TokenType.REFERENCES, 245 TokenType.RIGHT, 246 TokenType.ROW, 247 TokenType.ROWS, 248 TokenType.SEED, 249 TokenType.SEMI, 250 TokenType.SET, 251 TokenType.SHOW, 252 TokenType.SORTKEY, 253 TokenType.TEMPORARY, 254 TokenType.TOP, 255 TokenType.TRAILING, 256 TokenType.TRUE, 257 TokenType.UNBOUNDED, 258 TokenType.UNIQUE, 259 TokenType.UNLOGGED, 260 TokenType.UNPIVOT, 261 TokenType.VOLATILE, 262 TokenType.WINDOW, 263 *CREATABLES, 264 *SUBQUERY_PREDICATES, 265 *TYPE_TOKENS, 266 *NO_PAREN_FUNCTIONS, 267 } 268 269 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 270 TokenType.APPLY, 271 TokenType.FULL, 272 TokenType.LEFT, 273 TokenType.NATURAL, 274 TokenType.OFFSET, 275 TokenType.RIGHT, 276 TokenType.WINDOW, 277 } 278 279 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 280 281 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 282 283 FUNC_TOKENS = { 284 TokenType.COMMAND, 285 TokenType.CURRENT_DATE, 286 TokenType.CURRENT_DATETIME, 287 TokenType.CURRENT_TIMESTAMP, 288 TokenType.CURRENT_TIME, 289 TokenType.FILTER, 290 TokenType.FIRST, 291 TokenType.FORMAT, 292 TokenType.GLOB, 293 TokenType.IDENTIFIER, 294 TokenType.INDEX, 295 TokenType.ISNULL, 296 TokenType.ILIKE, 297 TokenType.LIKE, 298 TokenType.MERGE, 299 TokenType.OFFSET, 300 TokenType.PRIMARY_KEY, 301 TokenType.REPLACE, 302 TokenType.ROW, 303 TokenType.UNNEST, 304 TokenType.VAR, 305 TokenType.LEFT, 306 TokenType.RIGHT, 307 TokenType.DATE, 308 TokenType.DATETIME, 309 TokenType.TABLE, 310 TokenType.TIMESTAMP, 311 TokenType.TIMESTAMPTZ, 312 TokenType.WINDOW, 313 *TYPE_TOKENS, 314 *SUBQUERY_PREDICATES, 315 } 316 317 CONJUNCTION = { 318 TokenType.AND: exp.And, 319 TokenType.OR: exp.Or, 320 } 321 322 EQUALITY = { 323 TokenType.EQ: exp.EQ, 324 TokenType.NEQ: exp.NEQ, 325 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 326 } 327 328 COMPARISON = { 329 TokenType.GT: exp.GT, 330 TokenType.GTE: exp.GTE, 331 TokenType.LT: exp.LT, 332 TokenType.LTE: exp.LTE, 333 } 334 335 BITWISE = { 336 TokenType.AMP: exp.BitwiseAnd, 337 TokenType.CARET: exp.BitwiseXor, 338 TokenType.PIPE: exp.BitwiseOr, 339 TokenType.DPIPE: exp.DPipe, 340 } 341 342 TERM = { 343 TokenType.DASH: exp.Sub, 344 TokenType.PLUS: exp.Add, 345 TokenType.MOD: exp.Mod, 346 TokenType.COLLATE: exp.Collate, 347 } 348 349 FACTOR = { 350 TokenType.DIV: exp.IntDiv, 351 TokenType.LR_ARROW: exp.Distance, 352 TokenType.SLASH: exp.Div, 353 TokenType.STAR: exp.Mul, 354 } 355 356 TIMESTAMPS = { 357 TokenType.TIME, 358 TokenType.TIMESTAMP, 359 TokenType.TIMESTAMPTZ, 360 TokenType.TIMESTAMPLTZ, 361 } 362 363 SET_OPERATIONS = { 364 TokenType.UNION, 365 TokenType.INTERSECT, 366 TokenType.EXCEPT, 367 } 368 369 JOIN_SIDES = { 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.FULL, 373 } 374 375 JOIN_KINDS = { 376 TokenType.INNER, 377 TokenType.OUTER, 378 TokenType.CROSS, 379 TokenType.SEMI, 380 TokenType.ANTI, 381 } 382 383 LAMBDAS = { 384 TokenType.ARROW: lambda self, expressions: self.expression( 385 exp.Lambda, 386 this=self._replace_lambda( 387 self._parse_conjunction(), 388 {node.name for node in expressions}, 389 ), 390 expressions=expressions, 391 ), 392 TokenType.FARROW: lambda self, expressions: self.expression( 393 exp.Kwarg, 394 this=exp.Var(this=expressions[0].name), 395 expression=self._parse_conjunction(), 396 ), 397 } 398 399 COLUMN_OPERATORS = { 400 TokenType.DOT: None, 401 TokenType.DCOLON: lambda self, this, to: self.expression( 402 exp.Cast, 403 this=this, 404 to=to, 405 ), 406 TokenType.ARROW: lambda self, this, path: self.expression( 407 exp.JSONExtract, 408 this=this, 409 expression=path, 410 ), 411 TokenType.DARROW: lambda self, this, path: self.expression( 412 exp.JSONExtractScalar, 413 this=this, 414 expression=path, 415 ), 416 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 417 exp.JSONBExtract, 418 this=this, 419 expression=path, 420 ), 421 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 422 exp.JSONBExtractScalar, 423 this=this, 424 expression=path, 425 ), 426 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 427 exp.JSONBContains, 428 this=this, 429 expression=key, 430 ), 431 } 432 433 EXPRESSION_PARSERS = { 434 exp.Column: lambda self: self._parse_column(), 435 exp.DataType: lambda self: self._parse_types(), 436 exp.From: lambda self: self._parse_from(), 437 exp.Group: lambda self: self._parse_group(), 438 exp.Identifier: lambda self: self._parse_id_var(), 439 exp.Lateral: lambda self: self._parse_lateral(), 440 exp.Join: lambda self: self._parse_join(), 441 exp.Order: lambda self: self._parse_order(), 442 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 443 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 444 exp.Lambda: lambda self: self._parse_lambda(), 445 exp.Limit: lambda self: self._parse_limit(), 446 exp.Offset: lambda self: self._parse_offset(), 447 exp.TableAlias: lambda self: self._parse_table_alias(), 448 exp.Table: lambda self: self._parse_table(), 449 exp.Condition: lambda self: self._parse_conjunction(), 450 exp.Expression: lambda self: self._parse_statement(), 451 exp.Properties: lambda self: self._parse_properties(), 452 exp.Where: lambda self: self._parse_where(), 453 exp.Ordered: lambda self: self._parse_ordered(), 454 exp.Having: lambda self: self._parse_having(), 455 exp.With: lambda self: self._parse_with(), 456 exp.Window: lambda self: self._parse_named_window(), 457 exp.Qualify: lambda self: self._parse_qualify(), 458 exp.Returning: lambda self: self._parse_returning(), 459 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 460 } 461 462 STATEMENT_PARSERS = { 463 TokenType.ALTER: lambda self: self._parse_alter(), 464 TokenType.BEGIN: lambda self: self._parse_transaction(), 465 TokenType.CACHE: lambda self: self._parse_cache(), 466 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 467 TokenType.COMMENT: lambda self: self._parse_comment(), 468 TokenType.CREATE: lambda self: self._parse_create(), 469 TokenType.DELETE: lambda self: self._parse_delete(), 470 TokenType.DESC: lambda self: self._parse_describe(), 471 TokenType.DESCRIBE: lambda self: self._parse_describe(), 472 TokenType.DROP: lambda self: self._parse_drop(), 473 TokenType.END: lambda self: self._parse_commit_or_rollback(), 474 TokenType.INSERT: lambda self: self._parse_insert(), 475 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 476 TokenType.MERGE: lambda self: self._parse_merge(), 477 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 478 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 479 TokenType.SET: lambda self: self._parse_set(), 480 TokenType.UNCACHE: lambda self: self._parse_uncache(), 481 TokenType.UPDATE: lambda self: self._parse_update(), 482 TokenType.USE: lambda self: self.expression( 483 exp.Use, 484 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 485 and exp.Var(this=self._prev.text), 486 this=self._parse_table(schema=False), 487 ), 488 } 489 490 UNARY_PARSERS = { 491 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 492 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 493 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 494 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 495 } 496 497 PRIMARY_PARSERS = { 498 TokenType.STRING: lambda self, token: self.expression( 499 exp.Literal, this=token.text, is_string=True 500 ), 501 TokenType.NUMBER: lambda self, token: self.expression( 502 exp.Literal, this=token.text, is_string=False 503 ), 504 TokenType.STAR: lambda self, _: self.expression( 505 exp.Star, 506 **{"except": self._parse_except(), "replace": self._parse_replace()}, 507 ), 508 TokenType.NULL: lambda self, _: self.expression(exp.Null), 509 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 510 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 511 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 512 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 513 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 514 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 515 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 516 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 517 } 518 519 PLACEHOLDER_PARSERS = { 520 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 521 TokenType.PARAMETER: lambda self: self._parse_parameter(), 522 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 523 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 524 else None, 525 } 526 527 RANGE_PARSERS = { 528 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 529 TokenType.GLOB: binary_range_parser(exp.Glob), 530 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 531 TokenType.IN: lambda self, this: self._parse_in(this), 532 TokenType.IS: lambda self, this: self._parse_is(this), 533 TokenType.LIKE: binary_range_parser(exp.Like), 534 TokenType.ILIKE: binary_range_parser(exp.ILike), 535 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 536 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 537 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 538 } 539 540 PROPERTY_PARSERS = { 541 "AFTER": lambda self: self._parse_afterjournal( 542 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 543 ), 544 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 545 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 546 "BEFORE": lambda self: self._parse_journal( 547 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 548 ), 549 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 550 "CHARACTER SET": lambda self: self._parse_character_set(), 551 "CHECKSUM": lambda self: self._parse_checksum(), 552 "CLUSTER BY": lambda self: self.expression( 553 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 554 ), 555 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 556 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 557 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 558 default=self._prev.text.upper() == "DEFAULT" 559 ), 560 "DEFINER": lambda self: self._parse_definer(), 561 "DETERMINISTIC": lambda self: self.expression( 562 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 563 ), 564 "DISTKEY": lambda self: self._parse_distkey(), 565 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 566 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 567 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 568 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 569 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 570 "FREESPACE": lambda self: self._parse_freespace(), 571 "GLOBAL": lambda self: self._parse_temporary(global_=True), 572 "IMMUTABLE": lambda self: self.expression( 573 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 574 ), 575 "JOURNAL": lambda self: self._parse_journal( 576 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 577 ), 578 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 579 "LIKE": lambda self: self._parse_create_like(), 580 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 581 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 582 "LOCK": lambda self: self._parse_locking(), 583 "LOCKING": lambda self: self._parse_locking(), 584 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 585 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 586 "MAX": lambda self: self._parse_datablocksize(), 587 "MAXIMUM": lambda self: self._parse_datablocksize(), 588 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 589 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 590 ), 591 "MIN": lambda self: self._parse_datablocksize(), 592 "MINIMUM": lambda self: self._parse_datablocksize(), 593 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 594 "NO": lambda self: self._parse_noprimaryindex(), 595 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 596 "ON": lambda self: self._parse_oncommit(), 597 "PARTITION BY": lambda self: self._parse_partitioned_by(), 598 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 599 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 600 "RETURNS": lambda self: self._parse_returns(), 601 "ROW": lambda self: self._parse_row(), 602 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 603 "SORTKEY": lambda self: self._parse_sortkey(), 604 "STABLE": lambda self: self.expression( 605 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 606 ), 607 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 609 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 610 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 611 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 612 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 613 "VOLATILE": lambda self: self.expression( 614 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 615 ), 616 "WITH": lambda self: self._parse_with_property(), 617 } 618 619 CONSTRAINT_PARSERS = { 620 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 621 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 622 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 623 "CHARACTER SET": lambda self: self.expression( 624 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 625 ), 626 "CHECK": lambda self: self.expression( 627 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 628 ), 629 "COLLATE": lambda self: self.expression( 630 exp.CollateColumnConstraint, this=self._parse_var() 631 ), 632 "COMMENT": lambda self: self.expression( 633 exp.CommentColumnConstraint, this=self._parse_string() 634 ), 635 "COMPRESS": lambda self: self._parse_compress(), 636 "DEFAULT": lambda self: self.expression( 637 exp.DefaultColumnConstraint, this=self._parse_bitwise() 638 ), 639 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 640 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 641 "FORMAT": lambda self: self.expression( 642 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 643 ), 644 "GENERATED": lambda self: self._parse_generated_as_identity(), 645 "IDENTITY": lambda self: self._parse_auto_increment(), 646 "INLINE": lambda self: self._parse_inline(), 647 "LIKE": lambda self: self._parse_create_like(), 648 "NOT": lambda self: self._parse_not_constraint(), 649 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 650 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 651 "PRIMARY KEY": lambda self: self._parse_primary_key(), 652 "TITLE": lambda self: self.expression( 653 exp.TitleColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "UNIQUE": lambda self: self._parse_unique(), 656 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 657 } 658 659 ALTER_PARSERS = { 660 "ADD": lambda self: self._parse_alter_table_add(), 661 "ALTER": lambda self: self._parse_alter_table_alter(), 662 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 663 "DROP": lambda self: self._parse_alter_table_drop(), 664 "RENAME": lambda self: self._parse_alter_table_rename(), 665 } 666 667 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 668 669 NO_PAREN_FUNCTION_PARSERS = { 670 TokenType.CASE: lambda self: self._parse_case(), 671 TokenType.IF: lambda self: self._parse_if(), 672 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 673 } 674 675 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 676 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 677 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 678 "EXTRACT": lambda self: self._parse_extract(), 679 "JSON_OBJECT": lambda self: self._parse_json_object(), 680 "POSITION": lambda self: self._parse_position(), 681 "STRING_AGG": lambda self: self._parse_string_agg(), 682 "SUBSTRING": lambda self: self._parse_substring(), 683 "TRIM": lambda self: self._parse_trim(), 684 "TRY_CAST": lambda self: self._parse_cast(False), 685 "TRY_CONVERT": lambda self: self._parse_convert(False), 686 } 687 688 QUERY_MODIFIER_PARSERS = { 689 "match": lambda self: self._parse_match_recognize(), 690 "where": lambda self: self._parse_where(), 691 "group": lambda self: self._parse_group(), 692 "having": lambda self: self._parse_having(), 693 "qualify": lambda self: self._parse_qualify(), 694 "windows": lambda self: self._parse_window_clause(), 695 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 696 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 697 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 698 "order": lambda self: self._parse_order(), 699 "limit": lambda self: self._parse_limit(), 700 "offset": lambda self: self._parse_offset(), 701 "lock": lambda self: self._parse_lock(), 702 "sample": lambda self: self._parse_table_sample(as_modifier=True), 703 } 704 705 SET_PARSERS = { 706 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 707 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 708 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 709 "TRANSACTION": lambda self: self._parse_set_transaction(), 710 } 711 712 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 713 714 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 715 716 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 717 718 TRANSACTION_CHARACTERISTICS = { 719 "ISOLATION LEVEL REPEATABLE READ", 720 "ISOLATION LEVEL READ COMMITTED", 721 "ISOLATION LEVEL READ UNCOMMITTED", 722 "ISOLATION LEVEL SERIALIZABLE", 723 "READ WRITE", 724 "READ ONLY", 725 } 726 727 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 728 729 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 730 731 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 732 733 STRICT_CAST = True 734 735 CONVERT_TYPE_FIRST = False 736 737 __slots__ = ( 738 "error_level", 739 "error_message_context", 740 "sql", 741 "errors", 742 "index_offset", 743 "unnest_column_only", 744 "alias_post_tablesample", 745 "max_errors", 746 "null_ordering", 747 "_tokens", 748 "_index", 749 "_curr", 750 "_next", 751 "_prev", 752 "_prev_comments", 753 "_show_trie", 754 "_set_trie", 755 ) 756 757 def __init__( 758 self, 759 error_level: t.Optional[ErrorLevel] = None, 760 error_message_context: int = 100, 761 index_offset: int = 0, 762 unnest_column_only: bool = False, 763 alias_post_tablesample: bool = False, 764 max_errors: int = 3, 765 null_ordering: t.Optional[str] = None, 766 ): 767 self.error_level = error_level or ErrorLevel.IMMEDIATE 768 self.error_message_context = error_message_context 769 self.index_offset = index_offset 770 self.unnest_column_only = unnest_column_only 771 self.alias_post_tablesample = alias_post_tablesample 772 self.max_errors = max_errors 773 self.null_ordering = null_ordering 774 self.reset() 775 776 def reset(self): 777 self.sql = "" 778 self.errors = [] 779 self._tokens = [] 780 self._index = 0 781 self._curr = None 782 self._next = None 783 self._prev = None 784 self._prev_comments = None 785 786 def parse( 787 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 788 ) -> t.List[t.Optional[exp.Expression]]: 789 """ 790 Parses a list of tokens and returns a list of syntax trees, one tree 791 per parsed SQL statement. 792 793 Args: 794 raw_tokens: the list of tokens. 795 sql: the original SQL string, used to produce helpful debug messages. 796 797 Returns: 798 The list of syntax trees. 799 """ 800 return self._parse( 801 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 802 ) 803 804 def parse_into( 805 self, 806 expression_types: exp.IntoType, 807 raw_tokens: t.List[Token], 808 sql: t.Optional[str] = None, 809 ) -> t.List[t.Optional[exp.Expression]]: 810 """ 811 Parses a list of tokens into a given Expression type. If a collection of Expression 812 types is given instead, this method will try to parse the token list into each one 813 of them, stopping at the first for which the parsing succeeds. 814 815 Args: 816 expression_types: the expression type(s) to try and parse the token list into. 817 raw_tokens: the list of tokens. 818 sql: the original SQL string, used to produce helpful debug messages. 819 820 Returns: 821 The target Expression. 822 """ 823 errors = [] 824 for expression_type in ensure_collection(expression_types): 825 parser = self.EXPRESSION_PARSERS.get(expression_type) 826 if not parser: 827 raise TypeError(f"No parser registered for {expression_type}") 828 try: 829 return self._parse(parser, raw_tokens, sql) 830 except ParseError as e: 831 e.errors[0]["into_expression"] = expression_type 832 errors.append(e) 833 raise ParseError( 834 f"Failed to parse into {expression_types}", 835 errors=merge_errors(errors), 836 ) from errors[-1] 837 838 def _parse( 839 self, 840 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 841 raw_tokens: t.List[Token], 842 sql: t.Optional[str] = None, 843 ) -> t.List[t.Optional[exp.Expression]]: 844 self.reset() 845 self.sql = sql or "" 846 total = len(raw_tokens) 847 chunks: t.List[t.List[Token]] = [[]] 848 849 for i, token in enumerate(raw_tokens): 850 if token.token_type == TokenType.SEMICOLON: 851 if i < total - 1: 852 chunks.append([]) 853 else: 854 chunks[-1].append(token) 855 856 expressions = [] 857 858 for tokens in chunks: 859 self._index = -1 860 self._tokens = tokens 861 self._advance() 862 863 expressions.append(parse_method(self)) 864 865 if self._index < len(self._tokens): 866 self.raise_error("Invalid expression / Unexpected token") 867 868 self.check_errors() 869 870 return expressions 871 872 def check_errors(self) -> None: 873 """ 874 Logs or raises any found errors, depending on the chosen error level setting. 875 """ 876 if self.error_level == ErrorLevel.WARN: 877 for error in self.errors: 878 logger.error(str(error)) 879 elif self.error_level == ErrorLevel.RAISE and self.errors: 880 raise ParseError( 881 concat_messages(self.errors, self.max_errors), 882 errors=merge_errors(self.errors), 883 ) 884 885 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 886 """ 887 Appends an error in the list of recorded errors or raises it, depending on the chosen 888 error level setting. 889 """ 890 token = token or self._curr or self._prev or Token.string("") 891 start = self._find_token(token) 892 end = start + len(token.text) 893 start_context = self.sql[max(start - self.error_message_context, 0) : start] 894 highlight = self.sql[start:end] 895 end_context = self.sql[end : end + self.error_message_context] 896 897 error = ParseError.new( 898 f"{message}. Line {token.line}, Col: {token.col}.\n" 899 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 900 description=message, 901 line=token.line, 902 col=token.col, 903 start_context=start_context, 904 highlight=highlight, 905 end_context=end_context, 906 ) 907 908 if self.error_level == ErrorLevel.IMMEDIATE: 909 raise error 910 911 self.errors.append(error) 912 913 def expression( 914 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 915 ) -> exp.Expression: 916 """ 917 Creates a new, validated Expression. 918 919 Args: 920 exp_class: the expression class to instantiate. 921 comments: an optional list of comments to attach to the expression. 922 kwargs: the arguments to set for the expression along with their respective values. 923 924 Returns: 925 The target expression. 926 """ 927 instance = exp_class(**kwargs) 928 if self._prev_comments: 929 instance.comments = self._prev_comments 930 self._prev_comments = None 931 if comments: 932 instance.comments = comments 933 self.validate_expression(instance) 934 return instance 935 936 def validate_expression( 937 self, expression: exp.Expression, args: t.Optional[t.List] = None 938 ) -> None: 939 """ 940 Validates an already instantiated expression, making sure that all its mandatory arguments 941 are set. 942 943 Args: 944 expression: the expression to validate. 945 args: an optional list of items that was used to instantiate the expression, if it's a Func. 946 """ 947 if self.error_level == ErrorLevel.IGNORE: 948 return 949 950 for error_message in expression.error_messages(args): 951 self.raise_error(error_message) 952 953 def _find_sql(self, start: Token, end: Token) -> str: 954 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 955 956 def _find_token(self, token: Token) -> int: 957 line = 1 958 col = 1 959 index = 0 960 961 while line < token.line or col < token.col: 962 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 963 line += 1 964 col = 1 965 else: 966 col += 1 967 index += 1 968 969 return index 970 971 def _advance(self, times: int = 1) -> None: 972 self._index += times 973 self._curr = seq_get(self._tokens, self._index) 974 self._next = seq_get(self._tokens, self._index + 1) 975 if self._index > 0: 976 self._prev = self._tokens[self._index - 1] 977 self._prev_comments = self._prev.comments 978 else: 979 self._prev = None 980 self._prev_comments = None 981 982 def _retreat(self, index: int) -> None: 983 if index != self._index: 984 self._advance(index - self._index) 985 986 def _parse_command(self) -> exp.Expression: 987 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 988 989 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 990 start = self._prev 991 exists = self._parse_exists() if allow_exists else None 992 993 self._match(TokenType.ON) 994 995 kind = self._match_set(self.CREATABLES) and self._prev 996 997 if not kind: 998 return self._parse_as_command(start) 999 1000 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1001 this = self._parse_user_defined_function(kind=kind.token_type) 1002 elif kind.token_type == TokenType.TABLE: 1003 this = self._parse_table() 1004 elif kind.token_type == TokenType.COLUMN: 1005 this = self._parse_column() 1006 else: 1007 this = self._parse_id_var() 1008 1009 self._match(TokenType.IS) 1010 1011 return self.expression( 1012 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1013 ) 1014 1015 def _parse_statement(self) -> t.Optional[exp.Expression]: 1016 if self._curr is None: 1017 return None 1018 1019 if self._match_set(self.STATEMENT_PARSERS): 1020 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1021 1022 if self._match_set(Tokenizer.COMMANDS): 1023 return self._parse_command() 1024 1025 expression = self._parse_expression() 1026 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1027 1028 self._parse_query_modifiers(expression) 1029 return expression 1030 1031 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1032 start = self._prev 1033 temporary = self._match(TokenType.TEMPORARY) 1034 materialized = self._match(TokenType.MATERIALIZED) 1035 kind = self._match_set(self.CREATABLES) and self._prev.text 1036 if not kind: 1037 if default_kind: 1038 kind = default_kind 1039 else: 1040 return self._parse_as_command(start) 1041 1042 return self.expression( 1043 exp.Drop, 1044 exists=self._parse_exists(), 1045 this=self._parse_table(schema=True), 1046 kind=kind, 1047 temporary=temporary, 1048 materialized=materialized, 1049 cascade=self._match(TokenType.CASCADE), 1050 ) 1051 1052 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1053 return ( 1054 self._match(TokenType.IF) 1055 and (not not_ or self._match(TokenType.NOT)) 1056 and self._match(TokenType.EXISTS) 1057 ) 1058 1059 def _parse_create(self) -> t.Optional[exp.Expression]: 1060 start = self._prev 1061 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1062 TokenType.OR, TokenType.REPLACE 1063 ) 1064 unique = self._match(TokenType.UNIQUE) 1065 volatile = self._match(TokenType.VOLATILE) 1066 1067 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1068 self._match(TokenType.TABLE) 1069 1070 properties = None 1071 create_token = self._match_set(self.CREATABLES) and self._prev 1072 1073 if not create_token: 1074 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1075 create_token = self._match_set(self.CREATABLES) and self._prev 1076 1077 if not properties or not create_token: 1078 return self._parse_as_command(start) 1079 1080 exists = self._parse_exists(not_=True) 1081 this = None 1082 expression = None 1083 indexes = None 1084 no_schema_binding = None 1085 begin = None 1086 1087 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1088 this = self._parse_user_defined_function(kind=create_token.token_type) 1089 temp_properties = self._parse_properties() 1090 if properties and temp_properties: 1091 properties.expressions.extend(temp_properties.expressions) 1092 elif temp_properties: 1093 properties = temp_properties 1094 1095 self._match(TokenType.ALIAS) 1096 begin = self._match(TokenType.BEGIN) 1097 return_ = self._match_text_seq("RETURN") 1098 expression = self._parse_statement() 1099 1100 if return_: 1101 expression = self.expression(exp.Return, this=expression) 1102 elif create_token.token_type == TokenType.INDEX: 1103 this = self._parse_index() 1104 elif create_token.token_type in self.DB_CREATABLES: 1105 table_parts = self._parse_table_parts(schema=True) 1106 1107 # exp.Properties.Location.POST_NAME 1108 if self._match(TokenType.COMMA): 1109 temp_properties = self._parse_properties(before=True) 1110 if properties and temp_properties: 1111 properties.expressions.extend(temp_properties.expressions) 1112 elif temp_properties: 1113 properties = temp_properties 1114 1115 this = self._parse_schema(this=table_parts) 1116 1117 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1118 temp_properties = self._parse_properties() 1119 if properties and temp_properties: 1120 properties.expressions.extend(temp_properties.expressions) 1121 elif temp_properties: 1122 properties = temp_properties 1123 1124 self._match(TokenType.ALIAS) 1125 1126 # exp.Properties.Location.POST_ALIAS 1127 if not ( 1128 self._match(TokenType.SELECT, advance=False) 1129 or self._match(TokenType.WITH, advance=False) 1130 or self._match(TokenType.L_PAREN, advance=False) 1131 ): 1132 temp_properties = self._parse_properties() 1133 if properties and temp_properties: 1134 properties.expressions.extend(temp_properties.expressions) 1135 elif temp_properties: 1136 properties = temp_properties 1137 1138 expression = self._parse_ddl_select() 1139 1140 if create_token.token_type == TokenType.TABLE: 1141 # exp.Properties.Location.POST_EXPRESSION 1142 temp_properties = self._parse_properties() 1143 if properties and temp_properties: 1144 properties.expressions.extend(temp_properties.expressions) 1145 elif temp_properties: 1146 properties = temp_properties 1147 1148 indexes = [] 1149 while True: 1150 index = self._parse_create_table_index() 1151 1152 # exp.Properties.Location.POST_INDEX 1153 if self._match(TokenType.PARTITION_BY, advance=False): 1154 temp_properties = self._parse_properties() 1155 if properties and temp_properties: 1156 properties.expressions.extend(temp_properties.expressions) 1157 elif temp_properties: 1158 properties = temp_properties 1159 1160 if not index: 1161 break 1162 else: 1163 indexes.append(index) 1164 elif create_token.token_type == TokenType.VIEW: 1165 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1166 no_schema_binding = True 1167 1168 return self.expression( 1169 exp.Create, 1170 this=this, 1171 kind=create_token.text, 1172 replace=replace, 1173 unique=unique, 1174 volatile=volatile, 1175 expression=expression, 1176 exists=exists, 1177 properties=properties, 1178 indexes=indexes, 1179 no_schema_binding=no_schema_binding, 1180 begin=begin, 1181 ) 1182 1183 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1184 self._match(TokenType.COMMA) 1185 1186 # parsers look to _prev for no/dual/default, so need to consume first 1187 self._match_text_seq("NO") 1188 self._match_text_seq("DUAL") 1189 self._match_text_seq("DEFAULT") 1190 1191 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1192 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1193 1194 return None 1195 1196 def _parse_property(self) -> t.Optional[exp.Expression]: 1197 if self._match_texts(self.PROPERTY_PARSERS): 1198 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1199 1200 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1201 return self._parse_character_set(default=True) 1202 1203 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1204 return self._parse_sortkey(compound=True) 1205 1206 if self._match_text_seq("SQL", "SECURITY"): 1207 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1208 1209 assignment = self._match_pair( 1210 TokenType.VAR, TokenType.EQ, advance=False 1211 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1212 1213 if assignment: 1214 key = self._parse_var_or_string() 1215 self._match(TokenType.EQ) 1216 return self.expression(exp.Property, this=key, value=self._parse_column()) 1217 1218 return None 1219 1220 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1221 self._match(TokenType.EQ) 1222 self._match(TokenType.ALIAS) 1223 return self.expression( 1224 exp_class, 1225 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1226 ) 1227 1228 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1229 properties = [] 1230 1231 while True: 1232 if before: 1233 identified_property = self._parse_property_before() 1234 else: 1235 identified_property = self._parse_property() 1236 1237 if not identified_property: 1238 break 1239 for p in ensure_list(identified_property): 1240 properties.append(p) 1241 1242 if properties: 1243 return self.expression(exp.Properties, expressions=properties) 1244 1245 return None 1246 1247 def _parse_fallback(self, no=False) -> exp.Expression: 1248 self._match_text_seq("FALLBACK") 1249 return self.expression( 1250 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1251 ) 1252 1253 def _parse_with_property( 1254 self, 1255 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1256 self._match(TokenType.WITH) 1257 if self._match(TokenType.L_PAREN, advance=False): 1258 return self._parse_wrapped_csv(self._parse_property) 1259 1260 if self._match_text_seq("JOURNAL"): 1261 return self._parse_withjournaltable() 1262 1263 if self._match_text_seq("DATA"): 1264 return self._parse_withdata(no=False) 1265 elif self._match_text_seq("NO", "DATA"): 1266 return self._parse_withdata(no=True) 1267 1268 if not self._next: 1269 return None 1270 1271 return self._parse_withisolatedloading() 1272 1273 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1274 def _parse_definer(self) -> t.Optional[exp.Expression]: 1275 self._match(TokenType.EQ) 1276 1277 user = self._parse_id_var() 1278 self._match(TokenType.PARAMETER) 1279 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1280 1281 if not user or not host: 1282 return None 1283 1284 return exp.DefinerProperty(this=f"{user}@{host}") 1285 1286 def _parse_withjournaltable(self) -> exp.Expression: 1287 self._match(TokenType.TABLE) 1288 self._match(TokenType.EQ) 1289 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1290 1291 def _parse_log(self, no=False) -> exp.Expression: 1292 self._match_text_seq("LOG") 1293 return self.expression(exp.LogProperty, no=no) 1294 1295 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1296 before = self._match_text_seq("BEFORE") 1297 self._match_text_seq("JOURNAL") 1298 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1299 1300 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1301 self._match_text_seq("NOT") 1302 self._match_text_seq("LOCAL") 1303 self._match_text_seq("AFTER", "JOURNAL") 1304 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1305 1306 def _parse_checksum(self) -> exp.Expression: 1307 self._match_text_seq("CHECKSUM") 1308 self._match(TokenType.EQ) 1309 1310 on = None 1311 if self._match(TokenType.ON): 1312 on = True 1313 elif self._match_text_seq("OFF"): 1314 on = False 1315 default = self._match(TokenType.DEFAULT) 1316 1317 return self.expression( 1318 exp.ChecksumProperty, 1319 on=on, 1320 default=default, 1321 ) 1322 1323 def _parse_freespace(self) -> exp.Expression: 1324 self._match_text_seq("FREESPACE") 1325 self._match(TokenType.EQ) 1326 return self.expression( 1327 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1328 ) 1329 1330 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1331 self._match_text_seq("MERGEBLOCKRATIO") 1332 if self._match(TokenType.EQ): 1333 return self.expression( 1334 exp.MergeBlockRatioProperty, 1335 this=self._parse_number(), 1336 percent=self._match(TokenType.PERCENT), 1337 ) 1338 else: 1339 return self.expression( 1340 exp.MergeBlockRatioProperty, 1341 no=no, 1342 default=default, 1343 ) 1344 1345 def _parse_datablocksize(self, default=None) -> exp.Expression: 1346 if default: 1347 self._match_text_seq("DATABLOCKSIZE") 1348 return self.expression(exp.DataBlocksizeProperty, default=True) 1349 elif self._match_texts(("MIN", "MINIMUM")): 1350 self._match_text_seq("DATABLOCKSIZE") 1351 return self.expression(exp.DataBlocksizeProperty, min=True) 1352 elif self._match_texts(("MAX", "MAXIMUM")): 1353 self._match_text_seq("DATABLOCKSIZE") 1354 return self.expression(exp.DataBlocksizeProperty, min=False) 1355 1356 self._match_text_seq("DATABLOCKSIZE") 1357 self._match(TokenType.EQ) 1358 size = self._parse_number() 1359 units = None 1360 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1361 units = self._prev.text 1362 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1363 1364 def _parse_blockcompression(self) -> exp.Expression: 1365 self._match_text_seq("BLOCKCOMPRESSION") 1366 self._match(TokenType.EQ) 1367 always = self._match_text_seq("ALWAYS") 1368 manual = self._match_text_seq("MANUAL") 1369 never = self._match_text_seq("NEVER") 1370 default = self._match_text_seq("DEFAULT") 1371 autotemp = None 1372 if self._match_text_seq("AUTOTEMP"): 1373 autotemp = self._parse_schema() 1374 1375 return self.expression( 1376 exp.BlockCompressionProperty, 1377 always=always, 1378 manual=manual, 1379 never=never, 1380 default=default, 1381 autotemp=autotemp, 1382 ) 1383 1384 def _parse_withisolatedloading(self) -> exp.Expression: 1385 no = self._match_text_seq("NO") 1386 concurrent = self._match_text_seq("CONCURRENT") 1387 self._match_text_seq("ISOLATED", "LOADING") 1388 for_all = self._match_text_seq("FOR", "ALL") 1389 for_insert = self._match_text_seq("FOR", "INSERT") 1390 for_none = self._match_text_seq("FOR", "NONE") 1391 return self.expression( 1392 exp.IsolatedLoadingProperty, 1393 no=no, 1394 concurrent=concurrent, 1395 for_all=for_all, 1396 for_insert=for_insert, 1397 for_none=for_none, 1398 ) 1399 1400 def _parse_locking(self) -> exp.Expression: 1401 if self._match(TokenType.TABLE): 1402 kind = "TABLE" 1403 elif self._match(TokenType.VIEW): 1404 kind = "VIEW" 1405 elif self._match(TokenType.ROW): 1406 kind = "ROW" 1407 elif self._match_text_seq("DATABASE"): 1408 kind = "DATABASE" 1409 else: 1410 kind = None 1411 1412 if kind in ("DATABASE", "TABLE", "VIEW"): 1413 this = self._parse_table_parts() 1414 else: 1415 this = None 1416 1417 if self._match(TokenType.FOR): 1418 for_or_in = "FOR" 1419 elif self._match(TokenType.IN): 1420 for_or_in = "IN" 1421 else: 1422 for_or_in = None 1423 1424 if self._match_text_seq("ACCESS"): 1425 lock_type = "ACCESS" 1426 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1427 lock_type = "EXCLUSIVE" 1428 elif self._match_text_seq("SHARE"): 1429 lock_type = "SHARE" 1430 elif self._match_text_seq("READ"): 1431 lock_type = "READ" 1432 elif self._match_text_seq("WRITE"): 1433 lock_type = "WRITE" 1434 elif self._match_text_seq("CHECKSUM"): 1435 lock_type = "CHECKSUM" 1436 else: 1437 lock_type = None 1438 1439 override = self._match_text_seq("OVERRIDE") 1440 1441 return self.expression( 1442 exp.LockingProperty, 1443 this=this, 1444 kind=kind, 1445 for_or_in=for_or_in, 1446 lock_type=lock_type, 1447 override=override, 1448 ) 1449 1450 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1451 if self._match(TokenType.PARTITION_BY): 1452 return self._parse_csv(self._parse_conjunction) 1453 return [] 1454 1455 def _parse_partitioned_by(self) -> exp.Expression: 1456 self._match(TokenType.EQ) 1457 return self.expression( 1458 exp.PartitionedByProperty, 1459 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1460 ) 1461 1462 def _parse_withdata(self, no=False) -> exp.Expression: 1463 if self._match_text_seq("AND", "STATISTICS"): 1464 statistics = True 1465 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1466 statistics = False 1467 else: 1468 statistics = None 1469 1470 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1471 1472 def _parse_noprimaryindex(self) -> exp.Expression: 1473 self._match_text_seq("PRIMARY", "INDEX") 1474 return exp.NoPrimaryIndexProperty() 1475 1476 def _parse_oncommit(self) -> exp.Expression: 1477 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1478 return exp.OnCommitProperty() 1479 1480 def _parse_distkey(self) -> exp.Expression: 1481 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1482 1483 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1484 table = self._parse_table(schema=True) 1485 options = [] 1486 while self._match_texts(("INCLUDING", "EXCLUDING")): 1487 this = self._prev.text.upper() 1488 id_var = self._parse_id_var() 1489 1490 if not id_var: 1491 return None 1492 1493 options.append( 1494 self.expression( 1495 exp.Property, 1496 this=this, 1497 value=exp.Var(this=id_var.this.upper()), 1498 ) 1499 ) 1500 return self.expression(exp.LikeProperty, this=table, expressions=options) 1501 1502 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1503 return self.expression( 1504 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1505 ) 1506 1507 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1508 self._match(TokenType.EQ) 1509 return self.expression( 1510 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1511 ) 1512 1513 def _parse_returns(self) -> exp.Expression: 1514 value: t.Optional[exp.Expression] 1515 is_table = self._match(TokenType.TABLE) 1516 1517 if is_table: 1518 if self._match(TokenType.LT): 1519 value = self.expression( 1520 exp.Schema, 1521 this="TABLE", 1522 expressions=self._parse_csv(self._parse_struct_kwargs), 1523 ) 1524 if not self._match(TokenType.GT): 1525 self.raise_error("Expecting >") 1526 else: 1527 value = self._parse_schema(exp.Var(this="TABLE")) 1528 else: 1529 value = self._parse_types() 1530 1531 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1532 1533 def _parse_temporary(self, global_=False) -> exp.Expression: 1534 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1535 return self.expression(exp.TemporaryProperty, global_=global_) 1536 1537 def _parse_describe(self) -> exp.Expression: 1538 kind = self._match_set(self.CREATABLES) and self._prev.text 1539 this = self._parse_table() 1540 1541 return self.expression(exp.Describe, this=this, kind=kind) 1542 1543 def _parse_insert(self) -> exp.Expression: 1544 overwrite = self._match(TokenType.OVERWRITE) 1545 local = self._match(TokenType.LOCAL) 1546 alternative = None 1547 1548 if self._match_text_seq("DIRECTORY"): 1549 this: t.Optional[exp.Expression] = self.expression( 1550 exp.Directory, 1551 this=self._parse_var_or_string(), 1552 local=local, 1553 row_format=self._parse_row_format(match_row=True), 1554 ) 1555 else: 1556 if self._match(TokenType.OR): 1557 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1558 1559 self._match(TokenType.INTO) 1560 self._match(TokenType.TABLE) 1561 this = self._parse_table(schema=True) 1562 1563 return self.expression( 1564 exp.Insert, 1565 this=this, 1566 exists=self._parse_exists(), 1567 partition=self._parse_partition(), 1568 expression=self._parse_ddl_select(), 1569 returning=self._parse_returning(), 1570 overwrite=overwrite, 1571 alternative=alternative, 1572 ) 1573 1574 def _parse_returning(self) -> t.Optional[exp.Expression]: 1575 if not self._match(TokenType.RETURNING): 1576 return None 1577 1578 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1579 1580 def _parse_row(self) -> t.Optional[exp.Expression]: 1581 if not self._match(TokenType.FORMAT): 1582 return None 1583 return self._parse_row_format() 1584 1585 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1586 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1587 return None 1588 1589 if self._match_text_seq("SERDE"): 1590 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1591 1592 self._match_text_seq("DELIMITED") 1593 1594 kwargs = {} 1595 1596 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1597 kwargs["fields"] = self._parse_string() 1598 if self._match_text_seq("ESCAPED", "BY"): 1599 kwargs["escaped"] = self._parse_string() 1600 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1601 kwargs["collection_items"] = self._parse_string() 1602 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1603 kwargs["map_keys"] = self._parse_string() 1604 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1605 kwargs["lines"] = self._parse_string() 1606 if self._match_text_seq("NULL", "DEFINED", "AS"): 1607 kwargs["null"] = self._parse_string() 1608 1609 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1610 1611 def _parse_load_data(self) -> exp.Expression: 1612 local = self._match(TokenType.LOCAL) 1613 self._match_text_seq("INPATH") 1614 inpath = self._parse_string() 1615 overwrite = self._match(TokenType.OVERWRITE) 1616 self._match_pair(TokenType.INTO, TokenType.TABLE) 1617 1618 return self.expression( 1619 exp.LoadData, 1620 this=self._parse_table(schema=True), 1621 local=local, 1622 overwrite=overwrite, 1623 inpath=inpath, 1624 partition=self._parse_partition(), 1625 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1626 serde=self._match_text_seq("SERDE") and self._parse_string(), 1627 ) 1628 1629 def _parse_delete(self) -> exp.Expression: 1630 self._match(TokenType.FROM) 1631 1632 return self.expression( 1633 exp.Delete, 1634 this=self._parse_table(schema=True), 1635 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1636 where=self._parse_where(), 1637 returning=self._parse_returning(), 1638 ) 1639 1640 def _parse_update(self) -> exp.Expression: 1641 return self.expression( 1642 exp.Update, 1643 **{ # type: ignore 1644 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1645 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1646 "from": self._parse_from(), 1647 "where": self._parse_where(), 1648 "returning": self._parse_returning(), 1649 }, 1650 ) 1651 1652 def _parse_uncache(self) -> exp.Expression: 1653 if not self._match(TokenType.TABLE): 1654 self.raise_error("Expecting TABLE after UNCACHE") 1655 1656 return self.expression( 1657 exp.Uncache, 1658 exists=self._parse_exists(), 1659 this=self._parse_table(schema=True), 1660 ) 1661 1662 def _parse_cache(self) -> exp.Expression: 1663 lazy = self._match(TokenType.LAZY) 1664 self._match(TokenType.TABLE) 1665 table = self._parse_table(schema=True) 1666 options = [] 1667 1668 if self._match(TokenType.OPTIONS): 1669 self._match_l_paren() 1670 k = self._parse_string() 1671 self._match(TokenType.EQ) 1672 v = self._parse_string() 1673 options = [k, v] 1674 self._match_r_paren() 1675 1676 self._match(TokenType.ALIAS) 1677 return self.expression( 1678 exp.Cache, 1679 this=table, 1680 lazy=lazy, 1681 options=options, 1682 expression=self._parse_select(nested=True), 1683 ) 1684 1685 def _parse_partition(self) -> t.Optional[exp.Expression]: 1686 if not self._match(TokenType.PARTITION): 1687 return None 1688 1689 return self.expression( 1690 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1691 ) 1692 1693 def _parse_value(self) -> exp.Expression: 1694 if self._match(TokenType.L_PAREN): 1695 expressions = self._parse_csv(self._parse_conjunction) 1696 self._match_r_paren() 1697 return self.expression(exp.Tuple, expressions=expressions) 1698 1699 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1700 # Source: https://prestodb.io/docs/current/sql/values.html 1701 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1702 1703 def _parse_select( 1704 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1705 ) -> t.Optional[exp.Expression]: 1706 cte = self._parse_with() 1707 if cte: 1708 this = self._parse_statement() 1709 1710 if not this: 1711 self.raise_error("Failed to parse any statement following CTE") 1712 return cte 1713 1714 if "with" in this.arg_types: 1715 this.set("with", cte) 1716 else: 1717 self.raise_error(f"{this.key} does not support CTE") 1718 this = cte 1719 elif self._match(TokenType.SELECT): 1720 comments = self._prev_comments 1721 1722 kind = ( 1723 self._match(TokenType.ALIAS) 1724 and self._match_texts(("STRUCT", "VALUE")) 1725 and self._prev.text 1726 ) 1727 hint = self._parse_hint() 1728 all_ = self._match(TokenType.ALL) 1729 distinct = self._match(TokenType.DISTINCT) 1730 1731 if distinct: 1732 distinct = self.expression( 1733 exp.Distinct, 1734 on=self._parse_value() if self._match(TokenType.ON) else None, 1735 ) 1736 1737 if all_ and distinct: 1738 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1739 1740 limit = self._parse_limit(top=True) 1741 expressions = self._parse_csv(self._parse_expression) 1742 1743 this = self.expression( 1744 exp.Select, 1745 kind=kind, 1746 hint=hint, 1747 distinct=distinct, 1748 expressions=expressions, 1749 limit=limit, 1750 ) 1751 this.comments = comments 1752 1753 into = self._parse_into() 1754 if into: 1755 this.set("into", into) 1756 1757 from_ = self._parse_from() 1758 if from_: 1759 this.set("from", from_) 1760 1761 self._parse_query_modifiers(this) 1762 elif (table or nested) and self._match(TokenType.L_PAREN): 1763 this = self._parse_table() if table else self._parse_select(nested=True) 1764 self._parse_query_modifiers(this) 1765 this = self._parse_set_operations(this) 1766 self._match_r_paren() 1767 1768 # early return so that subquery unions aren't parsed again 1769 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1770 # Union ALL should be a property of the top select node, not the subquery 1771 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1772 elif self._match(TokenType.VALUES): 1773 this = self.expression( 1774 exp.Values, 1775 expressions=self._parse_csv(self._parse_value), 1776 alias=self._parse_table_alias(), 1777 ) 1778 else: 1779 this = None 1780 1781 return self._parse_set_operations(this) 1782 1783 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1784 if not skip_with_token and not self._match(TokenType.WITH): 1785 return None 1786 1787 recursive = self._match(TokenType.RECURSIVE) 1788 1789 expressions = [] 1790 while True: 1791 expressions.append(self._parse_cte()) 1792 1793 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1794 break 1795 else: 1796 self._match(TokenType.WITH) 1797 1798 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1799 1800 def _parse_cte(self) -> exp.Expression: 1801 alias = self._parse_table_alias() 1802 if not alias or not alias.this: 1803 self.raise_error("Expected CTE to have alias") 1804 1805 self._match(TokenType.ALIAS) 1806 1807 return self.expression( 1808 exp.CTE, 1809 this=self._parse_wrapped(self._parse_statement), 1810 alias=alias, 1811 ) 1812 1813 def _parse_table_alias( 1814 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1815 ) -> t.Optional[exp.Expression]: 1816 any_token = self._match(TokenType.ALIAS) 1817 alias = ( 1818 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1819 or self._parse_string_as_identifier() 1820 ) 1821 1822 index = self._index 1823 if self._match(TokenType.L_PAREN): 1824 columns = self._parse_csv(self._parse_function_parameter) 1825 self._match_r_paren() if columns else self._retreat(index) 1826 else: 1827 columns = None 1828 1829 if not alias and not columns: 1830 return None 1831 1832 return self.expression(exp.TableAlias, this=alias, columns=columns) 1833 1834 def _parse_subquery( 1835 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1836 ) -> exp.Expression: 1837 return self.expression( 1838 exp.Subquery, 1839 this=this, 1840 pivots=self._parse_pivots(), 1841 alias=self._parse_table_alias() if parse_alias else None, 1842 ) 1843 1844 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1845 if not isinstance(this, self.MODIFIABLES): 1846 return 1847 1848 table = isinstance(this, exp.Table) 1849 1850 while True: 1851 lateral = self._parse_lateral() 1852 join = self._parse_join() 1853 comma = None if table else self._match(TokenType.COMMA) 1854 if lateral: 1855 this.append("laterals", lateral) 1856 if join: 1857 this.append("joins", join) 1858 if comma: 1859 this.args["from"].append("expressions", self._parse_table()) 1860 if not (lateral or join or comma): 1861 break 1862 1863 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1864 expression = parser(self) 1865 1866 if expression: 1867 this.set(key, expression) 1868 1869 def _parse_hint(self) -> t.Optional[exp.Expression]: 1870 if self._match(TokenType.HINT): 1871 hints = self._parse_csv(self._parse_function) 1872 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1873 self.raise_error("Expected */ after HINT") 1874 return self.expression(exp.Hint, expressions=hints) 1875 1876 return None 1877 1878 def _parse_into(self) -> t.Optional[exp.Expression]: 1879 if not self._match(TokenType.INTO): 1880 return None 1881 1882 temp = self._match(TokenType.TEMPORARY) 1883 unlogged = self._match(TokenType.UNLOGGED) 1884 self._match(TokenType.TABLE) 1885 1886 return self.expression( 1887 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1888 ) 1889 1890 def _parse_from(self) -> t.Optional[exp.Expression]: 1891 if not self._match(TokenType.FROM): 1892 return None 1893 1894 return self.expression( 1895 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1896 ) 1897 1898 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1899 if not self._match(TokenType.MATCH_RECOGNIZE): 1900 return None 1901 self._match_l_paren() 1902 1903 partition = self._parse_partition_by() 1904 order = self._parse_order() 1905 measures = ( 1906 self._parse_alias(self._parse_conjunction()) 1907 if self._match_text_seq("MEASURES") 1908 else None 1909 ) 1910 1911 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1912 rows = exp.Var(this="ONE ROW PER MATCH") 1913 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1914 text = "ALL ROWS PER MATCH" 1915 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1916 text += f" SHOW EMPTY MATCHES" 1917 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1918 text += f" OMIT EMPTY MATCHES" 1919 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1920 text += f" WITH UNMATCHED ROWS" 1921 rows = exp.Var(this=text) 1922 else: 1923 rows = None 1924 1925 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1926 text = "AFTER MATCH SKIP" 1927 if self._match_text_seq("PAST", "LAST", "ROW"): 1928 text += f" PAST LAST ROW" 1929 elif self._match_text_seq("TO", "NEXT", "ROW"): 1930 text += f" TO NEXT ROW" 1931 elif self._match_text_seq("TO", "FIRST"): 1932 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1933 elif self._match_text_seq("TO", "LAST"): 1934 text += f" TO LAST {self._advance_any().text}" # type: ignore 1935 after = exp.Var(this=text) 1936 else: 1937 after = None 1938 1939 if self._match_text_seq("PATTERN"): 1940 self._match_l_paren() 1941 1942 if not self._curr: 1943 self.raise_error("Expecting )", self._curr) 1944 1945 paren = 1 1946 start = self._curr 1947 1948 while self._curr and paren > 0: 1949 if self._curr.token_type == TokenType.L_PAREN: 1950 paren += 1 1951 if self._curr.token_type == TokenType.R_PAREN: 1952 paren -= 1 1953 end = self._prev 1954 self._advance() 1955 if paren > 0: 1956 self.raise_error("Expecting )", self._curr) 1957 pattern = exp.Var(this=self._find_sql(start, end)) 1958 else: 1959 pattern = None 1960 1961 define = ( 1962 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1963 ) 1964 self._match_r_paren() 1965 1966 return self.expression( 1967 exp.MatchRecognize, 1968 partition_by=partition, 1969 order=order, 1970 measures=measures, 1971 rows=rows, 1972 after=after, 1973 pattern=pattern, 1974 define=define, 1975 ) 1976 1977 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1978 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1979 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1980 1981 if outer_apply or cross_apply: 1982 this = self._parse_select(table=True) 1983 view = None 1984 outer = not cross_apply 1985 elif self._match(TokenType.LATERAL): 1986 this = self._parse_select(table=True) 1987 view = self._match(TokenType.VIEW) 1988 outer = self._match(TokenType.OUTER) 1989 else: 1990 return None 1991 1992 if not this: 1993 this = self._parse_function() or self._parse_id_var(any_token=False) 1994 while self._match(TokenType.DOT): 1995 this = exp.Dot( 1996 this=this, 1997 expression=self._parse_function() or self._parse_id_var(any_token=False), 1998 ) 1999 2000 table_alias: t.Optional[exp.Expression] 2001 2002 if view: 2003 table = self._parse_id_var(any_token=False) 2004 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2005 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2006 else: 2007 table_alias = self._parse_table_alias() 2008 2009 expression = self.expression( 2010 exp.Lateral, 2011 this=this, 2012 view=view, 2013 outer=outer, 2014 alias=table_alias, 2015 ) 2016 2017 if outer_apply or cross_apply: 2018 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 2019 2020 return expression 2021 2022 def _parse_join_side_and_kind( 2023 self, 2024 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2025 return ( 2026 self._match(TokenType.NATURAL) and self._prev, 2027 self._match_set(self.JOIN_SIDES) and self._prev, 2028 self._match_set(self.JOIN_KINDS) and self._prev, 2029 ) 2030 2031 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2032 natural, side, kind = self._parse_join_side_and_kind() 2033 2034 if not skip_join_token and not self._match(TokenType.JOIN): 2035 return None 2036 2037 kwargs: t.Dict[ 2038 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2039 ] = {"this": self._parse_table()} 2040 2041 if natural: 2042 kwargs["natural"] = True 2043 if side: 2044 kwargs["side"] = side.text 2045 if kind: 2046 kwargs["kind"] = kind.text 2047 2048 if self._match(TokenType.ON): 2049 kwargs["on"] = self._parse_conjunction() 2050 elif self._match(TokenType.USING): 2051 kwargs["using"] = self._parse_wrapped_id_vars() 2052 2053 return self.expression(exp.Join, **kwargs) # type: ignore 2054 2055 def _parse_index(self) -> exp.Expression: 2056 index = self._parse_id_var() 2057 self._match(TokenType.ON) 2058 self._match(TokenType.TABLE) # hive 2059 2060 return self.expression( 2061 exp.Index, 2062 this=index, 2063 table=self.expression(exp.Table, this=self._parse_id_var()), 2064 columns=self._parse_expression(), 2065 ) 2066 2067 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2068 unique = self._match(TokenType.UNIQUE) 2069 primary = self._match_text_seq("PRIMARY") 2070 amp = self._match_text_seq("AMP") 2071 if not self._match(TokenType.INDEX): 2072 return None 2073 index = self._parse_id_var() 2074 columns = None 2075 if self._match(TokenType.L_PAREN, advance=False): 2076 columns = self._parse_wrapped_csv(self._parse_column) 2077 return self.expression( 2078 exp.Index, 2079 this=index, 2080 columns=columns, 2081 unique=unique, 2082 primary=primary, 2083 amp=amp, 2084 ) 2085 2086 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2087 catalog = None 2088 db = None 2089 2090 table = ( 2091 (not schema and self._parse_function()) 2092 or self._parse_id_var(any_token=False) 2093 or self._parse_string_as_identifier() 2094 ) 2095 2096 while self._match(TokenType.DOT): 2097 if catalog: 2098 # This allows nesting the table in arbitrarily many dot expressions if needed 2099 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2100 else: 2101 catalog = db 2102 db = table 2103 table = self._parse_id_var() 2104 2105 if not table: 2106 self.raise_error(f"Expected table name but got {self._curr}") 2107 2108 return self.expression( 2109 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2110 ) 2111 2112 def _parse_table( 2113 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2114 ) -> t.Optional[exp.Expression]: 2115 lateral = self._parse_lateral() 2116 2117 if lateral: 2118 return lateral 2119 2120 unnest = self._parse_unnest() 2121 2122 if unnest: 2123 return unnest 2124 2125 values = self._parse_derived_table_values() 2126 2127 if values: 2128 return values 2129 2130 subquery = self._parse_select(table=True) 2131 2132 if subquery: 2133 if not subquery.args.get("pivots"): 2134 subquery.set("pivots", self._parse_pivots()) 2135 return subquery 2136 2137 this = self._parse_table_parts(schema=schema) 2138 2139 if schema: 2140 return self._parse_schema(this=this) 2141 2142 if self.alias_post_tablesample: 2143 table_sample = self._parse_table_sample() 2144 2145 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2146 2147 if alias: 2148 this.set("alias", alias) 2149 2150 if not this.args.get("pivots"): 2151 this.set("pivots", self._parse_pivots()) 2152 2153 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2154 this.set( 2155 "hints", 2156 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2157 ) 2158 self._match_r_paren() 2159 2160 if not self.alias_post_tablesample: 2161 table_sample = self._parse_table_sample() 2162 2163 if table_sample: 2164 table_sample.set("this", this) 2165 this = table_sample 2166 2167 return this 2168 2169 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2170 if not self._match(TokenType.UNNEST): 2171 return None 2172 2173 expressions = self._parse_wrapped_csv(self._parse_column) 2174 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2175 alias = self._parse_table_alias() 2176 2177 if alias and self.unnest_column_only: 2178 if alias.args.get("columns"): 2179 self.raise_error("Unexpected extra column alias in unnest.") 2180 alias.set("columns", [alias.this]) 2181 alias.set("this", None) 2182 2183 offset = None 2184 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2185 self._match(TokenType.ALIAS) 2186 offset = self._parse_conjunction() 2187 2188 return self.expression( 2189 exp.Unnest, 2190 expressions=expressions, 2191 ordinality=ordinality, 2192 alias=alias, 2193 offset=offset, 2194 ) 2195 2196 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2197 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2198 if not is_derived and not self._match(TokenType.VALUES): 2199 return None 2200 2201 expressions = self._parse_csv(self._parse_value) 2202 2203 if is_derived: 2204 self._match_r_paren() 2205 2206 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2207 2208 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2209 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2210 as_modifier and self._match_text_seq("USING", "SAMPLE") 2211 ): 2212 return None 2213 2214 bucket_numerator = None 2215 bucket_denominator = None 2216 bucket_field = None 2217 percent = None 2218 rows = None 2219 size = None 2220 seed = None 2221 2222 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2223 method = self._parse_var(tokens=(TokenType.ROW,)) 2224 2225 self._match(TokenType.L_PAREN) 2226 2227 num = self._parse_number() 2228 2229 if self._match(TokenType.BUCKET): 2230 bucket_numerator = self._parse_number() 2231 self._match(TokenType.OUT_OF) 2232 bucket_denominator = bucket_denominator = self._parse_number() 2233 self._match(TokenType.ON) 2234 bucket_field = self._parse_field() 2235 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2236 percent = num 2237 elif self._match(TokenType.ROWS): 2238 rows = num 2239 else: 2240 size = num 2241 2242 self._match(TokenType.R_PAREN) 2243 2244 if self._match(TokenType.L_PAREN): 2245 method = self._parse_var() 2246 seed = self._match(TokenType.COMMA) and self._parse_number() 2247 self._match_r_paren() 2248 elif self._match_texts(("SEED", "REPEATABLE")): 2249 seed = self._parse_wrapped(self._parse_number) 2250 2251 return self.expression( 2252 exp.TableSample, 2253 method=method, 2254 bucket_numerator=bucket_numerator, 2255 bucket_denominator=bucket_denominator, 2256 bucket_field=bucket_field, 2257 percent=percent, 2258 rows=rows, 2259 size=size, 2260 seed=seed, 2261 kind=kind, 2262 ) 2263 2264 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2265 return list(iter(self._parse_pivot, None)) 2266 2267 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2268 index = self._index 2269 2270 if self._match(TokenType.PIVOT): 2271 unpivot = False 2272 elif self._match(TokenType.UNPIVOT): 2273 unpivot = True 2274 else: 2275 return None 2276 2277 expressions = [] 2278 field = None 2279 2280 if not self._match(TokenType.L_PAREN): 2281 self._retreat(index) 2282 return None 2283 2284 if unpivot: 2285 expressions = self._parse_csv(self._parse_column) 2286 else: 2287 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2288 2289 if not self._match(TokenType.FOR): 2290 self.raise_error("Expecting FOR") 2291 2292 value = self._parse_column() 2293 2294 if not self._match(TokenType.IN): 2295 self.raise_error("Expecting IN") 2296 2297 field = self._parse_in(value) 2298 2299 self._match_r_paren() 2300 2301 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2302 2303 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2304 pivot.set("alias", self._parse_table_alias()) 2305 2306 return pivot 2307 2308 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2309 if not skip_where_token and not self._match(TokenType.WHERE): 2310 return None 2311 2312 return self.expression( 2313 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2314 ) 2315 2316 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2317 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2318 return None 2319 2320 elements = defaultdict(list) 2321 2322 while True: 2323 expressions = self._parse_csv(self._parse_conjunction) 2324 if expressions: 2325 elements["expressions"].extend(expressions) 2326 2327 grouping_sets = self._parse_grouping_sets() 2328 if grouping_sets: 2329 elements["grouping_sets"].extend(grouping_sets) 2330 2331 rollup = None 2332 cube = None 2333 2334 with_ = self._match(TokenType.WITH) 2335 if self._match(TokenType.ROLLUP): 2336 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2337 elements["rollup"].extend(ensure_list(rollup)) 2338 2339 if self._match(TokenType.CUBE): 2340 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2341 elements["cube"].extend(ensure_list(cube)) 2342 2343 if not (expressions or grouping_sets or rollup or cube): 2344 break 2345 2346 return self.expression(exp.Group, **elements) # type: ignore 2347 2348 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2349 if not self._match(TokenType.GROUPING_SETS): 2350 return None 2351 2352 return self._parse_wrapped_csv(self._parse_grouping_set) 2353 2354 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2355 if self._match(TokenType.L_PAREN): 2356 grouping_set = self._parse_csv(self._parse_column) 2357 self._match_r_paren() 2358 return self.expression(exp.Tuple, expressions=grouping_set) 2359 2360 return self._parse_column() 2361 2362 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2363 if not skip_having_token and not self._match(TokenType.HAVING): 2364 return None 2365 return self.expression(exp.Having, this=self._parse_conjunction()) 2366 2367 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2368 if not self._match(TokenType.QUALIFY): 2369 return None 2370 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2371 2372 def _parse_order( 2373 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2374 ) -> t.Optional[exp.Expression]: 2375 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2376 return this 2377 2378 return self.expression( 2379 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2380 ) 2381 2382 def _parse_sort( 2383 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2384 ) -> t.Optional[exp.Expression]: 2385 if not self._match(token_type): 2386 return None 2387 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2388 2389 def _parse_ordered(self) -> exp.Expression: 2390 this = self._parse_conjunction() 2391 self._match(TokenType.ASC) 2392 is_desc = self._match(TokenType.DESC) 2393 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2394 is_nulls_last = self._match(TokenType.NULLS_LAST) 2395 desc = is_desc or False 2396 asc = not desc 2397 nulls_first = is_nulls_first or False 2398 explicitly_null_ordered = is_nulls_first or is_nulls_last 2399 if ( 2400 not explicitly_null_ordered 2401 and ( 2402 (asc and self.null_ordering == "nulls_are_small") 2403 or (desc and self.null_ordering != "nulls_are_small") 2404 ) 2405 and self.null_ordering != "nulls_are_last" 2406 ): 2407 nulls_first = True 2408 2409 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2410 2411 def _parse_limit( 2412 self, this: t.Optional[exp.Expression] = None, top: bool = False 2413 ) -> t.Optional[exp.Expression]: 2414 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2415 limit_paren = self._match(TokenType.L_PAREN) 2416 limit_exp = self.expression( 2417 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2418 ) 2419 2420 if limit_paren: 2421 self._match_r_paren() 2422 2423 return limit_exp 2424 2425 if self._match(TokenType.FETCH): 2426 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2427 direction = self._prev.text if direction else "FIRST" 2428 count = self._parse_number() 2429 self._match_set((TokenType.ROW, TokenType.ROWS)) 2430 self._match(TokenType.ONLY) 2431 return self.expression(exp.Fetch, direction=direction, count=count) 2432 2433 return this 2434 2435 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2436 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2437 return this 2438 2439 count = self._parse_number() 2440 self._match_set((TokenType.ROW, TokenType.ROWS)) 2441 return self.expression(exp.Offset, this=this, expression=count) 2442 2443 def _parse_lock(self) -> t.Optional[exp.Expression]: 2444 if self._match_text_seq("FOR", "UPDATE"): 2445 return self.expression(exp.Lock, update=True) 2446 if self._match_text_seq("FOR", "SHARE"): 2447 return self.expression(exp.Lock, update=False) 2448 2449 return None 2450 2451 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2452 if not self._match_set(self.SET_OPERATIONS): 2453 return this 2454 2455 token_type = self._prev.token_type 2456 2457 if token_type == TokenType.UNION: 2458 expression = exp.Union 2459 elif token_type == TokenType.EXCEPT: 2460 expression = exp.Except 2461 else: 2462 expression = exp.Intersect 2463 2464 return self.expression( 2465 expression, 2466 this=this, 2467 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2468 expression=self._parse_set_operations(self._parse_select(nested=True)), 2469 ) 2470 2471 def _parse_expression(self) -> t.Optional[exp.Expression]: 2472 return self._parse_alias(self._parse_conjunction()) 2473 2474 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2475 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2476 2477 def _parse_equality(self) -> t.Optional[exp.Expression]: 2478 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2479 2480 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2481 return self._parse_tokens(self._parse_range, self.COMPARISON) 2482 2483 def _parse_range(self) -> t.Optional[exp.Expression]: 2484 this = self._parse_bitwise() 2485 negate = self._match(TokenType.NOT) 2486 2487 if self._match_set(self.RANGE_PARSERS): 2488 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2489 elif self._match(TokenType.ISNULL): 2490 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2491 2492 # Postgres supports ISNULL and NOTNULL for conditions. 2493 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2494 if self._match(TokenType.NOTNULL): 2495 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2496 this = self.expression(exp.Not, this=this) 2497 2498 if negate: 2499 this = self.expression(exp.Not, this=this) 2500 2501 if self._match(TokenType.IS): 2502 this = self._parse_is(this) 2503 2504 return this 2505 2506 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2507 negate = self._match(TokenType.NOT) 2508 if self._match(TokenType.DISTINCT_FROM): 2509 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2510 return self.expression(klass, this=this, expression=self._parse_expression()) 2511 2512 this = self.expression( 2513 exp.Is, 2514 this=this, 2515 expression=self._parse_null() or self._parse_boolean(), 2516 ) 2517 return self.expression(exp.Not, this=this) if negate else this 2518 2519 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2520 unnest = self._parse_unnest() 2521 if unnest: 2522 this = self.expression(exp.In, this=this, unnest=unnest) 2523 elif self._match(TokenType.L_PAREN): 2524 expressions = self._parse_csv(self._parse_select_or_expression) 2525 2526 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2527 this = self.expression(exp.In, this=this, query=expressions[0]) 2528 else: 2529 this = self.expression(exp.In, this=this, expressions=expressions) 2530 2531 self._match_r_paren() 2532 else: 2533 this = self.expression(exp.In, this=this, field=self._parse_field()) 2534 2535 return this 2536 2537 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2538 low = self._parse_bitwise() 2539 self._match(TokenType.AND) 2540 high = self._parse_bitwise() 2541 return self.expression(exp.Between, this=this, low=low, high=high) 2542 2543 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2544 if not self._match(TokenType.ESCAPE): 2545 return this 2546 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2547 2548 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2549 this = self._parse_term() 2550 2551 while True: 2552 if self._match_set(self.BITWISE): 2553 this = self.expression( 2554 self.BITWISE[self._prev.token_type], 2555 this=this, 2556 expression=self._parse_term(), 2557 ) 2558 elif self._match_pair(TokenType.LT, TokenType.LT): 2559 this = self.expression( 2560 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2561 ) 2562 elif self._match_pair(TokenType.GT, TokenType.GT): 2563 this = self.expression( 2564 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2565 ) 2566 else: 2567 break 2568 2569 return this 2570 2571 def _parse_term(self) -> t.Optional[exp.Expression]: 2572 return self._parse_tokens(self._parse_factor, self.TERM) 2573 2574 def _parse_factor(self) -> t.Optional[exp.Expression]: 2575 return self._parse_tokens(self._parse_unary, self.FACTOR) 2576 2577 def _parse_unary(self) -> t.Optional[exp.Expression]: 2578 if self._match_set(self.UNARY_PARSERS): 2579 return self.UNARY_PARSERS[self._prev.token_type](self) 2580 return self._parse_at_time_zone(self._parse_type()) 2581 2582 def _parse_type(self) -> t.Optional[exp.Expression]: 2583 if self._match(TokenType.INTERVAL): 2584 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2585 2586 index = self._index 2587 type_token = self._parse_types(check_func=True) 2588 this = self._parse_column() 2589 2590 if type_token: 2591 if isinstance(this, exp.Literal): 2592 return self.expression(exp.Cast, this=this, to=type_token) 2593 if not type_token.args.get("expressions"): 2594 self._retreat(index) 2595 return self._parse_column() 2596 return type_token 2597 2598 return this 2599 2600 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2601 index = self._index 2602 2603 prefix = self._match_text_seq("SYSUDTLIB", ".") 2604 2605 if not self._match_set(self.TYPE_TOKENS): 2606 return None 2607 2608 type_token = self._prev.token_type 2609 2610 if type_token == TokenType.PSEUDO_TYPE: 2611 return self.expression(exp.PseudoType, this=self._prev.text) 2612 2613 nested = type_token in self.NESTED_TYPE_TOKENS 2614 is_struct = type_token == TokenType.STRUCT 2615 expressions = None 2616 maybe_func = False 2617 2618 if self._match(TokenType.L_PAREN): 2619 if is_struct: 2620 expressions = self._parse_csv(self._parse_struct_kwargs) 2621 elif nested: 2622 expressions = self._parse_csv(self._parse_types) 2623 else: 2624 expressions = self._parse_csv(self._parse_conjunction) 2625 2626 if not expressions: 2627 self._retreat(index) 2628 return None 2629 2630 self._match_r_paren() 2631 maybe_func = True 2632 2633 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2634 this = exp.DataType( 2635 this=exp.DataType.Type.ARRAY, 2636 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2637 nested=True, 2638 ) 2639 2640 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2641 this = exp.DataType( 2642 this=exp.DataType.Type.ARRAY, 2643 expressions=[this], 2644 nested=True, 2645 ) 2646 2647 return this 2648 2649 if self._match(TokenType.L_BRACKET): 2650 self._retreat(index) 2651 return None 2652 2653 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2654 if nested and self._match(TokenType.LT): 2655 if is_struct: 2656 expressions = self._parse_csv(self._parse_struct_kwargs) 2657 else: 2658 expressions = self._parse_csv(self._parse_types) 2659 2660 if not self._match(TokenType.GT): 2661 self.raise_error("Expecting >") 2662 2663 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2664 values = self._parse_csv(self._parse_conjunction) 2665 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2666 2667 value: t.Optional[exp.Expression] = None 2668 if type_token in self.TIMESTAMPS: 2669 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2670 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2671 elif ( 2672 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2673 ): 2674 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2675 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2676 if type_token == TokenType.TIME: 2677 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2678 else: 2679 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2680 2681 maybe_func = maybe_func and value is None 2682 2683 if value is None: 2684 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2685 elif type_token == TokenType.INTERVAL: 2686 unit = self._parse_var() 2687 2688 if not unit: 2689 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2690 else: 2691 value = self.expression(exp.Interval, unit=unit) 2692 2693 if maybe_func and check_func: 2694 index2 = self._index 2695 peek = self._parse_string() 2696 2697 if not peek: 2698 self._retreat(index) 2699 return None 2700 2701 self._retreat(index2) 2702 2703 if value: 2704 return value 2705 2706 return exp.DataType( 2707 this=exp.DataType.Type[type_token.value.upper()], 2708 expressions=expressions, 2709 nested=nested, 2710 values=values, 2711 prefix=prefix, 2712 ) 2713 2714 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2715 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2716 return self._parse_types() 2717 2718 this = self._parse_id_var() 2719 self._match(TokenType.COLON) 2720 data_type = self._parse_types() 2721 2722 if not data_type: 2723 return None 2724 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2725 2726 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2727 if not self._match(TokenType.AT_TIME_ZONE): 2728 return this 2729 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2730 2731 def _parse_column(self) -> t.Optional[exp.Expression]: 2732 this = self._parse_field() 2733 if isinstance(this, exp.Identifier): 2734 this = self.expression(exp.Column, this=this) 2735 elif not this: 2736 return self._parse_bracket(this) 2737 this = self._parse_bracket(this) 2738 2739 while self._match_set(self.COLUMN_OPERATORS): 2740 op_token = self._prev.token_type 2741 op = self.COLUMN_OPERATORS.get(op_token) 2742 2743 if op_token == TokenType.DCOLON: 2744 field = self._parse_types() 2745 if not field: 2746 self.raise_error("Expected type") 2747 elif op: 2748 self._advance() 2749 value = self._prev.text 2750 field = ( 2751 exp.Literal.number(value) 2752 if self._prev.token_type == TokenType.NUMBER 2753 else exp.Literal.string(value) 2754 ) 2755 else: 2756 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2757 2758 if isinstance(field, exp.Func): 2759 # bigquery allows function calls like x.y.count(...) 2760 # SAFE.SUBSTR(...) 2761 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2762 this = self._replace_columns_with_dots(this) 2763 2764 if op: 2765 this = op(self, this, field) 2766 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2767 this = self.expression( 2768 exp.Column, 2769 this=field, 2770 table=this.this, 2771 db=this.args.get("table"), 2772 catalog=this.args.get("db"), 2773 ) 2774 else: 2775 this = self.expression(exp.Dot, this=this, expression=field) 2776 this = self._parse_bracket(this) 2777 2778 return this 2779 2780 def _parse_primary(self) -> t.Optional[exp.Expression]: 2781 if self._match_set(self.PRIMARY_PARSERS): 2782 token_type = self._prev.token_type 2783 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2784 2785 if token_type == TokenType.STRING: 2786 expressions = [primary] 2787 while self._match(TokenType.STRING): 2788 expressions.append(exp.Literal.string(self._prev.text)) 2789 if len(expressions) > 1: 2790 return self.expression(exp.Concat, expressions=expressions) 2791 return primary 2792 2793 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2794 return exp.Literal.number(f"0.{self._prev.text}") 2795 2796 if self._match(TokenType.L_PAREN): 2797 comments = self._prev_comments 2798 query = self._parse_select() 2799 2800 if query: 2801 expressions = [query] 2802 else: 2803 expressions = self._parse_csv( 2804 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2805 ) 2806 2807 this = seq_get(expressions, 0) 2808 self._parse_query_modifiers(this) 2809 2810 if isinstance(this, exp.Subqueryable): 2811 this = self._parse_set_operations( 2812 self._parse_subquery(this=this, parse_alias=False) 2813 ) 2814 elif len(expressions) > 1: 2815 this = self.expression(exp.Tuple, expressions=expressions) 2816 else: 2817 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2818 2819 self._match_r_paren() 2820 2821 if this and comments: 2822 this.comments = comments 2823 2824 return this 2825 2826 return None 2827 2828 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2829 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2830 2831 def _parse_function( 2832 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2833 ) -> t.Optional[exp.Expression]: 2834 if not self._curr: 2835 return None 2836 2837 token_type = self._curr.token_type 2838 2839 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2840 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2841 2842 if not self._next or self._next.token_type != TokenType.L_PAREN: 2843 if token_type in self.NO_PAREN_FUNCTIONS: 2844 self._advance() 2845 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2846 2847 return None 2848 2849 if token_type not in self.FUNC_TOKENS: 2850 return None 2851 2852 this = self._curr.text 2853 upper = this.upper() 2854 self._advance(2) 2855 2856 parser = self.FUNCTION_PARSERS.get(upper) 2857 2858 if parser: 2859 this = parser(self) 2860 else: 2861 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2862 2863 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2864 this = self.expression(subquery_predicate, this=self._parse_select()) 2865 self._match_r_paren() 2866 return this 2867 2868 if functions is None: 2869 functions = self.FUNCTIONS 2870 2871 function = functions.get(upper) 2872 args = self._parse_csv(self._parse_lambda) 2873 2874 if function: 2875 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2876 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2877 if count_params(function) == 2: 2878 params = None 2879 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2880 params = self._parse_csv(self._parse_lambda) 2881 2882 this = function(args, params) 2883 else: 2884 this = function(args) 2885 2886 self.validate_expression(this, args) 2887 else: 2888 this = self.expression(exp.Anonymous, this=this, expressions=args) 2889 2890 self._match_r_paren(this) 2891 return self._parse_window(this) 2892 2893 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2894 return self._parse_column_def(self._parse_id_var()) 2895 2896 def _parse_user_defined_function( 2897 self, kind: t.Optional[TokenType] = None 2898 ) -> t.Optional[exp.Expression]: 2899 this = self._parse_id_var() 2900 2901 while self._match(TokenType.DOT): 2902 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2903 2904 if not self._match(TokenType.L_PAREN): 2905 return this 2906 2907 expressions = self._parse_csv(self._parse_function_parameter) 2908 self._match_r_paren() 2909 return self.expression( 2910 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2911 ) 2912 2913 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2914 literal = self._parse_primary() 2915 if literal: 2916 return self.expression(exp.Introducer, this=token.text, expression=literal) 2917 2918 return self.expression(exp.Identifier, this=token.text) 2919 2920 def _parse_national(self, token: Token) -> exp.Expression: 2921 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2922 2923 def _parse_session_parameter(self) -> exp.Expression: 2924 kind = None 2925 this = self._parse_id_var() or self._parse_primary() 2926 2927 if this and self._match(TokenType.DOT): 2928 kind = this.name 2929 this = self._parse_var() or self._parse_primary() 2930 2931 return self.expression(exp.SessionParameter, this=this, kind=kind) 2932 2933 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2934 index = self._index 2935 2936 if self._match(TokenType.L_PAREN): 2937 expressions = self._parse_csv(self._parse_id_var) 2938 2939 if not self._match(TokenType.R_PAREN): 2940 self._retreat(index) 2941 else: 2942 expressions = [self._parse_id_var()] 2943 2944 if self._match_set(self.LAMBDAS): 2945 return self.LAMBDAS[self._prev.token_type](self, expressions) 2946 2947 self._retreat(index) 2948 2949 this: t.Optional[exp.Expression] 2950 2951 if self._match(TokenType.DISTINCT): 2952 this = self.expression( 2953 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2954 ) 2955 else: 2956 this = self._parse_select_or_expression() 2957 2958 if self._match(TokenType.IGNORE_NULLS): 2959 this = self.expression(exp.IgnoreNulls, this=this) 2960 else: 2961 self._match(TokenType.RESPECT_NULLS) 2962 2963 return self._parse_limit(self._parse_order(this)) 2964 2965 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2966 index = self._index 2967 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2968 self._retreat(index) 2969 return this 2970 2971 args = self._parse_csv( 2972 lambda: self._parse_constraint() 2973 or self._parse_column_def(self._parse_field(any_token=True)) 2974 ) 2975 self._match_r_paren() 2976 return self.expression(exp.Schema, this=this, expressions=args) 2977 2978 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2979 kind = self._parse_types() 2980 2981 if self._match_text_seq("FOR", "ORDINALITY"): 2982 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2983 2984 constraints = [] 2985 while True: 2986 constraint = self._parse_column_constraint() 2987 if not constraint: 2988 break 2989 constraints.append(constraint) 2990 2991 if not kind and not constraints: 2992 return this 2993 2994 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2995 2996 def _parse_auto_increment(self) -> exp.Expression: 2997 start = None 2998 increment = None 2999 3000 if self._match(TokenType.L_PAREN, advance=False): 3001 args = self._parse_wrapped_csv(self._parse_bitwise) 3002 start = seq_get(args, 0) 3003 increment = seq_get(args, 1) 3004 elif self._match_text_seq("START"): 3005 start = self._parse_bitwise() 3006 self._match_text_seq("INCREMENT") 3007 increment = self._parse_bitwise() 3008 3009 if start and increment: 3010 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3011 3012 return exp.AutoIncrementColumnConstraint() 3013 3014 def _parse_compress(self) -> exp.Expression: 3015 if self._match(TokenType.L_PAREN, advance=False): 3016 return self.expression( 3017 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3018 ) 3019 3020 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3021 3022 def _parse_generated_as_identity(self) -> exp.Expression: 3023 if self._match(TokenType.BY_DEFAULT): 3024 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3025 else: 3026 self._match_text_seq("ALWAYS") 3027 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3028 3029 self._match_text_seq("AS", "IDENTITY") 3030 if self._match(TokenType.L_PAREN): 3031 if self._match_text_seq("START", "WITH"): 3032 this.set("start", self._parse_bitwise()) 3033 if self._match_text_seq("INCREMENT", "BY"): 3034 this.set("increment", self._parse_bitwise()) 3035 if self._match_text_seq("MINVALUE"): 3036 this.set("minvalue", self._parse_bitwise()) 3037 if self._match_text_seq("MAXVALUE"): 3038 this.set("maxvalue", self._parse_bitwise()) 3039 3040 if self._match_text_seq("CYCLE"): 3041 this.set("cycle", True) 3042 elif self._match_text_seq("NO", "CYCLE"): 3043 this.set("cycle", False) 3044 3045 self._match_r_paren() 3046 3047 return this 3048 3049 def _parse_inline(self) -> t.Optional[exp.Expression]: 3050 self._match_text_seq("LENGTH") 3051 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3052 3053 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3054 if self._match_text_seq("NULL"): 3055 return self.expression(exp.NotNullColumnConstraint) 3056 if self._match_text_seq("CASESPECIFIC"): 3057 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3058 return None 3059 3060 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3061 this = self._parse_references() 3062 if this: 3063 return this 3064 3065 if self._match(TokenType.CONSTRAINT): 3066 this = self._parse_id_var() 3067 3068 if self._match_texts(self.CONSTRAINT_PARSERS): 3069 return self.expression( 3070 exp.ColumnConstraint, 3071 this=this, 3072 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3073 ) 3074 3075 return this 3076 3077 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3078 if not self._match(TokenType.CONSTRAINT): 3079 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3080 3081 this = self._parse_id_var() 3082 expressions = [] 3083 3084 while True: 3085 constraint = self._parse_unnamed_constraint() or self._parse_function() 3086 if not constraint: 3087 break 3088 expressions.append(constraint) 3089 3090 return self.expression(exp.Constraint, this=this, expressions=expressions) 3091 3092 def _parse_unnamed_constraint( 3093 self, constraints: t.Optional[t.Collection[str]] = None 3094 ) -> t.Optional[exp.Expression]: 3095 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3096 return None 3097 3098 constraint = self._prev.text.upper() 3099 if constraint not in self.CONSTRAINT_PARSERS: 3100 self.raise_error(f"No parser found for schema constraint {constraint}.") 3101 3102 return self.CONSTRAINT_PARSERS[constraint](self) 3103 3104 def _parse_unique(self) -> exp.Expression: 3105 if not self._match(TokenType.L_PAREN, advance=False): 3106 return self.expression(exp.UniqueColumnConstraint) 3107 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3108 3109 def _parse_key_constraint_options(self) -> t.List[str]: 3110 options = [] 3111 while True: 3112 if not self._curr: 3113 break 3114 3115 if self._match(TokenType.ON): 3116 action = None 3117 on = self._advance_any() and self._prev.text 3118 3119 if self._match(TokenType.NO_ACTION): 3120 action = "NO ACTION" 3121 elif self._match(TokenType.CASCADE): 3122 action = "CASCADE" 3123 elif self._match_pair(TokenType.SET, TokenType.NULL): 3124 action = "SET NULL" 3125 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3126 action = "SET DEFAULT" 3127 else: 3128 self.raise_error("Invalid key constraint") 3129 3130 options.append(f"ON {on} {action}") 3131 elif self._match_text_seq("NOT", "ENFORCED"): 3132 options.append("NOT ENFORCED") 3133 elif self._match_text_seq("DEFERRABLE"): 3134 options.append("DEFERRABLE") 3135 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3136 options.append("INITIALLY DEFERRED") 3137 elif self._match_text_seq("NORELY"): 3138 options.append("NORELY") 3139 elif self._match_text_seq("MATCH", "FULL"): 3140 options.append("MATCH FULL") 3141 else: 3142 break 3143 3144 return options 3145 3146 def _parse_references(self) -> t.Optional[exp.Expression]: 3147 if not self._match(TokenType.REFERENCES): 3148 return None 3149 3150 expressions = None 3151 this = self._parse_id_var() 3152 3153 if self._match(TokenType.L_PAREN, advance=False): 3154 expressions = self._parse_wrapped_id_vars() 3155 3156 options = self._parse_key_constraint_options() 3157 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3158 3159 def _parse_foreign_key(self) -> exp.Expression: 3160 expressions = self._parse_wrapped_id_vars() 3161 reference = self._parse_references() 3162 options = {} 3163 3164 while self._match(TokenType.ON): 3165 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3166 self.raise_error("Expected DELETE or UPDATE") 3167 3168 kind = self._prev.text.lower() 3169 3170 if self._match(TokenType.NO_ACTION): 3171 action = "NO ACTION" 3172 elif self._match(TokenType.SET): 3173 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3174 action = "SET " + self._prev.text.upper() 3175 else: 3176 self._advance() 3177 action = self._prev.text.upper() 3178 3179 options[kind] = action 3180 3181 return self.expression( 3182 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3183 ) 3184 3185 def _parse_primary_key(self) -> exp.Expression: 3186 desc = ( 3187 self._match_set((TokenType.ASC, TokenType.DESC)) 3188 and self._prev.token_type == TokenType.DESC 3189 ) 3190 3191 if not self._match(TokenType.L_PAREN, advance=False): 3192 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3193 3194 expressions = self._parse_wrapped_id_vars() 3195 options = self._parse_key_constraint_options() 3196 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3197 3198 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3199 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3200 return this 3201 3202 bracket_kind = self._prev.token_type 3203 expressions: t.List[t.Optional[exp.Expression]] 3204 3205 if self._match(TokenType.COLON): 3206 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3207 else: 3208 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3209 3210 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3211 if bracket_kind == TokenType.L_BRACE: 3212 this = self.expression(exp.Struct, expressions=expressions) 3213 elif not this or this.name.upper() == "ARRAY": 3214 this = self.expression(exp.Array, expressions=expressions) 3215 else: 3216 expressions = apply_index_offset(expressions, -self.index_offset) 3217 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3218 3219 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3220 self.raise_error("Expected ]") 3221 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3222 self.raise_error("Expected }") 3223 3224 this.comments = self._prev_comments 3225 return self._parse_bracket(this) 3226 3227 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3228 if self._match(TokenType.COLON): 3229 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3230 return this 3231 3232 def _parse_case(self) -> t.Optional[exp.Expression]: 3233 ifs = [] 3234 default = None 3235 3236 expression = self._parse_conjunction() 3237 3238 while self._match(TokenType.WHEN): 3239 this = self._parse_conjunction() 3240 self._match(TokenType.THEN) 3241 then = self._parse_conjunction() 3242 ifs.append(self.expression(exp.If, this=this, true=then)) 3243 3244 if self._match(TokenType.ELSE): 3245 default = self._parse_conjunction() 3246 3247 if not self._match(TokenType.END): 3248 self.raise_error("Expected END after CASE", self._prev) 3249 3250 return self._parse_window( 3251 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3252 ) 3253 3254 def _parse_if(self) -> t.Optional[exp.Expression]: 3255 if self._match(TokenType.L_PAREN): 3256 args = self._parse_csv(self._parse_conjunction) 3257 this = exp.If.from_arg_list(args) 3258 self.validate_expression(this, args) 3259 self._match_r_paren() 3260 else: 3261 condition = self._parse_conjunction() 3262 self._match(TokenType.THEN) 3263 true = self._parse_conjunction() 3264 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3265 self._match(TokenType.END) 3266 this = self.expression(exp.If, this=condition, true=true, false=false) 3267 3268 return self._parse_window(this) 3269 3270 def _parse_extract(self) -> exp.Expression: 3271 this = self._parse_function() or self._parse_var() or self._parse_type() 3272 3273 if self._match(TokenType.FROM): 3274 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3275 3276 if not self._match(TokenType.COMMA): 3277 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3278 3279 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3280 3281 def _parse_cast(self, strict: bool) -> exp.Expression: 3282 this = self._parse_conjunction() 3283 3284 if not self._match(TokenType.ALIAS): 3285 self.raise_error("Expected AS after CAST") 3286 3287 to = self._parse_types() 3288 3289 if not to: 3290 self.raise_error("Expected TYPE after CAST") 3291 elif to.this == exp.DataType.Type.CHAR: 3292 if self._match(TokenType.CHARACTER_SET): 3293 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3294 3295 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3296 3297 def _parse_string_agg(self) -> exp.Expression: 3298 expression: t.Optional[exp.Expression] 3299 3300 if self._match(TokenType.DISTINCT): 3301 args = self._parse_csv(self._parse_conjunction) 3302 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3303 else: 3304 args = self._parse_csv(self._parse_conjunction) 3305 expression = seq_get(args, 0) 3306 3307 index = self._index 3308 if not self._match(TokenType.R_PAREN): 3309 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3310 order = self._parse_order(this=expression) 3311 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3312 3313 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3314 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3315 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3316 if not self._match(TokenType.WITHIN_GROUP): 3317 self._retreat(index) 3318 this = exp.GroupConcat.from_arg_list(args) 3319 self.validate_expression(this, args) 3320 return this 3321 3322 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3323 order = self._parse_order(this=expression) 3324 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3325 3326 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3327 to: t.Optional[exp.Expression] 3328 this = self._parse_bitwise() 3329 3330 if self._match(TokenType.USING): 3331 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3332 elif self._match(TokenType.COMMA): 3333 to = self._parse_bitwise() 3334 else: 3335 to = None 3336 3337 # Swap the argument order if needed to produce the correct AST 3338 if self.CONVERT_TYPE_FIRST: 3339 this, to = to, this 3340 3341 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3342 3343 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3344 self._match_text_seq("KEY") 3345 key = self._parse_field() 3346 self._match(TokenType.COLON) 3347 self._match_text_seq("VALUE") 3348 value = self._parse_field() 3349 if not key and not value: 3350 return None 3351 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3352 3353 def _parse_json_object(self) -> exp.Expression: 3354 expressions = self._parse_csv(self._parse_json_key_value) 3355 3356 null_handling = None 3357 if self._match_text_seq("NULL", "ON", "NULL"): 3358 null_handling = "NULL ON NULL" 3359 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3360 null_handling = "ABSENT ON NULL" 3361 3362 unique_keys = None 3363 if self._match_text_seq("WITH", "UNIQUE"): 3364 unique_keys = True 3365 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3366 unique_keys = False 3367 3368 self._match_text_seq("KEYS") 3369 3370 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3371 format_json = self._match_text_seq("FORMAT", "JSON") 3372 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3373 3374 return self.expression( 3375 exp.JSONObject, 3376 expressions=expressions, 3377 null_handling=null_handling, 3378 unique_keys=unique_keys, 3379 return_type=return_type, 3380 format_json=format_json, 3381 encoding=encoding, 3382 ) 3383 3384 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3385 args = self._parse_csv(self._parse_bitwise) 3386 3387 if self._match(TokenType.IN): 3388 return self.expression( 3389 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3390 ) 3391 3392 if haystack_first: 3393 haystack = seq_get(args, 0) 3394 needle = seq_get(args, 1) 3395 else: 3396 needle = seq_get(args, 0) 3397 haystack = seq_get(args, 1) 3398 3399 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3400 3401 self.validate_expression(this, args) 3402 3403 return this 3404 3405 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3406 args = self._parse_csv(self._parse_table) 3407 return exp.JoinHint(this=func_name.upper(), expressions=args) 3408 3409 def _parse_substring(self) -> exp.Expression: 3410 # Postgres supports the form: substring(string [from int] [for int]) 3411 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3412 3413 args = self._parse_csv(self._parse_bitwise) 3414 3415 if self._match(TokenType.FROM): 3416 args.append(self._parse_bitwise()) 3417 if self._match(TokenType.FOR): 3418 args.append(self._parse_bitwise()) 3419 3420 this = exp.Substring.from_arg_list(args) 3421 self.validate_expression(this, args) 3422 3423 return this 3424 3425 def _parse_trim(self) -> exp.Expression: 3426 # https://www.w3resource.com/sql/character-functions/trim.php 3427 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3428 3429 position = None 3430 collation = None 3431 3432 if self._match_set(self.TRIM_TYPES): 3433 position = self._prev.text.upper() 3434 3435 expression = self._parse_term() 3436 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3437 this = self._parse_term() 3438 else: 3439 this = expression 3440 expression = None 3441 3442 if self._match(TokenType.COLLATE): 3443 collation = self._parse_term() 3444 3445 return self.expression( 3446 exp.Trim, 3447 this=this, 3448 position=position, 3449 expression=expression, 3450 collation=collation, 3451 ) 3452 3453 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3454 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3455 3456 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3457 return self._parse_window(self._parse_id_var(), alias=True) 3458 3459 def _parse_window( 3460 self, this: t.Optional[exp.Expression], alias: bool = False 3461 ) -> t.Optional[exp.Expression]: 3462 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3463 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3464 self._match_r_paren() 3465 3466 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3467 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3468 if self._match(TokenType.WITHIN_GROUP): 3469 order = self._parse_wrapped(self._parse_order) 3470 this = self.expression(exp.WithinGroup, this=this, expression=order) 3471 3472 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3473 # Some dialects choose to implement and some do not. 3474 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3475 3476 # There is some code above in _parse_lambda that handles 3477 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3478 3479 # The below changes handle 3480 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3481 3482 # Oracle allows both formats 3483 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3484 # and Snowflake chose to do the same for familiarity 3485 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3486 if self._match(TokenType.IGNORE_NULLS): 3487 this = self.expression(exp.IgnoreNulls, this=this) 3488 elif self._match(TokenType.RESPECT_NULLS): 3489 this = self.expression(exp.RespectNulls, this=this) 3490 3491 # bigquery select from window x AS (partition by ...) 3492 if alias: 3493 self._match(TokenType.ALIAS) 3494 elif not self._match(TokenType.OVER): 3495 return this 3496 3497 if not self._match(TokenType.L_PAREN): 3498 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3499 3500 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3501 partition = self._parse_partition_by() 3502 order = self._parse_order() 3503 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3504 3505 if kind: 3506 self._match(TokenType.BETWEEN) 3507 start = self._parse_window_spec() 3508 self._match(TokenType.AND) 3509 end = self._parse_window_spec() 3510 3511 spec = self.expression( 3512 exp.WindowSpec, 3513 kind=kind, 3514 start=start["value"], 3515 start_side=start["side"], 3516 end=end["value"], 3517 end_side=end["side"], 3518 ) 3519 else: 3520 spec = None 3521 3522 self._match_r_paren() 3523 3524 return self.expression( 3525 exp.Window, 3526 this=this, 3527 partition_by=partition, 3528 order=order, 3529 spec=spec, 3530 alias=window_alias, 3531 ) 3532 3533 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3534 self._match(TokenType.BETWEEN) 3535 3536 return { 3537 "value": ( 3538 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3539 ) 3540 or self._parse_bitwise(), 3541 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3542 } 3543 3544 def _parse_alias( 3545 self, this: t.Optional[exp.Expression], explicit: bool = False 3546 ) -> t.Optional[exp.Expression]: 3547 any_token = self._match(TokenType.ALIAS) 3548 3549 if explicit and not any_token: 3550 return this 3551 3552 if self._match(TokenType.L_PAREN): 3553 aliases = self.expression( 3554 exp.Aliases, 3555 this=this, 3556 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3557 ) 3558 self._match_r_paren(aliases) 3559 return aliases 3560 3561 alias = self._parse_id_var(any_token) 3562 3563 if alias: 3564 return self.expression(exp.Alias, this=this, alias=alias) 3565 3566 return this 3567 3568 def _parse_id_var( 3569 self, 3570 any_token: bool = True, 3571 tokens: t.Optional[t.Collection[TokenType]] = None, 3572 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3573 ) -> t.Optional[exp.Expression]: 3574 identifier = self._parse_identifier() 3575 3576 if identifier: 3577 return identifier 3578 3579 prefix = "" 3580 3581 if prefix_tokens: 3582 while self._match_set(prefix_tokens): 3583 prefix += self._prev.text 3584 3585 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3586 quoted = self._prev.token_type == TokenType.STRING 3587 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3588 3589 return None 3590 3591 def _parse_string(self) -> t.Optional[exp.Expression]: 3592 if self._match(TokenType.STRING): 3593 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3594 return self._parse_placeholder() 3595 3596 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3597 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3598 3599 def _parse_number(self) -> t.Optional[exp.Expression]: 3600 if self._match(TokenType.NUMBER): 3601 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3602 return self._parse_placeholder() 3603 3604 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3605 if self._match(TokenType.IDENTIFIER): 3606 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3607 return self._parse_placeholder() 3608 3609 def _parse_var( 3610 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3611 ) -> t.Optional[exp.Expression]: 3612 if ( 3613 (any_token and self._advance_any()) 3614 or self._match(TokenType.VAR) 3615 or (self._match_set(tokens) if tokens else False) 3616 ): 3617 return self.expression(exp.Var, this=self._prev.text) 3618 return self._parse_placeholder() 3619 3620 def _advance_any(self) -> t.Optional[Token]: 3621 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3622 self._advance() 3623 return self._prev 3624 return None 3625 3626 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3627 return self._parse_var() or self._parse_string() 3628 3629 def _parse_null(self) -> t.Optional[exp.Expression]: 3630 if self._match(TokenType.NULL): 3631 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3632 return None 3633 3634 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3635 if self._match(TokenType.TRUE): 3636 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3637 if self._match(TokenType.FALSE): 3638 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3639 return None 3640 3641 def _parse_star(self) -> t.Optional[exp.Expression]: 3642 if self._match(TokenType.STAR): 3643 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3644 return None 3645 3646 def _parse_parameter(self) -> exp.Expression: 3647 wrapped = self._match(TokenType.L_BRACE) 3648 this = self._parse_var() or self._parse_primary() 3649 self._match(TokenType.R_BRACE) 3650 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3651 3652 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3653 if self._match_set(self.PLACEHOLDER_PARSERS): 3654 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3655 if placeholder: 3656 return placeholder 3657 self._advance(-1) 3658 return None 3659 3660 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3661 if not self._match(TokenType.EXCEPT): 3662 return None 3663 if self._match(TokenType.L_PAREN, advance=False): 3664 return self._parse_wrapped_csv(self._parse_column) 3665 return self._parse_csv(self._parse_column) 3666 3667 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3668 if not self._match(TokenType.REPLACE): 3669 return None 3670 if self._match(TokenType.L_PAREN, advance=False): 3671 return self._parse_wrapped_csv(self._parse_expression) 3672 return self._parse_csv(self._parse_expression) 3673 3674 def _parse_csv( 3675 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3676 ) -> t.List[t.Optional[exp.Expression]]: 3677 parse_result = parse_method() 3678 items = [parse_result] if parse_result is not None else [] 3679 3680 while self._match(sep): 3681 if parse_result and self._prev_comments: 3682 parse_result.comments = self._prev_comments 3683 3684 parse_result = parse_method() 3685 if parse_result is not None: 3686 items.append(parse_result) 3687 3688 return items 3689 3690 def _parse_tokens( 3691 self, parse_method: t.Callable, expressions: t.Dict 3692 ) -> t.Optional[exp.Expression]: 3693 this = parse_method() 3694 3695 while self._match_set(expressions): 3696 this = self.expression( 3697 expressions[self._prev.token_type], 3698 this=this, 3699 comments=self._prev_comments, 3700 expression=parse_method(), 3701 ) 3702 3703 return this 3704 3705 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3706 return self._parse_wrapped_csv(self._parse_id_var) 3707 3708 def _parse_wrapped_csv( 3709 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3710 ) -> t.List[t.Optional[exp.Expression]]: 3711 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3712 3713 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3714 self._match_l_paren() 3715 parse_result = parse_method() 3716 self._match_r_paren() 3717 return parse_result 3718 3719 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3720 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3721 3722 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3723 return self._parse_set_operations( 3724 self._parse_select(nested=True, parse_subquery_alias=False) 3725 ) 3726 3727 def _parse_transaction(self) -> exp.Expression: 3728 this = None 3729 if self._match_texts(self.TRANSACTION_KIND): 3730 this = self._prev.text 3731 3732 self._match_texts({"TRANSACTION", "WORK"}) 3733 3734 modes = [] 3735 while True: 3736 mode = [] 3737 while self._match(TokenType.VAR): 3738 mode.append(self._prev.text) 3739 3740 if mode: 3741 modes.append(" ".join(mode)) 3742 if not self._match(TokenType.COMMA): 3743 break 3744 3745 return self.expression(exp.Transaction, this=this, modes=modes) 3746 3747 def _parse_commit_or_rollback(self) -> exp.Expression: 3748 chain = None 3749 savepoint = None 3750 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3751 3752 self._match_texts({"TRANSACTION", "WORK"}) 3753 3754 if self._match_text_seq("TO"): 3755 self._match_text_seq("SAVEPOINT") 3756 savepoint = self._parse_id_var() 3757 3758 if self._match(TokenType.AND): 3759 chain = not self._match_text_seq("NO") 3760 self._match_text_seq("CHAIN") 3761 3762 if is_rollback: 3763 return self.expression(exp.Rollback, savepoint=savepoint) 3764 return self.expression(exp.Commit, chain=chain) 3765 3766 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3767 if not self._match_text_seq("ADD"): 3768 return None 3769 3770 self._match(TokenType.COLUMN) 3771 exists_column = self._parse_exists(not_=True) 3772 expression = self._parse_column_def(self._parse_field(any_token=True)) 3773 3774 if expression: 3775 expression.set("exists", exists_column) 3776 3777 return expression 3778 3779 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3780 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3781 3782 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3783 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3784 return self.expression( 3785 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3786 ) 3787 3788 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3789 this = None 3790 kind = self._prev.token_type 3791 3792 if kind == TokenType.CONSTRAINT: 3793 this = self._parse_id_var() 3794 3795 if self._match_text_seq("CHECK"): 3796 expression = self._parse_wrapped(self._parse_conjunction) 3797 enforced = self._match_text_seq("ENFORCED") 3798 3799 return self.expression( 3800 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3801 ) 3802 3803 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3804 expression = self._parse_foreign_key() 3805 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3806 expression = self._parse_primary_key() 3807 3808 return self.expression(exp.AddConstraint, this=this, expression=expression) 3809 3810 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3811 index = self._index - 1 3812 3813 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3814 return self._parse_csv(self._parse_add_constraint) 3815 3816 self._retreat(index) 3817 return self._parse_csv(self._parse_add_column) 3818 3819 def _parse_alter_table_alter(self) -> exp.Expression: 3820 self._match(TokenType.COLUMN) 3821 column = self._parse_field(any_token=True) 3822 3823 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3824 return self.expression(exp.AlterColumn, this=column, drop=True) 3825 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3826 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3827 3828 self._match_text_seq("SET", "DATA") 3829 return self.expression( 3830 exp.AlterColumn, 3831 this=column, 3832 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3833 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3834 using=self._match(TokenType.USING) and self._parse_conjunction(), 3835 ) 3836 3837 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3838 index = self._index - 1 3839 3840 partition_exists = self._parse_exists() 3841 if self._match(TokenType.PARTITION, advance=False): 3842 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3843 3844 self._retreat(index) 3845 return self._parse_csv(self._parse_drop_column) 3846 3847 def _parse_alter_table_rename(self) -> exp.Expression: 3848 self._match_text_seq("TO") 3849 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3850 3851 def _parse_alter(self) -> t.Optional[exp.Expression]: 3852 start = self._prev 3853 3854 if not self._match(TokenType.TABLE): 3855 return self._parse_as_command(start) 3856 3857 exists = self._parse_exists() 3858 this = self._parse_table(schema=True) 3859 3860 if self._next: 3861 self._advance() 3862 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3863 3864 if parser: 3865 return self.expression( 3866 exp.AlterTable, 3867 this=this, 3868 exists=exists, 3869 actions=ensure_list(parser(self)), 3870 ) 3871 return self._parse_as_command(start) 3872 3873 def _parse_merge(self) -> exp.Expression: 3874 self._match(TokenType.INTO) 3875 target = self._parse_table() 3876 3877 self._match(TokenType.USING) 3878 using = self._parse_table() 3879 3880 self._match(TokenType.ON) 3881 on = self._parse_conjunction() 3882 3883 whens = [] 3884 while self._match(TokenType.WHEN): 3885 matched = not self._match(TokenType.NOT) 3886 self._match_text_seq("MATCHED") 3887 source = ( 3888 False 3889 if self._match_text_seq("BY", "TARGET") 3890 else self._match_text_seq("BY", "SOURCE") 3891 ) 3892 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3893 3894 self._match(TokenType.THEN) 3895 3896 if self._match(TokenType.INSERT): 3897 _this = self._parse_star() 3898 if _this: 3899 then = self.expression(exp.Insert, this=_this) 3900 else: 3901 then = self.expression( 3902 exp.Insert, 3903 this=self._parse_value(), 3904 expression=self._match(TokenType.VALUES) and self._parse_value(), 3905 ) 3906 elif self._match(TokenType.UPDATE): 3907 expressions = self._parse_star() 3908 if expressions: 3909 then = self.expression(exp.Update, expressions=expressions) 3910 else: 3911 then = self.expression( 3912 exp.Update, 3913 expressions=self._match(TokenType.SET) 3914 and self._parse_csv(self._parse_equality), 3915 ) 3916 elif self._match(TokenType.DELETE): 3917 then = self.expression(exp.Var, this=self._prev.text) 3918 else: 3919 then = None 3920 3921 whens.append( 3922 self.expression( 3923 exp.When, 3924 matched=matched, 3925 source=source, 3926 condition=condition, 3927 then=then, 3928 ) 3929 ) 3930 3931 return self.expression( 3932 exp.Merge, 3933 this=target, 3934 using=using, 3935 on=on, 3936 expressions=whens, 3937 ) 3938 3939 def _parse_show(self) -> t.Optional[exp.Expression]: 3940 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3941 if parser: 3942 return parser(self) 3943 self._advance() 3944 return self.expression(exp.Show, this=self._prev.text.upper()) 3945 3946 def _parse_set_item_assignment( 3947 self, kind: t.Optional[str] = None 3948 ) -> t.Optional[exp.Expression]: 3949 index = self._index 3950 3951 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 3952 return self._parse_set_transaction(global_=kind == "GLOBAL") 3953 3954 left = self._parse_primary() or self._parse_id_var() 3955 3956 if not self._match_texts(("=", "TO")): 3957 self._retreat(index) 3958 return None 3959 3960 right = self._parse_statement() or self._parse_id_var() 3961 this = self.expression( 3962 exp.EQ, 3963 this=left, 3964 expression=right, 3965 ) 3966 3967 return self.expression( 3968 exp.SetItem, 3969 this=this, 3970 kind=kind, 3971 ) 3972 3973 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 3974 self._match_text_seq("TRANSACTION") 3975 characteristics = self._parse_csv( 3976 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 3977 ) 3978 return self.expression( 3979 exp.SetItem, 3980 expressions=characteristics, 3981 kind="TRANSACTION", 3982 **{"global": global_}, # type: ignore 3983 ) 3984 3985 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3986 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3987 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 3988 3989 def _parse_set(self) -> exp.Expression: 3990 index = self._index 3991 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3992 3993 if self._curr: 3994 self._retreat(index) 3995 return self._parse_as_command(self._prev) 3996 3997 return set_ 3998 3999 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4000 for option in options: 4001 if self._match_text_seq(*option.split(" ")): 4002 return exp.Var(this=option) 4003 return None 4004 4005 def _parse_as_command(self, start: Token) -> exp.Command: 4006 while self._curr: 4007 self._advance() 4008 text = self._find_sql(start, self._prev) 4009 size = len(start.text) 4010 return exp.Command(this=text[:size], expression=text[size:]) 4011 4012 def _find_parser( 4013 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4014 ) -> t.Optional[t.Callable]: 4015 if not self._curr: 4016 return None 4017 4018 index = self._index 4019 this = [] 4020 while True: 4021 # The current token might be multiple words 4022 curr = self._curr.text.upper() 4023 key = curr.split(" ") 4024 this.append(curr) 4025 self._advance() 4026 result, trie = in_trie(trie, key) 4027 if result == 0: 4028 break 4029 if result == 2: 4030 subparser = parsers[" ".join(this)] 4031 return subparser 4032 self._retreat(index) 4033 return None 4034 4035 def _match(self, token_type, advance=True): 4036 if not self._curr: 4037 return None 4038 4039 if self._curr.token_type == token_type: 4040 if advance: 4041 self._advance() 4042 return True 4043 4044 return None 4045 4046 def _match_set(self, types, advance=True): 4047 if not self._curr: 4048 return None 4049 4050 if self._curr.token_type in types: 4051 if advance: 4052 self._advance() 4053 return True 4054 4055 return None 4056 4057 def _match_pair(self, token_type_a, token_type_b, advance=True): 4058 if not self._curr or not self._next: 4059 return None 4060 4061 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4062 if advance: 4063 self._advance(2) 4064 return True 4065 4066 return None 4067 4068 def _match_l_paren(self, expression=None): 4069 if not self._match(TokenType.L_PAREN): 4070 self.raise_error("Expecting (") 4071 if expression and self._prev_comments: 4072 expression.comments = self._prev_comments 4073 4074 def _match_r_paren(self, expression=None): 4075 if not self._match(TokenType.R_PAREN): 4076 self.raise_error("Expecting )") 4077 if expression and self._prev_comments: 4078 expression.comments = self._prev_comments 4079 4080 def _match_texts(self, texts, advance=True): 4081 if self._curr and self._curr.text.upper() in texts: 4082 if advance: 4083 self._advance() 4084 return True 4085 return False 4086 4087 def _match_text_seq(self, *texts, advance=True): 4088 index = self._index 4089 for text in texts: 4090 if self._curr and self._curr.text.upper() == text: 4091 self._advance() 4092 else: 4093 self._retreat(index) 4094 return False 4095 4096 if not advance: 4097 self._retreat(index) 4098 4099 return True 4100 4101 def _replace_columns_with_dots(self, this): 4102 if isinstance(this, exp.Dot): 4103 exp.replace_children(this, self._replace_columns_with_dots) 4104 elif isinstance(this, exp.Column): 4105 exp.replace_children(this, self._replace_columns_with_dots) 4106 table = this.args.get("table") 4107 this = ( 4108 self.expression(exp.Dot, this=table, expression=this.this) 4109 if table 4110 else self.expression(exp.Var, this=this.name) 4111 ) 4112 elif isinstance(this, exp.Identifier): 4113 this = self.expression(exp.Var, this=this.name) 4114 return this 4115 4116 def _replace_lambda(self, node, lambda_variables): 4117 for column in node.find_all(exp.Column): 4118 if column.parts[0].name in lambda_variables: 4119 dot_or_id = column.to_dot() if column.table else column.this 4120 parent = column.parent 4121 4122 while isinstance(parent, exp.Dot): 4123 if not isinstance(parent.parent, exp.Dot): 4124 parent.replace(dot_or_id) 4125 break 4126 parent = parent.parent 4127 else: 4128 column.replace(dot_or_id) 4129 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
757 def __init__( 758 self, 759 error_level: t.Optional[ErrorLevel] = None, 760 error_message_context: int = 100, 761 index_offset: int = 0, 762 unnest_column_only: bool = False, 763 alias_post_tablesample: bool = False, 764 max_errors: int = 3, 765 null_ordering: t.Optional[str] = None, 766 ): 767 self.error_level = error_level or ErrorLevel.IMMEDIATE 768 self.error_message_context = error_message_context 769 self.index_offset = index_offset 770 self.unnest_column_only = unnest_column_only 771 self.alias_post_tablesample = alias_post_tablesample 772 self.max_errors = max_errors 773 self.null_ordering = null_ordering 774 self.reset()
786 def parse( 787 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 788 ) -> t.List[t.Optional[exp.Expression]]: 789 """ 790 Parses a list of tokens and returns a list of syntax trees, one tree 791 per parsed SQL statement. 792 793 Args: 794 raw_tokens: the list of tokens. 795 sql: the original SQL string, used to produce helpful debug messages. 796 797 Returns: 798 The list of syntax trees. 799 """ 800 return self._parse( 801 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 802 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
804 def parse_into( 805 self, 806 expression_types: exp.IntoType, 807 raw_tokens: t.List[Token], 808 sql: t.Optional[str] = None, 809 ) -> t.List[t.Optional[exp.Expression]]: 810 """ 811 Parses a list of tokens into a given Expression type. If a collection of Expression 812 types is given instead, this method will try to parse the token list into each one 813 of them, stopping at the first for which the parsing succeeds. 814 815 Args: 816 expression_types: the expression type(s) to try and parse the token list into. 817 raw_tokens: the list of tokens. 818 sql: the original SQL string, used to produce helpful debug messages. 819 820 Returns: 821 The target Expression. 822 """ 823 errors = [] 824 for expression_type in ensure_collection(expression_types): 825 parser = self.EXPRESSION_PARSERS.get(expression_type) 826 if not parser: 827 raise TypeError(f"No parser registered for {expression_type}") 828 try: 829 return self._parse(parser, raw_tokens, sql) 830 except ParseError as e: 831 e.errors[0]["into_expression"] = expression_type 832 errors.append(e) 833 raise ParseError( 834 f"Failed to parse into {expression_types}", 835 errors=merge_errors(errors), 836 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
872 def check_errors(self) -> None: 873 """ 874 Logs or raises any found errors, depending on the chosen error level setting. 875 """ 876 if self.error_level == ErrorLevel.WARN: 877 for error in self.errors: 878 logger.error(str(error)) 879 elif self.error_level == ErrorLevel.RAISE and self.errors: 880 raise ParseError( 881 concat_messages(self.errors, self.max_errors), 882 errors=merge_errors(self.errors), 883 )
Logs or raises any found errors, depending on the chosen error level setting.
885 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 886 """ 887 Appends an error in the list of recorded errors or raises it, depending on the chosen 888 error level setting. 889 """ 890 token = token or self._curr or self._prev or Token.string("") 891 start = self._find_token(token) 892 end = start + len(token.text) 893 start_context = self.sql[max(start - self.error_message_context, 0) : start] 894 highlight = self.sql[start:end] 895 end_context = self.sql[end : end + self.error_message_context] 896 897 error = ParseError.new( 898 f"{message}. Line {token.line}, Col: {token.col}.\n" 899 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 900 description=message, 901 line=token.line, 902 col=token.col, 903 start_context=start_context, 904 highlight=highlight, 905 end_context=end_context, 906 ) 907 908 if self.error_level == ErrorLevel.IMMEDIATE: 909 raise error 910 911 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
913 def expression( 914 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 915 ) -> exp.Expression: 916 """ 917 Creates a new, validated Expression. 918 919 Args: 920 exp_class: the expression class to instantiate. 921 comments: an optional list of comments to attach to the expression. 922 kwargs: the arguments to set for the expression along with their respective values. 923 924 Returns: 925 The target expression. 926 """ 927 instance = exp_class(**kwargs) 928 if self._prev_comments: 929 instance.comments = self._prev_comments 930 self._prev_comments = None 931 if comments: 932 instance.comments = comments 933 self.validate_expression(instance) 934 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
936 def validate_expression( 937 self, expression: exp.Expression, args: t.Optional[t.List] = None 938 ) -> None: 939 """ 940 Validates an already instantiated expression, making sure that all its mandatory arguments 941 are set. 942 943 Args: 944 expression: the expression to validate. 945 args: an optional list of items that was used to instantiate the expression, if it's a Func. 946 """ 947 if self.error_level == ErrorLevel.IGNORE: 948 return 949 950 for error_message in expression.error_messages(args): 951 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.