sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def binary_range_parser( 35 expr_type: t.Type[exp.Expression], 36) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 37 return lambda self, this: self._parse_escape( 38 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 39 ) 40 41 42class _Parser(type): 43 def __new__(cls, clsname, bases, attrs): 44 klass = super().__new__(cls, clsname, bases, attrs) 45 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 46 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 47 48 return klass 49 50 51class Parser(metaclass=_Parser): 52 """ 53 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 54 a parsed syntax tree. 55 56 Args: 57 error_level: the desired error level. 58 Default: ErrorLevel.RAISE 59 error_message_context: determines the amount of context to capture from a 60 query string when displaying the error message (in number of characters). 61 Default: 50. 62 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 63 Default: 0 64 alias_post_tablesample: If the table alias comes after tablesample. 65 Default: False 66 max_errors: Maximum number of error messages to include in a raised ParseError. 67 This is only relevant if error_level is ErrorLevel.RAISE. 68 Default: 3 69 null_ordering: Indicates the default null ordering method to use if not explicitly set. 70 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 71 Default: "nulls_are_small" 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "TIME_TO_TIME_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 85 this=exp.Cast( 86 this=seq_get(args, 0), 87 to=exp.DataType(this=exp.DataType.Type.TEXT), 88 ), 89 start=exp.Literal.number(1), 90 length=exp.Literal.number(10), 91 ), 92 "VAR_MAP": parse_var_map, 93 "IFNULL": exp.Coalesce.from_arg_list, 94 } 95 96 NO_PAREN_FUNCTIONS = { 97 TokenType.CURRENT_DATE: exp.CurrentDate, 98 TokenType.CURRENT_DATETIME: exp.CurrentDate, 99 TokenType.CURRENT_TIME: exp.CurrentTime, 100 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 101 } 102 103 NESTED_TYPE_TOKENS = { 104 TokenType.ARRAY, 105 TokenType.MAP, 106 TokenType.STRUCT, 107 TokenType.NULLABLE, 108 } 109 110 TYPE_TOKENS = { 111 TokenType.BIT, 112 TokenType.BOOLEAN, 113 TokenType.TINYINT, 114 TokenType.SMALLINT, 115 TokenType.INT, 116 TokenType.BIGINT, 117 TokenType.FLOAT, 118 TokenType.DOUBLE, 119 TokenType.CHAR, 120 TokenType.NCHAR, 121 TokenType.VARCHAR, 122 TokenType.NVARCHAR, 123 TokenType.TEXT, 124 TokenType.MEDIUMTEXT, 125 TokenType.LONGTEXT, 126 TokenType.MEDIUMBLOB, 127 TokenType.LONGBLOB, 128 TokenType.BINARY, 129 TokenType.VARBINARY, 130 TokenType.JSON, 131 TokenType.JSONB, 132 TokenType.INTERVAL, 133 TokenType.TIME, 134 TokenType.TIMESTAMP, 135 TokenType.TIMESTAMPTZ, 136 TokenType.TIMESTAMPLTZ, 137 TokenType.DATETIME, 138 TokenType.DATE, 139 TokenType.DECIMAL, 140 TokenType.UUID, 141 TokenType.GEOGRAPHY, 142 TokenType.GEOMETRY, 143 TokenType.HLLSKETCH, 144 TokenType.HSTORE, 145 TokenType.PSEUDO_TYPE, 146 TokenType.SUPER, 147 TokenType.SERIAL, 148 TokenType.SMALLSERIAL, 149 TokenType.BIGSERIAL, 150 TokenType.XML, 151 TokenType.UNIQUEIDENTIFIER, 152 TokenType.MONEY, 153 TokenType.SMALLMONEY, 154 TokenType.ROWVERSION, 155 TokenType.IMAGE, 156 TokenType.VARIANT, 157 TokenType.OBJECT, 158 TokenType.INET, 159 *NESTED_TYPE_TOKENS, 160 } 161 162 SUBQUERY_PREDICATES = { 163 TokenType.ANY: exp.Any, 164 TokenType.ALL: exp.All, 165 TokenType.EXISTS: exp.Exists, 166 TokenType.SOME: exp.Any, 167 } 168 169 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 170 171 DB_CREATABLES = { 172 TokenType.DATABASE, 173 TokenType.SCHEMA, 174 TokenType.TABLE, 175 TokenType.VIEW, 176 } 177 178 CREATABLES = { 179 TokenType.COLUMN, 180 TokenType.FUNCTION, 181 TokenType.INDEX, 182 TokenType.PROCEDURE, 183 *DB_CREATABLES, 184 } 185 186 ID_VAR_TOKENS = { 187 TokenType.VAR, 188 TokenType.ANTI, 189 TokenType.APPLY, 190 TokenType.AUTO_INCREMENT, 191 TokenType.BEGIN, 192 TokenType.BOTH, 193 TokenType.BUCKET, 194 TokenType.CACHE, 195 TokenType.CASCADE, 196 TokenType.COLLATE, 197 TokenType.COMMAND, 198 TokenType.COMMENT, 199 TokenType.COMMIT, 200 TokenType.COMPOUND, 201 TokenType.CONSTRAINT, 202 TokenType.DEFAULT, 203 TokenType.DELETE, 204 TokenType.DESCRIBE, 205 TokenType.DIV, 206 TokenType.END, 207 TokenType.EXECUTE, 208 TokenType.ESCAPE, 209 TokenType.FALSE, 210 TokenType.FIRST, 211 TokenType.FILTER, 212 TokenType.FOLLOWING, 213 TokenType.FORMAT, 214 TokenType.IF, 215 TokenType.ISNULL, 216 TokenType.INTERVAL, 217 TokenType.LAZY, 218 TokenType.LEADING, 219 TokenType.LEFT, 220 TokenType.LOCAL, 221 TokenType.MATERIALIZED, 222 TokenType.MERGE, 223 TokenType.NATURAL, 224 TokenType.NEXT, 225 TokenType.OFFSET, 226 TokenType.ONLY, 227 TokenType.OPTIONS, 228 TokenType.ORDINALITY, 229 TokenType.PERCENT, 230 TokenType.PIVOT, 231 TokenType.PRECEDING, 232 TokenType.RANGE, 233 TokenType.REFERENCES, 234 TokenType.RIGHT, 235 TokenType.ROW, 236 TokenType.ROWS, 237 TokenType.SEED, 238 TokenType.SEMI, 239 TokenType.SET, 240 TokenType.SHOW, 241 TokenType.SORTKEY, 242 TokenType.TEMPORARY, 243 TokenType.TOP, 244 TokenType.TRAILING, 245 TokenType.TRUE, 246 TokenType.UNBOUNDED, 247 TokenType.UNIQUE, 248 TokenType.UNLOGGED, 249 TokenType.UNPIVOT, 250 TokenType.VOLATILE, 251 TokenType.WINDOW, 252 *CREATABLES, 253 *SUBQUERY_PREDICATES, 254 *TYPE_TOKENS, 255 *NO_PAREN_FUNCTIONS, 256 } 257 258 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 259 TokenType.APPLY, 260 TokenType.LEFT, 261 TokenType.NATURAL, 262 TokenType.OFFSET, 263 TokenType.RIGHT, 264 TokenType.WINDOW, 265 } 266 267 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 268 269 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 270 271 FUNC_TOKENS = { 272 TokenType.COMMAND, 273 TokenType.CURRENT_DATE, 274 TokenType.CURRENT_DATETIME, 275 TokenType.CURRENT_TIMESTAMP, 276 TokenType.CURRENT_TIME, 277 TokenType.FILTER, 278 TokenType.FIRST, 279 TokenType.FORMAT, 280 TokenType.IDENTIFIER, 281 TokenType.INDEX, 282 TokenType.ISNULL, 283 TokenType.ILIKE, 284 TokenType.LIKE, 285 TokenType.MERGE, 286 TokenType.OFFSET, 287 TokenType.PRIMARY_KEY, 288 TokenType.REPLACE, 289 TokenType.ROW, 290 TokenType.UNNEST, 291 TokenType.VAR, 292 TokenType.LEFT, 293 TokenType.RIGHT, 294 TokenType.DATE, 295 TokenType.DATETIME, 296 TokenType.TABLE, 297 TokenType.TIMESTAMP, 298 TokenType.TIMESTAMPTZ, 299 TokenType.WINDOW, 300 *TYPE_TOKENS, 301 *SUBQUERY_PREDICATES, 302 } 303 304 CONJUNCTION = { 305 TokenType.AND: exp.And, 306 TokenType.OR: exp.Or, 307 } 308 309 EQUALITY = { 310 TokenType.EQ: exp.EQ, 311 TokenType.NEQ: exp.NEQ, 312 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 313 } 314 315 COMPARISON = { 316 TokenType.GT: exp.GT, 317 TokenType.GTE: exp.GTE, 318 TokenType.LT: exp.LT, 319 TokenType.LTE: exp.LTE, 320 } 321 322 BITWISE = { 323 TokenType.AMP: exp.BitwiseAnd, 324 TokenType.CARET: exp.BitwiseXor, 325 TokenType.PIPE: exp.BitwiseOr, 326 TokenType.DPIPE: exp.DPipe, 327 } 328 329 TERM = { 330 TokenType.DASH: exp.Sub, 331 TokenType.PLUS: exp.Add, 332 TokenType.MOD: exp.Mod, 333 TokenType.COLLATE: exp.Collate, 334 } 335 336 FACTOR = { 337 TokenType.DIV: exp.IntDiv, 338 TokenType.LR_ARROW: exp.Distance, 339 TokenType.SLASH: exp.Div, 340 TokenType.STAR: exp.Mul, 341 } 342 343 TIMESTAMPS = { 344 TokenType.TIME, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 } 349 350 SET_OPERATIONS = { 351 TokenType.UNION, 352 TokenType.INTERSECT, 353 TokenType.EXCEPT, 354 } 355 356 JOIN_SIDES = { 357 TokenType.LEFT, 358 TokenType.RIGHT, 359 TokenType.FULL, 360 } 361 362 JOIN_KINDS = { 363 TokenType.INNER, 364 TokenType.OUTER, 365 TokenType.CROSS, 366 TokenType.SEMI, 367 TokenType.ANTI, 368 } 369 370 LAMBDAS = { 371 TokenType.ARROW: lambda self, expressions: self.expression( 372 exp.Lambda, 373 this=self._parse_conjunction().transform( 374 self._replace_lambda, {node.name for node in expressions} 375 ), 376 expressions=expressions, 377 ), 378 TokenType.FARROW: lambda self, expressions: self.expression( 379 exp.Kwarg, 380 this=exp.Var(this=expressions[0].name), 381 expression=self._parse_conjunction(), 382 ), 383 } 384 385 COLUMN_OPERATORS = { 386 TokenType.DOT: None, 387 TokenType.DCOLON: lambda self, this, to: self.expression( 388 exp.Cast, 389 this=this, 390 to=to, 391 ), 392 TokenType.ARROW: lambda self, this, path: self.expression( 393 exp.JSONExtract, 394 this=this, 395 expression=path, 396 ), 397 TokenType.DARROW: lambda self, this, path: self.expression( 398 exp.JSONExtractScalar, 399 this=this, 400 expression=path, 401 ), 402 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 403 exp.JSONBExtract, 404 this=this, 405 expression=path, 406 ), 407 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 408 exp.JSONBExtractScalar, 409 this=this, 410 expression=path, 411 ), 412 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 413 exp.JSONBContains, 414 this=this, 415 expression=key, 416 ), 417 } 418 419 EXPRESSION_PARSERS = { 420 exp.Column: lambda self: self._parse_column(), 421 exp.DataType: lambda self: self._parse_types(), 422 exp.From: lambda self: self._parse_from(), 423 exp.Group: lambda self: self._parse_group(), 424 exp.Identifier: lambda self: self._parse_id_var(), 425 exp.Lateral: lambda self: self._parse_lateral(), 426 exp.Join: lambda self: self._parse_join(), 427 exp.Order: lambda self: self._parse_order(), 428 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 429 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 430 exp.Lambda: lambda self: self._parse_lambda(), 431 exp.Limit: lambda self: self._parse_limit(), 432 exp.Offset: lambda self: self._parse_offset(), 433 exp.TableAlias: lambda self: self._parse_table_alias(), 434 exp.Table: lambda self: self._parse_table(), 435 exp.Condition: lambda self: self._parse_conjunction(), 436 exp.Expression: lambda self: self._parse_statement(), 437 exp.Properties: lambda self: self._parse_properties(), 438 exp.Where: lambda self: self._parse_where(), 439 exp.Ordered: lambda self: self._parse_ordered(), 440 exp.Having: lambda self: self._parse_having(), 441 exp.With: lambda self: self._parse_with(), 442 exp.Window: lambda self: self._parse_named_window(), 443 exp.Qualify: lambda self: self._parse_qualify(), 444 exp.Returning: lambda self: self._parse_returning(), 445 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 446 } 447 448 STATEMENT_PARSERS = { 449 TokenType.ALTER: lambda self: self._parse_alter(), 450 TokenType.BEGIN: lambda self: self._parse_transaction(), 451 TokenType.CACHE: lambda self: self._parse_cache(), 452 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 453 TokenType.COMMENT: lambda self: self._parse_comment(), 454 TokenType.CREATE: lambda self: self._parse_create(), 455 TokenType.DELETE: lambda self: self._parse_delete(), 456 TokenType.DESC: lambda self: self._parse_describe(), 457 TokenType.DESCRIBE: lambda self: self._parse_describe(), 458 TokenType.DROP: lambda self: self._parse_drop(), 459 TokenType.END: lambda self: self._parse_commit_or_rollback(), 460 TokenType.INSERT: lambda self: self._parse_insert(), 461 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 462 TokenType.MERGE: lambda self: self._parse_merge(), 463 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 464 TokenType.SET: lambda self: self._parse_set(), 465 TokenType.UNCACHE: lambda self: self._parse_uncache(), 466 TokenType.UPDATE: lambda self: self._parse_update(), 467 TokenType.USE: lambda self: self.expression( 468 exp.Use, 469 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 470 and exp.Var(this=self._prev.text), 471 this=self._parse_table(schema=False), 472 ), 473 } 474 475 UNARY_PARSERS = { 476 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 477 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 478 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 479 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 480 } 481 482 PRIMARY_PARSERS = { 483 TokenType.STRING: lambda self, token: self.expression( 484 exp.Literal, this=token.text, is_string=True 485 ), 486 TokenType.NUMBER: lambda self, token: self.expression( 487 exp.Literal, this=token.text, is_string=False 488 ), 489 TokenType.STAR: lambda self, _: self.expression( 490 exp.Star, 491 **{"except": self._parse_except(), "replace": self._parse_replace()}, 492 ), 493 TokenType.NULL: lambda self, _: self.expression(exp.Null), 494 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 495 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 496 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 497 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 498 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 499 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 500 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 501 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 502 } 503 504 PLACEHOLDER_PARSERS = { 505 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 506 TokenType.PARAMETER: lambda self: self._parse_parameter(), 507 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 508 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 509 else None, 510 } 511 512 RANGE_PARSERS = { 513 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 514 TokenType.GLOB: binary_range_parser(exp.Glob), 515 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 516 TokenType.IN: lambda self, this: self._parse_in(this), 517 TokenType.IS: lambda self, this: self._parse_is(this), 518 TokenType.LIKE: binary_range_parser(exp.Like), 519 TokenType.ILIKE: binary_range_parser(exp.ILike), 520 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 521 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 522 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 523 } 524 525 PROPERTY_PARSERS = { 526 "AFTER": lambda self: self._parse_afterjournal( 527 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 528 ), 529 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 530 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 531 "BEFORE": lambda self: self._parse_journal( 532 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 533 ), 534 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 535 "CHARACTER SET": lambda self: self._parse_character_set(), 536 "CHECKSUM": lambda self: self._parse_checksum(), 537 "CLUSTER BY": lambda self: self.expression( 538 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 539 ), 540 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 541 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 542 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 543 default=self._prev.text.upper() == "DEFAULT" 544 ), 545 "DEFINER": lambda self: self._parse_definer(), 546 "DETERMINISTIC": lambda self: self.expression( 547 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 548 ), 549 "DISTKEY": lambda self: self._parse_distkey(), 550 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 551 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 552 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 553 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 554 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 555 "FREESPACE": lambda self: self._parse_freespace(), 556 "GLOBAL": lambda self: self._parse_temporary(global_=True), 557 "IMMUTABLE": lambda self: self.expression( 558 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 559 ), 560 "JOURNAL": lambda self: self._parse_journal( 561 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 562 ), 563 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 564 "LIKE": lambda self: self._parse_create_like(), 565 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 566 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 567 "LOCK": lambda self: self._parse_locking(), 568 "LOCKING": lambda self: self._parse_locking(), 569 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 570 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 571 "MAX": lambda self: self._parse_datablocksize(), 572 "MAXIMUM": lambda self: self._parse_datablocksize(), 573 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 574 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 575 ), 576 "MIN": lambda self: self._parse_datablocksize(), 577 "MINIMUM": lambda self: self._parse_datablocksize(), 578 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 579 "NO": lambda self: self._parse_noprimaryindex(), 580 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 581 "ON": lambda self: self._parse_oncommit(), 582 "PARTITION BY": lambda self: self._parse_partitioned_by(), 583 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 584 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 585 "RETURNS": lambda self: self._parse_returns(), 586 "ROW": lambda self: self._parse_row(), 587 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 588 "SORTKEY": lambda self: self._parse_sortkey(), 589 "STABLE": lambda self: self.expression( 590 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 591 ), 592 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 593 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 594 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 595 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 596 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 597 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 598 "VOLATILE": lambda self: self.expression( 599 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 600 ), 601 "WITH": lambda self: self._parse_with_property(), 602 } 603 604 CONSTRAINT_PARSERS = { 605 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 606 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 607 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 608 "CHARACTER SET": lambda self: self.expression( 609 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 610 ), 611 "CHECK": lambda self: self.expression( 612 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 613 ), 614 "COLLATE": lambda self: self.expression( 615 exp.CollateColumnConstraint, this=self._parse_var() 616 ), 617 "COMMENT": lambda self: self.expression( 618 exp.CommentColumnConstraint, this=self._parse_string() 619 ), 620 "COMPRESS": lambda self: self._parse_compress(), 621 "DEFAULT": lambda self: self.expression( 622 exp.DefaultColumnConstraint, this=self._parse_bitwise() 623 ), 624 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 625 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 626 "FORMAT": lambda self: self.expression( 627 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 628 ), 629 "GENERATED": lambda self: self._parse_generated_as_identity(), 630 "IDENTITY": lambda self: self._parse_auto_increment(), 631 "INLINE": lambda self: self._parse_inline(), 632 "LIKE": lambda self: self._parse_create_like(), 633 "NOT": lambda self: self._parse_not_constraint(), 634 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 635 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 636 "PRIMARY KEY": lambda self: self._parse_primary_key(), 637 "TITLE": lambda self: self.expression( 638 exp.TitleColumnConstraint, this=self._parse_var_or_string() 639 ), 640 "UNIQUE": lambda self: self._parse_unique(), 641 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 642 } 643 644 ALTER_PARSERS = { 645 "ADD": lambda self: self._parse_alter_table_add(), 646 "ALTER": lambda self: self._parse_alter_table_alter(), 647 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 648 "DROP": lambda self: self._parse_alter_table_drop(), 649 "RENAME": lambda self: self._parse_alter_table_rename(), 650 } 651 652 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 653 654 NO_PAREN_FUNCTION_PARSERS = { 655 TokenType.CASE: lambda self: self._parse_case(), 656 TokenType.IF: lambda self: self._parse_if(), 657 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 658 } 659 660 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 661 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 662 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 663 "EXTRACT": lambda self: self._parse_extract(), 664 "POSITION": lambda self: self._parse_position(), 665 "STRING_AGG": lambda self: self._parse_string_agg(), 666 "SUBSTRING": lambda self: self._parse_substring(), 667 "TRIM": lambda self: self._parse_trim(), 668 "TRY_CAST": lambda self: self._parse_cast(False), 669 "TRY_CONVERT": lambda self: self._parse_convert(False), 670 } 671 672 QUERY_MODIFIER_PARSERS = { 673 "match": lambda self: self._parse_match_recognize(), 674 "where": lambda self: self._parse_where(), 675 "group": lambda self: self._parse_group(), 676 "having": lambda self: self._parse_having(), 677 "qualify": lambda self: self._parse_qualify(), 678 "windows": lambda self: self._parse_window_clause(), 679 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 680 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 681 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 682 "order": lambda self: self._parse_order(), 683 "limit": lambda self: self._parse_limit(), 684 "offset": lambda self: self._parse_offset(), 685 "lock": lambda self: self._parse_lock(), 686 "sample": lambda self: self._parse_table_sample(as_modifier=True), 687 } 688 689 SET_PARSERS = { 690 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 691 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 692 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 693 "TRANSACTION": lambda self: self._parse_set_transaction(), 694 } 695 696 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 697 698 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 699 700 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 701 702 TRANSACTION_CHARACTERISTICS = { 703 "ISOLATION LEVEL REPEATABLE READ", 704 "ISOLATION LEVEL READ COMMITTED", 705 "ISOLATION LEVEL READ UNCOMMITTED", 706 "ISOLATION LEVEL SERIALIZABLE", 707 "READ WRITE", 708 "READ ONLY", 709 } 710 711 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 712 713 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 714 715 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 716 717 STRICT_CAST = True 718 719 CONVERT_TYPE_FIRST = False 720 721 __slots__ = ( 722 "error_level", 723 "error_message_context", 724 "sql", 725 "errors", 726 "index_offset", 727 "unnest_column_only", 728 "alias_post_tablesample", 729 "max_errors", 730 "null_ordering", 731 "_tokens", 732 "_index", 733 "_curr", 734 "_next", 735 "_prev", 736 "_prev_comments", 737 "_show_trie", 738 "_set_trie", 739 ) 740 741 def __init__( 742 self, 743 error_level: t.Optional[ErrorLevel] = None, 744 error_message_context: int = 100, 745 index_offset: int = 0, 746 unnest_column_only: bool = False, 747 alias_post_tablesample: bool = False, 748 max_errors: int = 3, 749 null_ordering: t.Optional[str] = None, 750 ): 751 self.error_level = error_level or ErrorLevel.IMMEDIATE 752 self.error_message_context = error_message_context 753 self.index_offset = index_offset 754 self.unnest_column_only = unnest_column_only 755 self.alias_post_tablesample = alias_post_tablesample 756 self.max_errors = max_errors 757 self.null_ordering = null_ordering 758 self.reset() 759 760 def reset(self): 761 self.sql = "" 762 self.errors = [] 763 self._tokens = [] 764 self._index = 0 765 self._curr = None 766 self._next = None 767 self._prev = None 768 self._prev_comments = None 769 770 def parse( 771 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 772 ) -> t.List[t.Optional[exp.Expression]]: 773 """ 774 Parses a list of tokens and returns a list of syntax trees, one tree 775 per parsed SQL statement. 776 777 Args: 778 raw_tokens: the list of tokens. 779 sql: the original SQL string, used to produce helpful debug messages. 780 781 Returns: 782 The list of syntax trees. 783 """ 784 return self._parse( 785 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 786 ) 787 788 def parse_into( 789 self, 790 expression_types: exp.IntoType, 791 raw_tokens: t.List[Token], 792 sql: t.Optional[str] = None, 793 ) -> t.List[t.Optional[exp.Expression]]: 794 """ 795 Parses a list of tokens into a given Expression type. If a collection of Expression 796 types is given instead, this method will try to parse the token list into each one 797 of them, stopping at the first for which the parsing succeeds. 798 799 Args: 800 expression_types: the expression type(s) to try and parse the token list into. 801 raw_tokens: the list of tokens. 802 sql: the original SQL string, used to produce helpful debug messages. 803 804 Returns: 805 The target Expression. 806 """ 807 errors = [] 808 for expression_type in ensure_collection(expression_types): 809 parser = self.EXPRESSION_PARSERS.get(expression_type) 810 if not parser: 811 raise TypeError(f"No parser registered for {expression_type}") 812 try: 813 return self._parse(parser, raw_tokens, sql) 814 except ParseError as e: 815 e.errors[0]["into_expression"] = expression_type 816 errors.append(e) 817 raise ParseError( 818 f"Failed to parse into {expression_types}", 819 errors=merge_errors(errors), 820 ) from errors[-1] 821 822 def _parse( 823 self, 824 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 825 raw_tokens: t.List[Token], 826 sql: t.Optional[str] = None, 827 ) -> t.List[t.Optional[exp.Expression]]: 828 self.reset() 829 self.sql = sql or "" 830 total = len(raw_tokens) 831 chunks: t.List[t.List[Token]] = [[]] 832 833 for i, token in enumerate(raw_tokens): 834 if token.token_type == TokenType.SEMICOLON: 835 if i < total - 1: 836 chunks.append([]) 837 else: 838 chunks[-1].append(token) 839 840 expressions = [] 841 842 for tokens in chunks: 843 self._index = -1 844 self._tokens = tokens 845 self._advance() 846 847 expressions.append(parse_method(self)) 848 849 if self._index < len(self._tokens): 850 self.raise_error("Invalid expression / Unexpected token") 851 852 self.check_errors() 853 854 return expressions 855 856 def check_errors(self) -> None: 857 """ 858 Logs or raises any found errors, depending on the chosen error level setting. 859 """ 860 if self.error_level == ErrorLevel.WARN: 861 for error in self.errors: 862 logger.error(str(error)) 863 elif self.error_level == ErrorLevel.RAISE and self.errors: 864 raise ParseError( 865 concat_messages(self.errors, self.max_errors), 866 errors=merge_errors(self.errors), 867 ) 868 869 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 870 """ 871 Appends an error in the list of recorded errors or raises it, depending on the chosen 872 error level setting. 873 """ 874 token = token or self._curr or self._prev or Token.string("") 875 start = self._find_token(token) 876 end = start + len(token.text) 877 start_context = self.sql[max(start - self.error_message_context, 0) : start] 878 highlight = self.sql[start:end] 879 end_context = self.sql[end : end + self.error_message_context] 880 881 error = ParseError.new( 882 f"{message}. Line {token.line}, Col: {token.col}.\n" 883 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 884 description=message, 885 line=token.line, 886 col=token.col, 887 start_context=start_context, 888 highlight=highlight, 889 end_context=end_context, 890 ) 891 892 if self.error_level == ErrorLevel.IMMEDIATE: 893 raise error 894 895 self.errors.append(error) 896 897 def expression( 898 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 899 ) -> exp.Expression: 900 """ 901 Creates a new, validated Expression. 902 903 Args: 904 exp_class: the expression class to instantiate. 905 comments: an optional list of comments to attach to the expression. 906 kwargs: the arguments to set for the expression along with their respective values. 907 908 Returns: 909 The target expression. 910 """ 911 instance = exp_class(**kwargs) 912 if self._prev_comments: 913 instance.comments = self._prev_comments 914 self._prev_comments = None 915 if comments: 916 instance.comments = comments 917 self.validate_expression(instance) 918 return instance 919 920 def validate_expression( 921 self, expression: exp.Expression, args: t.Optional[t.List] = None 922 ) -> None: 923 """ 924 Validates an already instantiated expression, making sure that all its mandatory arguments 925 are set. 926 927 Args: 928 expression: the expression to validate. 929 args: an optional list of items that was used to instantiate the expression, if it's a Func. 930 """ 931 if self.error_level == ErrorLevel.IGNORE: 932 return 933 934 for error_message in expression.error_messages(args): 935 self.raise_error(error_message) 936 937 def _find_sql(self, start: Token, end: Token) -> str: 938 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 939 940 def _find_token(self, token: Token) -> int: 941 line = 1 942 col = 1 943 index = 0 944 945 while line < token.line or col < token.col: 946 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 947 line += 1 948 col = 1 949 else: 950 col += 1 951 index += 1 952 953 return index 954 955 def _advance(self, times: int = 1) -> None: 956 self._index += times 957 self._curr = seq_get(self._tokens, self._index) 958 self._next = seq_get(self._tokens, self._index + 1) 959 if self._index > 0: 960 self._prev = self._tokens[self._index - 1] 961 self._prev_comments = self._prev.comments 962 else: 963 self._prev = None 964 self._prev_comments = None 965 966 def _retreat(self, index: int) -> None: 967 if index != self._index: 968 self._advance(index - self._index) 969 970 def _parse_command(self) -> exp.Expression: 971 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 972 973 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 974 start = self._prev 975 exists = self._parse_exists() if allow_exists else None 976 977 self._match(TokenType.ON) 978 979 kind = self._match_set(self.CREATABLES) and self._prev 980 981 if not kind: 982 return self._parse_as_command(start) 983 984 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 985 this = self._parse_user_defined_function(kind=kind.token_type) 986 elif kind.token_type == TokenType.TABLE: 987 this = self._parse_table() 988 elif kind.token_type == TokenType.COLUMN: 989 this = self._parse_column() 990 else: 991 this = self._parse_id_var() 992 993 self._match(TokenType.IS) 994 995 return self.expression( 996 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 997 ) 998 999 def _parse_statement(self) -> t.Optional[exp.Expression]: 1000 if self._curr is None: 1001 return None 1002 1003 if self._match_set(self.STATEMENT_PARSERS): 1004 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1005 1006 if self._match_set(Tokenizer.COMMANDS): 1007 return self._parse_command() 1008 1009 expression = self._parse_expression() 1010 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1011 1012 self._parse_query_modifiers(expression) 1013 return expression 1014 1015 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1016 start = self._prev 1017 temporary = self._match(TokenType.TEMPORARY) 1018 materialized = self._match(TokenType.MATERIALIZED) 1019 kind = self._match_set(self.CREATABLES) and self._prev.text 1020 if not kind: 1021 if default_kind: 1022 kind = default_kind 1023 else: 1024 return self._parse_as_command(start) 1025 1026 return self.expression( 1027 exp.Drop, 1028 exists=self._parse_exists(), 1029 this=self._parse_table(schema=True), 1030 kind=kind, 1031 temporary=temporary, 1032 materialized=materialized, 1033 cascade=self._match(TokenType.CASCADE), 1034 ) 1035 1036 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1037 return ( 1038 self._match(TokenType.IF) 1039 and (not not_ or self._match(TokenType.NOT)) 1040 and self._match(TokenType.EXISTS) 1041 ) 1042 1043 def _parse_create(self) -> t.Optional[exp.Expression]: 1044 start = self._prev 1045 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1046 TokenType.OR, TokenType.REPLACE 1047 ) 1048 unique = self._match(TokenType.UNIQUE) 1049 volatile = self._match(TokenType.VOLATILE) 1050 1051 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1052 self._match(TokenType.TABLE) 1053 1054 properties = None 1055 create_token = self._match_set(self.CREATABLES) and self._prev 1056 1057 if not create_token: 1058 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1059 create_token = self._match_set(self.CREATABLES) and self._prev 1060 1061 if not properties or not create_token: 1062 return self._parse_as_command(start) 1063 1064 exists = self._parse_exists(not_=True) 1065 this = None 1066 expression = None 1067 indexes = None 1068 no_schema_binding = None 1069 begin = None 1070 1071 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1072 this = self._parse_user_defined_function(kind=create_token.token_type) 1073 temp_properties = self._parse_properties() 1074 if properties and temp_properties: 1075 properties.expressions.extend(temp_properties.expressions) 1076 elif temp_properties: 1077 properties = temp_properties 1078 1079 self._match(TokenType.ALIAS) 1080 begin = self._match(TokenType.BEGIN) 1081 return_ = self._match_text_seq("RETURN") 1082 expression = self._parse_statement() 1083 1084 if return_: 1085 expression = self.expression(exp.Return, this=expression) 1086 elif create_token.token_type == TokenType.INDEX: 1087 this = self._parse_index() 1088 elif create_token.token_type in self.DB_CREATABLES: 1089 table_parts = self._parse_table_parts(schema=True) 1090 1091 # exp.Properties.Location.POST_NAME 1092 if self._match(TokenType.COMMA): 1093 temp_properties = self._parse_properties(before=True) 1094 if properties and temp_properties: 1095 properties.expressions.extend(temp_properties.expressions) 1096 elif temp_properties: 1097 properties = temp_properties 1098 1099 this = self._parse_schema(this=table_parts) 1100 1101 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1102 temp_properties = self._parse_properties() 1103 if properties and temp_properties: 1104 properties.expressions.extend(temp_properties.expressions) 1105 elif temp_properties: 1106 properties = temp_properties 1107 1108 self._match(TokenType.ALIAS) 1109 1110 # exp.Properties.Location.POST_ALIAS 1111 if not ( 1112 self._match(TokenType.SELECT, advance=False) 1113 or self._match(TokenType.WITH, advance=False) 1114 or self._match(TokenType.L_PAREN, advance=False) 1115 ): 1116 temp_properties = self._parse_properties() 1117 if properties and temp_properties: 1118 properties.expressions.extend(temp_properties.expressions) 1119 elif temp_properties: 1120 properties = temp_properties 1121 1122 expression = self._parse_ddl_select() 1123 1124 if create_token.token_type == TokenType.TABLE: 1125 # exp.Properties.Location.POST_EXPRESSION 1126 temp_properties = self._parse_properties() 1127 if properties and temp_properties: 1128 properties.expressions.extend(temp_properties.expressions) 1129 elif temp_properties: 1130 properties = temp_properties 1131 1132 indexes = [] 1133 while True: 1134 index = self._parse_create_table_index() 1135 1136 # exp.Properties.Location.POST_INDEX 1137 if self._match(TokenType.PARTITION_BY, advance=False): 1138 temp_properties = self._parse_properties() 1139 if properties and temp_properties: 1140 properties.expressions.extend(temp_properties.expressions) 1141 elif temp_properties: 1142 properties = temp_properties 1143 1144 if not index: 1145 break 1146 else: 1147 indexes.append(index) 1148 elif create_token.token_type == TokenType.VIEW: 1149 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1150 no_schema_binding = True 1151 1152 return self.expression( 1153 exp.Create, 1154 this=this, 1155 kind=create_token.text, 1156 replace=replace, 1157 unique=unique, 1158 volatile=volatile, 1159 expression=expression, 1160 exists=exists, 1161 properties=properties, 1162 indexes=indexes, 1163 no_schema_binding=no_schema_binding, 1164 begin=begin, 1165 ) 1166 1167 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1168 self._match(TokenType.COMMA) 1169 1170 # parsers look to _prev for no/dual/default, so need to consume first 1171 self._match_text_seq("NO") 1172 self._match_text_seq("DUAL") 1173 self._match_text_seq("DEFAULT") 1174 1175 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1176 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1177 1178 return None 1179 1180 def _parse_property(self) -> t.Optional[exp.Expression]: 1181 if self._match_texts(self.PROPERTY_PARSERS): 1182 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1183 1184 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1185 return self._parse_character_set(default=True) 1186 1187 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1188 return self._parse_sortkey(compound=True) 1189 1190 if self._match_text_seq("SQL", "SECURITY"): 1191 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1192 1193 assignment = self._match_pair( 1194 TokenType.VAR, TokenType.EQ, advance=False 1195 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1196 1197 if assignment: 1198 key = self._parse_var_or_string() 1199 self._match(TokenType.EQ) 1200 return self.expression(exp.Property, this=key, value=self._parse_column()) 1201 1202 return None 1203 1204 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1205 self._match(TokenType.EQ) 1206 self._match(TokenType.ALIAS) 1207 return self.expression( 1208 exp_class, 1209 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1210 ) 1211 1212 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1213 properties = [] 1214 1215 while True: 1216 if before: 1217 identified_property = self._parse_property_before() 1218 else: 1219 identified_property = self._parse_property() 1220 1221 if not identified_property: 1222 break 1223 for p in ensure_collection(identified_property): 1224 properties.append(p) 1225 1226 if properties: 1227 return self.expression(exp.Properties, expressions=properties) 1228 1229 return None 1230 1231 def _parse_fallback(self, no=False) -> exp.Expression: 1232 self._match_text_seq("FALLBACK") 1233 return self.expression( 1234 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1235 ) 1236 1237 def _parse_with_property( 1238 self, 1239 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1240 self._match(TokenType.WITH) 1241 if self._match(TokenType.L_PAREN, advance=False): 1242 return self._parse_wrapped_csv(self._parse_property) 1243 1244 if self._match_text_seq("JOURNAL"): 1245 return self._parse_withjournaltable() 1246 1247 if self._match_text_seq("DATA"): 1248 return self._parse_withdata(no=False) 1249 elif self._match_text_seq("NO", "DATA"): 1250 return self._parse_withdata(no=True) 1251 1252 if not self._next: 1253 return None 1254 1255 return self._parse_withisolatedloading() 1256 1257 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1258 def _parse_definer(self) -> t.Optional[exp.Expression]: 1259 self._match(TokenType.EQ) 1260 1261 user = self._parse_id_var() 1262 self._match(TokenType.PARAMETER) 1263 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1264 1265 if not user or not host: 1266 return None 1267 1268 return exp.DefinerProperty(this=f"{user}@{host}") 1269 1270 def _parse_withjournaltable(self) -> exp.Expression: 1271 self._match(TokenType.TABLE) 1272 self._match(TokenType.EQ) 1273 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1274 1275 def _parse_log(self, no=False) -> exp.Expression: 1276 self._match_text_seq("LOG") 1277 return self.expression(exp.LogProperty, no=no) 1278 1279 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1280 before = self._match_text_seq("BEFORE") 1281 self._match_text_seq("JOURNAL") 1282 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1283 1284 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1285 self._match_text_seq("NOT") 1286 self._match_text_seq("LOCAL") 1287 self._match_text_seq("AFTER", "JOURNAL") 1288 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1289 1290 def _parse_checksum(self) -> exp.Expression: 1291 self._match_text_seq("CHECKSUM") 1292 self._match(TokenType.EQ) 1293 1294 on = None 1295 if self._match(TokenType.ON): 1296 on = True 1297 elif self._match_text_seq("OFF"): 1298 on = False 1299 default = self._match(TokenType.DEFAULT) 1300 1301 return self.expression( 1302 exp.ChecksumProperty, 1303 on=on, 1304 default=default, 1305 ) 1306 1307 def _parse_freespace(self) -> exp.Expression: 1308 self._match_text_seq("FREESPACE") 1309 self._match(TokenType.EQ) 1310 return self.expression( 1311 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1312 ) 1313 1314 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1315 self._match_text_seq("MERGEBLOCKRATIO") 1316 if self._match(TokenType.EQ): 1317 return self.expression( 1318 exp.MergeBlockRatioProperty, 1319 this=self._parse_number(), 1320 percent=self._match(TokenType.PERCENT), 1321 ) 1322 else: 1323 return self.expression( 1324 exp.MergeBlockRatioProperty, 1325 no=no, 1326 default=default, 1327 ) 1328 1329 def _parse_datablocksize(self, default=None) -> exp.Expression: 1330 if default: 1331 self._match_text_seq("DATABLOCKSIZE") 1332 return self.expression(exp.DataBlocksizeProperty, default=True) 1333 elif self._match_texts(("MIN", "MINIMUM")): 1334 self._match_text_seq("DATABLOCKSIZE") 1335 return self.expression(exp.DataBlocksizeProperty, min=True) 1336 elif self._match_texts(("MAX", "MAXIMUM")): 1337 self._match_text_seq("DATABLOCKSIZE") 1338 return self.expression(exp.DataBlocksizeProperty, min=False) 1339 1340 self._match_text_seq("DATABLOCKSIZE") 1341 self._match(TokenType.EQ) 1342 size = self._parse_number() 1343 units = None 1344 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1345 units = self._prev.text 1346 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1347 1348 def _parse_blockcompression(self) -> exp.Expression: 1349 self._match_text_seq("BLOCKCOMPRESSION") 1350 self._match(TokenType.EQ) 1351 always = self._match_text_seq("ALWAYS") 1352 manual = self._match_text_seq("MANUAL") 1353 never = self._match_text_seq("NEVER") 1354 default = self._match_text_seq("DEFAULT") 1355 autotemp = None 1356 if self._match_text_seq("AUTOTEMP"): 1357 autotemp = self._parse_schema() 1358 1359 return self.expression( 1360 exp.BlockCompressionProperty, 1361 always=always, 1362 manual=manual, 1363 never=never, 1364 default=default, 1365 autotemp=autotemp, 1366 ) 1367 1368 def _parse_withisolatedloading(self) -> exp.Expression: 1369 no = self._match_text_seq("NO") 1370 concurrent = self._match_text_seq("CONCURRENT") 1371 self._match_text_seq("ISOLATED", "LOADING") 1372 for_all = self._match_text_seq("FOR", "ALL") 1373 for_insert = self._match_text_seq("FOR", "INSERT") 1374 for_none = self._match_text_seq("FOR", "NONE") 1375 return self.expression( 1376 exp.IsolatedLoadingProperty, 1377 no=no, 1378 concurrent=concurrent, 1379 for_all=for_all, 1380 for_insert=for_insert, 1381 for_none=for_none, 1382 ) 1383 1384 def _parse_locking(self) -> exp.Expression: 1385 if self._match(TokenType.TABLE): 1386 kind = "TABLE" 1387 elif self._match(TokenType.VIEW): 1388 kind = "VIEW" 1389 elif self._match(TokenType.ROW): 1390 kind = "ROW" 1391 elif self._match_text_seq("DATABASE"): 1392 kind = "DATABASE" 1393 else: 1394 kind = None 1395 1396 if kind in ("DATABASE", "TABLE", "VIEW"): 1397 this = self._parse_table_parts() 1398 else: 1399 this = None 1400 1401 if self._match(TokenType.FOR): 1402 for_or_in = "FOR" 1403 elif self._match(TokenType.IN): 1404 for_or_in = "IN" 1405 else: 1406 for_or_in = None 1407 1408 if self._match_text_seq("ACCESS"): 1409 lock_type = "ACCESS" 1410 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1411 lock_type = "EXCLUSIVE" 1412 elif self._match_text_seq("SHARE"): 1413 lock_type = "SHARE" 1414 elif self._match_text_seq("READ"): 1415 lock_type = "READ" 1416 elif self._match_text_seq("WRITE"): 1417 lock_type = "WRITE" 1418 elif self._match_text_seq("CHECKSUM"): 1419 lock_type = "CHECKSUM" 1420 else: 1421 lock_type = None 1422 1423 override = self._match_text_seq("OVERRIDE") 1424 1425 return self.expression( 1426 exp.LockingProperty, 1427 this=this, 1428 kind=kind, 1429 for_or_in=for_or_in, 1430 lock_type=lock_type, 1431 override=override, 1432 ) 1433 1434 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1435 if self._match(TokenType.PARTITION_BY): 1436 return self._parse_csv(self._parse_conjunction) 1437 return [] 1438 1439 def _parse_partitioned_by(self) -> exp.Expression: 1440 self._match(TokenType.EQ) 1441 return self.expression( 1442 exp.PartitionedByProperty, 1443 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1444 ) 1445 1446 def _parse_withdata(self, no=False) -> exp.Expression: 1447 if self._match_text_seq("AND", "STATISTICS"): 1448 statistics = True 1449 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1450 statistics = False 1451 else: 1452 statistics = None 1453 1454 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1455 1456 def _parse_noprimaryindex(self) -> exp.Expression: 1457 self._match_text_seq("PRIMARY", "INDEX") 1458 return exp.NoPrimaryIndexProperty() 1459 1460 def _parse_oncommit(self) -> exp.Expression: 1461 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1462 return exp.OnCommitProperty() 1463 1464 def _parse_distkey(self) -> exp.Expression: 1465 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1466 1467 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1468 table = self._parse_table(schema=True) 1469 options = [] 1470 while self._match_texts(("INCLUDING", "EXCLUDING")): 1471 this = self._prev.text.upper() 1472 id_var = self._parse_id_var() 1473 1474 if not id_var: 1475 return None 1476 1477 options.append( 1478 self.expression( 1479 exp.Property, 1480 this=this, 1481 value=exp.Var(this=id_var.this.upper()), 1482 ) 1483 ) 1484 return self.expression(exp.LikeProperty, this=table, expressions=options) 1485 1486 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1487 return self.expression( 1488 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1489 ) 1490 1491 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1492 self._match(TokenType.EQ) 1493 return self.expression( 1494 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1495 ) 1496 1497 def _parse_returns(self) -> exp.Expression: 1498 value: t.Optional[exp.Expression] 1499 is_table = self._match(TokenType.TABLE) 1500 1501 if is_table: 1502 if self._match(TokenType.LT): 1503 value = self.expression( 1504 exp.Schema, 1505 this="TABLE", 1506 expressions=self._parse_csv(self._parse_struct_kwargs), 1507 ) 1508 if not self._match(TokenType.GT): 1509 self.raise_error("Expecting >") 1510 else: 1511 value = self._parse_schema(exp.Var(this="TABLE")) 1512 else: 1513 value = self._parse_types() 1514 1515 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1516 1517 def _parse_temporary(self, global_=False) -> exp.Expression: 1518 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1519 return self.expression(exp.TemporaryProperty, global_=global_) 1520 1521 def _parse_describe(self) -> exp.Expression: 1522 kind = self._match_set(self.CREATABLES) and self._prev.text 1523 this = self._parse_table() 1524 1525 return self.expression(exp.Describe, this=this, kind=kind) 1526 1527 def _parse_insert(self) -> exp.Expression: 1528 overwrite = self._match(TokenType.OVERWRITE) 1529 local = self._match(TokenType.LOCAL) 1530 alternative = None 1531 1532 if self._match_text_seq("DIRECTORY"): 1533 this: t.Optional[exp.Expression] = self.expression( 1534 exp.Directory, 1535 this=self._parse_var_or_string(), 1536 local=local, 1537 row_format=self._parse_row_format(match_row=True), 1538 ) 1539 else: 1540 if self._match(TokenType.OR): 1541 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1542 1543 self._match(TokenType.INTO) 1544 self._match(TokenType.TABLE) 1545 this = self._parse_table(schema=True) 1546 1547 return self.expression( 1548 exp.Insert, 1549 this=this, 1550 exists=self._parse_exists(), 1551 partition=self._parse_partition(), 1552 expression=self._parse_ddl_select(), 1553 returning=self._parse_returning(), 1554 overwrite=overwrite, 1555 alternative=alternative, 1556 ) 1557 1558 def _parse_returning(self) -> t.Optional[exp.Expression]: 1559 if not self._match(TokenType.RETURNING): 1560 return None 1561 1562 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1563 1564 def _parse_row(self) -> t.Optional[exp.Expression]: 1565 if not self._match(TokenType.FORMAT): 1566 return None 1567 return self._parse_row_format() 1568 1569 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1570 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1571 return None 1572 1573 if self._match_text_seq("SERDE"): 1574 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1575 1576 self._match_text_seq("DELIMITED") 1577 1578 kwargs = {} 1579 1580 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1581 kwargs["fields"] = self._parse_string() 1582 if self._match_text_seq("ESCAPED", "BY"): 1583 kwargs["escaped"] = self._parse_string() 1584 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1585 kwargs["collection_items"] = self._parse_string() 1586 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1587 kwargs["map_keys"] = self._parse_string() 1588 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1589 kwargs["lines"] = self._parse_string() 1590 if self._match_text_seq("NULL", "DEFINED", "AS"): 1591 kwargs["null"] = self._parse_string() 1592 1593 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1594 1595 def _parse_load_data(self) -> exp.Expression: 1596 local = self._match(TokenType.LOCAL) 1597 self._match_text_seq("INPATH") 1598 inpath = self._parse_string() 1599 overwrite = self._match(TokenType.OVERWRITE) 1600 self._match_pair(TokenType.INTO, TokenType.TABLE) 1601 1602 return self.expression( 1603 exp.LoadData, 1604 this=self._parse_table(schema=True), 1605 local=local, 1606 overwrite=overwrite, 1607 inpath=inpath, 1608 partition=self._parse_partition(), 1609 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1610 serde=self._match_text_seq("SERDE") and self._parse_string(), 1611 ) 1612 1613 def _parse_delete(self) -> exp.Expression: 1614 self._match(TokenType.FROM) 1615 1616 return self.expression( 1617 exp.Delete, 1618 this=self._parse_table(schema=True), 1619 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1620 where=self._parse_where(), 1621 returning=self._parse_returning(), 1622 ) 1623 1624 def _parse_update(self) -> exp.Expression: 1625 return self.expression( 1626 exp.Update, 1627 **{ # type: ignore 1628 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1629 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1630 "from": self._parse_from(), 1631 "where": self._parse_where(), 1632 "returning": self._parse_returning(), 1633 }, 1634 ) 1635 1636 def _parse_uncache(self) -> exp.Expression: 1637 if not self._match(TokenType.TABLE): 1638 self.raise_error("Expecting TABLE after UNCACHE") 1639 1640 return self.expression( 1641 exp.Uncache, 1642 exists=self._parse_exists(), 1643 this=self._parse_table(schema=True), 1644 ) 1645 1646 def _parse_cache(self) -> exp.Expression: 1647 lazy = self._match(TokenType.LAZY) 1648 self._match(TokenType.TABLE) 1649 table = self._parse_table(schema=True) 1650 options = [] 1651 1652 if self._match(TokenType.OPTIONS): 1653 self._match_l_paren() 1654 k = self._parse_string() 1655 self._match(TokenType.EQ) 1656 v = self._parse_string() 1657 options = [k, v] 1658 self._match_r_paren() 1659 1660 self._match(TokenType.ALIAS) 1661 return self.expression( 1662 exp.Cache, 1663 this=table, 1664 lazy=lazy, 1665 options=options, 1666 expression=self._parse_select(nested=True), 1667 ) 1668 1669 def _parse_partition(self) -> t.Optional[exp.Expression]: 1670 if not self._match(TokenType.PARTITION): 1671 return None 1672 1673 return self.expression( 1674 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1675 ) 1676 1677 def _parse_value(self) -> exp.Expression: 1678 if self._match(TokenType.L_PAREN): 1679 expressions = self._parse_csv(self._parse_conjunction) 1680 self._match_r_paren() 1681 return self.expression(exp.Tuple, expressions=expressions) 1682 1683 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1684 # Source: https://prestodb.io/docs/current/sql/values.html 1685 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1686 1687 def _parse_select( 1688 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1689 ) -> t.Optional[exp.Expression]: 1690 cte = self._parse_with() 1691 if cte: 1692 this = self._parse_statement() 1693 1694 if not this: 1695 self.raise_error("Failed to parse any statement following CTE") 1696 return cte 1697 1698 if "with" in this.arg_types: 1699 this.set("with", cte) 1700 else: 1701 self.raise_error(f"{this.key} does not support CTE") 1702 this = cte 1703 elif self._match(TokenType.SELECT): 1704 comments = self._prev_comments 1705 1706 hint = self._parse_hint() 1707 all_ = self._match(TokenType.ALL) 1708 distinct = self._match(TokenType.DISTINCT) 1709 1710 if distinct: 1711 distinct = self.expression( 1712 exp.Distinct, 1713 on=self._parse_value() if self._match(TokenType.ON) else None, 1714 ) 1715 1716 if all_ and distinct: 1717 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1718 1719 limit = self._parse_limit(top=True) 1720 expressions = self._parse_csv(self._parse_expression) 1721 1722 this = self.expression( 1723 exp.Select, 1724 hint=hint, 1725 distinct=distinct, 1726 expressions=expressions, 1727 limit=limit, 1728 ) 1729 this.comments = comments 1730 1731 into = self._parse_into() 1732 if into: 1733 this.set("into", into) 1734 1735 from_ = self._parse_from() 1736 if from_: 1737 this.set("from", from_) 1738 1739 self._parse_query_modifiers(this) 1740 elif (table or nested) and self._match(TokenType.L_PAREN): 1741 this = self._parse_table() if table else self._parse_select(nested=True) 1742 self._parse_query_modifiers(this) 1743 this = self._parse_set_operations(this) 1744 self._match_r_paren() 1745 1746 # early return so that subquery unions aren't parsed again 1747 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1748 # Union ALL should be a property of the top select node, not the subquery 1749 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1750 elif self._match(TokenType.VALUES): 1751 this = self.expression( 1752 exp.Values, 1753 expressions=self._parse_csv(self._parse_value), 1754 alias=self._parse_table_alias(), 1755 ) 1756 else: 1757 this = None 1758 1759 return self._parse_set_operations(this) 1760 1761 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1762 if not skip_with_token and not self._match(TokenType.WITH): 1763 return None 1764 1765 recursive = self._match(TokenType.RECURSIVE) 1766 1767 expressions = [] 1768 while True: 1769 expressions.append(self._parse_cte()) 1770 1771 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1772 break 1773 else: 1774 self._match(TokenType.WITH) 1775 1776 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1777 1778 def _parse_cte(self) -> exp.Expression: 1779 alias = self._parse_table_alias() 1780 if not alias or not alias.this: 1781 self.raise_error("Expected CTE to have alias") 1782 1783 self._match(TokenType.ALIAS) 1784 1785 return self.expression( 1786 exp.CTE, 1787 this=self._parse_wrapped(self._parse_statement), 1788 alias=alias, 1789 ) 1790 1791 def _parse_table_alias( 1792 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1793 ) -> t.Optional[exp.Expression]: 1794 any_token = self._match(TokenType.ALIAS) 1795 alias = ( 1796 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1797 or self._parse_string_as_identifier() 1798 ) 1799 1800 index = self._index 1801 if self._match(TokenType.L_PAREN): 1802 columns = self._parse_csv(self._parse_function_parameter) 1803 self._match_r_paren() if columns else self._retreat(index) 1804 else: 1805 columns = None 1806 1807 if not alias and not columns: 1808 return None 1809 1810 return self.expression(exp.TableAlias, this=alias, columns=columns) 1811 1812 def _parse_subquery( 1813 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1814 ) -> exp.Expression: 1815 return self.expression( 1816 exp.Subquery, 1817 this=this, 1818 pivots=self._parse_pivots(), 1819 alias=self._parse_table_alias() if parse_alias else None, 1820 ) 1821 1822 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1823 if not isinstance(this, self.MODIFIABLES): 1824 return 1825 1826 table = isinstance(this, exp.Table) 1827 1828 while True: 1829 lateral = self._parse_lateral() 1830 join = self._parse_join() 1831 comma = None if table else self._match(TokenType.COMMA) 1832 if lateral: 1833 this.append("laterals", lateral) 1834 if join: 1835 this.append("joins", join) 1836 if comma: 1837 this.args["from"].append("expressions", self._parse_table()) 1838 if not (lateral or join or comma): 1839 break 1840 1841 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1842 expression = parser(self) 1843 1844 if expression: 1845 this.set(key, expression) 1846 1847 def _parse_hint(self) -> t.Optional[exp.Expression]: 1848 if self._match(TokenType.HINT): 1849 hints = self._parse_csv(self._parse_function) 1850 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1851 self.raise_error("Expected */ after HINT") 1852 return self.expression(exp.Hint, expressions=hints) 1853 1854 return None 1855 1856 def _parse_into(self) -> t.Optional[exp.Expression]: 1857 if not self._match(TokenType.INTO): 1858 return None 1859 1860 temp = self._match(TokenType.TEMPORARY) 1861 unlogged = self._match(TokenType.UNLOGGED) 1862 self._match(TokenType.TABLE) 1863 1864 return self.expression( 1865 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1866 ) 1867 1868 def _parse_from(self) -> t.Optional[exp.Expression]: 1869 if not self._match(TokenType.FROM): 1870 return None 1871 1872 return self.expression( 1873 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1874 ) 1875 1876 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1877 if not self._match(TokenType.MATCH_RECOGNIZE): 1878 return None 1879 self._match_l_paren() 1880 1881 partition = self._parse_partition_by() 1882 order = self._parse_order() 1883 measures = ( 1884 self._parse_alias(self._parse_conjunction()) 1885 if self._match_text_seq("MEASURES") 1886 else None 1887 ) 1888 1889 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1890 rows = exp.Var(this="ONE ROW PER MATCH") 1891 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1892 text = "ALL ROWS PER MATCH" 1893 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1894 text += f" SHOW EMPTY MATCHES" 1895 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1896 text += f" OMIT EMPTY MATCHES" 1897 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1898 text += f" WITH UNMATCHED ROWS" 1899 rows = exp.Var(this=text) 1900 else: 1901 rows = None 1902 1903 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1904 text = "AFTER MATCH SKIP" 1905 if self._match_text_seq("PAST", "LAST", "ROW"): 1906 text += f" PAST LAST ROW" 1907 elif self._match_text_seq("TO", "NEXT", "ROW"): 1908 text += f" TO NEXT ROW" 1909 elif self._match_text_seq("TO", "FIRST"): 1910 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1911 elif self._match_text_seq("TO", "LAST"): 1912 text += f" TO LAST {self._advance_any().text}" # type: ignore 1913 after = exp.Var(this=text) 1914 else: 1915 after = None 1916 1917 if self._match_text_seq("PATTERN"): 1918 self._match_l_paren() 1919 1920 if not self._curr: 1921 self.raise_error("Expecting )", self._curr) 1922 1923 paren = 1 1924 start = self._curr 1925 1926 while self._curr and paren > 0: 1927 if self._curr.token_type == TokenType.L_PAREN: 1928 paren += 1 1929 if self._curr.token_type == TokenType.R_PAREN: 1930 paren -= 1 1931 end = self._prev 1932 self._advance() 1933 if paren > 0: 1934 self.raise_error("Expecting )", self._curr) 1935 pattern = exp.Var(this=self._find_sql(start, end)) 1936 else: 1937 pattern = None 1938 1939 define = ( 1940 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1941 ) 1942 self._match_r_paren() 1943 1944 return self.expression( 1945 exp.MatchRecognize, 1946 partition_by=partition, 1947 order=order, 1948 measures=measures, 1949 rows=rows, 1950 after=after, 1951 pattern=pattern, 1952 define=define, 1953 ) 1954 1955 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1956 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1957 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1958 1959 if outer_apply or cross_apply: 1960 this = self._parse_select(table=True) 1961 view = None 1962 outer = not cross_apply 1963 elif self._match(TokenType.LATERAL): 1964 this = self._parse_select(table=True) 1965 view = self._match(TokenType.VIEW) 1966 outer = self._match(TokenType.OUTER) 1967 else: 1968 return None 1969 1970 if not this: 1971 this = self._parse_function() or self._parse_id_var(any_token=False) 1972 while self._match(TokenType.DOT): 1973 this = exp.Dot( 1974 this=this, 1975 expression=self._parse_function() or self._parse_id_var(any_token=False), 1976 ) 1977 1978 table_alias: t.Optional[exp.Expression] 1979 1980 if view: 1981 table = self._parse_id_var(any_token=False) 1982 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1983 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1984 else: 1985 table_alias = self._parse_table_alias() 1986 1987 expression = self.expression( 1988 exp.Lateral, 1989 this=this, 1990 view=view, 1991 outer=outer, 1992 alias=table_alias, 1993 ) 1994 1995 if outer_apply or cross_apply: 1996 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1997 1998 return expression 1999 2000 def _parse_join_side_and_kind( 2001 self, 2002 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2003 return ( 2004 self._match(TokenType.NATURAL) and self._prev, 2005 self._match_set(self.JOIN_SIDES) and self._prev, 2006 self._match_set(self.JOIN_KINDS) and self._prev, 2007 ) 2008 2009 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2010 natural, side, kind = self._parse_join_side_and_kind() 2011 2012 if not skip_join_token and not self._match(TokenType.JOIN): 2013 return None 2014 2015 kwargs: t.Dict[ 2016 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2017 ] = {"this": self._parse_table()} 2018 2019 if natural: 2020 kwargs["natural"] = True 2021 if side: 2022 kwargs["side"] = side.text 2023 if kind: 2024 kwargs["kind"] = kind.text 2025 2026 if self._match(TokenType.ON): 2027 kwargs["on"] = self._parse_conjunction() 2028 elif self._match(TokenType.USING): 2029 kwargs["using"] = self._parse_wrapped_id_vars() 2030 2031 return self.expression(exp.Join, **kwargs) # type: ignore 2032 2033 def _parse_index(self) -> exp.Expression: 2034 index = self._parse_id_var() 2035 self._match(TokenType.ON) 2036 self._match(TokenType.TABLE) # hive 2037 2038 return self.expression( 2039 exp.Index, 2040 this=index, 2041 table=self.expression(exp.Table, this=self._parse_id_var()), 2042 columns=self._parse_expression(), 2043 ) 2044 2045 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2046 unique = self._match(TokenType.UNIQUE) 2047 primary = self._match_text_seq("PRIMARY") 2048 amp = self._match_text_seq("AMP") 2049 if not self._match(TokenType.INDEX): 2050 return None 2051 index = self._parse_id_var() 2052 columns = None 2053 if self._match(TokenType.L_PAREN, advance=False): 2054 columns = self._parse_wrapped_csv(self._parse_column) 2055 return self.expression( 2056 exp.Index, 2057 this=index, 2058 columns=columns, 2059 unique=unique, 2060 primary=primary, 2061 amp=amp, 2062 ) 2063 2064 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2065 catalog = None 2066 db = None 2067 2068 table = ( 2069 (not schema and self._parse_function()) 2070 or self._parse_id_var(any_token=False) 2071 or self._parse_string_as_identifier() 2072 ) 2073 2074 while self._match(TokenType.DOT): 2075 if catalog: 2076 # This allows nesting the table in arbitrarily many dot expressions if needed 2077 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2078 else: 2079 catalog = db 2080 db = table 2081 table = self._parse_id_var() 2082 2083 if not table: 2084 self.raise_error(f"Expected table name but got {self._curr}") 2085 2086 return self.expression( 2087 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2088 ) 2089 2090 def _parse_table( 2091 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2092 ) -> t.Optional[exp.Expression]: 2093 lateral = self._parse_lateral() 2094 2095 if lateral: 2096 return lateral 2097 2098 unnest = self._parse_unnest() 2099 2100 if unnest: 2101 return unnest 2102 2103 values = self._parse_derived_table_values() 2104 2105 if values: 2106 return values 2107 2108 subquery = self._parse_select(table=True) 2109 2110 if subquery: 2111 if not subquery.args.get("pivots"): 2112 subquery.set("pivots", self._parse_pivots()) 2113 return subquery 2114 2115 this = self._parse_table_parts(schema=schema) 2116 2117 if schema: 2118 return self._parse_schema(this=this) 2119 2120 if self.alias_post_tablesample: 2121 table_sample = self._parse_table_sample() 2122 2123 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2124 2125 if alias: 2126 this.set("alias", alias) 2127 2128 if not this.args.get("pivots"): 2129 this.set("pivots", self._parse_pivots()) 2130 2131 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2132 this.set( 2133 "hints", 2134 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2135 ) 2136 self._match_r_paren() 2137 2138 if not self.alias_post_tablesample: 2139 table_sample = self._parse_table_sample() 2140 2141 if table_sample: 2142 table_sample.set("this", this) 2143 this = table_sample 2144 2145 return this 2146 2147 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2148 if not self._match(TokenType.UNNEST): 2149 return None 2150 2151 expressions = self._parse_wrapped_csv(self._parse_column) 2152 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2153 alias = self._parse_table_alias() 2154 2155 if alias and self.unnest_column_only: 2156 if alias.args.get("columns"): 2157 self.raise_error("Unexpected extra column alias in unnest.") 2158 alias.set("columns", [alias.this]) 2159 alias.set("this", None) 2160 2161 offset = None 2162 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2163 self._match(TokenType.ALIAS) 2164 offset = self._parse_conjunction() 2165 2166 return self.expression( 2167 exp.Unnest, 2168 expressions=expressions, 2169 ordinality=ordinality, 2170 alias=alias, 2171 offset=offset, 2172 ) 2173 2174 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2175 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2176 if not is_derived and not self._match(TokenType.VALUES): 2177 return None 2178 2179 expressions = self._parse_csv(self._parse_value) 2180 2181 if is_derived: 2182 self._match_r_paren() 2183 2184 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2185 2186 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2187 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2188 as_modifier and self._match_text_seq("USING", "SAMPLE") 2189 ): 2190 return None 2191 2192 bucket_numerator = None 2193 bucket_denominator = None 2194 bucket_field = None 2195 percent = None 2196 rows = None 2197 size = None 2198 seed = None 2199 2200 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2201 method = self._parse_var(tokens=(TokenType.ROW,)) 2202 2203 self._match(TokenType.L_PAREN) 2204 2205 num = self._parse_number() 2206 2207 if self._match(TokenType.BUCKET): 2208 bucket_numerator = self._parse_number() 2209 self._match(TokenType.OUT_OF) 2210 bucket_denominator = bucket_denominator = self._parse_number() 2211 self._match(TokenType.ON) 2212 bucket_field = self._parse_field() 2213 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2214 percent = num 2215 elif self._match(TokenType.ROWS): 2216 rows = num 2217 else: 2218 size = num 2219 2220 self._match(TokenType.R_PAREN) 2221 2222 if self._match(TokenType.L_PAREN): 2223 method = self._parse_var() 2224 seed = self._match(TokenType.COMMA) and self._parse_number() 2225 self._match_r_paren() 2226 elif self._match_texts(("SEED", "REPEATABLE")): 2227 seed = self._parse_wrapped(self._parse_number) 2228 2229 return self.expression( 2230 exp.TableSample, 2231 method=method, 2232 bucket_numerator=bucket_numerator, 2233 bucket_denominator=bucket_denominator, 2234 bucket_field=bucket_field, 2235 percent=percent, 2236 rows=rows, 2237 size=size, 2238 seed=seed, 2239 kind=kind, 2240 ) 2241 2242 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2243 return list(iter(self._parse_pivot, None)) 2244 2245 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2246 index = self._index 2247 2248 if self._match(TokenType.PIVOT): 2249 unpivot = False 2250 elif self._match(TokenType.UNPIVOT): 2251 unpivot = True 2252 else: 2253 return None 2254 2255 expressions = [] 2256 field = None 2257 2258 if not self._match(TokenType.L_PAREN): 2259 self._retreat(index) 2260 return None 2261 2262 if unpivot: 2263 expressions = self._parse_csv(self._parse_column) 2264 else: 2265 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2266 2267 if not self._match(TokenType.FOR): 2268 self.raise_error("Expecting FOR") 2269 2270 value = self._parse_column() 2271 2272 if not self._match(TokenType.IN): 2273 self.raise_error("Expecting IN") 2274 2275 field = self._parse_in(value) 2276 2277 self._match_r_paren() 2278 2279 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2280 2281 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2282 pivot.set("alias", self._parse_table_alias()) 2283 2284 return pivot 2285 2286 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2287 if not skip_where_token and not self._match(TokenType.WHERE): 2288 return None 2289 2290 return self.expression( 2291 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2292 ) 2293 2294 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2295 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2296 return None 2297 2298 elements = defaultdict(list) 2299 2300 while True: 2301 expressions = self._parse_csv(self._parse_conjunction) 2302 if expressions: 2303 elements["expressions"].extend(expressions) 2304 2305 grouping_sets = self._parse_grouping_sets() 2306 if grouping_sets: 2307 elements["grouping_sets"].extend(grouping_sets) 2308 2309 rollup = None 2310 cube = None 2311 2312 with_ = self._match(TokenType.WITH) 2313 if self._match(TokenType.ROLLUP): 2314 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2315 elements["rollup"].extend(ensure_list(rollup)) 2316 2317 if self._match(TokenType.CUBE): 2318 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2319 elements["cube"].extend(ensure_list(cube)) 2320 2321 if not (expressions or grouping_sets or rollup or cube): 2322 break 2323 2324 return self.expression(exp.Group, **elements) # type: ignore 2325 2326 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2327 if not self._match(TokenType.GROUPING_SETS): 2328 return None 2329 2330 return self._parse_wrapped_csv(self._parse_grouping_set) 2331 2332 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2333 if self._match(TokenType.L_PAREN): 2334 grouping_set = self._parse_csv(self._parse_column) 2335 self._match_r_paren() 2336 return self.expression(exp.Tuple, expressions=grouping_set) 2337 2338 return self._parse_column() 2339 2340 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2341 if not skip_having_token and not self._match(TokenType.HAVING): 2342 return None 2343 return self.expression(exp.Having, this=self._parse_conjunction()) 2344 2345 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2346 if not self._match(TokenType.QUALIFY): 2347 return None 2348 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2349 2350 def _parse_order( 2351 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2352 ) -> t.Optional[exp.Expression]: 2353 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2354 return this 2355 2356 return self.expression( 2357 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2358 ) 2359 2360 def _parse_sort( 2361 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2362 ) -> t.Optional[exp.Expression]: 2363 if not self._match(token_type): 2364 return None 2365 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2366 2367 def _parse_ordered(self) -> exp.Expression: 2368 this = self._parse_conjunction() 2369 self._match(TokenType.ASC) 2370 is_desc = self._match(TokenType.DESC) 2371 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2372 is_nulls_last = self._match(TokenType.NULLS_LAST) 2373 desc = is_desc or False 2374 asc = not desc 2375 nulls_first = is_nulls_first or False 2376 explicitly_null_ordered = is_nulls_first or is_nulls_last 2377 if ( 2378 not explicitly_null_ordered 2379 and ( 2380 (asc and self.null_ordering == "nulls_are_small") 2381 or (desc and self.null_ordering != "nulls_are_small") 2382 ) 2383 and self.null_ordering != "nulls_are_last" 2384 ): 2385 nulls_first = True 2386 2387 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2388 2389 def _parse_limit( 2390 self, this: t.Optional[exp.Expression] = None, top: bool = False 2391 ) -> t.Optional[exp.Expression]: 2392 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2393 limit_paren = self._match(TokenType.L_PAREN) 2394 limit_exp = self.expression( 2395 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2396 ) 2397 2398 if limit_paren: 2399 self._match_r_paren() 2400 2401 return limit_exp 2402 2403 if self._match(TokenType.FETCH): 2404 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2405 direction = self._prev.text if direction else "FIRST" 2406 count = self._parse_number() 2407 self._match_set((TokenType.ROW, TokenType.ROWS)) 2408 self._match(TokenType.ONLY) 2409 return self.expression(exp.Fetch, direction=direction, count=count) 2410 2411 return this 2412 2413 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2414 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2415 return this 2416 2417 count = self._parse_number() 2418 self._match_set((TokenType.ROW, TokenType.ROWS)) 2419 return self.expression(exp.Offset, this=this, expression=count) 2420 2421 def _parse_lock(self) -> t.Optional[exp.Expression]: 2422 if self._match_text_seq("FOR", "UPDATE"): 2423 return self.expression(exp.Lock, update=True) 2424 if self._match_text_seq("FOR", "SHARE"): 2425 return self.expression(exp.Lock, update=False) 2426 2427 return None 2428 2429 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2430 if not self._match_set(self.SET_OPERATIONS): 2431 return this 2432 2433 token_type = self._prev.token_type 2434 2435 if token_type == TokenType.UNION: 2436 expression = exp.Union 2437 elif token_type == TokenType.EXCEPT: 2438 expression = exp.Except 2439 else: 2440 expression = exp.Intersect 2441 2442 return self.expression( 2443 expression, 2444 this=this, 2445 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2446 expression=self._parse_set_operations(self._parse_select(nested=True)), 2447 ) 2448 2449 def _parse_expression(self) -> t.Optional[exp.Expression]: 2450 return self._parse_alias(self._parse_conjunction()) 2451 2452 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2453 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2454 2455 def _parse_equality(self) -> t.Optional[exp.Expression]: 2456 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2457 2458 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2459 return self._parse_tokens(self._parse_range, self.COMPARISON) 2460 2461 def _parse_range(self) -> t.Optional[exp.Expression]: 2462 this = self._parse_bitwise() 2463 negate = self._match(TokenType.NOT) 2464 2465 if self._match_set(self.RANGE_PARSERS): 2466 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2467 elif self._match(TokenType.ISNULL): 2468 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2469 2470 # Postgres supports ISNULL and NOTNULL for conditions. 2471 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2472 if self._match(TokenType.NOTNULL): 2473 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2474 this = self.expression(exp.Not, this=this) 2475 2476 if negate: 2477 this = self.expression(exp.Not, this=this) 2478 2479 if self._match(TokenType.IS): 2480 this = self._parse_is(this) 2481 2482 return this 2483 2484 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2485 negate = self._match(TokenType.NOT) 2486 if self._match(TokenType.DISTINCT_FROM): 2487 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2488 return self.expression(klass, this=this, expression=self._parse_expression()) 2489 2490 this = self.expression( 2491 exp.Is, 2492 this=this, 2493 expression=self._parse_null() or self._parse_boolean(), 2494 ) 2495 return self.expression(exp.Not, this=this) if negate else this 2496 2497 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2498 unnest = self._parse_unnest() 2499 if unnest: 2500 this = self.expression(exp.In, this=this, unnest=unnest) 2501 elif self._match(TokenType.L_PAREN): 2502 expressions = self._parse_csv(self._parse_select_or_expression) 2503 2504 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2505 this = self.expression(exp.In, this=this, query=expressions[0]) 2506 else: 2507 this = self.expression(exp.In, this=this, expressions=expressions) 2508 2509 self._match_r_paren() 2510 else: 2511 this = self.expression(exp.In, this=this, field=self._parse_field()) 2512 2513 return this 2514 2515 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2516 low = self._parse_bitwise() 2517 self._match(TokenType.AND) 2518 high = self._parse_bitwise() 2519 return self.expression(exp.Between, this=this, low=low, high=high) 2520 2521 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2522 if not self._match(TokenType.ESCAPE): 2523 return this 2524 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2525 2526 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2527 this = self._parse_term() 2528 2529 while True: 2530 if self._match_set(self.BITWISE): 2531 this = self.expression( 2532 self.BITWISE[self._prev.token_type], 2533 this=this, 2534 expression=self._parse_term(), 2535 ) 2536 elif self._match_pair(TokenType.LT, TokenType.LT): 2537 this = self.expression( 2538 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2539 ) 2540 elif self._match_pair(TokenType.GT, TokenType.GT): 2541 this = self.expression( 2542 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2543 ) 2544 else: 2545 break 2546 2547 return this 2548 2549 def _parse_term(self) -> t.Optional[exp.Expression]: 2550 return self._parse_tokens(self._parse_factor, self.TERM) 2551 2552 def _parse_factor(self) -> t.Optional[exp.Expression]: 2553 return self._parse_tokens(self._parse_unary, self.FACTOR) 2554 2555 def _parse_unary(self) -> t.Optional[exp.Expression]: 2556 if self._match_set(self.UNARY_PARSERS): 2557 return self.UNARY_PARSERS[self._prev.token_type](self) 2558 return self._parse_at_time_zone(self._parse_type()) 2559 2560 def _parse_type(self) -> t.Optional[exp.Expression]: 2561 if self._match(TokenType.INTERVAL): 2562 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2563 2564 index = self._index 2565 type_token = self._parse_types(check_func=True) 2566 this = self._parse_column() 2567 2568 if type_token: 2569 if isinstance(this, exp.Literal): 2570 return self.expression(exp.Cast, this=this, to=type_token) 2571 if not type_token.args.get("expressions"): 2572 self._retreat(index) 2573 return self._parse_column() 2574 return type_token 2575 2576 return this 2577 2578 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2579 index = self._index 2580 2581 prefix = self._match_text_seq("SYSUDTLIB", ".") 2582 2583 if not self._match_set(self.TYPE_TOKENS): 2584 return None 2585 2586 type_token = self._prev.token_type 2587 2588 if type_token == TokenType.PSEUDO_TYPE: 2589 return self.expression(exp.PseudoType, this=self._prev.text) 2590 2591 nested = type_token in self.NESTED_TYPE_TOKENS 2592 is_struct = type_token == TokenType.STRUCT 2593 expressions = None 2594 maybe_func = False 2595 2596 if self._match(TokenType.L_PAREN): 2597 if is_struct: 2598 expressions = self._parse_csv(self._parse_struct_kwargs) 2599 elif nested: 2600 expressions = self._parse_csv(self._parse_types) 2601 else: 2602 expressions = self._parse_csv(self._parse_conjunction) 2603 2604 if not expressions: 2605 self._retreat(index) 2606 return None 2607 2608 self._match_r_paren() 2609 maybe_func = True 2610 2611 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2612 this = exp.DataType( 2613 this=exp.DataType.Type.ARRAY, 2614 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2615 nested=True, 2616 ) 2617 2618 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2619 this = exp.DataType( 2620 this=exp.DataType.Type.ARRAY, 2621 expressions=[this], 2622 nested=True, 2623 ) 2624 2625 return this 2626 2627 if self._match(TokenType.L_BRACKET): 2628 self._retreat(index) 2629 return None 2630 2631 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2632 if nested and self._match(TokenType.LT): 2633 if is_struct: 2634 expressions = self._parse_csv(self._parse_struct_kwargs) 2635 else: 2636 expressions = self._parse_csv(self._parse_types) 2637 2638 if not self._match(TokenType.GT): 2639 self.raise_error("Expecting >") 2640 2641 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2642 values = self._parse_csv(self._parse_conjunction) 2643 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2644 2645 value: t.Optional[exp.Expression] = None 2646 if type_token in self.TIMESTAMPS: 2647 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2648 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2649 elif ( 2650 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2651 ): 2652 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2653 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2654 if type_token == TokenType.TIME: 2655 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2656 else: 2657 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2658 2659 maybe_func = maybe_func and value is None 2660 2661 if value is None: 2662 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2663 elif type_token == TokenType.INTERVAL: 2664 unit = self._parse_var() 2665 2666 if not unit: 2667 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2668 else: 2669 value = self.expression(exp.Interval, unit=unit) 2670 2671 if maybe_func and check_func: 2672 index2 = self._index 2673 peek = self._parse_string() 2674 2675 if not peek: 2676 self._retreat(index) 2677 return None 2678 2679 self._retreat(index2) 2680 2681 if value: 2682 return value 2683 2684 return exp.DataType( 2685 this=exp.DataType.Type[type_token.value.upper()], 2686 expressions=expressions, 2687 nested=nested, 2688 values=values, 2689 prefix=prefix, 2690 ) 2691 2692 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2693 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2694 return self._parse_types() 2695 2696 this = self._parse_id_var() 2697 self._match(TokenType.COLON) 2698 data_type = self._parse_types() 2699 2700 if not data_type: 2701 return None 2702 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2703 2704 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2705 if not self._match(TokenType.AT_TIME_ZONE): 2706 return this 2707 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2708 2709 def _parse_column(self) -> t.Optional[exp.Expression]: 2710 this = self._parse_field() 2711 if isinstance(this, exp.Identifier): 2712 this = self.expression(exp.Column, this=this) 2713 elif not this: 2714 return self._parse_bracket(this) 2715 this = self._parse_bracket(this) 2716 2717 while self._match_set(self.COLUMN_OPERATORS): 2718 op_token = self._prev.token_type 2719 op = self.COLUMN_OPERATORS.get(op_token) 2720 2721 if op_token == TokenType.DCOLON: 2722 field = self._parse_types() 2723 if not field: 2724 self.raise_error("Expected type") 2725 elif op: 2726 self._advance() 2727 value = self._prev.text 2728 field = ( 2729 exp.Literal.number(value) 2730 if self._prev.token_type == TokenType.NUMBER 2731 else exp.Literal.string(value) 2732 ) 2733 else: 2734 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2735 2736 if isinstance(field, exp.Func): 2737 # bigquery allows function calls like x.y.count(...) 2738 # SAFE.SUBSTR(...) 2739 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2740 this = self._replace_columns_with_dots(this) 2741 2742 if op: 2743 this = op(self, this, field) 2744 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2745 this = self.expression( 2746 exp.Column, 2747 this=field, 2748 table=this.this, 2749 db=this.args.get("table"), 2750 catalog=this.args.get("db"), 2751 ) 2752 else: 2753 this = self.expression(exp.Dot, this=this, expression=field) 2754 this = self._parse_bracket(this) 2755 2756 return this 2757 2758 def _parse_primary(self) -> t.Optional[exp.Expression]: 2759 if self._match_set(self.PRIMARY_PARSERS): 2760 token_type = self._prev.token_type 2761 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2762 2763 if token_type == TokenType.STRING: 2764 expressions = [primary] 2765 while self._match(TokenType.STRING): 2766 expressions.append(exp.Literal.string(self._prev.text)) 2767 if len(expressions) > 1: 2768 return self.expression(exp.Concat, expressions=expressions) 2769 return primary 2770 2771 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2772 return exp.Literal.number(f"0.{self._prev.text}") 2773 2774 if self._match(TokenType.L_PAREN): 2775 comments = self._prev_comments 2776 query = self._parse_select() 2777 2778 if query: 2779 expressions = [query] 2780 else: 2781 expressions = self._parse_csv( 2782 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2783 ) 2784 2785 this = seq_get(expressions, 0) 2786 self._parse_query_modifiers(this) 2787 self._match_r_paren() 2788 2789 if isinstance(this, exp.Subqueryable): 2790 this = self._parse_set_operations( 2791 self._parse_subquery(this=this, parse_alias=False) 2792 ) 2793 elif len(expressions) > 1: 2794 this = self.expression(exp.Tuple, expressions=expressions) 2795 else: 2796 this = self.expression(exp.Paren, this=this) 2797 2798 if this and comments: 2799 this.comments = comments 2800 2801 return this 2802 2803 return None 2804 2805 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2806 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2807 2808 def _parse_function( 2809 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2810 ) -> t.Optional[exp.Expression]: 2811 if not self._curr: 2812 return None 2813 2814 token_type = self._curr.token_type 2815 2816 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2817 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2818 2819 if not self._next or self._next.token_type != TokenType.L_PAREN: 2820 if token_type in self.NO_PAREN_FUNCTIONS: 2821 self._advance() 2822 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2823 2824 return None 2825 2826 if token_type not in self.FUNC_TOKENS: 2827 return None 2828 2829 this = self._curr.text 2830 upper = this.upper() 2831 self._advance(2) 2832 2833 parser = self.FUNCTION_PARSERS.get(upper) 2834 2835 if parser: 2836 this = parser(self) 2837 else: 2838 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2839 2840 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2841 this = self.expression(subquery_predicate, this=self._parse_select()) 2842 self._match_r_paren() 2843 return this 2844 2845 if functions is None: 2846 functions = self.FUNCTIONS 2847 2848 function = functions.get(upper) 2849 args = self._parse_csv(self._parse_lambda) 2850 2851 if function: 2852 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2853 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2854 if count_params(function) == 2: 2855 params = None 2856 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2857 params = self._parse_csv(self._parse_lambda) 2858 2859 this = function(args, params) 2860 else: 2861 this = function(args) 2862 2863 self.validate_expression(this, args) 2864 else: 2865 this = self.expression(exp.Anonymous, this=this, expressions=args) 2866 2867 self._match_r_paren(this) 2868 return self._parse_window(this) 2869 2870 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2871 return self._parse_column_def(self._parse_id_var()) 2872 2873 def _parse_user_defined_function( 2874 self, kind: t.Optional[TokenType] = None 2875 ) -> t.Optional[exp.Expression]: 2876 this = self._parse_id_var() 2877 2878 while self._match(TokenType.DOT): 2879 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2880 2881 if not self._match(TokenType.L_PAREN): 2882 return this 2883 2884 expressions = self._parse_csv(self._parse_function_parameter) 2885 self._match_r_paren() 2886 return self.expression( 2887 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2888 ) 2889 2890 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2891 literal = self._parse_primary() 2892 if literal: 2893 return self.expression(exp.Introducer, this=token.text, expression=literal) 2894 2895 return self.expression(exp.Identifier, this=token.text) 2896 2897 def _parse_national(self, token: Token) -> exp.Expression: 2898 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2899 2900 def _parse_session_parameter(self) -> exp.Expression: 2901 kind = None 2902 this = self._parse_id_var() or self._parse_primary() 2903 2904 if this and self._match(TokenType.DOT): 2905 kind = this.name 2906 this = self._parse_var() or self._parse_primary() 2907 2908 return self.expression(exp.SessionParameter, this=this, kind=kind) 2909 2910 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2911 index = self._index 2912 2913 if self._match(TokenType.L_PAREN): 2914 expressions = self._parse_csv(self._parse_id_var) 2915 2916 if not self._match(TokenType.R_PAREN): 2917 self._retreat(index) 2918 else: 2919 expressions = [self._parse_id_var()] 2920 2921 if self._match_set(self.LAMBDAS): 2922 return self.LAMBDAS[self._prev.token_type](self, expressions) 2923 2924 self._retreat(index) 2925 2926 this: t.Optional[exp.Expression] 2927 2928 if self._match(TokenType.DISTINCT): 2929 this = self.expression( 2930 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2931 ) 2932 else: 2933 this = self._parse_select_or_expression() 2934 2935 if self._match(TokenType.IGNORE_NULLS): 2936 this = self.expression(exp.IgnoreNulls, this=this) 2937 else: 2938 self._match(TokenType.RESPECT_NULLS) 2939 2940 return self._parse_limit(self._parse_order(this)) 2941 2942 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2943 index = self._index 2944 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2945 self._retreat(index) 2946 return this 2947 2948 args = self._parse_csv( 2949 lambda: self._parse_constraint() 2950 or self._parse_column_def(self._parse_field(any_token=True)) 2951 ) 2952 self._match_r_paren() 2953 return self.expression(exp.Schema, this=this, expressions=args) 2954 2955 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2956 kind = self._parse_types() 2957 2958 if self._match_text_seq("FOR", "ORDINALITY"): 2959 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2960 2961 constraints = [] 2962 while True: 2963 constraint = self._parse_column_constraint() 2964 if not constraint: 2965 break 2966 constraints.append(constraint) 2967 2968 if not kind and not constraints: 2969 return this 2970 2971 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2972 2973 def _parse_auto_increment(self) -> exp.Expression: 2974 start = None 2975 increment = None 2976 2977 if self._match(TokenType.L_PAREN, advance=False): 2978 args = self._parse_wrapped_csv(self._parse_bitwise) 2979 start = seq_get(args, 0) 2980 increment = seq_get(args, 1) 2981 elif self._match_text_seq("START"): 2982 start = self._parse_bitwise() 2983 self._match_text_seq("INCREMENT") 2984 increment = self._parse_bitwise() 2985 2986 if start and increment: 2987 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2988 2989 return exp.AutoIncrementColumnConstraint() 2990 2991 def _parse_compress(self) -> exp.Expression: 2992 if self._match(TokenType.L_PAREN, advance=False): 2993 return self.expression( 2994 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2995 ) 2996 2997 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2998 2999 def _parse_generated_as_identity(self) -> exp.Expression: 3000 if self._match(TokenType.BY_DEFAULT): 3001 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3002 else: 3003 self._match_text_seq("ALWAYS") 3004 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3005 3006 self._match_text_seq("AS", "IDENTITY") 3007 if self._match(TokenType.L_PAREN): 3008 if self._match_text_seq("START", "WITH"): 3009 this.set("start", self._parse_bitwise()) 3010 if self._match_text_seq("INCREMENT", "BY"): 3011 this.set("increment", self._parse_bitwise()) 3012 if self._match_text_seq("MINVALUE"): 3013 this.set("minvalue", self._parse_bitwise()) 3014 if self._match_text_seq("MAXVALUE"): 3015 this.set("maxvalue", self._parse_bitwise()) 3016 3017 if self._match_text_seq("CYCLE"): 3018 this.set("cycle", True) 3019 elif self._match_text_seq("NO", "CYCLE"): 3020 this.set("cycle", False) 3021 3022 self._match_r_paren() 3023 3024 return this 3025 3026 def _parse_inline(self) -> t.Optional[exp.Expression]: 3027 self._match_text_seq("LENGTH") 3028 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3029 3030 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3031 if self._match_text_seq("NULL"): 3032 return self.expression(exp.NotNullColumnConstraint) 3033 if self._match_text_seq("CASESPECIFIC"): 3034 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3035 return None 3036 3037 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3038 this = self._parse_references() 3039 if this: 3040 return this 3041 3042 if self._match(TokenType.CONSTRAINT): 3043 this = self._parse_id_var() 3044 3045 if self._match_texts(self.CONSTRAINT_PARSERS): 3046 return self.expression( 3047 exp.ColumnConstraint, 3048 this=this, 3049 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3050 ) 3051 3052 return this 3053 3054 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3055 if not self._match(TokenType.CONSTRAINT): 3056 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3057 3058 this = self._parse_id_var() 3059 expressions = [] 3060 3061 while True: 3062 constraint = self._parse_unnamed_constraint() or self._parse_function() 3063 if not constraint: 3064 break 3065 expressions.append(constraint) 3066 3067 return self.expression(exp.Constraint, this=this, expressions=expressions) 3068 3069 def _parse_unnamed_constraint( 3070 self, constraints: t.Optional[t.Collection[str]] = None 3071 ) -> t.Optional[exp.Expression]: 3072 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3073 return None 3074 3075 constraint = self._prev.text.upper() 3076 if constraint not in self.CONSTRAINT_PARSERS: 3077 self.raise_error(f"No parser found for schema constraint {constraint}.") 3078 3079 return self.CONSTRAINT_PARSERS[constraint](self) 3080 3081 def _parse_unique(self) -> exp.Expression: 3082 if not self._match(TokenType.L_PAREN, advance=False): 3083 return self.expression(exp.UniqueColumnConstraint) 3084 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3085 3086 def _parse_key_constraint_options(self) -> t.List[str]: 3087 options = [] 3088 while True: 3089 if not self._curr: 3090 break 3091 3092 if self._match(TokenType.ON): 3093 action = None 3094 on = self._advance_any() and self._prev.text 3095 3096 if self._match(TokenType.NO_ACTION): 3097 action = "NO ACTION" 3098 elif self._match(TokenType.CASCADE): 3099 action = "CASCADE" 3100 elif self._match_pair(TokenType.SET, TokenType.NULL): 3101 action = "SET NULL" 3102 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3103 action = "SET DEFAULT" 3104 else: 3105 self.raise_error("Invalid key constraint") 3106 3107 options.append(f"ON {on} {action}") 3108 elif self._match_text_seq("NOT", "ENFORCED"): 3109 options.append("NOT ENFORCED") 3110 elif self._match_text_seq("DEFERRABLE"): 3111 options.append("DEFERRABLE") 3112 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3113 options.append("INITIALLY DEFERRED") 3114 elif self._match_text_seq("NORELY"): 3115 options.append("NORELY") 3116 elif self._match_text_seq("MATCH", "FULL"): 3117 options.append("MATCH FULL") 3118 else: 3119 break 3120 3121 return options 3122 3123 def _parse_references(self) -> t.Optional[exp.Expression]: 3124 if not self._match(TokenType.REFERENCES): 3125 return None 3126 3127 expressions = None 3128 this = self._parse_id_var() 3129 3130 if self._match(TokenType.L_PAREN, advance=False): 3131 expressions = self._parse_wrapped_id_vars() 3132 3133 options = self._parse_key_constraint_options() 3134 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3135 3136 def _parse_foreign_key(self) -> exp.Expression: 3137 expressions = self._parse_wrapped_id_vars() 3138 reference = self._parse_references() 3139 options = {} 3140 3141 while self._match(TokenType.ON): 3142 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3143 self.raise_error("Expected DELETE or UPDATE") 3144 3145 kind = self._prev.text.lower() 3146 3147 if self._match(TokenType.NO_ACTION): 3148 action = "NO ACTION" 3149 elif self._match(TokenType.SET): 3150 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3151 action = "SET " + self._prev.text.upper() 3152 else: 3153 self._advance() 3154 action = self._prev.text.upper() 3155 3156 options[kind] = action 3157 3158 return self.expression( 3159 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3160 ) 3161 3162 def _parse_primary_key(self) -> exp.Expression: 3163 desc = ( 3164 self._match_set((TokenType.ASC, TokenType.DESC)) 3165 and self._prev.token_type == TokenType.DESC 3166 ) 3167 3168 if not self._match(TokenType.L_PAREN, advance=False): 3169 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3170 3171 expressions = self._parse_wrapped_id_vars() 3172 options = self._parse_key_constraint_options() 3173 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3174 3175 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3176 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3177 return this 3178 3179 bracket_kind = self._prev.token_type 3180 expressions: t.List[t.Optional[exp.Expression]] 3181 3182 if self._match(TokenType.COLON): 3183 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3184 else: 3185 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3186 3187 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3188 if bracket_kind == TokenType.L_BRACE: 3189 this = self.expression(exp.Struct, expressions=expressions) 3190 elif not this or this.name.upper() == "ARRAY": 3191 this = self.expression(exp.Array, expressions=expressions) 3192 else: 3193 expressions = apply_index_offset(expressions, -self.index_offset) 3194 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3195 3196 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3197 self.raise_error("Expected ]") 3198 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3199 self.raise_error("Expected }") 3200 3201 this.comments = self._prev_comments 3202 return self._parse_bracket(this) 3203 3204 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3205 if self._match(TokenType.COLON): 3206 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3207 return this 3208 3209 def _parse_case(self) -> t.Optional[exp.Expression]: 3210 ifs = [] 3211 default = None 3212 3213 expression = self._parse_conjunction() 3214 3215 while self._match(TokenType.WHEN): 3216 this = self._parse_conjunction() 3217 self._match(TokenType.THEN) 3218 then = self._parse_conjunction() 3219 ifs.append(self.expression(exp.If, this=this, true=then)) 3220 3221 if self._match(TokenType.ELSE): 3222 default = self._parse_conjunction() 3223 3224 if not self._match(TokenType.END): 3225 self.raise_error("Expected END after CASE", self._prev) 3226 3227 return self._parse_window( 3228 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3229 ) 3230 3231 def _parse_if(self) -> t.Optional[exp.Expression]: 3232 if self._match(TokenType.L_PAREN): 3233 args = self._parse_csv(self._parse_conjunction) 3234 this = exp.If.from_arg_list(args) 3235 self.validate_expression(this, args) 3236 self._match_r_paren() 3237 else: 3238 condition = self._parse_conjunction() 3239 self._match(TokenType.THEN) 3240 true = self._parse_conjunction() 3241 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3242 self._match(TokenType.END) 3243 this = self.expression(exp.If, this=condition, true=true, false=false) 3244 3245 return self._parse_window(this) 3246 3247 def _parse_extract(self) -> exp.Expression: 3248 this = self._parse_function() or self._parse_var() or self._parse_type() 3249 3250 if self._match(TokenType.FROM): 3251 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3252 3253 if not self._match(TokenType.COMMA): 3254 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3255 3256 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3257 3258 def _parse_cast(self, strict: bool) -> exp.Expression: 3259 this = self._parse_conjunction() 3260 3261 if not self._match(TokenType.ALIAS): 3262 self.raise_error("Expected AS after CAST") 3263 3264 to = self._parse_types() 3265 3266 if not to: 3267 self.raise_error("Expected TYPE after CAST") 3268 elif to.this == exp.DataType.Type.CHAR: 3269 if self._match(TokenType.CHARACTER_SET): 3270 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3271 3272 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3273 3274 def _parse_string_agg(self) -> exp.Expression: 3275 expression: t.Optional[exp.Expression] 3276 3277 if self._match(TokenType.DISTINCT): 3278 args = self._parse_csv(self._parse_conjunction) 3279 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3280 else: 3281 args = self._parse_csv(self._parse_conjunction) 3282 expression = seq_get(args, 0) 3283 3284 index = self._index 3285 if not self._match(TokenType.R_PAREN): 3286 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3287 order = self._parse_order(this=expression) 3288 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3289 3290 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3291 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3292 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3293 if not self._match(TokenType.WITHIN_GROUP): 3294 self._retreat(index) 3295 this = exp.GroupConcat.from_arg_list(args) 3296 self.validate_expression(this, args) 3297 return this 3298 3299 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3300 order = self._parse_order(this=expression) 3301 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3302 3303 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3304 to: t.Optional[exp.Expression] 3305 this = self._parse_bitwise() 3306 3307 if self._match(TokenType.USING): 3308 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3309 elif self._match(TokenType.COMMA): 3310 to = self._parse_bitwise() 3311 else: 3312 to = None 3313 3314 # Swap the argument order if needed to produce the correct AST 3315 if self.CONVERT_TYPE_FIRST: 3316 this, to = to, this 3317 3318 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3319 3320 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3321 args = self._parse_csv(self._parse_bitwise) 3322 3323 if self._match(TokenType.IN): 3324 return self.expression( 3325 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3326 ) 3327 3328 if haystack_first: 3329 haystack = seq_get(args, 0) 3330 needle = seq_get(args, 1) 3331 else: 3332 needle = seq_get(args, 0) 3333 haystack = seq_get(args, 1) 3334 3335 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3336 3337 self.validate_expression(this, args) 3338 3339 return this 3340 3341 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3342 args = self._parse_csv(self._parse_table) 3343 return exp.JoinHint(this=func_name.upper(), expressions=args) 3344 3345 def _parse_substring(self) -> exp.Expression: 3346 # Postgres supports the form: substring(string [from int] [for int]) 3347 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3348 3349 args = self._parse_csv(self._parse_bitwise) 3350 3351 if self._match(TokenType.FROM): 3352 args.append(self._parse_bitwise()) 3353 if self._match(TokenType.FOR): 3354 args.append(self._parse_bitwise()) 3355 3356 this = exp.Substring.from_arg_list(args) 3357 self.validate_expression(this, args) 3358 3359 return this 3360 3361 def _parse_trim(self) -> exp.Expression: 3362 # https://www.w3resource.com/sql/character-functions/trim.php 3363 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3364 3365 position = None 3366 collation = None 3367 3368 if self._match_set(self.TRIM_TYPES): 3369 position = self._prev.text.upper() 3370 3371 expression = self._parse_term() 3372 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3373 this = self._parse_term() 3374 else: 3375 this = expression 3376 expression = None 3377 3378 if self._match(TokenType.COLLATE): 3379 collation = self._parse_term() 3380 3381 return self.expression( 3382 exp.Trim, 3383 this=this, 3384 position=position, 3385 expression=expression, 3386 collation=collation, 3387 ) 3388 3389 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3390 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3391 3392 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3393 return self._parse_window(self._parse_id_var(), alias=True) 3394 3395 def _parse_window( 3396 self, this: t.Optional[exp.Expression], alias: bool = False 3397 ) -> t.Optional[exp.Expression]: 3398 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3399 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3400 self._match_r_paren() 3401 3402 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3403 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3404 if self._match(TokenType.WITHIN_GROUP): 3405 order = self._parse_wrapped(self._parse_order) 3406 this = self.expression(exp.WithinGroup, this=this, expression=order) 3407 3408 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3409 # Some dialects choose to implement and some do not. 3410 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3411 3412 # There is some code above in _parse_lambda that handles 3413 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3414 3415 # The below changes handle 3416 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3417 3418 # Oracle allows both formats 3419 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3420 # and Snowflake chose to do the same for familiarity 3421 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3422 if self._match(TokenType.IGNORE_NULLS): 3423 this = self.expression(exp.IgnoreNulls, this=this) 3424 elif self._match(TokenType.RESPECT_NULLS): 3425 this = self.expression(exp.RespectNulls, this=this) 3426 3427 # bigquery select from window x AS (partition by ...) 3428 if alias: 3429 self._match(TokenType.ALIAS) 3430 elif not self._match(TokenType.OVER): 3431 return this 3432 3433 if not self._match(TokenType.L_PAREN): 3434 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3435 3436 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3437 partition = self._parse_partition_by() 3438 order = self._parse_order() 3439 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3440 3441 if kind: 3442 self._match(TokenType.BETWEEN) 3443 start = self._parse_window_spec() 3444 self._match(TokenType.AND) 3445 end = self._parse_window_spec() 3446 3447 spec = self.expression( 3448 exp.WindowSpec, 3449 kind=kind, 3450 start=start["value"], 3451 start_side=start["side"], 3452 end=end["value"], 3453 end_side=end["side"], 3454 ) 3455 else: 3456 spec = None 3457 3458 self._match_r_paren() 3459 3460 return self.expression( 3461 exp.Window, 3462 this=this, 3463 partition_by=partition, 3464 order=order, 3465 spec=spec, 3466 alias=window_alias, 3467 ) 3468 3469 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3470 self._match(TokenType.BETWEEN) 3471 3472 return { 3473 "value": ( 3474 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3475 ) 3476 or self._parse_bitwise(), 3477 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3478 } 3479 3480 def _parse_alias( 3481 self, this: t.Optional[exp.Expression], explicit: bool = False 3482 ) -> t.Optional[exp.Expression]: 3483 any_token = self._match(TokenType.ALIAS) 3484 3485 if explicit and not any_token: 3486 return this 3487 3488 if self._match(TokenType.L_PAREN): 3489 aliases = self.expression( 3490 exp.Aliases, 3491 this=this, 3492 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3493 ) 3494 self._match_r_paren(aliases) 3495 return aliases 3496 3497 alias = self._parse_id_var(any_token) 3498 3499 if alias: 3500 return self.expression(exp.Alias, this=this, alias=alias) 3501 3502 return this 3503 3504 def _parse_id_var( 3505 self, 3506 any_token: bool = True, 3507 tokens: t.Optional[t.Collection[TokenType]] = None, 3508 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3509 ) -> t.Optional[exp.Expression]: 3510 identifier = self._parse_identifier() 3511 3512 if identifier: 3513 return identifier 3514 3515 prefix = "" 3516 3517 if prefix_tokens: 3518 while self._match_set(prefix_tokens): 3519 prefix += self._prev.text 3520 3521 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3522 quoted = self._prev.token_type == TokenType.STRING 3523 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3524 3525 return None 3526 3527 def _parse_string(self) -> t.Optional[exp.Expression]: 3528 if self._match(TokenType.STRING): 3529 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3530 return self._parse_placeholder() 3531 3532 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3533 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3534 3535 def _parse_number(self) -> t.Optional[exp.Expression]: 3536 if self._match(TokenType.NUMBER): 3537 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3538 return self._parse_placeholder() 3539 3540 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3541 if self._match(TokenType.IDENTIFIER): 3542 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3543 return self._parse_placeholder() 3544 3545 def _parse_var( 3546 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3547 ) -> t.Optional[exp.Expression]: 3548 if ( 3549 (any_token and self._advance_any()) 3550 or self._match(TokenType.VAR) 3551 or (self._match_set(tokens) if tokens else False) 3552 ): 3553 return self.expression(exp.Var, this=self._prev.text) 3554 return self._parse_placeholder() 3555 3556 def _advance_any(self) -> t.Optional[Token]: 3557 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3558 self._advance() 3559 return self._prev 3560 return None 3561 3562 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3563 return self._parse_var() or self._parse_string() 3564 3565 def _parse_null(self) -> t.Optional[exp.Expression]: 3566 if self._match(TokenType.NULL): 3567 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3568 return None 3569 3570 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3571 if self._match(TokenType.TRUE): 3572 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3573 if self._match(TokenType.FALSE): 3574 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3575 return None 3576 3577 def _parse_star(self) -> t.Optional[exp.Expression]: 3578 if self._match(TokenType.STAR): 3579 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3580 return None 3581 3582 def _parse_parameter(self) -> exp.Expression: 3583 wrapped = self._match(TokenType.L_BRACE) 3584 this = self._parse_var() or self._parse_primary() 3585 self._match(TokenType.R_BRACE) 3586 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3587 3588 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3589 if self._match_set(self.PLACEHOLDER_PARSERS): 3590 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3591 if placeholder: 3592 return placeholder 3593 self._advance(-1) 3594 return None 3595 3596 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3597 if not self._match(TokenType.EXCEPT): 3598 return None 3599 if self._match(TokenType.L_PAREN, advance=False): 3600 return self._parse_wrapped_csv(self._parse_column) 3601 return self._parse_csv(self._parse_column) 3602 3603 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3604 if not self._match(TokenType.REPLACE): 3605 return None 3606 if self._match(TokenType.L_PAREN, advance=False): 3607 return self._parse_wrapped_csv(self._parse_expression) 3608 return self._parse_csv(self._parse_expression) 3609 3610 def _parse_csv( 3611 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3612 ) -> t.List[t.Optional[exp.Expression]]: 3613 parse_result = parse_method() 3614 items = [parse_result] if parse_result is not None else [] 3615 3616 while self._match(sep): 3617 if parse_result and self._prev_comments: 3618 parse_result.comments = self._prev_comments 3619 3620 parse_result = parse_method() 3621 if parse_result is not None: 3622 items.append(parse_result) 3623 3624 return items 3625 3626 def _parse_tokens( 3627 self, parse_method: t.Callable, expressions: t.Dict 3628 ) -> t.Optional[exp.Expression]: 3629 this = parse_method() 3630 3631 while self._match_set(expressions): 3632 this = self.expression( 3633 expressions[self._prev.token_type], 3634 this=this, 3635 comments=self._prev_comments, 3636 expression=parse_method(), 3637 ) 3638 3639 return this 3640 3641 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3642 return self._parse_wrapped_csv(self._parse_id_var) 3643 3644 def _parse_wrapped_csv( 3645 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3646 ) -> t.List[t.Optional[exp.Expression]]: 3647 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3648 3649 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3650 self._match_l_paren() 3651 parse_result = parse_method() 3652 self._match_r_paren() 3653 return parse_result 3654 3655 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3656 return self._parse_select() or self._parse_expression() 3657 3658 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3659 return self._parse_set_operations( 3660 self._parse_select(nested=True, parse_subquery_alias=False) 3661 ) 3662 3663 def _parse_transaction(self) -> exp.Expression: 3664 this = None 3665 if self._match_texts(self.TRANSACTION_KIND): 3666 this = self._prev.text 3667 3668 self._match_texts({"TRANSACTION", "WORK"}) 3669 3670 modes = [] 3671 while True: 3672 mode = [] 3673 while self._match(TokenType.VAR): 3674 mode.append(self._prev.text) 3675 3676 if mode: 3677 modes.append(" ".join(mode)) 3678 if not self._match(TokenType.COMMA): 3679 break 3680 3681 return self.expression(exp.Transaction, this=this, modes=modes) 3682 3683 def _parse_commit_or_rollback(self) -> exp.Expression: 3684 chain = None 3685 savepoint = None 3686 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3687 3688 self._match_texts({"TRANSACTION", "WORK"}) 3689 3690 if self._match_text_seq("TO"): 3691 self._match_text_seq("SAVEPOINT") 3692 savepoint = self._parse_id_var() 3693 3694 if self._match(TokenType.AND): 3695 chain = not self._match_text_seq("NO") 3696 self._match_text_seq("CHAIN") 3697 3698 if is_rollback: 3699 return self.expression(exp.Rollback, savepoint=savepoint) 3700 return self.expression(exp.Commit, chain=chain) 3701 3702 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3703 if not self._match_text_seq("ADD"): 3704 return None 3705 3706 self._match(TokenType.COLUMN) 3707 exists_column = self._parse_exists(not_=True) 3708 expression = self._parse_column_def(self._parse_field(any_token=True)) 3709 3710 if expression: 3711 expression.set("exists", exists_column) 3712 3713 return expression 3714 3715 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3716 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3717 3718 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3719 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3720 return self.expression( 3721 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3722 ) 3723 3724 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3725 this = None 3726 kind = self._prev.token_type 3727 3728 if kind == TokenType.CONSTRAINT: 3729 this = self._parse_id_var() 3730 3731 if self._match_text_seq("CHECK"): 3732 expression = self._parse_wrapped(self._parse_conjunction) 3733 enforced = self._match_text_seq("ENFORCED") 3734 3735 return self.expression( 3736 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3737 ) 3738 3739 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3740 expression = self._parse_foreign_key() 3741 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3742 expression = self._parse_primary_key() 3743 3744 return self.expression(exp.AddConstraint, this=this, expression=expression) 3745 3746 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3747 index = self._index - 1 3748 3749 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3750 return self._parse_csv(self._parse_add_constraint) 3751 3752 self._retreat(index) 3753 return self._parse_csv(self._parse_add_column) 3754 3755 def _parse_alter_table_alter(self) -> exp.Expression: 3756 self._match(TokenType.COLUMN) 3757 column = self._parse_field(any_token=True) 3758 3759 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3760 return self.expression(exp.AlterColumn, this=column, drop=True) 3761 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3762 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3763 3764 self._match_text_seq("SET", "DATA") 3765 return self.expression( 3766 exp.AlterColumn, 3767 this=column, 3768 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3769 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3770 using=self._match(TokenType.USING) and self._parse_conjunction(), 3771 ) 3772 3773 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3774 index = self._index - 1 3775 3776 partition_exists = self._parse_exists() 3777 if self._match(TokenType.PARTITION, advance=False): 3778 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3779 3780 self._retreat(index) 3781 return self._parse_csv(self._parse_drop_column) 3782 3783 def _parse_alter_table_rename(self) -> exp.Expression: 3784 self._match_text_seq("TO") 3785 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3786 3787 def _parse_alter(self) -> t.Optional[exp.Expression]: 3788 start = self._prev 3789 3790 if not self._match(TokenType.TABLE): 3791 return self._parse_as_command(start) 3792 3793 exists = self._parse_exists() 3794 this = self._parse_table(schema=True) 3795 3796 if self._next: 3797 self._advance() 3798 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3799 3800 if parser: 3801 return self.expression( 3802 exp.AlterTable, 3803 this=this, 3804 exists=exists, 3805 actions=ensure_list(parser(self)), 3806 ) 3807 return self._parse_as_command(start) 3808 3809 def _parse_merge(self) -> exp.Expression: 3810 self._match(TokenType.INTO) 3811 target = self._parse_table() 3812 3813 self._match(TokenType.USING) 3814 using = self._parse_table() 3815 3816 self._match(TokenType.ON) 3817 on = self._parse_conjunction() 3818 3819 whens = [] 3820 while self._match(TokenType.WHEN): 3821 matched = not self._match(TokenType.NOT) 3822 self._match_text_seq("MATCHED") 3823 source = ( 3824 False 3825 if self._match_text_seq("BY", "TARGET") 3826 else self._match_text_seq("BY", "SOURCE") 3827 ) 3828 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3829 3830 self._match(TokenType.THEN) 3831 3832 if self._match(TokenType.INSERT): 3833 _this = self._parse_star() 3834 if _this: 3835 then = self.expression(exp.Insert, this=_this) 3836 else: 3837 then = self.expression( 3838 exp.Insert, 3839 this=self._parse_value(), 3840 expression=self._match(TokenType.VALUES) and self._parse_value(), 3841 ) 3842 elif self._match(TokenType.UPDATE): 3843 expressions = self._parse_star() 3844 if expressions: 3845 then = self.expression(exp.Update, expressions=expressions) 3846 else: 3847 then = self.expression( 3848 exp.Update, 3849 expressions=self._match(TokenType.SET) 3850 and self._parse_csv(self._parse_equality), 3851 ) 3852 elif self._match(TokenType.DELETE): 3853 then = self.expression(exp.Var, this=self._prev.text) 3854 else: 3855 then = None 3856 3857 whens.append( 3858 self.expression( 3859 exp.When, 3860 matched=matched, 3861 source=source, 3862 condition=condition, 3863 then=then, 3864 ) 3865 ) 3866 3867 return self.expression( 3868 exp.Merge, 3869 this=target, 3870 using=using, 3871 on=on, 3872 expressions=whens, 3873 ) 3874 3875 def _parse_show(self) -> t.Optional[exp.Expression]: 3876 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3877 if parser: 3878 return parser(self) 3879 self._advance() 3880 return self.expression(exp.Show, this=self._prev.text.upper()) 3881 3882 def _parse_set_item_assignment( 3883 self, kind: t.Optional[str] = None 3884 ) -> t.Optional[exp.Expression]: 3885 index = self._index 3886 3887 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 3888 return self._parse_set_transaction(global_=kind == "GLOBAL") 3889 3890 left = self._parse_primary() or self._parse_id_var() 3891 3892 if not self._match_texts(("=", "TO")): 3893 self._retreat(index) 3894 return None 3895 3896 right = self._parse_statement() or self._parse_id_var() 3897 this = self.expression( 3898 exp.EQ, 3899 this=left, 3900 expression=right, 3901 ) 3902 3903 return self.expression( 3904 exp.SetItem, 3905 this=this, 3906 kind=kind, 3907 ) 3908 3909 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 3910 self._match_text_seq("TRANSACTION") 3911 characteristics = self._parse_csv( 3912 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 3913 ) 3914 return self.expression( 3915 exp.SetItem, 3916 expressions=characteristics, 3917 kind="TRANSACTION", 3918 **{"global": global_}, # type: ignore 3919 ) 3920 3921 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3922 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3923 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 3924 3925 def _parse_set(self) -> exp.Expression: 3926 index = self._index 3927 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3928 3929 if self._curr: 3930 self._retreat(index) 3931 return self._parse_as_command(self._prev) 3932 3933 return set_ 3934 3935 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 3936 for option in options: 3937 if self._match_text_seq(*option.split(" ")): 3938 return exp.Var(this=option) 3939 return None 3940 3941 def _parse_as_command(self, start: Token) -> exp.Command: 3942 while self._curr: 3943 self._advance() 3944 text = self._find_sql(start, self._prev) 3945 size = len(start.text) 3946 return exp.Command(this=text[:size], expression=text[size:]) 3947 3948 def _find_parser( 3949 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3950 ) -> t.Optional[t.Callable]: 3951 if not self._curr: 3952 return None 3953 3954 index = self._index 3955 this = [] 3956 while True: 3957 # The current token might be multiple words 3958 curr = self._curr.text.upper() 3959 key = curr.split(" ") 3960 this.append(curr) 3961 self._advance() 3962 result, trie = in_trie(trie, key) 3963 if result == 0: 3964 break 3965 if result == 2: 3966 subparser = parsers[" ".join(this)] 3967 return subparser 3968 self._retreat(index) 3969 return None 3970 3971 def _match(self, token_type, advance=True): 3972 if not self._curr: 3973 return None 3974 3975 if self._curr.token_type == token_type: 3976 if advance: 3977 self._advance() 3978 return True 3979 3980 return None 3981 3982 def _match_set(self, types, advance=True): 3983 if not self._curr: 3984 return None 3985 3986 if self._curr.token_type in types: 3987 if advance: 3988 self._advance() 3989 return True 3990 3991 return None 3992 3993 def _match_pair(self, token_type_a, token_type_b, advance=True): 3994 if not self._curr or not self._next: 3995 return None 3996 3997 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3998 if advance: 3999 self._advance(2) 4000 return True 4001 4002 return None 4003 4004 def _match_l_paren(self, expression=None): 4005 if not self._match(TokenType.L_PAREN): 4006 self.raise_error("Expecting (") 4007 if expression and self._prev_comments: 4008 expression.comments = self._prev_comments 4009 4010 def _match_r_paren(self, expression=None): 4011 if not self._match(TokenType.R_PAREN): 4012 self.raise_error("Expecting )") 4013 if expression and self._prev_comments: 4014 expression.comments = self._prev_comments 4015 4016 def _match_texts(self, texts, advance=True): 4017 if self._curr and self._curr.text.upper() in texts: 4018 if advance: 4019 self._advance() 4020 return True 4021 return False 4022 4023 def _match_text_seq(self, *texts, advance=True): 4024 index = self._index 4025 for text in texts: 4026 if self._curr and self._curr.text.upper() == text: 4027 self._advance() 4028 else: 4029 self._retreat(index) 4030 return False 4031 4032 if not advance: 4033 self._retreat(index) 4034 4035 return True 4036 4037 def _replace_columns_with_dots(self, this): 4038 if isinstance(this, exp.Dot): 4039 exp.replace_children(this, self._replace_columns_with_dots) 4040 elif isinstance(this, exp.Column): 4041 exp.replace_children(this, self._replace_columns_with_dots) 4042 table = this.args.get("table") 4043 this = ( 4044 self.expression(exp.Dot, this=table, expression=this.this) 4045 if table 4046 else self.expression(exp.Var, this=this.name) 4047 ) 4048 elif isinstance(this, exp.Identifier): 4049 this = self.expression(exp.Var, this=this.name) 4050 return this 4051 4052 def _replace_lambda(self, node, lambda_variables): 4053 if isinstance(node, exp.Column): 4054 if node.name in lambda_variables: 4055 return node.this 4056 return node
52class Parser(metaclass=_Parser): 53 """ 54 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 55 a parsed syntax tree. 56 57 Args: 58 error_level: the desired error level. 59 Default: ErrorLevel.RAISE 60 error_message_context: determines the amount of context to capture from a 61 query string when displaying the error message (in number of characters). 62 Default: 50. 63 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 64 Default: 0 65 alias_post_tablesample: If the table alias comes after tablesample. 66 Default: False 67 max_errors: Maximum number of error messages to include in a raised ParseError. 68 This is only relevant if error_level is ErrorLevel.RAISE. 69 Default: 3 70 null_ordering: Indicates the default null ordering method to use if not explicitly set. 71 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 72 Default: "nulls_are_small" 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "TIME_TO_TIME_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 86 this=exp.Cast( 87 this=seq_get(args, 0), 88 to=exp.DataType(this=exp.DataType.Type.TEXT), 89 ), 90 start=exp.Literal.number(1), 91 length=exp.Literal.number(10), 92 ), 93 "VAR_MAP": parse_var_map, 94 "IFNULL": exp.Coalesce.from_arg_list, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 } 103 104 NESTED_TYPE_TOKENS = { 105 TokenType.ARRAY, 106 TokenType.MAP, 107 TokenType.STRUCT, 108 TokenType.NULLABLE, 109 } 110 111 TYPE_TOKENS = { 112 TokenType.BIT, 113 TokenType.BOOLEAN, 114 TokenType.TINYINT, 115 TokenType.SMALLINT, 116 TokenType.INT, 117 TokenType.BIGINT, 118 TokenType.FLOAT, 119 TokenType.DOUBLE, 120 TokenType.CHAR, 121 TokenType.NCHAR, 122 TokenType.VARCHAR, 123 TokenType.NVARCHAR, 124 TokenType.TEXT, 125 TokenType.MEDIUMTEXT, 126 TokenType.LONGTEXT, 127 TokenType.MEDIUMBLOB, 128 TokenType.LONGBLOB, 129 TokenType.BINARY, 130 TokenType.VARBINARY, 131 TokenType.JSON, 132 TokenType.JSONB, 133 TokenType.INTERVAL, 134 TokenType.TIME, 135 TokenType.TIMESTAMP, 136 TokenType.TIMESTAMPTZ, 137 TokenType.TIMESTAMPLTZ, 138 TokenType.DATETIME, 139 TokenType.DATE, 140 TokenType.DECIMAL, 141 TokenType.UUID, 142 TokenType.GEOGRAPHY, 143 TokenType.GEOMETRY, 144 TokenType.HLLSKETCH, 145 TokenType.HSTORE, 146 TokenType.PSEUDO_TYPE, 147 TokenType.SUPER, 148 TokenType.SERIAL, 149 TokenType.SMALLSERIAL, 150 TokenType.BIGSERIAL, 151 TokenType.XML, 152 TokenType.UNIQUEIDENTIFIER, 153 TokenType.MONEY, 154 TokenType.SMALLMONEY, 155 TokenType.ROWVERSION, 156 TokenType.IMAGE, 157 TokenType.VARIANT, 158 TokenType.OBJECT, 159 TokenType.INET, 160 *NESTED_TYPE_TOKENS, 161 } 162 163 SUBQUERY_PREDICATES = { 164 TokenType.ANY: exp.Any, 165 TokenType.ALL: exp.All, 166 TokenType.EXISTS: exp.Exists, 167 TokenType.SOME: exp.Any, 168 } 169 170 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 171 172 DB_CREATABLES = { 173 TokenType.DATABASE, 174 TokenType.SCHEMA, 175 TokenType.TABLE, 176 TokenType.VIEW, 177 } 178 179 CREATABLES = { 180 TokenType.COLUMN, 181 TokenType.FUNCTION, 182 TokenType.INDEX, 183 TokenType.PROCEDURE, 184 *DB_CREATABLES, 185 } 186 187 ID_VAR_TOKENS = { 188 TokenType.VAR, 189 TokenType.ANTI, 190 TokenType.APPLY, 191 TokenType.AUTO_INCREMENT, 192 TokenType.BEGIN, 193 TokenType.BOTH, 194 TokenType.BUCKET, 195 TokenType.CACHE, 196 TokenType.CASCADE, 197 TokenType.COLLATE, 198 TokenType.COMMAND, 199 TokenType.COMMENT, 200 TokenType.COMMIT, 201 TokenType.COMPOUND, 202 TokenType.CONSTRAINT, 203 TokenType.DEFAULT, 204 TokenType.DELETE, 205 TokenType.DESCRIBE, 206 TokenType.DIV, 207 TokenType.END, 208 TokenType.EXECUTE, 209 TokenType.ESCAPE, 210 TokenType.FALSE, 211 TokenType.FIRST, 212 TokenType.FILTER, 213 TokenType.FOLLOWING, 214 TokenType.FORMAT, 215 TokenType.IF, 216 TokenType.ISNULL, 217 TokenType.INTERVAL, 218 TokenType.LAZY, 219 TokenType.LEADING, 220 TokenType.LEFT, 221 TokenType.LOCAL, 222 TokenType.MATERIALIZED, 223 TokenType.MERGE, 224 TokenType.NATURAL, 225 TokenType.NEXT, 226 TokenType.OFFSET, 227 TokenType.ONLY, 228 TokenType.OPTIONS, 229 TokenType.ORDINALITY, 230 TokenType.PERCENT, 231 TokenType.PIVOT, 232 TokenType.PRECEDING, 233 TokenType.RANGE, 234 TokenType.REFERENCES, 235 TokenType.RIGHT, 236 TokenType.ROW, 237 TokenType.ROWS, 238 TokenType.SEED, 239 TokenType.SEMI, 240 TokenType.SET, 241 TokenType.SHOW, 242 TokenType.SORTKEY, 243 TokenType.TEMPORARY, 244 TokenType.TOP, 245 TokenType.TRAILING, 246 TokenType.TRUE, 247 TokenType.UNBOUNDED, 248 TokenType.UNIQUE, 249 TokenType.UNLOGGED, 250 TokenType.UNPIVOT, 251 TokenType.VOLATILE, 252 TokenType.WINDOW, 253 *CREATABLES, 254 *SUBQUERY_PREDICATES, 255 *TYPE_TOKENS, 256 *NO_PAREN_FUNCTIONS, 257 } 258 259 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 260 TokenType.APPLY, 261 TokenType.LEFT, 262 TokenType.NATURAL, 263 TokenType.OFFSET, 264 TokenType.RIGHT, 265 TokenType.WINDOW, 266 } 267 268 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 269 270 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 271 272 FUNC_TOKENS = { 273 TokenType.COMMAND, 274 TokenType.CURRENT_DATE, 275 TokenType.CURRENT_DATETIME, 276 TokenType.CURRENT_TIMESTAMP, 277 TokenType.CURRENT_TIME, 278 TokenType.FILTER, 279 TokenType.FIRST, 280 TokenType.FORMAT, 281 TokenType.IDENTIFIER, 282 TokenType.INDEX, 283 TokenType.ISNULL, 284 TokenType.ILIKE, 285 TokenType.LIKE, 286 TokenType.MERGE, 287 TokenType.OFFSET, 288 TokenType.PRIMARY_KEY, 289 TokenType.REPLACE, 290 TokenType.ROW, 291 TokenType.UNNEST, 292 TokenType.VAR, 293 TokenType.LEFT, 294 TokenType.RIGHT, 295 TokenType.DATE, 296 TokenType.DATETIME, 297 TokenType.TABLE, 298 TokenType.TIMESTAMP, 299 TokenType.TIMESTAMPTZ, 300 TokenType.WINDOW, 301 *TYPE_TOKENS, 302 *SUBQUERY_PREDICATES, 303 } 304 305 CONJUNCTION = { 306 TokenType.AND: exp.And, 307 TokenType.OR: exp.Or, 308 } 309 310 EQUALITY = { 311 TokenType.EQ: exp.EQ, 312 TokenType.NEQ: exp.NEQ, 313 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 314 } 315 316 COMPARISON = { 317 TokenType.GT: exp.GT, 318 TokenType.GTE: exp.GTE, 319 TokenType.LT: exp.LT, 320 TokenType.LTE: exp.LTE, 321 } 322 323 BITWISE = { 324 TokenType.AMP: exp.BitwiseAnd, 325 TokenType.CARET: exp.BitwiseXor, 326 TokenType.PIPE: exp.BitwiseOr, 327 TokenType.DPIPE: exp.DPipe, 328 } 329 330 TERM = { 331 TokenType.DASH: exp.Sub, 332 TokenType.PLUS: exp.Add, 333 TokenType.MOD: exp.Mod, 334 TokenType.COLLATE: exp.Collate, 335 } 336 337 FACTOR = { 338 TokenType.DIV: exp.IntDiv, 339 TokenType.LR_ARROW: exp.Distance, 340 TokenType.SLASH: exp.Div, 341 TokenType.STAR: exp.Mul, 342 } 343 344 TIMESTAMPS = { 345 TokenType.TIME, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 } 350 351 SET_OPERATIONS = { 352 TokenType.UNION, 353 TokenType.INTERSECT, 354 TokenType.EXCEPT, 355 } 356 357 JOIN_SIDES = { 358 TokenType.LEFT, 359 TokenType.RIGHT, 360 TokenType.FULL, 361 } 362 363 JOIN_KINDS = { 364 TokenType.INNER, 365 TokenType.OUTER, 366 TokenType.CROSS, 367 TokenType.SEMI, 368 TokenType.ANTI, 369 } 370 371 LAMBDAS = { 372 TokenType.ARROW: lambda self, expressions: self.expression( 373 exp.Lambda, 374 this=self._parse_conjunction().transform( 375 self._replace_lambda, {node.name for node in expressions} 376 ), 377 expressions=expressions, 378 ), 379 TokenType.FARROW: lambda self, expressions: self.expression( 380 exp.Kwarg, 381 this=exp.Var(this=expressions[0].name), 382 expression=self._parse_conjunction(), 383 ), 384 } 385 386 COLUMN_OPERATORS = { 387 TokenType.DOT: None, 388 TokenType.DCOLON: lambda self, this, to: self.expression( 389 exp.Cast, 390 this=this, 391 to=to, 392 ), 393 TokenType.ARROW: lambda self, this, path: self.expression( 394 exp.JSONExtract, 395 this=this, 396 expression=path, 397 ), 398 TokenType.DARROW: lambda self, this, path: self.expression( 399 exp.JSONExtractScalar, 400 this=this, 401 expression=path, 402 ), 403 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 404 exp.JSONBExtract, 405 this=this, 406 expression=path, 407 ), 408 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 409 exp.JSONBExtractScalar, 410 this=this, 411 expression=path, 412 ), 413 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 414 exp.JSONBContains, 415 this=this, 416 expression=key, 417 ), 418 } 419 420 EXPRESSION_PARSERS = { 421 exp.Column: lambda self: self._parse_column(), 422 exp.DataType: lambda self: self._parse_types(), 423 exp.From: lambda self: self._parse_from(), 424 exp.Group: lambda self: self._parse_group(), 425 exp.Identifier: lambda self: self._parse_id_var(), 426 exp.Lateral: lambda self: self._parse_lateral(), 427 exp.Join: lambda self: self._parse_join(), 428 exp.Order: lambda self: self._parse_order(), 429 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 430 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 431 exp.Lambda: lambda self: self._parse_lambda(), 432 exp.Limit: lambda self: self._parse_limit(), 433 exp.Offset: lambda self: self._parse_offset(), 434 exp.TableAlias: lambda self: self._parse_table_alias(), 435 exp.Table: lambda self: self._parse_table(), 436 exp.Condition: lambda self: self._parse_conjunction(), 437 exp.Expression: lambda self: self._parse_statement(), 438 exp.Properties: lambda self: self._parse_properties(), 439 exp.Where: lambda self: self._parse_where(), 440 exp.Ordered: lambda self: self._parse_ordered(), 441 exp.Having: lambda self: self._parse_having(), 442 exp.With: lambda self: self._parse_with(), 443 exp.Window: lambda self: self._parse_named_window(), 444 exp.Qualify: lambda self: self._parse_qualify(), 445 exp.Returning: lambda self: self._parse_returning(), 446 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 447 } 448 449 STATEMENT_PARSERS = { 450 TokenType.ALTER: lambda self: self._parse_alter(), 451 TokenType.BEGIN: lambda self: self._parse_transaction(), 452 TokenType.CACHE: lambda self: self._parse_cache(), 453 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 454 TokenType.COMMENT: lambda self: self._parse_comment(), 455 TokenType.CREATE: lambda self: self._parse_create(), 456 TokenType.DELETE: lambda self: self._parse_delete(), 457 TokenType.DESC: lambda self: self._parse_describe(), 458 TokenType.DESCRIBE: lambda self: self._parse_describe(), 459 TokenType.DROP: lambda self: self._parse_drop(), 460 TokenType.END: lambda self: self._parse_commit_or_rollback(), 461 TokenType.INSERT: lambda self: self._parse_insert(), 462 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 463 TokenType.MERGE: lambda self: self._parse_merge(), 464 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 465 TokenType.SET: lambda self: self._parse_set(), 466 TokenType.UNCACHE: lambda self: self._parse_uncache(), 467 TokenType.UPDATE: lambda self: self._parse_update(), 468 TokenType.USE: lambda self: self.expression( 469 exp.Use, 470 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 471 and exp.Var(this=self._prev.text), 472 this=self._parse_table(schema=False), 473 ), 474 } 475 476 UNARY_PARSERS = { 477 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 478 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 479 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 480 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 481 } 482 483 PRIMARY_PARSERS = { 484 TokenType.STRING: lambda self, token: self.expression( 485 exp.Literal, this=token.text, is_string=True 486 ), 487 TokenType.NUMBER: lambda self, token: self.expression( 488 exp.Literal, this=token.text, is_string=False 489 ), 490 TokenType.STAR: lambda self, _: self.expression( 491 exp.Star, 492 **{"except": self._parse_except(), "replace": self._parse_replace()}, 493 ), 494 TokenType.NULL: lambda self, _: self.expression(exp.Null), 495 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 496 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 497 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 498 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 499 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 500 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 501 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 502 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 503 } 504 505 PLACEHOLDER_PARSERS = { 506 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 507 TokenType.PARAMETER: lambda self: self._parse_parameter(), 508 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 509 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 510 else None, 511 } 512 513 RANGE_PARSERS = { 514 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 515 TokenType.GLOB: binary_range_parser(exp.Glob), 516 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 517 TokenType.IN: lambda self, this: self._parse_in(this), 518 TokenType.IS: lambda self, this: self._parse_is(this), 519 TokenType.LIKE: binary_range_parser(exp.Like), 520 TokenType.ILIKE: binary_range_parser(exp.ILike), 521 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 522 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 523 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 524 } 525 526 PROPERTY_PARSERS = { 527 "AFTER": lambda self: self._parse_afterjournal( 528 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 529 ), 530 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 531 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 532 "BEFORE": lambda self: self._parse_journal( 533 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 534 ), 535 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 536 "CHARACTER SET": lambda self: self._parse_character_set(), 537 "CHECKSUM": lambda self: self._parse_checksum(), 538 "CLUSTER BY": lambda self: self.expression( 539 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 540 ), 541 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 542 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 543 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 544 default=self._prev.text.upper() == "DEFAULT" 545 ), 546 "DEFINER": lambda self: self._parse_definer(), 547 "DETERMINISTIC": lambda self: self.expression( 548 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 549 ), 550 "DISTKEY": lambda self: self._parse_distkey(), 551 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 552 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 553 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 554 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 555 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 556 "FREESPACE": lambda self: self._parse_freespace(), 557 "GLOBAL": lambda self: self._parse_temporary(global_=True), 558 "IMMUTABLE": lambda self: self.expression( 559 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 560 ), 561 "JOURNAL": lambda self: self._parse_journal( 562 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 563 ), 564 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 565 "LIKE": lambda self: self._parse_create_like(), 566 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 567 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 568 "LOCK": lambda self: self._parse_locking(), 569 "LOCKING": lambda self: self._parse_locking(), 570 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 571 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 572 "MAX": lambda self: self._parse_datablocksize(), 573 "MAXIMUM": lambda self: self._parse_datablocksize(), 574 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 575 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 576 ), 577 "MIN": lambda self: self._parse_datablocksize(), 578 "MINIMUM": lambda self: self._parse_datablocksize(), 579 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 580 "NO": lambda self: self._parse_noprimaryindex(), 581 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 582 "ON": lambda self: self._parse_oncommit(), 583 "PARTITION BY": lambda self: self._parse_partitioned_by(), 584 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 585 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 586 "RETURNS": lambda self: self._parse_returns(), 587 "ROW": lambda self: self._parse_row(), 588 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 589 "SORTKEY": lambda self: self._parse_sortkey(), 590 "STABLE": lambda self: self.expression( 591 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 592 ), 593 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 594 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 595 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 596 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 597 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 598 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 599 "VOLATILE": lambda self: self.expression( 600 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 601 ), 602 "WITH": lambda self: self._parse_with_property(), 603 } 604 605 CONSTRAINT_PARSERS = { 606 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 607 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 608 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 609 "CHARACTER SET": lambda self: self.expression( 610 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 611 ), 612 "CHECK": lambda self: self.expression( 613 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 614 ), 615 "COLLATE": lambda self: self.expression( 616 exp.CollateColumnConstraint, this=self._parse_var() 617 ), 618 "COMMENT": lambda self: self.expression( 619 exp.CommentColumnConstraint, this=self._parse_string() 620 ), 621 "COMPRESS": lambda self: self._parse_compress(), 622 "DEFAULT": lambda self: self.expression( 623 exp.DefaultColumnConstraint, this=self._parse_bitwise() 624 ), 625 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 626 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 627 "FORMAT": lambda self: self.expression( 628 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 629 ), 630 "GENERATED": lambda self: self._parse_generated_as_identity(), 631 "IDENTITY": lambda self: self._parse_auto_increment(), 632 "INLINE": lambda self: self._parse_inline(), 633 "LIKE": lambda self: self._parse_create_like(), 634 "NOT": lambda self: self._parse_not_constraint(), 635 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 636 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 637 "PRIMARY KEY": lambda self: self._parse_primary_key(), 638 "TITLE": lambda self: self.expression( 639 exp.TitleColumnConstraint, this=self._parse_var_or_string() 640 ), 641 "UNIQUE": lambda self: self._parse_unique(), 642 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 643 } 644 645 ALTER_PARSERS = { 646 "ADD": lambda self: self._parse_alter_table_add(), 647 "ALTER": lambda self: self._parse_alter_table_alter(), 648 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 649 "DROP": lambda self: self._parse_alter_table_drop(), 650 "RENAME": lambda self: self._parse_alter_table_rename(), 651 } 652 653 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 654 655 NO_PAREN_FUNCTION_PARSERS = { 656 TokenType.CASE: lambda self: self._parse_case(), 657 TokenType.IF: lambda self: self._parse_if(), 658 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 659 } 660 661 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 662 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 663 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 664 "EXTRACT": lambda self: self._parse_extract(), 665 "POSITION": lambda self: self._parse_position(), 666 "STRING_AGG": lambda self: self._parse_string_agg(), 667 "SUBSTRING": lambda self: self._parse_substring(), 668 "TRIM": lambda self: self._parse_trim(), 669 "TRY_CAST": lambda self: self._parse_cast(False), 670 "TRY_CONVERT": lambda self: self._parse_convert(False), 671 } 672 673 QUERY_MODIFIER_PARSERS = { 674 "match": lambda self: self._parse_match_recognize(), 675 "where": lambda self: self._parse_where(), 676 "group": lambda self: self._parse_group(), 677 "having": lambda self: self._parse_having(), 678 "qualify": lambda self: self._parse_qualify(), 679 "windows": lambda self: self._parse_window_clause(), 680 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 681 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 682 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 683 "order": lambda self: self._parse_order(), 684 "limit": lambda self: self._parse_limit(), 685 "offset": lambda self: self._parse_offset(), 686 "lock": lambda self: self._parse_lock(), 687 "sample": lambda self: self._parse_table_sample(as_modifier=True), 688 } 689 690 SET_PARSERS = { 691 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 692 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 693 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 694 "TRANSACTION": lambda self: self._parse_set_transaction(), 695 } 696 697 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 698 699 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 700 701 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 702 703 TRANSACTION_CHARACTERISTICS = { 704 "ISOLATION LEVEL REPEATABLE READ", 705 "ISOLATION LEVEL READ COMMITTED", 706 "ISOLATION LEVEL READ UNCOMMITTED", 707 "ISOLATION LEVEL SERIALIZABLE", 708 "READ WRITE", 709 "READ ONLY", 710 } 711 712 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 713 714 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 715 716 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 717 718 STRICT_CAST = True 719 720 CONVERT_TYPE_FIRST = False 721 722 __slots__ = ( 723 "error_level", 724 "error_message_context", 725 "sql", 726 "errors", 727 "index_offset", 728 "unnest_column_only", 729 "alias_post_tablesample", 730 "max_errors", 731 "null_ordering", 732 "_tokens", 733 "_index", 734 "_curr", 735 "_next", 736 "_prev", 737 "_prev_comments", 738 "_show_trie", 739 "_set_trie", 740 ) 741 742 def __init__( 743 self, 744 error_level: t.Optional[ErrorLevel] = None, 745 error_message_context: int = 100, 746 index_offset: int = 0, 747 unnest_column_only: bool = False, 748 alias_post_tablesample: bool = False, 749 max_errors: int = 3, 750 null_ordering: t.Optional[str] = None, 751 ): 752 self.error_level = error_level or ErrorLevel.IMMEDIATE 753 self.error_message_context = error_message_context 754 self.index_offset = index_offset 755 self.unnest_column_only = unnest_column_only 756 self.alias_post_tablesample = alias_post_tablesample 757 self.max_errors = max_errors 758 self.null_ordering = null_ordering 759 self.reset() 760 761 def reset(self): 762 self.sql = "" 763 self.errors = [] 764 self._tokens = [] 765 self._index = 0 766 self._curr = None 767 self._next = None 768 self._prev = None 769 self._prev_comments = None 770 771 def parse( 772 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 773 ) -> t.List[t.Optional[exp.Expression]]: 774 """ 775 Parses a list of tokens and returns a list of syntax trees, one tree 776 per parsed SQL statement. 777 778 Args: 779 raw_tokens: the list of tokens. 780 sql: the original SQL string, used to produce helpful debug messages. 781 782 Returns: 783 The list of syntax trees. 784 """ 785 return self._parse( 786 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 787 ) 788 789 def parse_into( 790 self, 791 expression_types: exp.IntoType, 792 raw_tokens: t.List[Token], 793 sql: t.Optional[str] = None, 794 ) -> t.List[t.Optional[exp.Expression]]: 795 """ 796 Parses a list of tokens into a given Expression type. If a collection of Expression 797 types is given instead, this method will try to parse the token list into each one 798 of them, stopping at the first for which the parsing succeeds. 799 800 Args: 801 expression_types: the expression type(s) to try and parse the token list into. 802 raw_tokens: the list of tokens. 803 sql: the original SQL string, used to produce helpful debug messages. 804 805 Returns: 806 The target Expression. 807 """ 808 errors = [] 809 for expression_type in ensure_collection(expression_types): 810 parser = self.EXPRESSION_PARSERS.get(expression_type) 811 if not parser: 812 raise TypeError(f"No parser registered for {expression_type}") 813 try: 814 return self._parse(parser, raw_tokens, sql) 815 except ParseError as e: 816 e.errors[0]["into_expression"] = expression_type 817 errors.append(e) 818 raise ParseError( 819 f"Failed to parse into {expression_types}", 820 errors=merge_errors(errors), 821 ) from errors[-1] 822 823 def _parse( 824 self, 825 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 826 raw_tokens: t.List[Token], 827 sql: t.Optional[str] = None, 828 ) -> t.List[t.Optional[exp.Expression]]: 829 self.reset() 830 self.sql = sql or "" 831 total = len(raw_tokens) 832 chunks: t.List[t.List[Token]] = [[]] 833 834 for i, token in enumerate(raw_tokens): 835 if token.token_type == TokenType.SEMICOLON: 836 if i < total - 1: 837 chunks.append([]) 838 else: 839 chunks[-1].append(token) 840 841 expressions = [] 842 843 for tokens in chunks: 844 self._index = -1 845 self._tokens = tokens 846 self._advance() 847 848 expressions.append(parse_method(self)) 849 850 if self._index < len(self._tokens): 851 self.raise_error("Invalid expression / Unexpected token") 852 853 self.check_errors() 854 855 return expressions 856 857 def check_errors(self) -> None: 858 """ 859 Logs or raises any found errors, depending on the chosen error level setting. 860 """ 861 if self.error_level == ErrorLevel.WARN: 862 for error in self.errors: 863 logger.error(str(error)) 864 elif self.error_level == ErrorLevel.RAISE and self.errors: 865 raise ParseError( 866 concat_messages(self.errors, self.max_errors), 867 errors=merge_errors(self.errors), 868 ) 869 870 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 871 """ 872 Appends an error in the list of recorded errors or raises it, depending on the chosen 873 error level setting. 874 """ 875 token = token or self._curr or self._prev or Token.string("") 876 start = self._find_token(token) 877 end = start + len(token.text) 878 start_context = self.sql[max(start - self.error_message_context, 0) : start] 879 highlight = self.sql[start:end] 880 end_context = self.sql[end : end + self.error_message_context] 881 882 error = ParseError.new( 883 f"{message}. Line {token.line}, Col: {token.col}.\n" 884 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 885 description=message, 886 line=token.line, 887 col=token.col, 888 start_context=start_context, 889 highlight=highlight, 890 end_context=end_context, 891 ) 892 893 if self.error_level == ErrorLevel.IMMEDIATE: 894 raise error 895 896 self.errors.append(error) 897 898 def expression( 899 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 900 ) -> exp.Expression: 901 """ 902 Creates a new, validated Expression. 903 904 Args: 905 exp_class: the expression class to instantiate. 906 comments: an optional list of comments to attach to the expression. 907 kwargs: the arguments to set for the expression along with their respective values. 908 909 Returns: 910 The target expression. 911 """ 912 instance = exp_class(**kwargs) 913 if self._prev_comments: 914 instance.comments = self._prev_comments 915 self._prev_comments = None 916 if comments: 917 instance.comments = comments 918 self.validate_expression(instance) 919 return instance 920 921 def validate_expression( 922 self, expression: exp.Expression, args: t.Optional[t.List] = None 923 ) -> None: 924 """ 925 Validates an already instantiated expression, making sure that all its mandatory arguments 926 are set. 927 928 Args: 929 expression: the expression to validate. 930 args: an optional list of items that was used to instantiate the expression, if it's a Func. 931 """ 932 if self.error_level == ErrorLevel.IGNORE: 933 return 934 935 for error_message in expression.error_messages(args): 936 self.raise_error(error_message) 937 938 def _find_sql(self, start: Token, end: Token) -> str: 939 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 940 941 def _find_token(self, token: Token) -> int: 942 line = 1 943 col = 1 944 index = 0 945 946 while line < token.line or col < token.col: 947 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 948 line += 1 949 col = 1 950 else: 951 col += 1 952 index += 1 953 954 return index 955 956 def _advance(self, times: int = 1) -> None: 957 self._index += times 958 self._curr = seq_get(self._tokens, self._index) 959 self._next = seq_get(self._tokens, self._index + 1) 960 if self._index > 0: 961 self._prev = self._tokens[self._index - 1] 962 self._prev_comments = self._prev.comments 963 else: 964 self._prev = None 965 self._prev_comments = None 966 967 def _retreat(self, index: int) -> None: 968 if index != self._index: 969 self._advance(index - self._index) 970 971 def _parse_command(self) -> exp.Expression: 972 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 973 974 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 975 start = self._prev 976 exists = self._parse_exists() if allow_exists else None 977 978 self._match(TokenType.ON) 979 980 kind = self._match_set(self.CREATABLES) and self._prev 981 982 if not kind: 983 return self._parse_as_command(start) 984 985 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 986 this = self._parse_user_defined_function(kind=kind.token_type) 987 elif kind.token_type == TokenType.TABLE: 988 this = self._parse_table() 989 elif kind.token_type == TokenType.COLUMN: 990 this = self._parse_column() 991 else: 992 this = self._parse_id_var() 993 994 self._match(TokenType.IS) 995 996 return self.expression( 997 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 998 ) 999 1000 def _parse_statement(self) -> t.Optional[exp.Expression]: 1001 if self._curr is None: 1002 return None 1003 1004 if self._match_set(self.STATEMENT_PARSERS): 1005 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1006 1007 if self._match_set(Tokenizer.COMMANDS): 1008 return self._parse_command() 1009 1010 expression = self._parse_expression() 1011 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1012 1013 self._parse_query_modifiers(expression) 1014 return expression 1015 1016 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1017 start = self._prev 1018 temporary = self._match(TokenType.TEMPORARY) 1019 materialized = self._match(TokenType.MATERIALIZED) 1020 kind = self._match_set(self.CREATABLES) and self._prev.text 1021 if not kind: 1022 if default_kind: 1023 kind = default_kind 1024 else: 1025 return self._parse_as_command(start) 1026 1027 return self.expression( 1028 exp.Drop, 1029 exists=self._parse_exists(), 1030 this=self._parse_table(schema=True), 1031 kind=kind, 1032 temporary=temporary, 1033 materialized=materialized, 1034 cascade=self._match(TokenType.CASCADE), 1035 ) 1036 1037 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1038 return ( 1039 self._match(TokenType.IF) 1040 and (not not_ or self._match(TokenType.NOT)) 1041 and self._match(TokenType.EXISTS) 1042 ) 1043 1044 def _parse_create(self) -> t.Optional[exp.Expression]: 1045 start = self._prev 1046 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1047 TokenType.OR, TokenType.REPLACE 1048 ) 1049 unique = self._match(TokenType.UNIQUE) 1050 volatile = self._match(TokenType.VOLATILE) 1051 1052 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1053 self._match(TokenType.TABLE) 1054 1055 properties = None 1056 create_token = self._match_set(self.CREATABLES) and self._prev 1057 1058 if not create_token: 1059 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1060 create_token = self._match_set(self.CREATABLES) and self._prev 1061 1062 if not properties or not create_token: 1063 return self._parse_as_command(start) 1064 1065 exists = self._parse_exists(not_=True) 1066 this = None 1067 expression = None 1068 indexes = None 1069 no_schema_binding = None 1070 begin = None 1071 1072 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1073 this = self._parse_user_defined_function(kind=create_token.token_type) 1074 temp_properties = self._parse_properties() 1075 if properties and temp_properties: 1076 properties.expressions.extend(temp_properties.expressions) 1077 elif temp_properties: 1078 properties = temp_properties 1079 1080 self._match(TokenType.ALIAS) 1081 begin = self._match(TokenType.BEGIN) 1082 return_ = self._match_text_seq("RETURN") 1083 expression = self._parse_statement() 1084 1085 if return_: 1086 expression = self.expression(exp.Return, this=expression) 1087 elif create_token.token_type == TokenType.INDEX: 1088 this = self._parse_index() 1089 elif create_token.token_type in self.DB_CREATABLES: 1090 table_parts = self._parse_table_parts(schema=True) 1091 1092 # exp.Properties.Location.POST_NAME 1093 if self._match(TokenType.COMMA): 1094 temp_properties = self._parse_properties(before=True) 1095 if properties and temp_properties: 1096 properties.expressions.extend(temp_properties.expressions) 1097 elif temp_properties: 1098 properties = temp_properties 1099 1100 this = self._parse_schema(this=table_parts) 1101 1102 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1103 temp_properties = self._parse_properties() 1104 if properties and temp_properties: 1105 properties.expressions.extend(temp_properties.expressions) 1106 elif temp_properties: 1107 properties = temp_properties 1108 1109 self._match(TokenType.ALIAS) 1110 1111 # exp.Properties.Location.POST_ALIAS 1112 if not ( 1113 self._match(TokenType.SELECT, advance=False) 1114 or self._match(TokenType.WITH, advance=False) 1115 or self._match(TokenType.L_PAREN, advance=False) 1116 ): 1117 temp_properties = self._parse_properties() 1118 if properties and temp_properties: 1119 properties.expressions.extend(temp_properties.expressions) 1120 elif temp_properties: 1121 properties = temp_properties 1122 1123 expression = self._parse_ddl_select() 1124 1125 if create_token.token_type == TokenType.TABLE: 1126 # exp.Properties.Location.POST_EXPRESSION 1127 temp_properties = self._parse_properties() 1128 if properties and temp_properties: 1129 properties.expressions.extend(temp_properties.expressions) 1130 elif temp_properties: 1131 properties = temp_properties 1132 1133 indexes = [] 1134 while True: 1135 index = self._parse_create_table_index() 1136 1137 # exp.Properties.Location.POST_INDEX 1138 if self._match(TokenType.PARTITION_BY, advance=False): 1139 temp_properties = self._parse_properties() 1140 if properties and temp_properties: 1141 properties.expressions.extend(temp_properties.expressions) 1142 elif temp_properties: 1143 properties = temp_properties 1144 1145 if not index: 1146 break 1147 else: 1148 indexes.append(index) 1149 elif create_token.token_type == TokenType.VIEW: 1150 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1151 no_schema_binding = True 1152 1153 return self.expression( 1154 exp.Create, 1155 this=this, 1156 kind=create_token.text, 1157 replace=replace, 1158 unique=unique, 1159 volatile=volatile, 1160 expression=expression, 1161 exists=exists, 1162 properties=properties, 1163 indexes=indexes, 1164 no_schema_binding=no_schema_binding, 1165 begin=begin, 1166 ) 1167 1168 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1169 self._match(TokenType.COMMA) 1170 1171 # parsers look to _prev for no/dual/default, so need to consume first 1172 self._match_text_seq("NO") 1173 self._match_text_seq("DUAL") 1174 self._match_text_seq("DEFAULT") 1175 1176 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1177 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1178 1179 return None 1180 1181 def _parse_property(self) -> t.Optional[exp.Expression]: 1182 if self._match_texts(self.PROPERTY_PARSERS): 1183 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1184 1185 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1186 return self._parse_character_set(default=True) 1187 1188 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1189 return self._parse_sortkey(compound=True) 1190 1191 if self._match_text_seq("SQL", "SECURITY"): 1192 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1193 1194 assignment = self._match_pair( 1195 TokenType.VAR, TokenType.EQ, advance=False 1196 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1197 1198 if assignment: 1199 key = self._parse_var_or_string() 1200 self._match(TokenType.EQ) 1201 return self.expression(exp.Property, this=key, value=self._parse_column()) 1202 1203 return None 1204 1205 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1206 self._match(TokenType.EQ) 1207 self._match(TokenType.ALIAS) 1208 return self.expression( 1209 exp_class, 1210 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1211 ) 1212 1213 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1214 properties = [] 1215 1216 while True: 1217 if before: 1218 identified_property = self._parse_property_before() 1219 else: 1220 identified_property = self._parse_property() 1221 1222 if not identified_property: 1223 break 1224 for p in ensure_collection(identified_property): 1225 properties.append(p) 1226 1227 if properties: 1228 return self.expression(exp.Properties, expressions=properties) 1229 1230 return None 1231 1232 def _parse_fallback(self, no=False) -> exp.Expression: 1233 self._match_text_seq("FALLBACK") 1234 return self.expression( 1235 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1236 ) 1237 1238 def _parse_with_property( 1239 self, 1240 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1241 self._match(TokenType.WITH) 1242 if self._match(TokenType.L_PAREN, advance=False): 1243 return self._parse_wrapped_csv(self._parse_property) 1244 1245 if self._match_text_seq("JOURNAL"): 1246 return self._parse_withjournaltable() 1247 1248 if self._match_text_seq("DATA"): 1249 return self._parse_withdata(no=False) 1250 elif self._match_text_seq("NO", "DATA"): 1251 return self._parse_withdata(no=True) 1252 1253 if not self._next: 1254 return None 1255 1256 return self._parse_withisolatedloading() 1257 1258 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1259 def _parse_definer(self) -> t.Optional[exp.Expression]: 1260 self._match(TokenType.EQ) 1261 1262 user = self._parse_id_var() 1263 self._match(TokenType.PARAMETER) 1264 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1265 1266 if not user or not host: 1267 return None 1268 1269 return exp.DefinerProperty(this=f"{user}@{host}") 1270 1271 def _parse_withjournaltable(self) -> exp.Expression: 1272 self._match(TokenType.TABLE) 1273 self._match(TokenType.EQ) 1274 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1275 1276 def _parse_log(self, no=False) -> exp.Expression: 1277 self._match_text_seq("LOG") 1278 return self.expression(exp.LogProperty, no=no) 1279 1280 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1281 before = self._match_text_seq("BEFORE") 1282 self._match_text_seq("JOURNAL") 1283 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1284 1285 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1286 self._match_text_seq("NOT") 1287 self._match_text_seq("LOCAL") 1288 self._match_text_seq("AFTER", "JOURNAL") 1289 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1290 1291 def _parse_checksum(self) -> exp.Expression: 1292 self._match_text_seq("CHECKSUM") 1293 self._match(TokenType.EQ) 1294 1295 on = None 1296 if self._match(TokenType.ON): 1297 on = True 1298 elif self._match_text_seq("OFF"): 1299 on = False 1300 default = self._match(TokenType.DEFAULT) 1301 1302 return self.expression( 1303 exp.ChecksumProperty, 1304 on=on, 1305 default=default, 1306 ) 1307 1308 def _parse_freespace(self) -> exp.Expression: 1309 self._match_text_seq("FREESPACE") 1310 self._match(TokenType.EQ) 1311 return self.expression( 1312 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1313 ) 1314 1315 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1316 self._match_text_seq("MERGEBLOCKRATIO") 1317 if self._match(TokenType.EQ): 1318 return self.expression( 1319 exp.MergeBlockRatioProperty, 1320 this=self._parse_number(), 1321 percent=self._match(TokenType.PERCENT), 1322 ) 1323 else: 1324 return self.expression( 1325 exp.MergeBlockRatioProperty, 1326 no=no, 1327 default=default, 1328 ) 1329 1330 def _parse_datablocksize(self, default=None) -> exp.Expression: 1331 if default: 1332 self._match_text_seq("DATABLOCKSIZE") 1333 return self.expression(exp.DataBlocksizeProperty, default=True) 1334 elif self._match_texts(("MIN", "MINIMUM")): 1335 self._match_text_seq("DATABLOCKSIZE") 1336 return self.expression(exp.DataBlocksizeProperty, min=True) 1337 elif self._match_texts(("MAX", "MAXIMUM")): 1338 self._match_text_seq("DATABLOCKSIZE") 1339 return self.expression(exp.DataBlocksizeProperty, min=False) 1340 1341 self._match_text_seq("DATABLOCKSIZE") 1342 self._match(TokenType.EQ) 1343 size = self._parse_number() 1344 units = None 1345 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1346 units = self._prev.text 1347 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1348 1349 def _parse_blockcompression(self) -> exp.Expression: 1350 self._match_text_seq("BLOCKCOMPRESSION") 1351 self._match(TokenType.EQ) 1352 always = self._match_text_seq("ALWAYS") 1353 manual = self._match_text_seq("MANUAL") 1354 never = self._match_text_seq("NEVER") 1355 default = self._match_text_seq("DEFAULT") 1356 autotemp = None 1357 if self._match_text_seq("AUTOTEMP"): 1358 autotemp = self._parse_schema() 1359 1360 return self.expression( 1361 exp.BlockCompressionProperty, 1362 always=always, 1363 manual=manual, 1364 never=never, 1365 default=default, 1366 autotemp=autotemp, 1367 ) 1368 1369 def _parse_withisolatedloading(self) -> exp.Expression: 1370 no = self._match_text_seq("NO") 1371 concurrent = self._match_text_seq("CONCURRENT") 1372 self._match_text_seq("ISOLATED", "LOADING") 1373 for_all = self._match_text_seq("FOR", "ALL") 1374 for_insert = self._match_text_seq("FOR", "INSERT") 1375 for_none = self._match_text_seq("FOR", "NONE") 1376 return self.expression( 1377 exp.IsolatedLoadingProperty, 1378 no=no, 1379 concurrent=concurrent, 1380 for_all=for_all, 1381 for_insert=for_insert, 1382 for_none=for_none, 1383 ) 1384 1385 def _parse_locking(self) -> exp.Expression: 1386 if self._match(TokenType.TABLE): 1387 kind = "TABLE" 1388 elif self._match(TokenType.VIEW): 1389 kind = "VIEW" 1390 elif self._match(TokenType.ROW): 1391 kind = "ROW" 1392 elif self._match_text_seq("DATABASE"): 1393 kind = "DATABASE" 1394 else: 1395 kind = None 1396 1397 if kind in ("DATABASE", "TABLE", "VIEW"): 1398 this = self._parse_table_parts() 1399 else: 1400 this = None 1401 1402 if self._match(TokenType.FOR): 1403 for_or_in = "FOR" 1404 elif self._match(TokenType.IN): 1405 for_or_in = "IN" 1406 else: 1407 for_or_in = None 1408 1409 if self._match_text_seq("ACCESS"): 1410 lock_type = "ACCESS" 1411 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1412 lock_type = "EXCLUSIVE" 1413 elif self._match_text_seq("SHARE"): 1414 lock_type = "SHARE" 1415 elif self._match_text_seq("READ"): 1416 lock_type = "READ" 1417 elif self._match_text_seq("WRITE"): 1418 lock_type = "WRITE" 1419 elif self._match_text_seq("CHECKSUM"): 1420 lock_type = "CHECKSUM" 1421 else: 1422 lock_type = None 1423 1424 override = self._match_text_seq("OVERRIDE") 1425 1426 return self.expression( 1427 exp.LockingProperty, 1428 this=this, 1429 kind=kind, 1430 for_or_in=for_or_in, 1431 lock_type=lock_type, 1432 override=override, 1433 ) 1434 1435 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1436 if self._match(TokenType.PARTITION_BY): 1437 return self._parse_csv(self._parse_conjunction) 1438 return [] 1439 1440 def _parse_partitioned_by(self) -> exp.Expression: 1441 self._match(TokenType.EQ) 1442 return self.expression( 1443 exp.PartitionedByProperty, 1444 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1445 ) 1446 1447 def _parse_withdata(self, no=False) -> exp.Expression: 1448 if self._match_text_seq("AND", "STATISTICS"): 1449 statistics = True 1450 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1451 statistics = False 1452 else: 1453 statistics = None 1454 1455 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1456 1457 def _parse_noprimaryindex(self) -> exp.Expression: 1458 self._match_text_seq("PRIMARY", "INDEX") 1459 return exp.NoPrimaryIndexProperty() 1460 1461 def _parse_oncommit(self) -> exp.Expression: 1462 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1463 return exp.OnCommitProperty() 1464 1465 def _parse_distkey(self) -> exp.Expression: 1466 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1467 1468 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1469 table = self._parse_table(schema=True) 1470 options = [] 1471 while self._match_texts(("INCLUDING", "EXCLUDING")): 1472 this = self._prev.text.upper() 1473 id_var = self._parse_id_var() 1474 1475 if not id_var: 1476 return None 1477 1478 options.append( 1479 self.expression( 1480 exp.Property, 1481 this=this, 1482 value=exp.Var(this=id_var.this.upper()), 1483 ) 1484 ) 1485 return self.expression(exp.LikeProperty, this=table, expressions=options) 1486 1487 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1488 return self.expression( 1489 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1490 ) 1491 1492 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1493 self._match(TokenType.EQ) 1494 return self.expression( 1495 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1496 ) 1497 1498 def _parse_returns(self) -> exp.Expression: 1499 value: t.Optional[exp.Expression] 1500 is_table = self._match(TokenType.TABLE) 1501 1502 if is_table: 1503 if self._match(TokenType.LT): 1504 value = self.expression( 1505 exp.Schema, 1506 this="TABLE", 1507 expressions=self._parse_csv(self._parse_struct_kwargs), 1508 ) 1509 if not self._match(TokenType.GT): 1510 self.raise_error("Expecting >") 1511 else: 1512 value = self._parse_schema(exp.Var(this="TABLE")) 1513 else: 1514 value = self._parse_types() 1515 1516 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1517 1518 def _parse_temporary(self, global_=False) -> exp.Expression: 1519 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1520 return self.expression(exp.TemporaryProperty, global_=global_) 1521 1522 def _parse_describe(self) -> exp.Expression: 1523 kind = self._match_set(self.CREATABLES) and self._prev.text 1524 this = self._parse_table() 1525 1526 return self.expression(exp.Describe, this=this, kind=kind) 1527 1528 def _parse_insert(self) -> exp.Expression: 1529 overwrite = self._match(TokenType.OVERWRITE) 1530 local = self._match(TokenType.LOCAL) 1531 alternative = None 1532 1533 if self._match_text_seq("DIRECTORY"): 1534 this: t.Optional[exp.Expression] = self.expression( 1535 exp.Directory, 1536 this=self._parse_var_or_string(), 1537 local=local, 1538 row_format=self._parse_row_format(match_row=True), 1539 ) 1540 else: 1541 if self._match(TokenType.OR): 1542 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1543 1544 self._match(TokenType.INTO) 1545 self._match(TokenType.TABLE) 1546 this = self._parse_table(schema=True) 1547 1548 return self.expression( 1549 exp.Insert, 1550 this=this, 1551 exists=self._parse_exists(), 1552 partition=self._parse_partition(), 1553 expression=self._parse_ddl_select(), 1554 returning=self._parse_returning(), 1555 overwrite=overwrite, 1556 alternative=alternative, 1557 ) 1558 1559 def _parse_returning(self) -> t.Optional[exp.Expression]: 1560 if not self._match(TokenType.RETURNING): 1561 return None 1562 1563 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1564 1565 def _parse_row(self) -> t.Optional[exp.Expression]: 1566 if not self._match(TokenType.FORMAT): 1567 return None 1568 return self._parse_row_format() 1569 1570 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1571 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1572 return None 1573 1574 if self._match_text_seq("SERDE"): 1575 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1576 1577 self._match_text_seq("DELIMITED") 1578 1579 kwargs = {} 1580 1581 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1582 kwargs["fields"] = self._parse_string() 1583 if self._match_text_seq("ESCAPED", "BY"): 1584 kwargs["escaped"] = self._parse_string() 1585 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1586 kwargs["collection_items"] = self._parse_string() 1587 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1588 kwargs["map_keys"] = self._parse_string() 1589 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1590 kwargs["lines"] = self._parse_string() 1591 if self._match_text_seq("NULL", "DEFINED", "AS"): 1592 kwargs["null"] = self._parse_string() 1593 1594 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1595 1596 def _parse_load_data(self) -> exp.Expression: 1597 local = self._match(TokenType.LOCAL) 1598 self._match_text_seq("INPATH") 1599 inpath = self._parse_string() 1600 overwrite = self._match(TokenType.OVERWRITE) 1601 self._match_pair(TokenType.INTO, TokenType.TABLE) 1602 1603 return self.expression( 1604 exp.LoadData, 1605 this=self._parse_table(schema=True), 1606 local=local, 1607 overwrite=overwrite, 1608 inpath=inpath, 1609 partition=self._parse_partition(), 1610 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1611 serde=self._match_text_seq("SERDE") and self._parse_string(), 1612 ) 1613 1614 def _parse_delete(self) -> exp.Expression: 1615 self._match(TokenType.FROM) 1616 1617 return self.expression( 1618 exp.Delete, 1619 this=self._parse_table(schema=True), 1620 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1621 where=self._parse_where(), 1622 returning=self._parse_returning(), 1623 ) 1624 1625 def _parse_update(self) -> exp.Expression: 1626 return self.expression( 1627 exp.Update, 1628 **{ # type: ignore 1629 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1630 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1631 "from": self._parse_from(), 1632 "where": self._parse_where(), 1633 "returning": self._parse_returning(), 1634 }, 1635 ) 1636 1637 def _parse_uncache(self) -> exp.Expression: 1638 if not self._match(TokenType.TABLE): 1639 self.raise_error("Expecting TABLE after UNCACHE") 1640 1641 return self.expression( 1642 exp.Uncache, 1643 exists=self._parse_exists(), 1644 this=self._parse_table(schema=True), 1645 ) 1646 1647 def _parse_cache(self) -> exp.Expression: 1648 lazy = self._match(TokenType.LAZY) 1649 self._match(TokenType.TABLE) 1650 table = self._parse_table(schema=True) 1651 options = [] 1652 1653 if self._match(TokenType.OPTIONS): 1654 self._match_l_paren() 1655 k = self._parse_string() 1656 self._match(TokenType.EQ) 1657 v = self._parse_string() 1658 options = [k, v] 1659 self._match_r_paren() 1660 1661 self._match(TokenType.ALIAS) 1662 return self.expression( 1663 exp.Cache, 1664 this=table, 1665 lazy=lazy, 1666 options=options, 1667 expression=self._parse_select(nested=True), 1668 ) 1669 1670 def _parse_partition(self) -> t.Optional[exp.Expression]: 1671 if not self._match(TokenType.PARTITION): 1672 return None 1673 1674 return self.expression( 1675 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1676 ) 1677 1678 def _parse_value(self) -> exp.Expression: 1679 if self._match(TokenType.L_PAREN): 1680 expressions = self._parse_csv(self._parse_conjunction) 1681 self._match_r_paren() 1682 return self.expression(exp.Tuple, expressions=expressions) 1683 1684 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1685 # Source: https://prestodb.io/docs/current/sql/values.html 1686 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1687 1688 def _parse_select( 1689 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1690 ) -> t.Optional[exp.Expression]: 1691 cte = self._parse_with() 1692 if cte: 1693 this = self._parse_statement() 1694 1695 if not this: 1696 self.raise_error("Failed to parse any statement following CTE") 1697 return cte 1698 1699 if "with" in this.arg_types: 1700 this.set("with", cte) 1701 else: 1702 self.raise_error(f"{this.key} does not support CTE") 1703 this = cte 1704 elif self._match(TokenType.SELECT): 1705 comments = self._prev_comments 1706 1707 hint = self._parse_hint() 1708 all_ = self._match(TokenType.ALL) 1709 distinct = self._match(TokenType.DISTINCT) 1710 1711 if distinct: 1712 distinct = self.expression( 1713 exp.Distinct, 1714 on=self._parse_value() if self._match(TokenType.ON) else None, 1715 ) 1716 1717 if all_ and distinct: 1718 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1719 1720 limit = self._parse_limit(top=True) 1721 expressions = self._parse_csv(self._parse_expression) 1722 1723 this = self.expression( 1724 exp.Select, 1725 hint=hint, 1726 distinct=distinct, 1727 expressions=expressions, 1728 limit=limit, 1729 ) 1730 this.comments = comments 1731 1732 into = self._parse_into() 1733 if into: 1734 this.set("into", into) 1735 1736 from_ = self._parse_from() 1737 if from_: 1738 this.set("from", from_) 1739 1740 self._parse_query_modifiers(this) 1741 elif (table or nested) and self._match(TokenType.L_PAREN): 1742 this = self._parse_table() if table else self._parse_select(nested=True) 1743 self._parse_query_modifiers(this) 1744 this = self._parse_set_operations(this) 1745 self._match_r_paren() 1746 1747 # early return so that subquery unions aren't parsed again 1748 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1749 # Union ALL should be a property of the top select node, not the subquery 1750 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1751 elif self._match(TokenType.VALUES): 1752 this = self.expression( 1753 exp.Values, 1754 expressions=self._parse_csv(self._parse_value), 1755 alias=self._parse_table_alias(), 1756 ) 1757 else: 1758 this = None 1759 1760 return self._parse_set_operations(this) 1761 1762 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1763 if not skip_with_token and not self._match(TokenType.WITH): 1764 return None 1765 1766 recursive = self._match(TokenType.RECURSIVE) 1767 1768 expressions = [] 1769 while True: 1770 expressions.append(self._parse_cte()) 1771 1772 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1773 break 1774 else: 1775 self._match(TokenType.WITH) 1776 1777 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1778 1779 def _parse_cte(self) -> exp.Expression: 1780 alias = self._parse_table_alias() 1781 if not alias or not alias.this: 1782 self.raise_error("Expected CTE to have alias") 1783 1784 self._match(TokenType.ALIAS) 1785 1786 return self.expression( 1787 exp.CTE, 1788 this=self._parse_wrapped(self._parse_statement), 1789 alias=alias, 1790 ) 1791 1792 def _parse_table_alias( 1793 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1794 ) -> t.Optional[exp.Expression]: 1795 any_token = self._match(TokenType.ALIAS) 1796 alias = ( 1797 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1798 or self._parse_string_as_identifier() 1799 ) 1800 1801 index = self._index 1802 if self._match(TokenType.L_PAREN): 1803 columns = self._parse_csv(self._parse_function_parameter) 1804 self._match_r_paren() if columns else self._retreat(index) 1805 else: 1806 columns = None 1807 1808 if not alias and not columns: 1809 return None 1810 1811 return self.expression(exp.TableAlias, this=alias, columns=columns) 1812 1813 def _parse_subquery( 1814 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1815 ) -> exp.Expression: 1816 return self.expression( 1817 exp.Subquery, 1818 this=this, 1819 pivots=self._parse_pivots(), 1820 alias=self._parse_table_alias() if parse_alias else None, 1821 ) 1822 1823 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1824 if not isinstance(this, self.MODIFIABLES): 1825 return 1826 1827 table = isinstance(this, exp.Table) 1828 1829 while True: 1830 lateral = self._parse_lateral() 1831 join = self._parse_join() 1832 comma = None if table else self._match(TokenType.COMMA) 1833 if lateral: 1834 this.append("laterals", lateral) 1835 if join: 1836 this.append("joins", join) 1837 if comma: 1838 this.args["from"].append("expressions", self._parse_table()) 1839 if not (lateral or join or comma): 1840 break 1841 1842 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1843 expression = parser(self) 1844 1845 if expression: 1846 this.set(key, expression) 1847 1848 def _parse_hint(self) -> t.Optional[exp.Expression]: 1849 if self._match(TokenType.HINT): 1850 hints = self._parse_csv(self._parse_function) 1851 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1852 self.raise_error("Expected */ after HINT") 1853 return self.expression(exp.Hint, expressions=hints) 1854 1855 return None 1856 1857 def _parse_into(self) -> t.Optional[exp.Expression]: 1858 if not self._match(TokenType.INTO): 1859 return None 1860 1861 temp = self._match(TokenType.TEMPORARY) 1862 unlogged = self._match(TokenType.UNLOGGED) 1863 self._match(TokenType.TABLE) 1864 1865 return self.expression( 1866 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1867 ) 1868 1869 def _parse_from(self) -> t.Optional[exp.Expression]: 1870 if not self._match(TokenType.FROM): 1871 return None 1872 1873 return self.expression( 1874 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1875 ) 1876 1877 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1878 if not self._match(TokenType.MATCH_RECOGNIZE): 1879 return None 1880 self._match_l_paren() 1881 1882 partition = self._parse_partition_by() 1883 order = self._parse_order() 1884 measures = ( 1885 self._parse_alias(self._parse_conjunction()) 1886 if self._match_text_seq("MEASURES") 1887 else None 1888 ) 1889 1890 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1891 rows = exp.Var(this="ONE ROW PER MATCH") 1892 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1893 text = "ALL ROWS PER MATCH" 1894 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1895 text += f" SHOW EMPTY MATCHES" 1896 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1897 text += f" OMIT EMPTY MATCHES" 1898 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1899 text += f" WITH UNMATCHED ROWS" 1900 rows = exp.Var(this=text) 1901 else: 1902 rows = None 1903 1904 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1905 text = "AFTER MATCH SKIP" 1906 if self._match_text_seq("PAST", "LAST", "ROW"): 1907 text += f" PAST LAST ROW" 1908 elif self._match_text_seq("TO", "NEXT", "ROW"): 1909 text += f" TO NEXT ROW" 1910 elif self._match_text_seq("TO", "FIRST"): 1911 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1912 elif self._match_text_seq("TO", "LAST"): 1913 text += f" TO LAST {self._advance_any().text}" # type: ignore 1914 after = exp.Var(this=text) 1915 else: 1916 after = None 1917 1918 if self._match_text_seq("PATTERN"): 1919 self._match_l_paren() 1920 1921 if not self._curr: 1922 self.raise_error("Expecting )", self._curr) 1923 1924 paren = 1 1925 start = self._curr 1926 1927 while self._curr and paren > 0: 1928 if self._curr.token_type == TokenType.L_PAREN: 1929 paren += 1 1930 if self._curr.token_type == TokenType.R_PAREN: 1931 paren -= 1 1932 end = self._prev 1933 self._advance() 1934 if paren > 0: 1935 self.raise_error("Expecting )", self._curr) 1936 pattern = exp.Var(this=self._find_sql(start, end)) 1937 else: 1938 pattern = None 1939 1940 define = ( 1941 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1942 ) 1943 self._match_r_paren() 1944 1945 return self.expression( 1946 exp.MatchRecognize, 1947 partition_by=partition, 1948 order=order, 1949 measures=measures, 1950 rows=rows, 1951 after=after, 1952 pattern=pattern, 1953 define=define, 1954 ) 1955 1956 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1957 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1958 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1959 1960 if outer_apply or cross_apply: 1961 this = self._parse_select(table=True) 1962 view = None 1963 outer = not cross_apply 1964 elif self._match(TokenType.LATERAL): 1965 this = self._parse_select(table=True) 1966 view = self._match(TokenType.VIEW) 1967 outer = self._match(TokenType.OUTER) 1968 else: 1969 return None 1970 1971 if not this: 1972 this = self._parse_function() or self._parse_id_var(any_token=False) 1973 while self._match(TokenType.DOT): 1974 this = exp.Dot( 1975 this=this, 1976 expression=self._parse_function() or self._parse_id_var(any_token=False), 1977 ) 1978 1979 table_alias: t.Optional[exp.Expression] 1980 1981 if view: 1982 table = self._parse_id_var(any_token=False) 1983 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1984 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1985 else: 1986 table_alias = self._parse_table_alias() 1987 1988 expression = self.expression( 1989 exp.Lateral, 1990 this=this, 1991 view=view, 1992 outer=outer, 1993 alias=table_alias, 1994 ) 1995 1996 if outer_apply or cross_apply: 1997 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1998 1999 return expression 2000 2001 def _parse_join_side_and_kind( 2002 self, 2003 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2004 return ( 2005 self._match(TokenType.NATURAL) and self._prev, 2006 self._match_set(self.JOIN_SIDES) and self._prev, 2007 self._match_set(self.JOIN_KINDS) and self._prev, 2008 ) 2009 2010 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2011 natural, side, kind = self._parse_join_side_and_kind() 2012 2013 if not skip_join_token and not self._match(TokenType.JOIN): 2014 return None 2015 2016 kwargs: t.Dict[ 2017 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2018 ] = {"this": self._parse_table()} 2019 2020 if natural: 2021 kwargs["natural"] = True 2022 if side: 2023 kwargs["side"] = side.text 2024 if kind: 2025 kwargs["kind"] = kind.text 2026 2027 if self._match(TokenType.ON): 2028 kwargs["on"] = self._parse_conjunction() 2029 elif self._match(TokenType.USING): 2030 kwargs["using"] = self._parse_wrapped_id_vars() 2031 2032 return self.expression(exp.Join, **kwargs) # type: ignore 2033 2034 def _parse_index(self) -> exp.Expression: 2035 index = self._parse_id_var() 2036 self._match(TokenType.ON) 2037 self._match(TokenType.TABLE) # hive 2038 2039 return self.expression( 2040 exp.Index, 2041 this=index, 2042 table=self.expression(exp.Table, this=self._parse_id_var()), 2043 columns=self._parse_expression(), 2044 ) 2045 2046 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2047 unique = self._match(TokenType.UNIQUE) 2048 primary = self._match_text_seq("PRIMARY") 2049 amp = self._match_text_seq("AMP") 2050 if not self._match(TokenType.INDEX): 2051 return None 2052 index = self._parse_id_var() 2053 columns = None 2054 if self._match(TokenType.L_PAREN, advance=False): 2055 columns = self._parse_wrapped_csv(self._parse_column) 2056 return self.expression( 2057 exp.Index, 2058 this=index, 2059 columns=columns, 2060 unique=unique, 2061 primary=primary, 2062 amp=amp, 2063 ) 2064 2065 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2066 catalog = None 2067 db = None 2068 2069 table = ( 2070 (not schema and self._parse_function()) 2071 or self._parse_id_var(any_token=False) 2072 or self._parse_string_as_identifier() 2073 ) 2074 2075 while self._match(TokenType.DOT): 2076 if catalog: 2077 # This allows nesting the table in arbitrarily many dot expressions if needed 2078 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2079 else: 2080 catalog = db 2081 db = table 2082 table = self._parse_id_var() 2083 2084 if not table: 2085 self.raise_error(f"Expected table name but got {self._curr}") 2086 2087 return self.expression( 2088 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2089 ) 2090 2091 def _parse_table( 2092 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2093 ) -> t.Optional[exp.Expression]: 2094 lateral = self._parse_lateral() 2095 2096 if lateral: 2097 return lateral 2098 2099 unnest = self._parse_unnest() 2100 2101 if unnest: 2102 return unnest 2103 2104 values = self._parse_derived_table_values() 2105 2106 if values: 2107 return values 2108 2109 subquery = self._parse_select(table=True) 2110 2111 if subquery: 2112 if not subquery.args.get("pivots"): 2113 subquery.set("pivots", self._parse_pivots()) 2114 return subquery 2115 2116 this = self._parse_table_parts(schema=schema) 2117 2118 if schema: 2119 return self._parse_schema(this=this) 2120 2121 if self.alias_post_tablesample: 2122 table_sample = self._parse_table_sample() 2123 2124 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2125 2126 if alias: 2127 this.set("alias", alias) 2128 2129 if not this.args.get("pivots"): 2130 this.set("pivots", self._parse_pivots()) 2131 2132 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2133 this.set( 2134 "hints", 2135 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2136 ) 2137 self._match_r_paren() 2138 2139 if not self.alias_post_tablesample: 2140 table_sample = self._parse_table_sample() 2141 2142 if table_sample: 2143 table_sample.set("this", this) 2144 this = table_sample 2145 2146 return this 2147 2148 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2149 if not self._match(TokenType.UNNEST): 2150 return None 2151 2152 expressions = self._parse_wrapped_csv(self._parse_column) 2153 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2154 alias = self._parse_table_alias() 2155 2156 if alias and self.unnest_column_only: 2157 if alias.args.get("columns"): 2158 self.raise_error("Unexpected extra column alias in unnest.") 2159 alias.set("columns", [alias.this]) 2160 alias.set("this", None) 2161 2162 offset = None 2163 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2164 self._match(TokenType.ALIAS) 2165 offset = self._parse_conjunction() 2166 2167 return self.expression( 2168 exp.Unnest, 2169 expressions=expressions, 2170 ordinality=ordinality, 2171 alias=alias, 2172 offset=offset, 2173 ) 2174 2175 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2176 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2177 if not is_derived and not self._match(TokenType.VALUES): 2178 return None 2179 2180 expressions = self._parse_csv(self._parse_value) 2181 2182 if is_derived: 2183 self._match_r_paren() 2184 2185 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2186 2187 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2188 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2189 as_modifier and self._match_text_seq("USING", "SAMPLE") 2190 ): 2191 return None 2192 2193 bucket_numerator = None 2194 bucket_denominator = None 2195 bucket_field = None 2196 percent = None 2197 rows = None 2198 size = None 2199 seed = None 2200 2201 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2202 method = self._parse_var(tokens=(TokenType.ROW,)) 2203 2204 self._match(TokenType.L_PAREN) 2205 2206 num = self._parse_number() 2207 2208 if self._match(TokenType.BUCKET): 2209 bucket_numerator = self._parse_number() 2210 self._match(TokenType.OUT_OF) 2211 bucket_denominator = bucket_denominator = self._parse_number() 2212 self._match(TokenType.ON) 2213 bucket_field = self._parse_field() 2214 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2215 percent = num 2216 elif self._match(TokenType.ROWS): 2217 rows = num 2218 else: 2219 size = num 2220 2221 self._match(TokenType.R_PAREN) 2222 2223 if self._match(TokenType.L_PAREN): 2224 method = self._parse_var() 2225 seed = self._match(TokenType.COMMA) and self._parse_number() 2226 self._match_r_paren() 2227 elif self._match_texts(("SEED", "REPEATABLE")): 2228 seed = self._parse_wrapped(self._parse_number) 2229 2230 return self.expression( 2231 exp.TableSample, 2232 method=method, 2233 bucket_numerator=bucket_numerator, 2234 bucket_denominator=bucket_denominator, 2235 bucket_field=bucket_field, 2236 percent=percent, 2237 rows=rows, 2238 size=size, 2239 seed=seed, 2240 kind=kind, 2241 ) 2242 2243 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2244 return list(iter(self._parse_pivot, None)) 2245 2246 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2247 index = self._index 2248 2249 if self._match(TokenType.PIVOT): 2250 unpivot = False 2251 elif self._match(TokenType.UNPIVOT): 2252 unpivot = True 2253 else: 2254 return None 2255 2256 expressions = [] 2257 field = None 2258 2259 if not self._match(TokenType.L_PAREN): 2260 self._retreat(index) 2261 return None 2262 2263 if unpivot: 2264 expressions = self._parse_csv(self._parse_column) 2265 else: 2266 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2267 2268 if not self._match(TokenType.FOR): 2269 self.raise_error("Expecting FOR") 2270 2271 value = self._parse_column() 2272 2273 if not self._match(TokenType.IN): 2274 self.raise_error("Expecting IN") 2275 2276 field = self._parse_in(value) 2277 2278 self._match_r_paren() 2279 2280 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2281 2282 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2283 pivot.set("alias", self._parse_table_alias()) 2284 2285 return pivot 2286 2287 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2288 if not skip_where_token and not self._match(TokenType.WHERE): 2289 return None 2290 2291 return self.expression( 2292 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2293 ) 2294 2295 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2296 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2297 return None 2298 2299 elements = defaultdict(list) 2300 2301 while True: 2302 expressions = self._parse_csv(self._parse_conjunction) 2303 if expressions: 2304 elements["expressions"].extend(expressions) 2305 2306 grouping_sets = self._parse_grouping_sets() 2307 if grouping_sets: 2308 elements["grouping_sets"].extend(grouping_sets) 2309 2310 rollup = None 2311 cube = None 2312 2313 with_ = self._match(TokenType.WITH) 2314 if self._match(TokenType.ROLLUP): 2315 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2316 elements["rollup"].extend(ensure_list(rollup)) 2317 2318 if self._match(TokenType.CUBE): 2319 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2320 elements["cube"].extend(ensure_list(cube)) 2321 2322 if not (expressions or grouping_sets or rollup or cube): 2323 break 2324 2325 return self.expression(exp.Group, **elements) # type: ignore 2326 2327 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2328 if not self._match(TokenType.GROUPING_SETS): 2329 return None 2330 2331 return self._parse_wrapped_csv(self._parse_grouping_set) 2332 2333 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2334 if self._match(TokenType.L_PAREN): 2335 grouping_set = self._parse_csv(self._parse_column) 2336 self._match_r_paren() 2337 return self.expression(exp.Tuple, expressions=grouping_set) 2338 2339 return self._parse_column() 2340 2341 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2342 if not skip_having_token and not self._match(TokenType.HAVING): 2343 return None 2344 return self.expression(exp.Having, this=self._parse_conjunction()) 2345 2346 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2347 if not self._match(TokenType.QUALIFY): 2348 return None 2349 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2350 2351 def _parse_order( 2352 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2353 ) -> t.Optional[exp.Expression]: 2354 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2355 return this 2356 2357 return self.expression( 2358 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2359 ) 2360 2361 def _parse_sort( 2362 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2363 ) -> t.Optional[exp.Expression]: 2364 if not self._match(token_type): 2365 return None 2366 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2367 2368 def _parse_ordered(self) -> exp.Expression: 2369 this = self._parse_conjunction() 2370 self._match(TokenType.ASC) 2371 is_desc = self._match(TokenType.DESC) 2372 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2373 is_nulls_last = self._match(TokenType.NULLS_LAST) 2374 desc = is_desc or False 2375 asc = not desc 2376 nulls_first = is_nulls_first or False 2377 explicitly_null_ordered = is_nulls_first or is_nulls_last 2378 if ( 2379 not explicitly_null_ordered 2380 and ( 2381 (asc and self.null_ordering == "nulls_are_small") 2382 or (desc and self.null_ordering != "nulls_are_small") 2383 ) 2384 and self.null_ordering != "nulls_are_last" 2385 ): 2386 nulls_first = True 2387 2388 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2389 2390 def _parse_limit( 2391 self, this: t.Optional[exp.Expression] = None, top: bool = False 2392 ) -> t.Optional[exp.Expression]: 2393 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2394 limit_paren = self._match(TokenType.L_PAREN) 2395 limit_exp = self.expression( 2396 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2397 ) 2398 2399 if limit_paren: 2400 self._match_r_paren() 2401 2402 return limit_exp 2403 2404 if self._match(TokenType.FETCH): 2405 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2406 direction = self._prev.text if direction else "FIRST" 2407 count = self._parse_number() 2408 self._match_set((TokenType.ROW, TokenType.ROWS)) 2409 self._match(TokenType.ONLY) 2410 return self.expression(exp.Fetch, direction=direction, count=count) 2411 2412 return this 2413 2414 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2415 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2416 return this 2417 2418 count = self._parse_number() 2419 self._match_set((TokenType.ROW, TokenType.ROWS)) 2420 return self.expression(exp.Offset, this=this, expression=count) 2421 2422 def _parse_lock(self) -> t.Optional[exp.Expression]: 2423 if self._match_text_seq("FOR", "UPDATE"): 2424 return self.expression(exp.Lock, update=True) 2425 if self._match_text_seq("FOR", "SHARE"): 2426 return self.expression(exp.Lock, update=False) 2427 2428 return None 2429 2430 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2431 if not self._match_set(self.SET_OPERATIONS): 2432 return this 2433 2434 token_type = self._prev.token_type 2435 2436 if token_type == TokenType.UNION: 2437 expression = exp.Union 2438 elif token_type == TokenType.EXCEPT: 2439 expression = exp.Except 2440 else: 2441 expression = exp.Intersect 2442 2443 return self.expression( 2444 expression, 2445 this=this, 2446 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2447 expression=self._parse_set_operations(self._parse_select(nested=True)), 2448 ) 2449 2450 def _parse_expression(self) -> t.Optional[exp.Expression]: 2451 return self._parse_alias(self._parse_conjunction()) 2452 2453 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2454 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2455 2456 def _parse_equality(self) -> t.Optional[exp.Expression]: 2457 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2458 2459 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2460 return self._parse_tokens(self._parse_range, self.COMPARISON) 2461 2462 def _parse_range(self) -> t.Optional[exp.Expression]: 2463 this = self._parse_bitwise() 2464 negate = self._match(TokenType.NOT) 2465 2466 if self._match_set(self.RANGE_PARSERS): 2467 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2468 elif self._match(TokenType.ISNULL): 2469 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2470 2471 # Postgres supports ISNULL and NOTNULL for conditions. 2472 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2473 if self._match(TokenType.NOTNULL): 2474 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2475 this = self.expression(exp.Not, this=this) 2476 2477 if negate: 2478 this = self.expression(exp.Not, this=this) 2479 2480 if self._match(TokenType.IS): 2481 this = self._parse_is(this) 2482 2483 return this 2484 2485 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2486 negate = self._match(TokenType.NOT) 2487 if self._match(TokenType.DISTINCT_FROM): 2488 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2489 return self.expression(klass, this=this, expression=self._parse_expression()) 2490 2491 this = self.expression( 2492 exp.Is, 2493 this=this, 2494 expression=self._parse_null() or self._parse_boolean(), 2495 ) 2496 return self.expression(exp.Not, this=this) if negate else this 2497 2498 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2499 unnest = self._parse_unnest() 2500 if unnest: 2501 this = self.expression(exp.In, this=this, unnest=unnest) 2502 elif self._match(TokenType.L_PAREN): 2503 expressions = self._parse_csv(self._parse_select_or_expression) 2504 2505 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2506 this = self.expression(exp.In, this=this, query=expressions[0]) 2507 else: 2508 this = self.expression(exp.In, this=this, expressions=expressions) 2509 2510 self._match_r_paren() 2511 else: 2512 this = self.expression(exp.In, this=this, field=self._parse_field()) 2513 2514 return this 2515 2516 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2517 low = self._parse_bitwise() 2518 self._match(TokenType.AND) 2519 high = self._parse_bitwise() 2520 return self.expression(exp.Between, this=this, low=low, high=high) 2521 2522 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2523 if not self._match(TokenType.ESCAPE): 2524 return this 2525 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2526 2527 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2528 this = self._parse_term() 2529 2530 while True: 2531 if self._match_set(self.BITWISE): 2532 this = self.expression( 2533 self.BITWISE[self._prev.token_type], 2534 this=this, 2535 expression=self._parse_term(), 2536 ) 2537 elif self._match_pair(TokenType.LT, TokenType.LT): 2538 this = self.expression( 2539 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2540 ) 2541 elif self._match_pair(TokenType.GT, TokenType.GT): 2542 this = self.expression( 2543 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2544 ) 2545 else: 2546 break 2547 2548 return this 2549 2550 def _parse_term(self) -> t.Optional[exp.Expression]: 2551 return self._parse_tokens(self._parse_factor, self.TERM) 2552 2553 def _parse_factor(self) -> t.Optional[exp.Expression]: 2554 return self._parse_tokens(self._parse_unary, self.FACTOR) 2555 2556 def _parse_unary(self) -> t.Optional[exp.Expression]: 2557 if self._match_set(self.UNARY_PARSERS): 2558 return self.UNARY_PARSERS[self._prev.token_type](self) 2559 return self._parse_at_time_zone(self._parse_type()) 2560 2561 def _parse_type(self) -> t.Optional[exp.Expression]: 2562 if self._match(TokenType.INTERVAL): 2563 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2564 2565 index = self._index 2566 type_token = self._parse_types(check_func=True) 2567 this = self._parse_column() 2568 2569 if type_token: 2570 if isinstance(this, exp.Literal): 2571 return self.expression(exp.Cast, this=this, to=type_token) 2572 if not type_token.args.get("expressions"): 2573 self._retreat(index) 2574 return self._parse_column() 2575 return type_token 2576 2577 return this 2578 2579 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2580 index = self._index 2581 2582 prefix = self._match_text_seq("SYSUDTLIB", ".") 2583 2584 if not self._match_set(self.TYPE_TOKENS): 2585 return None 2586 2587 type_token = self._prev.token_type 2588 2589 if type_token == TokenType.PSEUDO_TYPE: 2590 return self.expression(exp.PseudoType, this=self._prev.text) 2591 2592 nested = type_token in self.NESTED_TYPE_TOKENS 2593 is_struct = type_token == TokenType.STRUCT 2594 expressions = None 2595 maybe_func = False 2596 2597 if self._match(TokenType.L_PAREN): 2598 if is_struct: 2599 expressions = self._parse_csv(self._parse_struct_kwargs) 2600 elif nested: 2601 expressions = self._parse_csv(self._parse_types) 2602 else: 2603 expressions = self._parse_csv(self._parse_conjunction) 2604 2605 if not expressions: 2606 self._retreat(index) 2607 return None 2608 2609 self._match_r_paren() 2610 maybe_func = True 2611 2612 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2613 this = exp.DataType( 2614 this=exp.DataType.Type.ARRAY, 2615 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2616 nested=True, 2617 ) 2618 2619 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2620 this = exp.DataType( 2621 this=exp.DataType.Type.ARRAY, 2622 expressions=[this], 2623 nested=True, 2624 ) 2625 2626 return this 2627 2628 if self._match(TokenType.L_BRACKET): 2629 self._retreat(index) 2630 return None 2631 2632 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2633 if nested and self._match(TokenType.LT): 2634 if is_struct: 2635 expressions = self._parse_csv(self._parse_struct_kwargs) 2636 else: 2637 expressions = self._parse_csv(self._parse_types) 2638 2639 if not self._match(TokenType.GT): 2640 self.raise_error("Expecting >") 2641 2642 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2643 values = self._parse_csv(self._parse_conjunction) 2644 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2645 2646 value: t.Optional[exp.Expression] = None 2647 if type_token in self.TIMESTAMPS: 2648 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2649 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2650 elif ( 2651 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2652 ): 2653 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2654 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2655 if type_token == TokenType.TIME: 2656 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2657 else: 2658 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2659 2660 maybe_func = maybe_func and value is None 2661 2662 if value is None: 2663 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2664 elif type_token == TokenType.INTERVAL: 2665 unit = self._parse_var() 2666 2667 if not unit: 2668 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2669 else: 2670 value = self.expression(exp.Interval, unit=unit) 2671 2672 if maybe_func and check_func: 2673 index2 = self._index 2674 peek = self._parse_string() 2675 2676 if not peek: 2677 self._retreat(index) 2678 return None 2679 2680 self._retreat(index2) 2681 2682 if value: 2683 return value 2684 2685 return exp.DataType( 2686 this=exp.DataType.Type[type_token.value.upper()], 2687 expressions=expressions, 2688 nested=nested, 2689 values=values, 2690 prefix=prefix, 2691 ) 2692 2693 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2694 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2695 return self._parse_types() 2696 2697 this = self._parse_id_var() 2698 self._match(TokenType.COLON) 2699 data_type = self._parse_types() 2700 2701 if not data_type: 2702 return None 2703 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2704 2705 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2706 if not self._match(TokenType.AT_TIME_ZONE): 2707 return this 2708 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2709 2710 def _parse_column(self) -> t.Optional[exp.Expression]: 2711 this = self._parse_field() 2712 if isinstance(this, exp.Identifier): 2713 this = self.expression(exp.Column, this=this) 2714 elif not this: 2715 return self._parse_bracket(this) 2716 this = self._parse_bracket(this) 2717 2718 while self._match_set(self.COLUMN_OPERATORS): 2719 op_token = self._prev.token_type 2720 op = self.COLUMN_OPERATORS.get(op_token) 2721 2722 if op_token == TokenType.DCOLON: 2723 field = self._parse_types() 2724 if not field: 2725 self.raise_error("Expected type") 2726 elif op: 2727 self._advance() 2728 value = self._prev.text 2729 field = ( 2730 exp.Literal.number(value) 2731 if self._prev.token_type == TokenType.NUMBER 2732 else exp.Literal.string(value) 2733 ) 2734 else: 2735 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2736 2737 if isinstance(field, exp.Func): 2738 # bigquery allows function calls like x.y.count(...) 2739 # SAFE.SUBSTR(...) 2740 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2741 this = self._replace_columns_with_dots(this) 2742 2743 if op: 2744 this = op(self, this, field) 2745 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2746 this = self.expression( 2747 exp.Column, 2748 this=field, 2749 table=this.this, 2750 db=this.args.get("table"), 2751 catalog=this.args.get("db"), 2752 ) 2753 else: 2754 this = self.expression(exp.Dot, this=this, expression=field) 2755 this = self._parse_bracket(this) 2756 2757 return this 2758 2759 def _parse_primary(self) -> t.Optional[exp.Expression]: 2760 if self._match_set(self.PRIMARY_PARSERS): 2761 token_type = self._prev.token_type 2762 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2763 2764 if token_type == TokenType.STRING: 2765 expressions = [primary] 2766 while self._match(TokenType.STRING): 2767 expressions.append(exp.Literal.string(self._prev.text)) 2768 if len(expressions) > 1: 2769 return self.expression(exp.Concat, expressions=expressions) 2770 return primary 2771 2772 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2773 return exp.Literal.number(f"0.{self._prev.text}") 2774 2775 if self._match(TokenType.L_PAREN): 2776 comments = self._prev_comments 2777 query = self._parse_select() 2778 2779 if query: 2780 expressions = [query] 2781 else: 2782 expressions = self._parse_csv( 2783 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2784 ) 2785 2786 this = seq_get(expressions, 0) 2787 self._parse_query_modifiers(this) 2788 self._match_r_paren() 2789 2790 if isinstance(this, exp.Subqueryable): 2791 this = self._parse_set_operations( 2792 self._parse_subquery(this=this, parse_alias=False) 2793 ) 2794 elif len(expressions) > 1: 2795 this = self.expression(exp.Tuple, expressions=expressions) 2796 else: 2797 this = self.expression(exp.Paren, this=this) 2798 2799 if this and comments: 2800 this.comments = comments 2801 2802 return this 2803 2804 return None 2805 2806 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2807 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2808 2809 def _parse_function( 2810 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2811 ) -> t.Optional[exp.Expression]: 2812 if not self._curr: 2813 return None 2814 2815 token_type = self._curr.token_type 2816 2817 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2818 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2819 2820 if not self._next or self._next.token_type != TokenType.L_PAREN: 2821 if token_type in self.NO_PAREN_FUNCTIONS: 2822 self._advance() 2823 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2824 2825 return None 2826 2827 if token_type not in self.FUNC_TOKENS: 2828 return None 2829 2830 this = self._curr.text 2831 upper = this.upper() 2832 self._advance(2) 2833 2834 parser = self.FUNCTION_PARSERS.get(upper) 2835 2836 if parser: 2837 this = parser(self) 2838 else: 2839 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2840 2841 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2842 this = self.expression(subquery_predicate, this=self._parse_select()) 2843 self._match_r_paren() 2844 return this 2845 2846 if functions is None: 2847 functions = self.FUNCTIONS 2848 2849 function = functions.get(upper) 2850 args = self._parse_csv(self._parse_lambda) 2851 2852 if function: 2853 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2854 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2855 if count_params(function) == 2: 2856 params = None 2857 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2858 params = self._parse_csv(self._parse_lambda) 2859 2860 this = function(args, params) 2861 else: 2862 this = function(args) 2863 2864 self.validate_expression(this, args) 2865 else: 2866 this = self.expression(exp.Anonymous, this=this, expressions=args) 2867 2868 self._match_r_paren(this) 2869 return self._parse_window(this) 2870 2871 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2872 return self._parse_column_def(self._parse_id_var()) 2873 2874 def _parse_user_defined_function( 2875 self, kind: t.Optional[TokenType] = None 2876 ) -> t.Optional[exp.Expression]: 2877 this = self._parse_id_var() 2878 2879 while self._match(TokenType.DOT): 2880 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2881 2882 if not self._match(TokenType.L_PAREN): 2883 return this 2884 2885 expressions = self._parse_csv(self._parse_function_parameter) 2886 self._match_r_paren() 2887 return self.expression( 2888 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2889 ) 2890 2891 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2892 literal = self._parse_primary() 2893 if literal: 2894 return self.expression(exp.Introducer, this=token.text, expression=literal) 2895 2896 return self.expression(exp.Identifier, this=token.text) 2897 2898 def _parse_national(self, token: Token) -> exp.Expression: 2899 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2900 2901 def _parse_session_parameter(self) -> exp.Expression: 2902 kind = None 2903 this = self._parse_id_var() or self._parse_primary() 2904 2905 if this and self._match(TokenType.DOT): 2906 kind = this.name 2907 this = self._parse_var() or self._parse_primary() 2908 2909 return self.expression(exp.SessionParameter, this=this, kind=kind) 2910 2911 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2912 index = self._index 2913 2914 if self._match(TokenType.L_PAREN): 2915 expressions = self._parse_csv(self._parse_id_var) 2916 2917 if not self._match(TokenType.R_PAREN): 2918 self._retreat(index) 2919 else: 2920 expressions = [self._parse_id_var()] 2921 2922 if self._match_set(self.LAMBDAS): 2923 return self.LAMBDAS[self._prev.token_type](self, expressions) 2924 2925 self._retreat(index) 2926 2927 this: t.Optional[exp.Expression] 2928 2929 if self._match(TokenType.DISTINCT): 2930 this = self.expression( 2931 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2932 ) 2933 else: 2934 this = self._parse_select_or_expression() 2935 2936 if self._match(TokenType.IGNORE_NULLS): 2937 this = self.expression(exp.IgnoreNulls, this=this) 2938 else: 2939 self._match(TokenType.RESPECT_NULLS) 2940 2941 return self._parse_limit(self._parse_order(this)) 2942 2943 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2944 index = self._index 2945 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2946 self._retreat(index) 2947 return this 2948 2949 args = self._parse_csv( 2950 lambda: self._parse_constraint() 2951 or self._parse_column_def(self._parse_field(any_token=True)) 2952 ) 2953 self._match_r_paren() 2954 return self.expression(exp.Schema, this=this, expressions=args) 2955 2956 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2957 kind = self._parse_types() 2958 2959 if self._match_text_seq("FOR", "ORDINALITY"): 2960 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2961 2962 constraints = [] 2963 while True: 2964 constraint = self._parse_column_constraint() 2965 if not constraint: 2966 break 2967 constraints.append(constraint) 2968 2969 if not kind and not constraints: 2970 return this 2971 2972 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2973 2974 def _parse_auto_increment(self) -> exp.Expression: 2975 start = None 2976 increment = None 2977 2978 if self._match(TokenType.L_PAREN, advance=False): 2979 args = self._parse_wrapped_csv(self._parse_bitwise) 2980 start = seq_get(args, 0) 2981 increment = seq_get(args, 1) 2982 elif self._match_text_seq("START"): 2983 start = self._parse_bitwise() 2984 self._match_text_seq("INCREMENT") 2985 increment = self._parse_bitwise() 2986 2987 if start and increment: 2988 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2989 2990 return exp.AutoIncrementColumnConstraint() 2991 2992 def _parse_compress(self) -> exp.Expression: 2993 if self._match(TokenType.L_PAREN, advance=False): 2994 return self.expression( 2995 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2996 ) 2997 2998 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2999 3000 def _parse_generated_as_identity(self) -> exp.Expression: 3001 if self._match(TokenType.BY_DEFAULT): 3002 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3003 else: 3004 self._match_text_seq("ALWAYS") 3005 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3006 3007 self._match_text_seq("AS", "IDENTITY") 3008 if self._match(TokenType.L_PAREN): 3009 if self._match_text_seq("START", "WITH"): 3010 this.set("start", self._parse_bitwise()) 3011 if self._match_text_seq("INCREMENT", "BY"): 3012 this.set("increment", self._parse_bitwise()) 3013 if self._match_text_seq("MINVALUE"): 3014 this.set("minvalue", self._parse_bitwise()) 3015 if self._match_text_seq("MAXVALUE"): 3016 this.set("maxvalue", self._parse_bitwise()) 3017 3018 if self._match_text_seq("CYCLE"): 3019 this.set("cycle", True) 3020 elif self._match_text_seq("NO", "CYCLE"): 3021 this.set("cycle", False) 3022 3023 self._match_r_paren() 3024 3025 return this 3026 3027 def _parse_inline(self) -> t.Optional[exp.Expression]: 3028 self._match_text_seq("LENGTH") 3029 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3030 3031 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3032 if self._match_text_seq("NULL"): 3033 return self.expression(exp.NotNullColumnConstraint) 3034 if self._match_text_seq("CASESPECIFIC"): 3035 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3036 return None 3037 3038 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3039 this = self._parse_references() 3040 if this: 3041 return this 3042 3043 if self._match(TokenType.CONSTRAINT): 3044 this = self._parse_id_var() 3045 3046 if self._match_texts(self.CONSTRAINT_PARSERS): 3047 return self.expression( 3048 exp.ColumnConstraint, 3049 this=this, 3050 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3051 ) 3052 3053 return this 3054 3055 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3056 if not self._match(TokenType.CONSTRAINT): 3057 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3058 3059 this = self._parse_id_var() 3060 expressions = [] 3061 3062 while True: 3063 constraint = self._parse_unnamed_constraint() or self._parse_function() 3064 if not constraint: 3065 break 3066 expressions.append(constraint) 3067 3068 return self.expression(exp.Constraint, this=this, expressions=expressions) 3069 3070 def _parse_unnamed_constraint( 3071 self, constraints: t.Optional[t.Collection[str]] = None 3072 ) -> t.Optional[exp.Expression]: 3073 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3074 return None 3075 3076 constraint = self._prev.text.upper() 3077 if constraint not in self.CONSTRAINT_PARSERS: 3078 self.raise_error(f"No parser found for schema constraint {constraint}.") 3079 3080 return self.CONSTRAINT_PARSERS[constraint](self) 3081 3082 def _parse_unique(self) -> exp.Expression: 3083 if not self._match(TokenType.L_PAREN, advance=False): 3084 return self.expression(exp.UniqueColumnConstraint) 3085 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3086 3087 def _parse_key_constraint_options(self) -> t.List[str]: 3088 options = [] 3089 while True: 3090 if not self._curr: 3091 break 3092 3093 if self._match(TokenType.ON): 3094 action = None 3095 on = self._advance_any() and self._prev.text 3096 3097 if self._match(TokenType.NO_ACTION): 3098 action = "NO ACTION" 3099 elif self._match(TokenType.CASCADE): 3100 action = "CASCADE" 3101 elif self._match_pair(TokenType.SET, TokenType.NULL): 3102 action = "SET NULL" 3103 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3104 action = "SET DEFAULT" 3105 else: 3106 self.raise_error("Invalid key constraint") 3107 3108 options.append(f"ON {on} {action}") 3109 elif self._match_text_seq("NOT", "ENFORCED"): 3110 options.append("NOT ENFORCED") 3111 elif self._match_text_seq("DEFERRABLE"): 3112 options.append("DEFERRABLE") 3113 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3114 options.append("INITIALLY DEFERRED") 3115 elif self._match_text_seq("NORELY"): 3116 options.append("NORELY") 3117 elif self._match_text_seq("MATCH", "FULL"): 3118 options.append("MATCH FULL") 3119 else: 3120 break 3121 3122 return options 3123 3124 def _parse_references(self) -> t.Optional[exp.Expression]: 3125 if not self._match(TokenType.REFERENCES): 3126 return None 3127 3128 expressions = None 3129 this = self._parse_id_var() 3130 3131 if self._match(TokenType.L_PAREN, advance=False): 3132 expressions = self._parse_wrapped_id_vars() 3133 3134 options = self._parse_key_constraint_options() 3135 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3136 3137 def _parse_foreign_key(self) -> exp.Expression: 3138 expressions = self._parse_wrapped_id_vars() 3139 reference = self._parse_references() 3140 options = {} 3141 3142 while self._match(TokenType.ON): 3143 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3144 self.raise_error("Expected DELETE or UPDATE") 3145 3146 kind = self._prev.text.lower() 3147 3148 if self._match(TokenType.NO_ACTION): 3149 action = "NO ACTION" 3150 elif self._match(TokenType.SET): 3151 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3152 action = "SET " + self._prev.text.upper() 3153 else: 3154 self._advance() 3155 action = self._prev.text.upper() 3156 3157 options[kind] = action 3158 3159 return self.expression( 3160 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3161 ) 3162 3163 def _parse_primary_key(self) -> exp.Expression: 3164 desc = ( 3165 self._match_set((TokenType.ASC, TokenType.DESC)) 3166 and self._prev.token_type == TokenType.DESC 3167 ) 3168 3169 if not self._match(TokenType.L_PAREN, advance=False): 3170 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3171 3172 expressions = self._parse_wrapped_id_vars() 3173 options = self._parse_key_constraint_options() 3174 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3175 3176 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3177 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3178 return this 3179 3180 bracket_kind = self._prev.token_type 3181 expressions: t.List[t.Optional[exp.Expression]] 3182 3183 if self._match(TokenType.COLON): 3184 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3185 else: 3186 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3187 3188 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3189 if bracket_kind == TokenType.L_BRACE: 3190 this = self.expression(exp.Struct, expressions=expressions) 3191 elif not this or this.name.upper() == "ARRAY": 3192 this = self.expression(exp.Array, expressions=expressions) 3193 else: 3194 expressions = apply_index_offset(expressions, -self.index_offset) 3195 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3196 3197 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3198 self.raise_error("Expected ]") 3199 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3200 self.raise_error("Expected }") 3201 3202 this.comments = self._prev_comments 3203 return self._parse_bracket(this) 3204 3205 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3206 if self._match(TokenType.COLON): 3207 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3208 return this 3209 3210 def _parse_case(self) -> t.Optional[exp.Expression]: 3211 ifs = [] 3212 default = None 3213 3214 expression = self._parse_conjunction() 3215 3216 while self._match(TokenType.WHEN): 3217 this = self._parse_conjunction() 3218 self._match(TokenType.THEN) 3219 then = self._parse_conjunction() 3220 ifs.append(self.expression(exp.If, this=this, true=then)) 3221 3222 if self._match(TokenType.ELSE): 3223 default = self._parse_conjunction() 3224 3225 if not self._match(TokenType.END): 3226 self.raise_error("Expected END after CASE", self._prev) 3227 3228 return self._parse_window( 3229 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3230 ) 3231 3232 def _parse_if(self) -> t.Optional[exp.Expression]: 3233 if self._match(TokenType.L_PAREN): 3234 args = self._parse_csv(self._parse_conjunction) 3235 this = exp.If.from_arg_list(args) 3236 self.validate_expression(this, args) 3237 self._match_r_paren() 3238 else: 3239 condition = self._parse_conjunction() 3240 self._match(TokenType.THEN) 3241 true = self._parse_conjunction() 3242 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3243 self._match(TokenType.END) 3244 this = self.expression(exp.If, this=condition, true=true, false=false) 3245 3246 return self._parse_window(this) 3247 3248 def _parse_extract(self) -> exp.Expression: 3249 this = self._parse_function() or self._parse_var() or self._parse_type() 3250 3251 if self._match(TokenType.FROM): 3252 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3253 3254 if not self._match(TokenType.COMMA): 3255 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3256 3257 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3258 3259 def _parse_cast(self, strict: bool) -> exp.Expression: 3260 this = self._parse_conjunction() 3261 3262 if not self._match(TokenType.ALIAS): 3263 self.raise_error("Expected AS after CAST") 3264 3265 to = self._parse_types() 3266 3267 if not to: 3268 self.raise_error("Expected TYPE after CAST") 3269 elif to.this == exp.DataType.Type.CHAR: 3270 if self._match(TokenType.CHARACTER_SET): 3271 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3272 3273 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3274 3275 def _parse_string_agg(self) -> exp.Expression: 3276 expression: t.Optional[exp.Expression] 3277 3278 if self._match(TokenType.DISTINCT): 3279 args = self._parse_csv(self._parse_conjunction) 3280 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3281 else: 3282 args = self._parse_csv(self._parse_conjunction) 3283 expression = seq_get(args, 0) 3284 3285 index = self._index 3286 if not self._match(TokenType.R_PAREN): 3287 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3288 order = self._parse_order(this=expression) 3289 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3290 3291 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3292 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3293 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3294 if not self._match(TokenType.WITHIN_GROUP): 3295 self._retreat(index) 3296 this = exp.GroupConcat.from_arg_list(args) 3297 self.validate_expression(this, args) 3298 return this 3299 3300 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3301 order = self._parse_order(this=expression) 3302 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3303 3304 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3305 to: t.Optional[exp.Expression] 3306 this = self._parse_bitwise() 3307 3308 if self._match(TokenType.USING): 3309 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3310 elif self._match(TokenType.COMMA): 3311 to = self._parse_bitwise() 3312 else: 3313 to = None 3314 3315 # Swap the argument order if needed to produce the correct AST 3316 if self.CONVERT_TYPE_FIRST: 3317 this, to = to, this 3318 3319 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3320 3321 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3322 args = self._parse_csv(self._parse_bitwise) 3323 3324 if self._match(TokenType.IN): 3325 return self.expression( 3326 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3327 ) 3328 3329 if haystack_first: 3330 haystack = seq_get(args, 0) 3331 needle = seq_get(args, 1) 3332 else: 3333 needle = seq_get(args, 0) 3334 haystack = seq_get(args, 1) 3335 3336 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3337 3338 self.validate_expression(this, args) 3339 3340 return this 3341 3342 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3343 args = self._parse_csv(self._parse_table) 3344 return exp.JoinHint(this=func_name.upper(), expressions=args) 3345 3346 def _parse_substring(self) -> exp.Expression: 3347 # Postgres supports the form: substring(string [from int] [for int]) 3348 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3349 3350 args = self._parse_csv(self._parse_bitwise) 3351 3352 if self._match(TokenType.FROM): 3353 args.append(self._parse_bitwise()) 3354 if self._match(TokenType.FOR): 3355 args.append(self._parse_bitwise()) 3356 3357 this = exp.Substring.from_arg_list(args) 3358 self.validate_expression(this, args) 3359 3360 return this 3361 3362 def _parse_trim(self) -> exp.Expression: 3363 # https://www.w3resource.com/sql/character-functions/trim.php 3364 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3365 3366 position = None 3367 collation = None 3368 3369 if self._match_set(self.TRIM_TYPES): 3370 position = self._prev.text.upper() 3371 3372 expression = self._parse_term() 3373 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3374 this = self._parse_term() 3375 else: 3376 this = expression 3377 expression = None 3378 3379 if self._match(TokenType.COLLATE): 3380 collation = self._parse_term() 3381 3382 return self.expression( 3383 exp.Trim, 3384 this=this, 3385 position=position, 3386 expression=expression, 3387 collation=collation, 3388 ) 3389 3390 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3391 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3392 3393 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3394 return self._parse_window(self._parse_id_var(), alias=True) 3395 3396 def _parse_window( 3397 self, this: t.Optional[exp.Expression], alias: bool = False 3398 ) -> t.Optional[exp.Expression]: 3399 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3400 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3401 self._match_r_paren() 3402 3403 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3404 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3405 if self._match(TokenType.WITHIN_GROUP): 3406 order = self._parse_wrapped(self._parse_order) 3407 this = self.expression(exp.WithinGroup, this=this, expression=order) 3408 3409 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3410 # Some dialects choose to implement and some do not. 3411 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3412 3413 # There is some code above in _parse_lambda that handles 3414 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3415 3416 # The below changes handle 3417 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3418 3419 # Oracle allows both formats 3420 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3421 # and Snowflake chose to do the same for familiarity 3422 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3423 if self._match(TokenType.IGNORE_NULLS): 3424 this = self.expression(exp.IgnoreNulls, this=this) 3425 elif self._match(TokenType.RESPECT_NULLS): 3426 this = self.expression(exp.RespectNulls, this=this) 3427 3428 # bigquery select from window x AS (partition by ...) 3429 if alias: 3430 self._match(TokenType.ALIAS) 3431 elif not self._match(TokenType.OVER): 3432 return this 3433 3434 if not self._match(TokenType.L_PAREN): 3435 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3436 3437 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3438 partition = self._parse_partition_by() 3439 order = self._parse_order() 3440 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3441 3442 if kind: 3443 self._match(TokenType.BETWEEN) 3444 start = self._parse_window_spec() 3445 self._match(TokenType.AND) 3446 end = self._parse_window_spec() 3447 3448 spec = self.expression( 3449 exp.WindowSpec, 3450 kind=kind, 3451 start=start["value"], 3452 start_side=start["side"], 3453 end=end["value"], 3454 end_side=end["side"], 3455 ) 3456 else: 3457 spec = None 3458 3459 self._match_r_paren() 3460 3461 return self.expression( 3462 exp.Window, 3463 this=this, 3464 partition_by=partition, 3465 order=order, 3466 spec=spec, 3467 alias=window_alias, 3468 ) 3469 3470 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3471 self._match(TokenType.BETWEEN) 3472 3473 return { 3474 "value": ( 3475 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3476 ) 3477 or self._parse_bitwise(), 3478 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3479 } 3480 3481 def _parse_alias( 3482 self, this: t.Optional[exp.Expression], explicit: bool = False 3483 ) -> t.Optional[exp.Expression]: 3484 any_token = self._match(TokenType.ALIAS) 3485 3486 if explicit and not any_token: 3487 return this 3488 3489 if self._match(TokenType.L_PAREN): 3490 aliases = self.expression( 3491 exp.Aliases, 3492 this=this, 3493 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3494 ) 3495 self._match_r_paren(aliases) 3496 return aliases 3497 3498 alias = self._parse_id_var(any_token) 3499 3500 if alias: 3501 return self.expression(exp.Alias, this=this, alias=alias) 3502 3503 return this 3504 3505 def _parse_id_var( 3506 self, 3507 any_token: bool = True, 3508 tokens: t.Optional[t.Collection[TokenType]] = None, 3509 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3510 ) -> t.Optional[exp.Expression]: 3511 identifier = self._parse_identifier() 3512 3513 if identifier: 3514 return identifier 3515 3516 prefix = "" 3517 3518 if prefix_tokens: 3519 while self._match_set(prefix_tokens): 3520 prefix += self._prev.text 3521 3522 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3523 quoted = self._prev.token_type == TokenType.STRING 3524 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3525 3526 return None 3527 3528 def _parse_string(self) -> t.Optional[exp.Expression]: 3529 if self._match(TokenType.STRING): 3530 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3531 return self._parse_placeholder() 3532 3533 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3534 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3535 3536 def _parse_number(self) -> t.Optional[exp.Expression]: 3537 if self._match(TokenType.NUMBER): 3538 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3539 return self._parse_placeholder() 3540 3541 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3542 if self._match(TokenType.IDENTIFIER): 3543 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3544 return self._parse_placeholder() 3545 3546 def _parse_var( 3547 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3548 ) -> t.Optional[exp.Expression]: 3549 if ( 3550 (any_token and self._advance_any()) 3551 or self._match(TokenType.VAR) 3552 or (self._match_set(tokens) if tokens else False) 3553 ): 3554 return self.expression(exp.Var, this=self._prev.text) 3555 return self._parse_placeholder() 3556 3557 def _advance_any(self) -> t.Optional[Token]: 3558 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3559 self._advance() 3560 return self._prev 3561 return None 3562 3563 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3564 return self._parse_var() or self._parse_string() 3565 3566 def _parse_null(self) -> t.Optional[exp.Expression]: 3567 if self._match(TokenType.NULL): 3568 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3569 return None 3570 3571 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3572 if self._match(TokenType.TRUE): 3573 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3574 if self._match(TokenType.FALSE): 3575 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3576 return None 3577 3578 def _parse_star(self) -> t.Optional[exp.Expression]: 3579 if self._match(TokenType.STAR): 3580 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3581 return None 3582 3583 def _parse_parameter(self) -> exp.Expression: 3584 wrapped = self._match(TokenType.L_BRACE) 3585 this = self._parse_var() or self._parse_primary() 3586 self._match(TokenType.R_BRACE) 3587 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3588 3589 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3590 if self._match_set(self.PLACEHOLDER_PARSERS): 3591 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3592 if placeholder: 3593 return placeholder 3594 self._advance(-1) 3595 return None 3596 3597 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3598 if not self._match(TokenType.EXCEPT): 3599 return None 3600 if self._match(TokenType.L_PAREN, advance=False): 3601 return self._parse_wrapped_csv(self._parse_column) 3602 return self._parse_csv(self._parse_column) 3603 3604 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3605 if not self._match(TokenType.REPLACE): 3606 return None 3607 if self._match(TokenType.L_PAREN, advance=False): 3608 return self._parse_wrapped_csv(self._parse_expression) 3609 return self._parse_csv(self._parse_expression) 3610 3611 def _parse_csv( 3612 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3613 ) -> t.List[t.Optional[exp.Expression]]: 3614 parse_result = parse_method() 3615 items = [parse_result] if parse_result is not None else [] 3616 3617 while self._match(sep): 3618 if parse_result and self._prev_comments: 3619 parse_result.comments = self._prev_comments 3620 3621 parse_result = parse_method() 3622 if parse_result is not None: 3623 items.append(parse_result) 3624 3625 return items 3626 3627 def _parse_tokens( 3628 self, parse_method: t.Callable, expressions: t.Dict 3629 ) -> t.Optional[exp.Expression]: 3630 this = parse_method() 3631 3632 while self._match_set(expressions): 3633 this = self.expression( 3634 expressions[self._prev.token_type], 3635 this=this, 3636 comments=self._prev_comments, 3637 expression=parse_method(), 3638 ) 3639 3640 return this 3641 3642 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3643 return self._parse_wrapped_csv(self._parse_id_var) 3644 3645 def _parse_wrapped_csv( 3646 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3647 ) -> t.List[t.Optional[exp.Expression]]: 3648 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3649 3650 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3651 self._match_l_paren() 3652 parse_result = parse_method() 3653 self._match_r_paren() 3654 return parse_result 3655 3656 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3657 return self._parse_select() or self._parse_expression() 3658 3659 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3660 return self._parse_set_operations( 3661 self._parse_select(nested=True, parse_subquery_alias=False) 3662 ) 3663 3664 def _parse_transaction(self) -> exp.Expression: 3665 this = None 3666 if self._match_texts(self.TRANSACTION_KIND): 3667 this = self._prev.text 3668 3669 self._match_texts({"TRANSACTION", "WORK"}) 3670 3671 modes = [] 3672 while True: 3673 mode = [] 3674 while self._match(TokenType.VAR): 3675 mode.append(self._prev.text) 3676 3677 if mode: 3678 modes.append(" ".join(mode)) 3679 if not self._match(TokenType.COMMA): 3680 break 3681 3682 return self.expression(exp.Transaction, this=this, modes=modes) 3683 3684 def _parse_commit_or_rollback(self) -> exp.Expression: 3685 chain = None 3686 savepoint = None 3687 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3688 3689 self._match_texts({"TRANSACTION", "WORK"}) 3690 3691 if self._match_text_seq("TO"): 3692 self._match_text_seq("SAVEPOINT") 3693 savepoint = self._parse_id_var() 3694 3695 if self._match(TokenType.AND): 3696 chain = not self._match_text_seq("NO") 3697 self._match_text_seq("CHAIN") 3698 3699 if is_rollback: 3700 return self.expression(exp.Rollback, savepoint=savepoint) 3701 return self.expression(exp.Commit, chain=chain) 3702 3703 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3704 if not self._match_text_seq("ADD"): 3705 return None 3706 3707 self._match(TokenType.COLUMN) 3708 exists_column = self._parse_exists(not_=True) 3709 expression = self._parse_column_def(self._parse_field(any_token=True)) 3710 3711 if expression: 3712 expression.set("exists", exists_column) 3713 3714 return expression 3715 3716 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3717 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3718 3719 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3720 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3721 return self.expression( 3722 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3723 ) 3724 3725 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3726 this = None 3727 kind = self._prev.token_type 3728 3729 if kind == TokenType.CONSTRAINT: 3730 this = self._parse_id_var() 3731 3732 if self._match_text_seq("CHECK"): 3733 expression = self._parse_wrapped(self._parse_conjunction) 3734 enforced = self._match_text_seq("ENFORCED") 3735 3736 return self.expression( 3737 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3738 ) 3739 3740 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3741 expression = self._parse_foreign_key() 3742 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3743 expression = self._parse_primary_key() 3744 3745 return self.expression(exp.AddConstraint, this=this, expression=expression) 3746 3747 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3748 index = self._index - 1 3749 3750 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3751 return self._parse_csv(self._parse_add_constraint) 3752 3753 self._retreat(index) 3754 return self._parse_csv(self._parse_add_column) 3755 3756 def _parse_alter_table_alter(self) -> exp.Expression: 3757 self._match(TokenType.COLUMN) 3758 column = self._parse_field(any_token=True) 3759 3760 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3761 return self.expression(exp.AlterColumn, this=column, drop=True) 3762 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3763 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3764 3765 self._match_text_seq("SET", "DATA") 3766 return self.expression( 3767 exp.AlterColumn, 3768 this=column, 3769 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3770 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3771 using=self._match(TokenType.USING) and self._parse_conjunction(), 3772 ) 3773 3774 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3775 index = self._index - 1 3776 3777 partition_exists = self._parse_exists() 3778 if self._match(TokenType.PARTITION, advance=False): 3779 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3780 3781 self._retreat(index) 3782 return self._parse_csv(self._parse_drop_column) 3783 3784 def _parse_alter_table_rename(self) -> exp.Expression: 3785 self._match_text_seq("TO") 3786 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3787 3788 def _parse_alter(self) -> t.Optional[exp.Expression]: 3789 start = self._prev 3790 3791 if not self._match(TokenType.TABLE): 3792 return self._parse_as_command(start) 3793 3794 exists = self._parse_exists() 3795 this = self._parse_table(schema=True) 3796 3797 if self._next: 3798 self._advance() 3799 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3800 3801 if parser: 3802 return self.expression( 3803 exp.AlterTable, 3804 this=this, 3805 exists=exists, 3806 actions=ensure_list(parser(self)), 3807 ) 3808 return self._parse_as_command(start) 3809 3810 def _parse_merge(self) -> exp.Expression: 3811 self._match(TokenType.INTO) 3812 target = self._parse_table() 3813 3814 self._match(TokenType.USING) 3815 using = self._parse_table() 3816 3817 self._match(TokenType.ON) 3818 on = self._parse_conjunction() 3819 3820 whens = [] 3821 while self._match(TokenType.WHEN): 3822 matched = not self._match(TokenType.NOT) 3823 self._match_text_seq("MATCHED") 3824 source = ( 3825 False 3826 if self._match_text_seq("BY", "TARGET") 3827 else self._match_text_seq("BY", "SOURCE") 3828 ) 3829 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3830 3831 self._match(TokenType.THEN) 3832 3833 if self._match(TokenType.INSERT): 3834 _this = self._parse_star() 3835 if _this: 3836 then = self.expression(exp.Insert, this=_this) 3837 else: 3838 then = self.expression( 3839 exp.Insert, 3840 this=self._parse_value(), 3841 expression=self._match(TokenType.VALUES) and self._parse_value(), 3842 ) 3843 elif self._match(TokenType.UPDATE): 3844 expressions = self._parse_star() 3845 if expressions: 3846 then = self.expression(exp.Update, expressions=expressions) 3847 else: 3848 then = self.expression( 3849 exp.Update, 3850 expressions=self._match(TokenType.SET) 3851 and self._parse_csv(self._parse_equality), 3852 ) 3853 elif self._match(TokenType.DELETE): 3854 then = self.expression(exp.Var, this=self._prev.text) 3855 else: 3856 then = None 3857 3858 whens.append( 3859 self.expression( 3860 exp.When, 3861 matched=matched, 3862 source=source, 3863 condition=condition, 3864 then=then, 3865 ) 3866 ) 3867 3868 return self.expression( 3869 exp.Merge, 3870 this=target, 3871 using=using, 3872 on=on, 3873 expressions=whens, 3874 ) 3875 3876 def _parse_show(self) -> t.Optional[exp.Expression]: 3877 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3878 if parser: 3879 return parser(self) 3880 self._advance() 3881 return self.expression(exp.Show, this=self._prev.text.upper()) 3882 3883 def _parse_set_item_assignment( 3884 self, kind: t.Optional[str] = None 3885 ) -> t.Optional[exp.Expression]: 3886 index = self._index 3887 3888 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 3889 return self._parse_set_transaction(global_=kind == "GLOBAL") 3890 3891 left = self._parse_primary() or self._parse_id_var() 3892 3893 if not self._match_texts(("=", "TO")): 3894 self._retreat(index) 3895 return None 3896 3897 right = self._parse_statement() or self._parse_id_var() 3898 this = self.expression( 3899 exp.EQ, 3900 this=left, 3901 expression=right, 3902 ) 3903 3904 return self.expression( 3905 exp.SetItem, 3906 this=this, 3907 kind=kind, 3908 ) 3909 3910 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 3911 self._match_text_seq("TRANSACTION") 3912 characteristics = self._parse_csv( 3913 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 3914 ) 3915 return self.expression( 3916 exp.SetItem, 3917 expressions=characteristics, 3918 kind="TRANSACTION", 3919 **{"global": global_}, # type: ignore 3920 ) 3921 3922 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3923 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3924 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 3925 3926 def _parse_set(self) -> exp.Expression: 3927 index = self._index 3928 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3929 3930 if self._curr: 3931 self._retreat(index) 3932 return self._parse_as_command(self._prev) 3933 3934 return set_ 3935 3936 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 3937 for option in options: 3938 if self._match_text_seq(*option.split(" ")): 3939 return exp.Var(this=option) 3940 return None 3941 3942 def _parse_as_command(self, start: Token) -> exp.Command: 3943 while self._curr: 3944 self._advance() 3945 text = self._find_sql(start, self._prev) 3946 size = len(start.text) 3947 return exp.Command(this=text[:size], expression=text[size:]) 3948 3949 def _find_parser( 3950 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3951 ) -> t.Optional[t.Callable]: 3952 if not self._curr: 3953 return None 3954 3955 index = self._index 3956 this = [] 3957 while True: 3958 # The current token might be multiple words 3959 curr = self._curr.text.upper() 3960 key = curr.split(" ") 3961 this.append(curr) 3962 self._advance() 3963 result, trie = in_trie(trie, key) 3964 if result == 0: 3965 break 3966 if result == 2: 3967 subparser = parsers[" ".join(this)] 3968 return subparser 3969 self._retreat(index) 3970 return None 3971 3972 def _match(self, token_type, advance=True): 3973 if not self._curr: 3974 return None 3975 3976 if self._curr.token_type == token_type: 3977 if advance: 3978 self._advance() 3979 return True 3980 3981 return None 3982 3983 def _match_set(self, types, advance=True): 3984 if not self._curr: 3985 return None 3986 3987 if self._curr.token_type in types: 3988 if advance: 3989 self._advance() 3990 return True 3991 3992 return None 3993 3994 def _match_pair(self, token_type_a, token_type_b, advance=True): 3995 if not self._curr or not self._next: 3996 return None 3997 3998 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3999 if advance: 4000 self._advance(2) 4001 return True 4002 4003 return None 4004 4005 def _match_l_paren(self, expression=None): 4006 if not self._match(TokenType.L_PAREN): 4007 self.raise_error("Expecting (") 4008 if expression and self._prev_comments: 4009 expression.comments = self._prev_comments 4010 4011 def _match_r_paren(self, expression=None): 4012 if not self._match(TokenType.R_PAREN): 4013 self.raise_error("Expecting )") 4014 if expression and self._prev_comments: 4015 expression.comments = self._prev_comments 4016 4017 def _match_texts(self, texts, advance=True): 4018 if self._curr and self._curr.text.upper() in texts: 4019 if advance: 4020 self._advance() 4021 return True 4022 return False 4023 4024 def _match_text_seq(self, *texts, advance=True): 4025 index = self._index 4026 for text in texts: 4027 if self._curr and self._curr.text.upper() == text: 4028 self._advance() 4029 else: 4030 self._retreat(index) 4031 return False 4032 4033 if not advance: 4034 self._retreat(index) 4035 4036 return True 4037 4038 def _replace_columns_with_dots(self, this): 4039 if isinstance(this, exp.Dot): 4040 exp.replace_children(this, self._replace_columns_with_dots) 4041 elif isinstance(this, exp.Column): 4042 exp.replace_children(this, self._replace_columns_with_dots) 4043 table = this.args.get("table") 4044 this = ( 4045 self.expression(exp.Dot, this=table, expression=this.this) 4046 if table 4047 else self.expression(exp.Var, this=this.name) 4048 ) 4049 elif isinstance(this, exp.Identifier): 4050 this = self.expression(exp.Var, this=this.name) 4051 return this 4052 4053 def _replace_lambda(self, node, lambda_variables): 4054 if isinstance(node, exp.Column): 4055 if node.name in lambda_variables: 4056 return node.this 4057 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
742 def __init__( 743 self, 744 error_level: t.Optional[ErrorLevel] = None, 745 error_message_context: int = 100, 746 index_offset: int = 0, 747 unnest_column_only: bool = False, 748 alias_post_tablesample: bool = False, 749 max_errors: int = 3, 750 null_ordering: t.Optional[str] = None, 751 ): 752 self.error_level = error_level or ErrorLevel.IMMEDIATE 753 self.error_message_context = error_message_context 754 self.index_offset = index_offset 755 self.unnest_column_only = unnest_column_only 756 self.alias_post_tablesample = alias_post_tablesample 757 self.max_errors = max_errors 758 self.null_ordering = null_ordering 759 self.reset()
771 def parse( 772 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 773 ) -> t.List[t.Optional[exp.Expression]]: 774 """ 775 Parses a list of tokens and returns a list of syntax trees, one tree 776 per parsed SQL statement. 777 778 Args: 779 raw_tokens: the list of tokens. 780 sql: the original SQL string, used to produce helpful debug messages. 781 782 Returns: 783 The list of syntax trees. 784 """ 785 return self._parse( 786 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 787 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
789 def parse_into( 790 self, 791 expression_types: exp.IntoType, 792 raw_tokens: t.List[Token], 793 sql: t.Optional[str] = None, 794 ) -> t.List[t.Optional[exp.Expression]]: 795 """ 796 Parses a list of tokens into a given Expression type. If a collection of Expression 797 types is given instead, this method will try to parse the token list into each one 798 of them, stopping at the first for which the parsing succeeds. 799 800 Args: 801 expression_types: the expression type(s) to try and parse the token list into. 802 raw_tokens: the list of tokens. 803 sql: the original SQL string, used to produce helpful debug messages. 804 805 Returns: 806 The target Expression. 807 """ 808 errors = [] 809 for expression_type in ensure_collection(expression_types): 810 parser = self.EXPRESSION_PARSERS.get(expression_type) 811 if not parser: 812 raise TypeError(f"No parser registered for {expression_type}") 813 try: 814 return self._parse(parser, raw_tokens, sql) 815 except ParseError as e: 816 e.errors[0]["into_expression"] = expression_type 817 errors.append(e) 818 raise ParseError( 819 f"Failed to parse into {expression_types}", 820 errors=merge_errors(errors), 821 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
857 def check_errors(self) -> None: 858 """ 859 Logs or raises any found errors, depending on the chosen error level setting. 860 """ 861 if self.error_level == ErrorLevel.WARN: 862 for error in self.errors: 863 logger.error(str(error)) 864 elif self.error_level == ErrorLevel.RAISE and self.errors: 865 raise ParseError( 866 concat_messages(self.errors, self.max_errors), 867 errors=merge_errors(self.errors), 868 )
Logs or raises any found errors, depending on the chosen error level setting.
870 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 871 """ 872 Appends an error in the list of recorded errors or raises it, depending on the chosen 873 error level setting. 874 """ 875 token = token or self._curr or self._prev or Token.string("") 876 start = self._find_token(token) 877 end = start + len(token.text) 878 start_context = self.sql[max(start - self.error_message_context, 0) : start] 879 highlight = self.sql[start:end] 880 end_context = self.sql[end : end + self.error_message_context] 881 882 error = ParseError.new( 883 f"{message}. Line {token.line}, Col: {token.col}.\n" 884 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 885 description=message, 886 line=token.line, 887 col=token.col, 888 start_context=start_context, 889 highlight=highlight, 890 end_context=end_context, 891 ) 892 893 if self.error_level == ErrorLevel.IMMEDIATE: 894 raise error 895 896 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
898 def expression( 899 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 900 ) -> exp.Expression: 901 """ 902 Creates a new, validated Expression. 903 904 Args: 905 exp_class: the expression class to instantiate. 906 comments: an optional list of comments to attach to the expression. 907 kwargs: the arguments to set for the expression along with their respective values. 908 909 Returns: 910 The target expression. 911 """ 912 instance = exp_class(**kwargs) 913 if self._prev_comments: 914 instance.comments = self._prev_comments 915 self._prev_comments = None 916 if comments: 917 instance.comments = comments 918 self.validate_expression(instance) 919 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
921 def validate_expression( 922 self, expression: exp.Expression, args: t.Optional[t.List] = None 923 ) -> None: 924 """ 925 Validates an already instantiated expression, making sure that all its mandatory arguments 926 are set. 927 928 Args: 929 expression: the expression to validate. 930 args: an optional list of items that was used to instantiate the expression, if it's a Func. 931 """ 932 if self.error_level == ErrorLevel.IGNORE: 933 return 934 935 for error_message in expression.error_messages(args): 936 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.