sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21E = t.TypeVar("E", bound=exp.Expression) 22 23 24def parse_var_map(args: t.Sequence) -> exp.Expression: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 return exp.VarMap( 34 keys=exp.Array(expressions=keys), 35 values=exp.Array(expressions=values), 36 ) 37 38 39def parse_like(args): 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 return lambda self, this: self._parse_escape( 48 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 49 ) 50 51 52class _Parser(type): 53 def __new__(cls, clsname, bases, attrs): 54 klass = super().__new__(cls, clsname, bases, attrs) 55 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 56 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 57 58 return klass 59 60 61class Parser(metaclass=_Parser): 62 """ 63 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 64 a parsed syntax tree. 65 66 Args: 67 error_level: the desired error level. 68 Default: ErrorLevel.RAISE 69 error_message_context: determines the amount of context to capture from a 70 query string when displaying the error message (in number of characters). 71 Default: 50. 72 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 73 Default: 0 74 alias_post_tablesample: If the table alias comes after tablesample. 75 Default: False 76 max_errors: Maximum number of error messages to include in a raised ParseError. 77 This is only relevant if error_level is ErrorLevel.RAISE. 78 Default: 3 79 null_ordering: Indicates the default null ordering method to use if not explicitly set. 80 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 81 Default: "nulls_are_small" 82 """ 83 84 FUNCTIONS: t.Dict[str, t.Callable] = { 85 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 86 "DATE_TO_DATE_STR": lambda args: exp.Cast( 87 this=seq_get(args, 0), 88 to=exp.DataType(this=exp.DataType.Type.TEXT), 89 ), 90 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 91 "IFNULL": exp.Coalesce.from_arg_list, 92 "LIKE": parse_like, 93 "TIME_TO_TIME_STR": lambda args: exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 98 this=exp.Cast( 99 this=seq_get(args, 0), 100 to=exp.DataType(this=exp.DataType.Type.TEXT), 101 ), 102 start=exp.Literal.number(1), 103 length=exp.Literal.number(10), 104 ), 105 "VAR_MAP": parse_var_map, 106 } 107 108 NO_PAREN_FUNCTIONS = { 109 TokenType.CURRENT_DATE: exp.CurrentDate, 110 TokenType.CURRENT_DATETIME: exp.CurrentDate, 111 TokenType.CURRENT_TIME: exp.CurrentTime, 112 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 113 TokenType.CURRENT_USER: exp.CurrentUser, 114 } 115 116 JOIN_HINTS: t.Set[str] = set() 117 118 NESTED_TYPE_TOKENS = { 119 TokenType.ARRAY, 120 TokenType.MAP, 121 TokenType.STRUCT, 122 TokenType.NULLABLE, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.SMALLINT, 130 TokenType.INT, 131 TokenType.BIGINT, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATE, 154 TokenType.DECIMAL, 155 TokenType.BIGDECIMAL, 156 TokenType.UUID, 157 TokenType.GEOGRAPHY, 158 TokenType.GEOMETRY, 159 TokenType.HLLSKETCH, 160 TokenType.HSTORE, 161 TokenType.PSEUDO_TYPE, 162 TokenType.SUPER, 163 TokenType.SERIAL, 164 TokenType.SMALLSERIAL, 165 TokenType.BIGSERIAL, 166 TokenType.XML, 167 TokenType.UNIQUEIDENTIFIER, 168 TokenType.MONEY, 169 TokenType.SMALLMONEY, 170 TokenType.ROWVERSION, 171 TokenType.IMAGE, 172 TokenType.VARIANT, 173 TokenType.OBJECT, 174 TokenType.INET, 175 *NESTED_TYPE_TOKENS, 176 } 177 178 SUBQUERY_PREDICATES = { 179 TokenType.ANY: exp.Any, 180 TokenType.ALL: exp.All, 181 TokenType.EXISTS: exp.Exists, 182 TokenType.SOME: exp.Any, 183 } 184 185 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 186 187 DB_CREATABLES = { 188 TokenType.DATABASE, 189 TokenType.SCHEMA, 190 TokenType.TABLE, 191 TokenType.VIEW, 192 } 193 194 CREATABLES = { 195 TokenType.COLUMN, 196 TokenType.FUNCTION, 197 TokenType.INDEX, 198 TokenType.PROCEDURE, 199 *DB_CREATABLES, 200 } 201 202 ID_VAR_TOKENS = { 203 TokenType.VAR, 204 TokenType.ANTI, 205 TokenType.APPLY, 206 TokenType.AUTO_INCREMENT, 207 TokenType.BEGIN, 208 TokenType.BOTH, 209 TokenType.BUCKET, 210 TokenType.CACHE, 211 TokenType.CASCADE, 212 TokenType.COLLATE, 213 TokenType.COMMAND, 214 TokenType.COMMENT, 215 TokenType.COMMIT, 216 TokenType.COMPOUND, 217 TokenType.CONSTRAINT, 218 TokenType.DEFAULT, 219 TokenType.DELETE, 220 TokenType.DESCRIBE, 221 TokenType.DIV, 222 TokenType.END, 223 TokenType.EXECUTE, 224 TokenType.ESCAPE, 225 TokenType.FALSE, 226 TokenType.FIRST, 227 TokenType.FILTER, 228 TokenType.FOLLOWING, 229 TokenType.FORMAT, 230 TokenType.FULL, 231 TokenType.IF, 232 TokenType.IS, 233 TokenType.ISNULL, 234 TokenType.INTERVAL, 235 TokenType.LAZY, 236 TokenType.LEADING, 237 TokenType.LEFT, 238 TokenType.LOCAL, 239 TokenType.MATERIALIZED, 240 TokenType.MERGE, 241 TokenType.NATURAL, 242 TokenType.NEXT, 243 TokenType.OFFSET, 244 TokenType.ONLY, 245 TokenType.OPTIONS, 246 TokenType.ORDINALITY, 247 TokenType.OVERWRITE, 248 TokenType.PARTITION, 249 TokenType.PERCENT, 250 TokenType.PIVOT, 251 TokenType.PRAGMA, 252 TokenType.PRECEDING, 253 TokenType.RANGE, 254 TokenType.REFERENCES, 255 TokenType.RIGHT, 256 TokenType.ROW, 257 TokenType.ROWS, 258 TokenType.SEED, 259 TokenType.SEMI, 260 TokenType.SET, 261 TokenType.SHOW, 262 TokenType.SORTKEY, 263 TokenType.TEMPORARY, 264 TokenType.TOP, 265 TokenType.TRAILING, 266 TokenType.TRUE, 267 TokenType.UNBOUNDED, 268 TokenType.UNIQUE, 269 TokenType.UNLOGGED, 270 TokenType.UNPIVOT, 271 TokenType.VOLATILE, 272 TokenType.WINDOW, 273 *CREATABLES, 274 *SUBQUERY_PREDICATES, 275 *TYPE_TOKENS, 276 *NO_PAREN_FUNCTIONS, 277 } 278 279 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 280 281 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 282 TokenType.APPLY, 283 TokenType.FULL, 284 TokenType.LEFT, 285 TokenType.NATURAL, 286 TokenType.OFFSET, 287 TokenType.RIGHT, 288 TokenType.WINDOW, 289 } 290 291 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 292 293 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 294 295 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 296 297 FUNC_TOKENS = { 298 TokenType.COMMAND, 299 TokenType.CURRENT_DATE, 300 TokenType.CURRENT_DATETIME, 301 TokenType.CURRENT_TIMESTAMP, 302 TokenType.CURRENT_TIME, 303 TokenType.CURRENT_USER, 304 TokenType.FILTER, 305 TokenType.FIRST, 306 TokenType.FORMAT, 307 TokenType.GLOB, 308 TokenType.IDENTIFIER, 309 TokenType.INDEX, 310 TokenType.ISNULL, 311 TokenType.ILIKE, 312 TokenType.LIKE, 313 TokenType.MERGE, 314 TokenType.OFFSET, 315 TokenType.PRIMARY_KEY, 316 TokenType.REPLACE, 317 TokenType.ROW, 318 TokenType.UNNEST, 319 TokenType.VAR, 320 TokenType.LEFT, 321 TokenType.RIGHT, 322 TokenType.DATE, 323 TokenType.DATETIME, 324 TokenType.TABLE, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMPTZ, 327 TokenType.WINDOW, 328 *TYPE_TOKENS, 329 *SUBQUERY_PREDICATES, 330 } 331 332 CONJUNCTION = { 333 TokenType.AND: exp.And, 334 TokenType.OR: exp.Or, 335 } 336 337 EQUALITY = { 338 TokenType.EQ: exp.EQ, 339 TokenType.NEQ: exp.NEQ, 340 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 341 } 342 343 COMPARISON = { 344 TokenType.GT: exp.GT, 345 TokenType.GTE: exp.GTE, 346 TokenType.LT: exp.LT, 347 TokenType.LTE: exp.LTE, 348 } 349 350 BITWISE = { 351 TokenType.AMP: exp.BitwiseAnd, 352 TokenType.CARET: exp.BitwiseXor, 353 TokenType.PIPE: exp.BitwiseOr, 354 TokenType.DPIPE: exp.DPipe, 355 } 356 357 TERM = { 358 TokenType.DASH: exp.Sub, 359 TokenType.PLUS: exp.Add, 360 TokenType.MOD: exp.Mod, 361 TokenType.COLLATE: exp.Collate, 362 } 363 364 FACTOR = { 365 TokenType.DIV: exp.IntDiv, 366 TokenType.LR_ARROW: exp.Distance, 367 TokenType.SLASH: exp.Div, 368 TokenType.STAR: exp.Mul, 369 } 370 371 TIMESTAMPS = { 372 TokenType.TIME, 373 TokenType.TIMESTAMP, 374 TokenType.TIMESTAMPTZ, 375 TokenType.TIMESTAMPLTZ, 376 } 377 378 SET_OPERATIONS = { 379 TokenType.UNION, 380 TokenType.INTERSECT, 381 TokenType.EXCEPT, 382 } 383 384 JOIN_SIDES = { 385 TokenType.LEFT, 386 TokenType.RIGHT, 387 TokenType.FULL, 388 } 389 390 JOIN_KINDS = { 391 TokenType.INNER, 392 TokenType.OUTER, 393 TokenType.CROSS, 394 TokenType.SEMI, 395 TokenType.ANTI, 396 } 397 398 LAMBDAS = { 399 TokenType.ARROW: lambda self, expressions: self.expression( 400 exp.Lambda, 401 this=self._replace_lambda( 402 self._parse_conjunction(), 403 {node.name for node in expressions}, 404 ), 405 expressions=expressions, 406 ), 407 TokenType.FARROW: lambda self, expressions: self.expression( 408 exp.Kwarg, 409 this=exp.Var(this=expressions[0].name), 410 expression=self._parse_conjunction(), 411 ), 412 } 413 414 COLUMN_OPERATORS = { 415 TokenType.DOT: None, 416 TokenType.DCOLON: lambda self, this, to: self.expression( 417 exp.Cast if self.STRICT_CAST else exp.TryCast, 418 this=this, 419 to=to, 420 ), 421 TokenType.ARROW: lambda self, this, path: self.expression( 422 exp.JSONExtract, 423 this=this, 424 expression=path, 425 ), 426 TokenType.DARROW: lambda self, this, path: self.expression( 427 exp.JSONExtractScalar, 428 this=this, 429 expression=path, 430 ), 431 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 432 exp.JSONBExtract, 433 this=this, 434 expression=path, 435 ), 436 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 437 exp.JSONBExtractScalar, 438 this=this, 439 expression=path, 440 ), 441 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 442 exp.JSONBContains, 443 this=this, 444 expression=key, 445 ), 446 } 447 448 EXPRESSION_PARSERS = { 449 exp.Column: lambda self: self._parse_column(), 450 exp.DataType: lambda self: self._parse_types(), 451 exp.From: lambda self: self._parse_from(), 452 exp.Group: lambda self: self._parse_group(), 453 exp.Identifier: lambda self: self._parse_id_var(), 454 exp.Lateral: lambda self: self._parse_lateral(), 455 exp.Join: lambda self: self._parse_join(), 456 exp.Order: lambda self: self._parse_order(), 457 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 458 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 459 exp.Lambda: lambda self: self._parse_lambda(), 460 exp.Limit: lambda self: self._parse_limit(), 461 exp.Offset: lambda self: self._parse_offset(), 462 exp.TableAlias: lambda self: self._parse_table_alias(), 463 exp.Table: lambda self: self._parse_table(), 464 exp.Condition: lambda self: self._parse_conjunction(), 465 exp.Expression: lambda self: self._parse_statement(), 466 exp.Properties: lambda self: self._parse_properties(), 467 exp.Where: lambda self: self._parse_where(), 468 exp.Ordered: lambda self: self._parse_ordered(), 469 exp.Having: lambda self: self._parse_having(), 470 exp.With: lambda self: self._parse_with(), 471 exp.Window: lambda self: self._parse_named_window(), 472 exp.Qualify: lambda self: self._parse_qualify(), 473 exp.Returning: lambda self: self._parse_returning(), 474 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 475 } 476 477 STATEMENT_PARSERS = { 478 TokenType.ALTER: lambda self: self._parse_alter(), 479 TokenType.BEGIN: lambda self: self._parse_transaction(), 480 TokenType.CACHE: lambda self: self._parse_cache(), 481 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 482 TokenType.COMMENT: lambda self: self._parse_comment(), 483 TokenType.CREATE: lambda self: self._parse_create(), 484 TokenType.DELETE: lambda self: self._parse_delete(), 485 TokenType.DESC: lambda self: self._parse_describe(), 486 TokenType.DESCRIBE: lambda self: self._parse_describe(), 487 TokenType.DROP: lambda self: self._parse_drop(), 488 TokenType.END: lambda self: self._parse_commit_or_rollback(), 489 TokenType.INSERT: lambda self: self._parse_insert(), 490 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 491 TokenType.MERGE: lambda self: self._parse_merge(), 492 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 493 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 494 TokenType.SET: lambda self: self._parse_set(), 495 TokenType.UNCACHE: lambda self: self._parse_uncache(), 496 TokenType.UPDATE: lambda self: self._parse_update(), 497 TokenType.USE: lambda self: self.expression( 498 exp.Use, 499 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 500 and exp.Var(this=self._prev.text), 501 this=self._parse_table(schema=False), 502 ), 503 } 504 505 UNARY_PARSERS = { 506 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 507 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 508 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 509 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 510 } 511 512 PRIMARY_PARSERS = { 513 TokenType.STRING: lambda self, token: self.expression( 514 exp.Literal, this=token.text, is_string=True 515 ), 516 TokenType.NUMBER: lambda self, token: self.expression( 517 exp.Literal, this=token.text, is_string=False 518 ), 519 TokenType.STAR: lambda self, _: self.expression( 520 exp.Star, 521 **{"except": self._parse_except(), "replace": self._parse_replace()}, 522 ), 523 TokenType.NULL: lambda self, _: self.expression(exp.Null), 524 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 525 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 526 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 527 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 528 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 529 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 530 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 531 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 532 } 533 534 PLACEHOLDER_PARSERS = { 535 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 536 TokenType.PARAMETER: lambda self: self._parse_parameter(), 537 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 538 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 539 else None, 540 } 541 542 RANGE_PARSERS = { 543 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 544 TokenType.GLOB: binary_range_parser(exp.Glob), 545 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 546 TokenType.IN: lambda self, this: self._parse_in(this), 547 TokenType.IS: lambda self, this: self._parse_is(this), 548 TokenType.LIKE: binary_range_parser(exp.Like), 549 TokenType.ILIKE: binary_range_parser(exp.ILike), 550 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 551 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 552 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 553 } 554 555 PROPERTY_PARSERS = { 556 "AFTER": lambda self: self._parse_afterjournal( 557 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 558 ), 559 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 560 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 561 "BEFORE": lambda self: self._parse_journal( 562 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 563 ), 564 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 565 "CHARACTER SET": lambda self: self._parse_character_set(), 566 "CHECKSUM": lambda self: self._parse_checksum(), 567 "CLUSTER BY": lambda self: self.expression( 568 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 569 ), 570 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 571 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 572 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 573 default=self._prev.text.upper() == "DEFAULT" 574 ), 575 "DEFINER": lambda self: self._parse_definer(), 576 "DETERMINISTIC": lambda self: self.expression( 577 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 578 ), 579 "DISTKEY": lambda self: self._parse_distkey(), 580 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 581 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 582 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 583 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 584 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 585 "FREESPACE": lambda self: self._parse_freespace(), 586 "GLOBAL": lambda self: self._parse_temporary(global_=True), 587 "IMMUTABLE": lambda self: self.expression( 588 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 589 ), 590 "JOURNAL": lambda self: self._parse_journal( 591 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 592 ), 593 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 594 "LIKE": lambda self: self._parse_create_like(), 595 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 596 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 597 "LOCK": lambda self: self._parse_locking(), 598 "LOCKING": lambda self: self._parse_locking(), 599 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 600 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 601 "MAX": lambda self: self._parse_datablocksize(), 602 "MAXIMUM": lambda self: self._parse_datablocksize(), 603 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 604 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 605 ), 606 "MIN": lambda self: self._parse_datablocksize(), 607 "MINIMUM": lambda self: self._parse_datablocksize(), 608 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 609 "NO": lambda self: self._parse_noprimaryindex(), 610 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 611 "ON": lambda self: self._parse_oncommit(), 612 "PARTITION BY": lambda self: self._parse_partitioned_by(), 613 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 614 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 615 "RETURNS": lambda self: self._parse_returns(), 616 "ROW": lambda self: self._parse_row(), 617 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 618 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 619 "SORTKEY": lambda self: self._parse_sortkey(), 620 "STABLE": lambda self: self.expression( 621 exp.StabilityProperty, this=exp.Literal.string("STABLE") 622 ), 623 "STORED": lambda self: self._parse_stored(), 624 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 625 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 626 "TEMP": lambda self: self._parse_temporary(global_=False), 627 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 628 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 629 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 630 "VOLATILE": lambda self: self._parse_volatile_property(), 631 "WITH": lambda self: self._parse_with_property(), 632 } 633 634 CONSTRAINT_PARSERS = { 635 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 636 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 637 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 638 "CHARACTER SET": lambda self: self.expression( 639 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 640 ), 641 "CHECK": lambda self: self.expression( 642 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 643 ), 644 "COLLATE": lambda self: self.expression( 645 exp.CollateColumnConstraint, this=self._parse_var() 646 ), 647 "COMMENT": lambda self: self.expression( 648 exp.CommentColumnConstraint, this=self._parse_string() 649 ), 650 "COMPRESS": lambda self: self._parse_compress(), 651 "DEFAULT": lambda self: self.expression( 652 exp.DefaultColumnConstraint, this=self._parse_bitwise() 653 ), 654 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 655 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 656 "FORMAT": lambda self: self.expression( 657 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "GENERATED": lambda self: self._parse_generated_as_identity(), 660 "IDENTITY": lambda self: self._parse_auto_increment(), 661 "INLINE": lambda self: self._parse_inline(), 662 "LIKE": lambda self: self._parse_create_like(), 663 "NOT": lambda self: self._parse_not_constraint(), 664 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 665 "ON": lambda self: self._match(TokenType.UPDATE) 666 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 667 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 668 "PRIMARY KEY": lambda self: self._parse_primary_key(), 669 "REFERENCES": lambda self: self._parse_references(match=False), 670 "TITLE": lambda self: self.expression( 671 exp.TitleColumnConstraint, this=self._parse_var_or_string() 672 ), 673 "UNIQUE": lambda self: self._parse_unique(), 674 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 675 } 676 677 ALTER_PARSERS = { 678 "ADD": lambda self: self._parse_alter_table_add(), 679 "ALTER": lambda self: self._parse_alter_table_alter(), 680 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 681 "DROP": lambda self: self._parse_alter_table_drop(), 682 "RENAME": lambda self: self._parse_alter_table_rename(), 683 } 684 685 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 686 687 NO_PAREN_FUNCTION_PARSERS = { 688 TokenType.CASE: lambda self: self._parse_case(), 689 TokenType.IF: lambda self: self._parse_if(), 690 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 691 } 692 693 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 694 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 695 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 696 "DECODE": lambda self: self._parse_decode(), 697 "EXTRACT": lambda self: self._parse_extract(), 698 "JSON_OBJECT": lambda self: self._parse_json_object(), 699 "LOG": lambda self: self._parse_logarithm(), 700 "MATCH": lambda self: self._parse_match_against(), 701 "POSITION": lambda self: self._parse_position(), 702 "STRING_AGG": lambda self: self._parse_string_agg(), 703 "SUBSTRING": lambda self: self._parse_substring(), 704 "TRIM": lambda self: self._parse_trim(), 705 "TRY_CAST": lambda self: self._parse_cast(False), 706 "TRY_CONVERT": lambda self: self._parse_convert(False), 707 } 708 709 QUERY_MODIFIER_PARSERS = { 710 "match": lambda self: self._parse_match_recognize(), 711 "where": lambda self: self._parse_where(), 712 "group": lambda self: self._parse_group(), 713 "having": lambda self: self._parse_having(), 714 "qualify": lambda self: self._parse_qualify(), 715 "windows": lambda self: self._parse_window_clause(), 716 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 717 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 718 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 719 "order": lambda self: self._parse_order(), 720 "limit": lambda self: self._parse_limit(), 721 "offset": lambda self: self._parse_offset(), 722 "lock": lambda self: self._parse_lock(), 723 "sample": lambda self: self._parse_table_sample(as_modifier=True), 724 } 725 726 SET_PARSERS = { 727 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 728 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 729 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 730 "TRANSACTION": lambda self: self._parse_set_transaction(), 731 } 732 733 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 734 735 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 736 737 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 738 739 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 740 741 TRANSACTION_CHARACTERISTICS = { 742 "ISOLATION LEVEL REPEATABLE READ", 743 "ISOLATION LEVEL READ COMMITTED", 744 "ISOLATION LEVEL READ UNCOMMITTED", 745 "ISOLATION LEVEL SERIALIZABLE", 746 "READ WRITE", 747 "READ ONLY", 748 } 749 750 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 751 752 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 753 754 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 755 756 STRICT_CAST = True 757 758 CONVERT_TYPE_FIRST = False 759 760 QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None 761 PREFIXED_PIVOT_COLUMNS = False 762 763 LOG_BASE_FIRST = True 764 LOG_DEFAULTS_TO_LN = False 765 766 __slots__ = ( 767 "error_level", 768 "error_message_context", 769 "sql", 770 "errors", 771 "index_offset", 772 "unnest_column_only", 773 "alias_post_tablesample", 774 "max_errors", 775 "null_ordering", 776 "_tokens", 777 "_index", 778 "_curr", 779 "_next", 780 "_prev", 781 "_prev_comments", 782 "_show_trie", 783 "_set_trie", 784 ) 785 786 def __init__( 787 self, 788 error_level: t.Optional[ErrorLevel] = None, 789 error_message_context: int = 100, 790 index_offset: int = 0, 791 unnest_column_only: bool = False, 792 alias_post_tablesample: bool = False, 793 max_errors: int = 3, 794 null_ordering: t.Optional[str] = None, 795 ): 796 self.error_level = error_level or ErrorLevel.IMMEDIATE 797 self.error_message_context = error_message_context 798 self.index_offset = index_offset 799 self.unnest_column_only = unnest_column_only 800 self.alias_post_tablesample = alias_post_tablesample 801 self.max_errors = max_errors 802 self.null_ordering = null_ordering 803 self.reset() 804 805 def reset(self): 806 self.sql = "" 807 self.errors = [] 808 self._tokens = [] 809 self._index = 0 810 self._curr = None 811 self._next = None 812 self._prev = None 813 self._prev_comments = None 814 815 def parse( 816 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 817 ) -> t.List[t.Optional[exp.Expression]]: 818 """ 819 Parses a list of tokens and returns a list of syntax trees, one tree 820 per parsed SQL statement. 821 822 Args: 823 raw_tokens: the list of tokens. 824 sql: the original SQL string, used to produce helpful debug messages. 825 826 Returns: 827 The list of syntax trees. 828 """ 829 return self._parse( 830 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 831 ) 832 833 def parse_into( 834 self, 835 expression_types: exp.IntoType, 836 raw_tokens: t.List[Token], 837 sql: t.Optional[str] = None, 838 ) -> t.List[t.Optional[exp.Expression]]: 839 """ 840 Parses a list of tokens into a given Expression type. If a collection of Expression 841 types is given instead, this method will try to parse the token list into each one 842 of them, stopping at the first for which the parsing succeeds. 843 844 Args: 845 expression_types: the expression type(s) to try and parse the token list into. 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The target Expression. 851 """ 852 errors = [] 853 for expression_type in ensure_collection(expression_types): 854 parser = self.EXPRESSION_PARSERS.get(expression_type) 855 if not parser: 856 raise TypeError(f"No parser registered for {expression_type}") 857 try: 858 return self._parse(parser, raw_tokens, sql) 859 except ParseError as e: 860 e.errors[0]["into_expression"] = expression_type 861 errors.append(e) 862 raise ParseError( 863 f"Failed to parse into {expression_types}", 864 errors=merge_errors(errors), 865 ) from errors[-1] 866 867 def _parse( 868 self, 869 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 870 raw_tokens: t.List[Token], 871 sql: t.Optional[str] = None, 872 ) -> t.List[t.Optional[exp.Expression]]: 873 self.reset() 874 self.sql = sql or "" 875 total = len(raw_tokens) 876 chunks: t.List[t.List[Token]] = [[]] 877 878 for i, token in enumerate(raw_tokens): 879 if token.token_type == TokenType.SEMICOLON: 880 if i < total - 1: 881 chunks.append([]) 882 else: 883 chunks[-1].append(token) 884 885 expressions = [] 886 887 for tokens in chunks: 888 self._index = -1 889 self._tokens = tokens 890 self._advance() 891 892 expressions.append(parse_method(self)) 893 894 if self._index < len(self._tokens): 895 self.raise_error("Invalid expression / Unexpected token") 896 897 self.check_errors() 898 899 return expressions 900 901 def check_errors(self) -> None: 902 """ 903 Logs or raises any found errors, depending on the chosen error level setting. 904 """ 905 if self.error_level == ErrorLevel.WARN: 906 for error in self.errors: 907 logger.error(str(error)) 908 elif self.error_level == ErrorLevel.RAISE and self.errors: 909 raise ParseError( 910 concat_messages(self.errors, self.max_errors), 911 errors=merge_errors(self.errors), 912 ) 913 914 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 915 """ 916 Appends an error in the list of recorded errors or raises it, depending on the chosen 917 error level setting. 918 """ 919 token = token or self._curr or self._prev or Token.string("") 920 start = token.start 921 end = token.end 922 start_context = self.sql[max(start - self.error_message_context, 0) : start] 923 highlight = self.sql[start:end] 924 end_context = self.sql[end : end + self.error_message_context] 925 926 error = ParseError.new( 927 f"{message}. Line {token.line}, Col: {token.col}.\n" 928 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 929 description=message, 930 line=token.line, 931 col=token.col, 932 start_context=start_context, 933 highlight=highlight, 934 end_context=end_context, 935 ) 936 937 if self.error_level == ErrorLevel.IMMEDIATE: 938 raise error 939 940 self.errors.append(error) 941 942 def expression( 943 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 944 ) -> E: 945 """ 946 Creates a new, validated Expression. 947 948 Args: 949 exp_class: the expression class to instantiate. 950 comments: an optional list of comments to attach to the expression. 951 kwargs: the arguments to set for the expression along with their respective values. 952 953 Returns: 954 The target expression. 955 """ 956 instance = exp_class(**kwargs) 957 if self._prev_comments: 958 instance.comments = self._prev_comments 959 self._prev_comments = None 960 if comments: 961 instance.comments = comments 962 self.validate_expression(instance) 963 return instance 964 965 def validate_expression( 966 self, expression: exp.Expression, args: t.Optional[t.List] = None 967 ) -> None: 968 """ 969 Validates an already instantiated expression, making sure that all its mandatory arguments 970 are set. 971 972 Args: 973 expression: the expression to validate. 974 args: an optional list of items that was used to instantiate the expression, if it's a Func. 975 """ 976 if self.error_level == ErrorLevel.IGNORE: 977 return 978 979 for error_message in expression.error_messages(args): 980 self.raise_error(error_message) 981 982 def _find_sql(self, start: Token, end: Token) -> str: 983 return self.sql[start.start : end.end] 984 985 def _advance(self, times: int = 1) -> None: 986 self._index += times 987 self._curr = seq_get(self._tokens, self._index) 988 self._next = seq_get(self._tokens, self._index + 1) 989 if self._index > 0: 990 self._prev = self._tokens[self._index - 1] 991 self._prev_comments = self._prev.comments 992 else: 993 self._prev = None 994 self._prev_comments = None 995 996 def _retreat(self, index: int) -> None: 997 if index != self._index: 998 self._advance(index - self._index) 999 1000 def _parse_command(self) -> exp.Command: 1001 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1002 1003 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1004 start = self._prev 1005 exists = self._parse_exists() if allow_exists else None 1006 1007 self._match(TokenType.ON) 1008 1009 kind = self._match_set(self.CREATABLES) and self._prev 1010 1011 if not kind: 1012 return self._parse_as_command(start) 1013 1014 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1015 this = self._parse_user_defined_function(kind=kind.token_type) 1016 elif kind.token_type == TokenType.TABLE: 1017 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1018 elif kind.token_type == TokenType.COLUMN: 1019 this = self._parse_column() 1020 else: 1021 this = self._parse_id_var() 1022 1023 self._match(TokenType.IS) 1024 1025 return self.expression( 1026 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1027 ) 1028 1029 def _parse_statement(self) -> t.Optional[exp.Expression]: 1030 if self._curr is None: 1031 return None 1032 1033 if self._match_set(self.STATEMENT_PARSERS): 1034 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1035 1036 if self._match_set(Tokenizer.COMMANDS): 1037 return self._parse_command() 1038 1039 expression = self._parse_expression() 1040 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1041 1042 self._parse_query_modifiers(expression) 1043 return expression 1044 1045 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1046 start = self._prev 1047 temporary = self._match(TokenType.TEMPORARY) 1048 materialized = self._match(TokenType.MATERIALIZED) 1049 kind = self._match_set(self.CREATABLES) and self._prev.text 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 return self.expression( 1054 exp.Drop, 1055 exists=self._parse_exists(), 1056 this=self._parse_table(schema=True), 1057 kind=kind, 1058 temporary=temporary, 1059 materialized=materialized, 1060 cascade=self._match(TokenType.CASCADE), 1061 constraints=self._match_text_seq("CONSTRAINTS"), 1062 purge=self._match_text_seq("PURGE"), 1063 ) 1064 1065 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1066 return ( 1067 self._match(TokenType.IF) 1068 and (not not_ or self._match(TokenType.NOT)) 1069 and self._match(TokenType.EXISTS) 1070 ) 1071 1072 def _parse_create(self) -> t.Optional[exp.Expression]: 1073 start = self._prev 1074 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1075 TokenType.OR, TokenType.REPLACE 1076 ) 1077 unique = self._match(TokenType.UNIQUE) 1078 1079 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1080 self._match(TokenType.TABLE) 1081 1082 properties = None 1083 create_token = self._match_set(self.CREATABLES) and self._prev 1084 1085 if not create_token: 1086 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1087 create_token = self._match_set(self.CREATABLES) and self._prev 1088 1089 if not properties or not create_token: 1090 return self._parse_as_command(start) 1091 1092 exists = self._parse_exists(not_=True) 1093 this = None 1094 expression = None 1095 indexes = None 1096 no_schema_binding = None 1097 begin = None 1098 1099 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1100 this = self._parse_user_defined_function(kind=create_token.token_type) 1101 temp_properties = self._parse_properties() 1102 if properties and temp_properties: 1103 properties.expressions.extend(temp_properties.expressions) 1104 elif temp_properties: 1105 properties = temp_properties 1106 1107 self._match(TokenType.ALIAS) 1108 begin = self._match(TokenType.BEGIN) 1109 return_ = self._match_text_seq("RETURN") 1110 expression = self._parse_statement() 1111 1112 if return_: 1113 expression = self.expression(exp.Return, this=expression) 1114 elif create_token.token_type == TokenType.INDEX: 1115 this = self._parse_index() 1116 elif create_token.token_type in self.DB_CREATABLES: 1117 table_parts = self._parse_table_parts(schema=True) 1118 1119 # exp.Properties.Location.POST_NAME 1120 if self._match(TokenType.COMMA): 1121 temp_properties = self._parse_properties(before=True) 1122 if properties and temp_properties: 1123 properties.expressions.extend(temp_properties.expressions) 1124 elif temp_properties: 1125 properties = temp_properties 1126 1127 this = self._parse_schema(this=table_parts) 1128 1129 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1130 temp_properties = self._parse_properties() 1131 if properties and temp_properties: 1132 properties.expressions.extend(temp_properties.expressions) 1133 elif temp_properties: 1134 properties = temp_properties 1135 1136 self._match(TokenType.ALIAS) 1137 1138 # exp.Properties.Location.POST_ALIAS 1139 if not ( 1140 self._match(TokenType.SELECT, advance=False) 1141 or self._match(TokenType.WITH, advance=False) 1142 or self._match(TokenType.L_PAREN, advance=False) 1143 ): 1144 temp_properties = self._parse_properties() 1145 if properties and temp_properties: 1146 properties.expressions.extend(temp_properties.expressions) 1147 elif temp_properties: 1148 properties = temp_properties 1149 1150 expression = self._parse_ddl_select() 1151 1152 if create_token.token_type == TokenType.TABLE: 1153 # exp.Properties.Location.POST_EXPRESSION 1154 temp_properties = self._parse_properties() 1155 if properties and temp_properties: 1156 properties.expressions.extend(temp_properties.expressions) 1157 elif temp_properties: 1158 properties = temp_properties 1159 1160 indexes = [] 1161 while True: 1162 index = self._parse_create_table_index() 1163 1164 # exp.Properties.Location.POST_INDEX 1165 if self._match(TokenType.PARTITION_BY, advance=False): 1166 temp_properties = self._parse_properties() 1167 if properties and temp_properties: 1168 properties.expressions.extend(temp_properties.expressions) 1169 elif temp_properties: 1170 properties = temp_properties 1171 1172 if not index: 1173 break 1174 else: 1175 indexes.append(index) 1176 elif create_token.token_type == TokenType.VIEW: 1177 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1178 no_schema_binding = True 1179 1180 return self.expression( 1181 exp.Create, 1182 this=this, 1183 kind=create_token.text, 1184 replace=replace, 1185 unique=unique, 1186 expression=expression, 1187 exists=exists, 1188 properties=properties, 1189 indexes=indexes, 1190 no_schema_binding=no_schema_binding, 1191 begin=begin, 1192 ) 1193 1194 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1195 self._match(TokenType.COMMA) 1196 1197 # parsers look to _prev for no/dual/default, so need to consume first 1198 self._match_text_seq("NO") 1199 self._match_text_seq("DUAL") 1200 self._match_text_seq("DEFAULT") 1201 1202 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1203 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1204 1205 return None 1206 1207 def _parse_property(self) -> t.Optional[exp.Expression]: 1208 if self._match_texts(self.PROPERTY_PARSERS): 1209 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1210 1211 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1212 return self._parse_character_set(default=True) 1213 1214 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1215 return self._parse_sortkey(compound=True) 1216 1217 if self._match_text_seq("SQL", "SECURITY"): 1218 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1219 1220 assignment = self._match_pair( 1221 TokenType.VAR, TokenType.EQ, advance=False 1222 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1223 1224 if assignment: 1225 key = self._parse_var_or_string() 1226 self._match(TokenType.EQ) 1227 return self.expression(exp.Property, this=key, value=self._parse_column()) 1228 1229 return None 1230 1231 def _parse_stored(self) -> exp.Expression: 1232 self._match(TokenType.ALIAS) 1233 1234 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1235 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1236 1237 return self.expression( 1238 exp.FileFormatProperty, 1239 this=self.expression( 1240 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1241 ) 1242 if input_format or output_format 1243 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1244 ) 1245 1246 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1247 self._match(TokenType.EQ) 1248 self._match(TokenType.ALIAS) 1249 return self.expression( 1250 exp_class, 1251 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1252 ) 1253 1254 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1255 properties = [] 1256 1257 while True: 1258 if before: 1259 identified_property = self._parse_property_before() 1260 else: 1261 identified_property = self._parse_property() 1262 1263 if not identified_property: 1264 break 1265 for p in ensure_list(identified_property): 1266 properties.append(p) 1267 1268 if properties: 1269 return self.expression(exp.Properties, expressions=properties) 1270 1271 return None 1272 1273 def _parse_fallback(self, no=False) -> exp.Expression: 1274 self._match_text_seq("FALLBACK") 1275 return self.expression( 1276 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1277 ) 1278 1279 def _parse_volatile_property(self) -> exp.Expression: 1280 if self._index >= 2: 1281 pre_volatile_token = self._tokens[self._index - 2] 1282 else: 1283 pre_volatile_token = None 1284 1285 if pre_volatile_token and pre_volatile_token.token_type in ( 1286 TokenType.CREATE, 1287 TokenType.REPLACE, 1288 TokenType.UNIQUE, 1289 ): 1290 return exp.VolatileProperty() 1291 1292 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1293 1294 def _parse_with_property( 1295 self, 1296 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1297 self._match(TokenType.WITH) 1298 if self._match(TokenType.L_PAREN, advance=False): 1299 return self._parse_wrapped_csv(self._parse_property) 1300 1301 if self._match_text_seq("JOURNAL"): 1302 return self._parse_withjournaltable() 1303 1304 if self._match_text_seq("DATA"): 1305 return self._parse_withdata(no=False) 1306 elif self._match_text_seq("NO", "DATA"): 1307 return self._parse_withdata(no=True) 1308 1309 if not self._next: 1310 return None 1311 1312 return self._parse_withisolatedloading() 1313 1314 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1315 def _parse_definer(self) -> t.Optional[exp.Expression]: 1316 self._match(TokenType.EQ) 1317 1318 user = self._parse_id_var() 1319 self._match(TokenType.PARAMETER) 1320 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1321 1322 if not user or not host: 1323 return None 1324 1325 return exp.DefinerProperty(this=f"{user}@{host}") 1326 1327 def _parse_withjournaltable(self) -> exp.Expression: 1328 self._match(TokenType.TABLE) 1329 self._match(TokenType.EQ) 1330 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1331 1332 def _parse_log(self, no=False) -> exp.Expression: 1333 self._match_text_seq("LOG") 1334 return self.expression(exp.LogProperty, no=no) 1335 1336 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1337 before = self._match_text_seq("BEFORE") 1338 self._match_text_seq("JOURNAL") 1339 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1340 1341 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1342 self._match_text_seq("NOT") 1343 self._match_text_seq("LOCAL") 1344 self._match_text_seq("AFTER", "JOURNAL") 1345 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1346 1347 def _parse_checksum(self) -> exp.Expression: 1348 self._match_text_seq("CHECKSUM") 1349 self._match(TokenType.EQ) 1350 1351 on = None 1352 if self._match(TokenType.ON): 1353 on = True 1354 elif self._match_text_seq("OFF"): 1355 on = False 1356 default = self._match(TokenType.DEFAULT) 1357 1358 return self.expression( 1359 exp.ChecksumProperty, 1360 on=on, 1361 default=default, 1362 ) 1363 1364 def _parse_freespace(self) -> exp.Expression: 1365 self._match_text_seq("FREESPACE") 1366 self._match(TokenType.EQ) 1367 return self.expression( 1368 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1369 ) 1370 1371 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1372 self._match_text_seq("MERGEBLOCKRATIO") 1373 if self._match(TokenType.EQ): 1374 return self.expression( 1375 exp.MergeBlockRatioProperty, 1376 this=self._parse_number(), 1377 percent=self._match(TokenType.PERCENT), 1378 ) 1379 else: 1380 return self.expression( 1381 exp.MergeBlockRatioProperty, 1382 no=no, 1383 default=default, 1384 ) 1385 1386 def _parse_datablocksize(self, default=None) -> exp.Expression: 1387 if default: 1388 self._match_text_seq("DATABLOCKSIZE") 1389 return self.expression(exp.DataBlocksizeProperty, default=True) 1390 elif self._match_texts(("MIN", "MINIMUM")): 1391 self._match_text_seq("DATABLOCKSIZE") 1392 return self.expression(exp.DataBlocksizeProperty, min=True) 1393 elif self._match_texts(("MAX", "MAXIMUM")): 1394 self._match_text_seq("DATABLOCKSIZE") 1395 return self.expression(exp.DataBlocksizeProperty, min=False) 1396 1397 self._match_text_seq("DATABLOCKSIZE") 1398 self._match(TokenType.EQ) 1399 size = self._parse_number() 1400 units = None 1401 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1402 units = self._prev.text 1403 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1404 1405 def _parse_blockcompression(self) -> exp.Expression: 1406 self._match_text_seq("BLOCKCOMPRESSION") 1407 self._match(TokenType.EQ) 1408 always = self._match_text_seq("ALWAYS") 1409 manual = self._match_text_seq("MANUAL") 1410 never = self._match_text_seq("NEVER") 1411 default = self._match_text_seq("DEFAULT") 1412 autotemp = None 1413 if self._match_text_seq("AUTOTEMP"): 1414 autotemp = self._parse_schema() 1415 1416 return self.expression( 1417 exp.BlockCompressionProperty, 1418 always=always, 1419 manual=manual, 1420 never=never, 1421 default=default, 1422 autotemp=autotemp, 1423 ) 1424 1425 def _parse_withisolatedloading(self) -> exp.Expression: 1426 no = self._match_text_seq("NO") 1427 concurrent = self._match_text_seq("CONCURRENT") 1428 self._match_text_seq("ISOLATED", "LOADING") 1429 for_all = self._match_text_seq("FOR", "ALL") 1430 for_insert = self._match_text_seq("FOR", "INSERT") 1431 for_none = self._match_text_seq("FOR", "NONE") 1432 return self.expression( 1433 exp.IsolatedLoadingProperty, 1434 no=no, 1435 concurrent=concurrent, 1436 for_all=for_all, 1437 for_insert=for_insert, 1438 for_none=for_none, 1439 ) 1440 1441 def _parse_locking(self) -> exp.Expression: 1442 if self._match(TokenType.TABLE): 1443 kind = "TABLE" 1444 elif self._match(TokenType.VIEW): 1445 kind = "VIEW" 1446 elif self._match(TokenType.ROW): 1447 kind = "ROW" 1448 elif self._match_text_seq("DATABASE"): 1449 kind = "DATABASE" 1450 else: 1451 kind = None 1452 1453 if kind in ("DATABASE", "TABLE", "VIEW"): 1454 this = self._parse_table_parts() 1455 else: 1456 this = None 1457 1458 if self._match(TokenType.FOR): 1459 for_or_in = "FOR" 1460 elif self._match(TokenType.IN): 1461 for_or_in = "IN" 1462 else: 1463 for_or_in = None 1464 1465 if self._match_text_seq("ACCESS"): 1466 lock_type = "ACCESS" 1467 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1468 lock_type = "EXCLUSIVE" 1469 elif self._match_text_seq("SHARE"): 1470 lock_type = "SHARE" 1471 elif self._match_text_seq("READ"): 1472 lock_type = "READ" 1473 elif self._match_text_seq("WRITE"): 1474 lock_type = "WRITE" 1475 elif self._match_text_seq("CHECKSUM"): 1476 lock_type = "CHECKSUM" 1477 else: 1478 lock_type = None 1479 1480 override = self._match_text_seq("OVERRIDE") 1481 1482 return self.expression( 1483 exp.LockingProperty, 1484 this=this, 1485 kind=kind, 1486 for_or_in=for_or_in, 1487 lock_type=lock_type, 1488 override=override, 1489 ) 1490 1491 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1492 if self._match(TokenType.PARTITION_BY): 1493 return self._parse_csv(self._parse_conjunction) 1494 return [] 1495 1496 def _parse_partitioned_by(self) -> exp.Expression: 1497 self._match(TokenType.EQ) 1498 return self.expression( 1499 exp.PartitionedByProperty, 1500 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1501 ) 1502 1503 def _parse_withdata(self, no=False) -> exp.Expression: 1504 if self._match_text_seq("AND", "STATISTICS"): 1505 statistics = True 1506 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1507 statistics = False 1508 else: 1509 statistics = None 1510 1511 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1512 1513 def _parse_noprimaryindex(self) -> exp.Expression: 1514 self._match_text_seq("PRIMARY", "INDEX") 1515 return exp.NoPrimaryIndexProperty() 1516 1517 def _parse_oncommit(self) -> exp.Expression: 1518 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1519 return exp.OnCommitProperty() 1520 1521 def _parse_distkey(self) -> exp.Expression: 1522 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1523 1524 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1525 table = self._parse_table(schema=True) 1526 options = [] 1527 while self._match_texts(("INCLUDING", "EXCLUDING")): 1528 this = self._prev.text.upper() 1529 id_var = self._parse_id_var() 1530 1531 if not id_var: 1532 return None 1533 1534 options.append( 1535 self.expression( 1536 exp.Property, 1537 this=this, 1538 value=exp.Var(this=id_var.this.upper()), 1539 ) 1540 ) 1541 return self.expression(exp.LikeProperty, this=table, expressions=options) 1542 1543 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1544 return self.expression( 1545 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1546 ) 1547 1548 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1549 self._match(TokenType.EQ) 1550 return self.expression( 1551 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1552 ) 1553 1554 def _parse_returns(self) -> exp.Expression: 1555 value: t.Optional[exp.Expression] 1556 is_table = self._match(TokenType.TABLE) 1557 1558 if is_table: 1559 if self._match(TokenType.LT): 1560 value = self.expression( 1561 exp.Schema, 1562 this="TABLE", 1563 expressions=self._parse_csv(self._parse_struct_kwargs), 1564 ) 1565 if not self._match(TokenType.GT): 1566 self.raise_error("Expecting >") 1567 else: 1568 value = self._parse_schema(exp.Var(this="TABLE")) 1569 else: 1570 value = self._parse_types() 1571 1572 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1573 1574 def _parse_temporary(self, global_=False) -> exp.Expression: 1575 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1576 return self.expression(exp.TemporaryProperty, global_=global_) 1577 1578 def _parse_describe(self) -> exp.Expression: 1579 kind = self._match_set(self.CREATABLES) and self._prev.text 1580 this = self._parse_table() 1581 1582 return self.expression(exp.Describe, this=this, kind=kind) 1583 1584 def _parse_insert(self) -> exp.Expression: 1585 overwrite = self._match(TokenType.OVERWRITE) 1586 local = self._match(TokenType.LOCAL) 1587 alternative = None 1588 1589 if self._match_text_seq("DIRECTORY"): 1590 this: t.Optional[exp.Expression] = self.expression( 1591 exp.Directory, 1592 this=self._parse_var_or_string(), 1593 local=local, 1594 row_format=self._parse_row_format(match_row=True), 1595 ) 1596 else: 1597 if self._match(TokenType.OR): 1598 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1599 1600 self._match(TokenType.INTO) 1601 self._match(TokenType.TABLE) 1602 this = self._parse_table(schema=True) 1603 1604 return self.expression( 1605 exp.Insert, 1606 this=this, 1607 exists=self._parse_exists(), 1608 partition=self._parse_partition(), 1609 expression=self._parse_ddl_select(), 1610 conflict=self._parse_on_conflict(), 1611 returning=self._parse_returning(), 1612 overwrite=overwrite, 1613 alternative=alternative, 1614 ) 1615 1616 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1617 conflict = self._match_text_seq("ON", "CONFLICT") 1618 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1619 1620 if not (conflict or duplicate): 1621 return None 1622 1623 nothing = None 1624 expressions = None 1625 key = None 1626 constraint = None 1627 1628 if conflict: 1629 if self._match_text_seq("ON", "CONSTRAINT"): 1630 constraint = self._parse_id_var() 1631 else: 1632 key = self._parse_csv(self._parse_value) 1633 1634 self._match_text_seq("DO") 1635 if self._match_text_seq("NOTHING"): 1636 nothing = True 1637 else: 1638 self._match(TokenType.UPDATE) 1639 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1640 1641 return self.expression( 1642 exp.OnConflict, 1643 duplicate=duplicate, 1644 expressions=expressions, 1645 nothing=nothing, 1646 key=key, 1647 constraint=constraint, 1648 ) 1649 1650 def _parse_returning(self) -> t.Optional[exp.Expression]: 1651 if not self._match(TokenType.RETURNING): 1652 return None 1653 1654 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1655 1656 def _parse_row(self) -> t.Optional[exp.Expression]: 1657 if not self._match(TokenType.FORMAT): 1658 return None 1659 return self._parse_row_format() 1660 1661 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1662 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1663 return None 1664 1665 if self._match_text_seq("SERDE"): 1666 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1667 1668 self._match_text_seq("DELIMITED") 1669 1670 kwargs = {} 1671 1672 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1673 kwargs["fields"] = self._parse_string() 1674 if self._match_text_seq("ESCAPED", "BY"): 1675 kwargs["escaped"] = self._parse_string() 1676 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1677 kwargs["collection_items"] = self._parse_string() 1678 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1679 kwargs["map_keys"] = self._parse_string() 1680 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1681 kwargs["lines"] = self._parse_string() 1682 if self._match_text_seq("NULL", "DEFINED", "AS"): 1683 kwargs["null"] = self._parse_string() 1684 1685 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1686 1687 def _parse_load_data(self) -> exp.Expression: 1688 local = self._match(TokenType.LOCAL) 1689 self._match_text_seq("INPATH") 1690 inpath = self._parse_string() 1691 overwrite = self._match(TokenType.OVERWRITE) 1692 self._match_pair(TokenType.INTO, TokenType.TABLE) 1693 1694 return self.expression( 1695 exp.LoadData, 1696 this=self._parse_table(schema=True), 1697 local=local, 1698 overwrite=overwrite, 1699 inpath=inpath, 1700 partition=self._parse_partition(), 1701 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1702 serde=self._match_text_seq("SERDE") and self._parse_string(), 1703 ) 1704 1705 def _parse_delete(self) -> exp.Expression: 1706 self._match(TokenType.FROM) 1707 1708 return self.expression( 1709 exp.Delete, 1710 this=self._parse_table(), 1711 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1712 where=self._parse_where(), 1713 returning=self._parse_returning(), 1714 ) 1715 1716 def _parse_update(self) -> exp.Expression: 1717 return self.expression( 1718 exp.Update, 1719 **{ # type: ignore 1720 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1721 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1722 "from": self._parse_from(), 1723 "where": self._parse_where(), 1724 "returning": self._parse_returning(), 1725 }, 1726 ) 1727 1728 def _parse_uncache(self) -> exp.Expression: 1729 if not self._match(TokenType.TABLE): 1730 self.raise_error("Expecting TABLE after UNCACHE") 1731 1732 return self.expression( 1733 exp.Uncache, 1734 exists=self._parse_exists(), 1735 this=self._parse_table(schema=True), 1736 ) 1737 1738 def _parse_cache(self) -> exp.Expression: 1739 lazy = self._match(TokenType.LAZY) 1740 self._match(TokenType.TABLE) 1741 table = self._parse_table(schema=True) 1742 options = [] 1743 1744 if self._match(TokenType.OPTIONS): 1745 self._match_l_paren() 1746 k = self._parse_string() 1747 self._match(TokenType.EQ) 1748 v = self._parse_string() 1749 options = [k, v] 1750 self._match_r_paren() 1751 1752 self._match(TokenType.ALIAS) 1753 return self.expression( 1754 exp.Cache, 1755 this=table, 1756 lazy=lazy, 1757 options=options, 1758 expression=self._parse_select(nested=True), 1759 ) 1760 1761 def _parse_partition(self) -> t.Optional[exp.Expression]: 1762 if not self._match(TokenType.PARTITION): 1763 return None 1764 1765 return self.expression( 1766 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1767 ) 1768 1769 def _parse_value(self) -> exp.Expression: 1770 if self._match(TokenType.L_PAREN): 1771 expressions = self._parse_csv(self._parse_conjunction) 1772 self._match_r_paren() 1773 return self.expression(exp.Tuple, expressions=expressions) 1774 1775 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1776 # Source: https://prestodb.io/docs/current/sql/values.html 1777 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1778 1779 def _parse_select( 1780 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1781 ) -> t.Optional[exp.Expression]: 1782 cte = self._parse_with() 1783 if cte: 1784 this = self._parse_statement() 1785 1786 if not this: 1787 self.raise_error("Failed to parse any statement following CTE") 1788 return cte 1789 1790 if "with" in this.arg_types: 1791 this.set("with", cte) 1792 else: 1793 self.raise_error(f"{this.key} does not support CTE") 1794 this = cte 1795 elif self._match(TokenType.SELECT): 1796 comments = self._prev_comments 1797 1798 kind = ( 1799 self._match(TokenType.ALIAS) 1800 and self._match_texts(("STRUCT", "VALUE")) 1801 and self._prev.text 1802 ) 1803 hint = self._parse_hint() 1804 all_ = self._match(TokenType.ALL) 1805 distinct = self._match(TokenType.DISTINCT) 1806 1807 if distinct: 1808 distinct = self.expression( 1809 exp.Distinct, 1810 on=self._parse_value() if self._match(TokenType.ON) else None, 1811 ) 1812 1813 if all_ and distinct: 1814 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1815 1816 limit = self._parse_limit(top=True) 1817 expressions = self._parse_csv(self._parse_expression) 1818 1819 this = self.expression( 1820 exp.Select, 1821 kind=kind, 1822 hint=hint, 1823 distinct=distinct, 1824 expressions=expressions, 1825 limit=limit, 1826 ) 1827 this.comments = comments 1828 1829 into = self._parse_into() 1830 if into: 1831 this.set("into", into) 1832 1833 from_ = self._parse_from() 1834 if from_: 1835 this.set("from", from_) 1836 1837 self._parse_query_modifiers(this) 1838 elif (table or nested) and self._match(TokenType.L_PAREN): 1839 this = self._parse_table() if table else self._parse_select(nested=True) 1840 self._parse_query_modifiers(this) 1841 this = self._parse_set_operations(this) 1842 self._match_r_paren() 1843 1844 # early return so that subquery unions aren't parsed again 1845 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1846 # Union ALL should be a property of the top select node, not the subquery 1847 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1848 elif self._match(TokenType.VALUES): 1849 this = self.expression( 1850 exp.Values, 1851 expressions=self._parse_csv(self._parse_value), 1852 alias=self._parse_table_alias(), 1853 ) 1854 else: 1855 this = None 1856 1857 return self._parse_set_operations(this) 1858 1859 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1860 if not skip_with_token and not self._match(TokenType.WITH): 1861 return None 1862 1863 comments = self._prev_comments 1864 recursive = self._match(TokenType.RECURSIVE) 1865 1866 expressions = [] 1867 while True: 1868 expressions.append(self._parse_cte()) 1869 1870 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1871 break 1872 else: 1873 self._match(TokenType.WITH) 1874 1875 return self.expression( 1876 exp.With, comments=comments, expressions=expressions, recursive=recursive 1877 ) 1878 1879 def _parse_cte(self) -> exp.Expression: 1880 alias = self._parse_table_alias() 1881 if not alias or not alias.this: 1882 self.raise_error("Expected CTE to have alias") 1883 1884 self._match(TokenType.ALIAS) 1885 1886 return self.expression( 1887 exp.CTE, 1888 this=self._parse_wrapped(self._parse_statement), 1889 alias=alias, 1890 ) 1891 1892 def _parse_table_alias( 1893 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1894 ) -> t.Optional[exp.Expression]: 1895 any_token = self._match(TokenType.ALIAS) 1896 alias = ( 1897 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1898 or self._parse_string_as_identifier() 1899 ) 1900 1901 index = self._index 1902 if self._match(TokenType.L_PAREN): 1903 columns = self._parse_csv(self._parse_function_parameter) 1904 self._match_r_paren() if columns else self._retreat(index) 1905 else: 1906 columns = None 1907 1908 if not alias and not columns: 1909 return None 1910 1911 return self.expression(exp.TableAlias, this=alias, columns=columns) 1912 1913 def _parse_subquery( 1914 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1915 ) -> exp.Expression: 1916 return self.expression( 1917 exp.Subquery, 1918 this=this, 1919 pivots=self._parse_pivots(), 1920 alias=self._parse_table_alias() if parse_alias else None, 1921 ) 1922 1923 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1924 if not isinstance(this, self.MODIFIABLES): 1925 return 1926 1927 table = isinstance(this, exp.Table) 1928 1929 while True: 1930 join = self._parse_join() 1931 if join: 1932 this.append("joins", join) 1933 1934 lateral = None 1935 if not join: 1936 lateral = self._parse_lateral() 1937 if lateral: 1938 this.append("laterals", lateral) 1939 1940 comma = None if table else self._match(TokenType.COMMA) 1941 if comma: 1942 this.args["from"].append("expressions", self._parse_table()) 1943 1944 if not (lateral or join or comma): 1945 break 1946 1947 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1948 expression = parser(self) 1949 1950 if expression: 1951 this.set(key, expression) 1952 1953 def _parse_hint(self) -> t.Optional[exp.Expression]: 1954 if self._match(TokenType.HINT): 1955 hints = self._parse_csv(self._parse_function) 1956 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1957 self.raise_error("Expected */ after HINT") 1958 return self.expression(exp.Hint, expressions=hints) 1959 1960 return None 1961 1962 def _parse_into(self) -> t.Optional[exp.Expression]: 1963 if not self._match(TokenType.INTO): 1964 return None 1965 1966 temp = self._match(TokenType.TEMPORARY) 1967 unlogged = self._match(TokenType.UNLOGGED) 1968 self._match(TokenType.TABLE) 1969 1970 return self.expression( 1971 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1972 ) 1973 1974 def _parse_from(self) -> t.Optional[exp.Expression]: 1975 if not self._match(TokenType.FROM): 1976 return None 1977 1978 return self.expression( 1979 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1980 ) 1981 1982 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1983 if not self._match(TokenType.MATCH_RECOGNIZE): 1984 return None 1985 1986 self._match_l_paren() 1987 1988 partition = self._parse_partition_by() 1989 order = self._parse_order() 1990 measures = ( 1991 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 1992 ) 1993 1994 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1995 rows = exp.Var(this="ONE ROW PER MATCH") 1996 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1997 text = "ALL ROWS PER MATCH" 1998 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1999 text += f" SHOW EMPTY MATCHES" 2000 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2001 text += f" OMIT EMPTY MATCHES" 2002 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2003 text += f" WITH UNMATCHED ROWS" 2004 rows = exp.Var(this=text) 2005 else: 2006 rows = None 2007 2008 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2009 text = "AFTER MATCH SKIP" 2010 if self._match_text_seq("PAST", "LAST", "ROW"): 2011 text += f" PAST LAST ROW" 2012 elif self._match_text_seq("TO", "NEXT", "ROW"): 2013 text += f" TO NEXT ROW" 2014 elif self._match_text_seq("TO", "FIRST"): 2015 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2016 elif self._match_text_seq("TO", "LAST"): 2017 text += f" TO LAST {self._advance_any().text}" # type: ignore 2018 after = exp.Var(this=text) 2019 else: 2020 after = None 2021 2022 if self._match_text_seq("PATTERN"): 2023 self._match_l_paren() 2024 2025 if not self._curr: 2026 self.raise_error("Expecting )", self._curr) 2027 2028 paren = 1 2029 start = self._curr 2030 2031 while self._curr and paren > 0: 2032 if self._curr.token_type == TokenType.L_PAREN: 2033 paren += 1 2034 if self._curr.token_type == TokenType.R_PAREN: 2035 paren -= 1 2036 end = self._prev 2037 self._advance() 2038 if paren > 0: 2039 self.raise_error("Expecting )", self._curr) 2040 pattern = exp.Var(this=self._find_sql(start, end)) 2041 else: 2042 pattern = None 2043 2044 define = ( 2045 self._parse_csv( 2046 lambda: self.expression( 2047 exp.Alias, 2048 alias=self._parse_id_var(any_token=True), 2049 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2050 ) 2051 ) 2052 if self._match_text_seq("DEFINE") 2053 else None 2054 ) 2055 2056 self._match_r_paren() 2057 2058 return self.expression( 2059 exp.MatchRecognize, 2060 partition_by=partition, 2061 order=order, 2062 measures=measures, 2063 rows=rows, 2064 after=after, 2065 pattern=pattern, 2066 define=define, 2067 alias=self._parse_table_alias(), 2068 ) 2069 2070 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2071 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2072 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2073 2074 if outer_apply or cross_apply: 2075 this = self._parse_select(table=True) 2076 view = None 2077 outer = not cross_apply 2078 elif self._match(TokenType.LATERAL): 2079 this = self._parse_select(table=True) 2080 view = self._match(TokenType.VIEW) 2081 outer = self._match(TokenType.OUTER) 2082 else: 2083 return None 2084 2085 if not this: 2086 this = self._parse_function() or self._parse_id_var(any_token=False) 2087 while self._match(TokenType.DOT): 2088 this = exp.Dot( 2089 this=this, 2090 expression=self._parse_function() or self._parse_id_var(any_token=False), 2091 ) 2092 2093 table_alias: t.Optional[exp.Expression] 2094 2095 if view: 2096 table = self._parse_id_var(any_token=False) 2097 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2098 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2099 else: 2100 table_alias = self._parse_table_alias() 2101 2102 expression = self.expression( 2103 exp.Lateral, 2104 this=this, 2105 view=view, 2106 outer=outer, 2107 alias=table_alias, 2108 ) 2109 2110 return expression 2111 2112 def _parse_join_side_and_kind( 2113 self, 2114 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2115 return ( 2116 self._match(TokenType.NATURAL) and self._prev, 2117 self._match_set(self.JOIN_SIDES) and self._prev, 2118 self._match_set(self.JOIN_KINDS) and self._prev, 2119 ) 2120 2121 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2122 index = self._index 2123 natural, side, kind = self._parse_join_side_and_kind() 2124 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2125 join = self._match(TokenType.JOIN) 2126 2127 if not skip_join_token and not join: 2128 self._retreat(index) 2129 kind = None 2130 natural = None 2131 side = None 2132 2133 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2134 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2135 2136 if not skip_join_token and not join and not outer_apply and not cross_apply: 2137 return None 2138 2139 if outer_apply: 2140 side = Token(TokenType.LEFT, "LEFT") 2141 2142 kwargs: t.Dict[ 2143 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2144 ] = {"this": self._parse_table()} 2145 2146 if natural: 2147 kwargs["natural"] = True 2148 if side: 2149 kwargs["side"] = side.text 2150 if kind: 2151 kwargs["kind"] = kind.text 2152 if hint: 2153 kwargs["hint"] = hint 2154 2155 if self._match(TokenType.ON): 2156 kwargs["on"] = self._parse_conjunction() 2157 elif self._match(TokenType.USING): 2158 kwargs["using"] = self._parse_wrapped_id_vars() 2159 2160 return self.expression(exp.Join, **kwargs) # type: ignore 2161 2162 def _parse_index(self) -> exp.Expression: 2163 index = self._parse_id_var() 2164 self._match(TokenType.ON) 2165 self._match(TokenType.TABLE) # hive 2166 2167 return self.expression( 2168 exp.Index, 2169 this=index, 2170 table=self.expression(exp.Table, this=self._parse_id_var()), 2171 columns=self._parse_expression(), 2172 ) 2173 2174 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2175 unique = self._match(TokenType.UNIQUE) 2176 primary = self._match_text_seq("PRIMARY") 2177 amp = self._match_text_seq("AMP") 2178 if not self._match(TokenType.INDEX): 2179 return None 2180 index = self._parse_id_var() 2181 columns = None 2182 if self._match(TokenType.L_PAREN, advance=False): 2183 columns = self._parse_wrapped_csv(self._parse_column) 2184 return self.expression( 2185 exp.Index, 2186 this=index, 2187 columns=columns, 2188 unique=unique, 2189 primary=primary, 2190 amp=amp, 2191 ) 2192 2193 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2194 catalog = None 2195 db = None 2196 2197 table = ( 2198 (not schema and self._parse_function()) 2199 or self._parse_id_var(any_token=False) 2200 or self._parse_string_as_identifier() 2201 ) 2202 2203 while self._match(TokenType.DOT): 2204 if catalog: 2205 # This allows nesting the table in arbitrarily many dot expressions if needed 2206 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2207 else: 2208 catalog = db 2209 db = table 2210 table = self._parse_id_var() 2211 2212 if not table: 2213 self.raise_error(f"Expected table name but got {self._curr}") 2214 2215 return self.expression( 2216 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2217 ) 2218 2219 def _parse_table( 2220 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2221 ) -> t.Optional[exp.Expression]: 2222 lateral = self._parse_lateral() 2223 2224 if lateral: 2225 return lateral 2226 2227 unnest = self._parse_unnest() 2228 2229 if unnest: 2230 return unnest 2231 2232 values = self._parse_derived_table_values() 2233 2234 if values: 2235 return values 2236 2237 subquery = self._parse_select(table=True) 2238 2239 if subquery: 2240 if not subquery.args.get("pivots"): 2241 subquery.set("pivots", self._parse_pivots()) 2242 return subquery 2243 2244 this = self._parse_table_parts(schema=schema) 2245 2246 if schema: 2247 return self._parse_schema(this=this) 2248 2249 if self.alias_post_tablesample: 2250 table_sample = self._parse_table_sample() 2251 2252 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2253 2254 if alias: 2255 this.set("alias", alias) 2256 2257 if not this.args.get("pivots"): 2258 this.set("pivots", self._parse_pivots()) 2259 2260 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2261 this.set( 2262 "hints", 2263 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2264 ) 2265 self._match_r_paren() 2266 2267 if not self.alias_post_tablesample: 2268 table_sample = self._parse_table_sample() 2269 2270 if table_sample: 2271 table_sample.set("this", this) 2272 this = table_sample 2273 2274 return this 2275 2276 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2277 if not self._match(TokenType.UNNEST): 2278 return None 2279 2280 expressions = self._parse_wrapped_csv(self._parse_column) 2281 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2282 alias = self._parse_table_alias() 2283 2284 if alias and self.unnest_column_only: 2285 if alias.args.get("columns"): 2286 self.raise_error("Unexpected extra column alias in unnest.") 2287 alias.set("columns", [alias.this]) 2288 alias.set("this", None) 2289 2290 offset = None 2291 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2292 self._match(TokenType.ALIAS) 2293 offset = self._parse_id_var() or exp.Identifier(this="offset") 2294 2295 return self.expression( 2296 exp.Unnest, 2297 expressions=expressions, 2298 ordinality=ordinality, 2299 alias=alias, 2300 offset=offset, 2301 ) 2302 2303 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2304 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2305 if not is_derived and not self._match(TokenType.VALUES): 2306 return None 2307 2308 expressions = self._parse_csv(self._parse_value) 2309 2310 if is_derived: 2311 self._match_r_paren() 2312 2313 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2314 2315 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2316 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2317 as_modifier and self._match_text_seq("USING", "SAMPLE") 2318 ): 2319 return None 2320 2321 bucket_numerator = None 2322 bucket_denominator = None 2323 bucket_field = None 2324 percent = None 2325 rows = None 2326 size = None 2327 seed = None 2328 2329 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2330 method = self._parse_var(tokens=(TokenType.ROW,)) 2331 2332 self._match(TokenType.L_PAREN) 2333 2334 num = self._parse_number() 2335 2336 if self._match(TokenType.BUCKET): 2337 bucket_numerator = self._parse_number() 2338 self._match(TokenType.OUT_OF) 2339 bucket_denominator = bucket_denominator = self._parse_number() 2340 self._match(TokenType.ON) 2341 bucket_field = self._parse_field() 2342 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2343 percent = num 2344 elif self._match(TokenType.ROWS): 2345 rows = num 2346 else: 2347 size = num 2348 2349 self._match(TokenType.R_PAREN) 2350 2351 if self._match(TokenType.L_PAREN): 2352 method = self._parse_var() 2353 seed = self._match(TokenType.COMMA) and self._parse_number() 2354 self._match_r_paren() 2355 elif self._match_texts(("SEED", "REPEATABLE")): 2356 seed = self._parse_wrapped(self._parse_number) 2357 2358 return self.expression( 2359 exp.TableSample, 2360 method=method, 2361 bucket_numerator=bucket_numerator, 2362 bucket_denominator=bucket_denominator, 2363 bucket_field=bucket_field, 2364 percent=percent, 2365 rows=rows, 2366 size=size, 2367 seed=seed, 2368 kind=kind, 2369 ) 2370 2371 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2372 return list(iter(self._parse_pivot, None)) 2373 2374 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2375 index = self._index 2376 2377 if self._match(TokenType.PIVOT): 2378 unpivot = False 2379 elif self._match(TokenType.UNPIVOT): 2380 unpivot = True 2381 else: 2382 return None 2383 2384 expressions = [] 2385 field = None 2386 2387 if not self._match(TokenType.L_PAREN): 2388 self._retreat(index) 2389 return None 2390 2391 if unpivot: 2392 expressions = self._parse_csv(self._parse_column) 2393 else: 2394 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2395 2396 if not expressions: 2397 self.raise_error("Failed to parse PIVOT's aggregation list") 2398 2399 if not self._match(TokenType.FOR): 2400 self.raise_error("Expecting FOR") 2401 2402 value = self._parse_column() 2403 2404 if not self._match(TokenType.IN): 2405 self.raise_error("Expecting IN") 2406 2407 field = self._parse_in(value) 2408 2409 self._match_r_paren() 2410 2411 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2412 2413 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2414 pivot.set("alias", self._parse_table_alias()) 2415 2416 if not unpivot: 2417 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2418 2419 columns: t.List[exp.Expression] = [] 2420 for col in pivot.args["field"].expressions: 2421 for name in names: 2422 if self.PREFIXED_PIVOT_COLUMNS: 2423 name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name 2424 else: 2425 name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name 2426 2427 columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS)) 2428 2429 pivot.set("columns", columns) 2430 2431 return pivot 2432 2433 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2434 return [agg.alias for agg in pivot_columns] 2435 2436 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2437 if not skip_where_token and not self._match(TokenType.WHERE): 2438 return None 2439 2440 return self.expression( 2441 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2442 ) 2443 2444 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2445 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2446 return None 2447 2448 elements = defaultdict(list) 2449 2450 while True: 2451 expressions = self._parse_csv(self._parse_conjunction) 2452 if expressions: 2453 elements["expressions"].extend(expressions) 2454 2455 grouping_sets = self._parse_grouping_sets() 2456 if grouping_sets: 2457 elements["grouping_sets"].extend(grouping_sets) 2458 2459 rollup = None 2460 cube = None 2461 2462 with_ = self._match(TokenType.WITH) 2463 if self._match(TokenType.ROLLUP): 2464 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2465 elements["rollup"].extend(ensure_list(rollup)) 2466 2467 if self._match(TokenType.CUBE): 2468 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2469 elements["cube"].extend(ensure_list(cube)) 2470 2471 if not (expressions or grouping_sets or rollup or cube): 2472 break 2473 2474 return self.expression(exp.Group, **elements) # type: ignore 2475 2476 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2477 if not self._match(TokenType.GROUPING_SETS): 2478 return None 2479 2480 return self._parse_wrapped_csv(self._parse_grouping_set) 2481 2482 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2483 if self._match(TokenType.L_PAREN): 2484 grouping_set = self._parse_csv(self._parse_column) 2485 self._match_r_paren() 2486 return self.expression(exp.Tuple, expressions=grouping_set) 2487 2488 return self._parse_column() 2489 2490 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2491 if not skip_having_token and not self._match(TokenType.HAVING): 2492 return None 2493 return self.expression(exp.Having, this=self._parse_conjunction()) 2494 2495 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2496 if not self._match(TokenType.QUALIFY): 2497 return None 2498 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2499 2500 def _parse_order( 2501 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2502 ) -> t.Optional[exp.Expression]: 2503 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2504 return this 2505 2506 return self.expression( 2507 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2508 ) 2509 2510 def _parse_sort( 2511 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2512 ) -> t.Optional[exp.Expression]: 2513 if not self._match(token_type): 2514 return None 2515 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2516 2517 def _parse_ordered(self) -> exp.Expression: 2518 this = self._parse_conjunction() 2519 self._match(TokenType.ASC) 2520 is_desc = self._match(TokenType.DESC) 2521 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2522 is_nulls_last = self._match(TokenType.NULLS_LAST) 2523 desc = is_desc or False 2524 asc = not desc 2525 nulls_first = is_nulls_first or False 2526 explicitly_null_ordered = is_nulls_first or is_nulls_last 2527 if ( 2528 not explicitly_null_ordered 2529 and ( 2530 (asc and self.null_ordering == "nulls_are_small") 2531 or (desc and self.null_ordering != "nulls_are_small") 2532 ) 2533 and self.null_ordering != "nulls_are_last" 2534 ): 2535 nulls_first = True 2536 2537 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2538 2539 def _parse_limit( 2540 self, this: t.Optional[exp.Expression] = None, top: bool = False 2541 ) -> t.Optional[exp.Expression]: 2542 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2543 limit_paren = self._match(TokenType.L_PAREN) 2544 limit_exp = self.expression( 2545 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2546 ) 2547 2548 if limit_paren: 2549 self._match_r_paren() 2550 2551 return limit_exp 2552 2553 if self._match(TokenType.FETCH): 2554 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2555 direction = self._prev.text if direction else "FIRST" 2556 2557 count = self._parse_number() 2558 percent = self._match(TokenType.PERCENT) 2559 2560 self._match_set((TokenType.ROW, TokenType.ROWS)) 2561 2562 only = self._match(TokenType.ONLY) 2563 with_ties = self._match_text_seq("WITH", "TIES") 2564 2565 if only and with_ties: 2566 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2567 2568 return self.expression( 2569 exp.Fetch, 2570 direction=direction, 2571 count=count, 2572 percent=percent, 2573 with_ties=with_ties, 2574 ) 2575 2576 return this 2577 2578 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2579 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2580 return this 2581 2582 count = self._parse_number() 2583 self._match_set((TokenType.ROW, TokenType.ROWS)) 2584 return self.expression(exp.Offset, this=this, expression=count) 2585 2586 def _parse_lock(self) -> t.Optional[exp.Expression]: 2587 if self._match_text_seq("FOR", "UPDATE"): 2588 return self.expression(exp.Lock, update=True) 2589 if self._match_text_seq("FOR", "SHARE"): 2590 return self.expression(exp.Lock, update=False) 2591 2592 return None 2593 2594 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2595 if not self._match_set(self.SET_OPERATIONS): 2596 return this 2597 2598 token_type = self._prev.token_type 2599 2600 if token_type == TokenType.UNION: 2601 expression = exp.Union 2602 elif token_type == TokenType.EXCEPT: 2603 expression = exp.Except 2604 else: 2605 expression = exp.Intersect 2606 2607 return self.expression( 2608 expression, 2609 this=this, 2610 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2611 expression=self._parse_set_operations(self._parse_select(nested=True)), 2612 ) 2613 2614 def _parse_expression(self) -> t.Optional[exp.Expression]: 2615 return self._parse_alias(self._parse_conjunction()) 2616 2617 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2618 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2619 2620 def _parse_equality(self) -> t.Optional[exp.Expression]: 2621 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2622 2623 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2624 return self._parse_tokens(self._parse_range, self.COMPARISON) 2625 2626 def _parse_range(self) -> t.Optional[exp.Expression]: 2627 this = self._parse_bitwise() 2628 negate = self._match(TokenType.NOT) 2629 2630 if self._match_set(self.RANGE_PARSERS): 2631 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2632 if not expression: 2633 return this 2634 2635 this = expression 2636 elif self._match(TokenType.ISNULL): 2637 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2638 2639 # Postgres supports ISNULL and NOTNULL for conditions. 2640 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2641 if self._match(TokenType.NOTNULL): 2642 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2643 this = self.expression(exp.Not, this=this) 2644 2645 if negate: 2646 this = self.expression(exp.Not, this=this) 2647 2648 if self._match(TokenType.IS): 2649 this = self._parse_is(this) 2650 2651 return this 2652 2653 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2654 index = self._index - 1 2655 negate = self._match(TokenType.NOT) 2656 if self._match(TokenType.DISTINCT_FROM): 2657 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2658 return self.expression(klass, this=this, expression=self._parse_expression()) 2659 2660 expression = self._parse_null() or self._parse_boolean() 2661 if not expression: 2662 self._retreat(index) 2663 return None 2664 2665 this = self.expression(exp.Is, this=this, expression=expression) 2666 return self.expression(exp.Not, this=this) if negate else this 2667 2668 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2669 unnest = self._parse_unnest() 2670 if unnest: 2671 this = self.expression(exp.In, this=this, unnest=unnest) 2672 elif self._match(TokenType.L_PAREN): 2673 expressions = self._parse_csv(self._parse_select_or_expression) 2674 2675 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2676 this = self.expression(exp.In, this=this, query=expressions[0]) 2677 else: 2678 this = self.expression(exp.In, this=this, expressions=expressions) 2679 2680 self._match_r_paren() 2681 else: 2682 this = self.expression(exp.In, this=this, field=self._parse_field()) 2683 2684 return this 2685 2686 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2687 low = self._parse_bitwise() 2688 self._match(TokenType.AND) 2689 high = self._parse_bitwise() 2690 return self.expression(exp.Between, this=this, low=low, high=high) 2691 2692 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2693 if not self._match(TokenType.ESCAPE): 2694 return this 2695 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2696 2697 def _parse_interval(self) -> t.Optional[exp.Expression]: 2698 if not self._match(TokenType.INTERVAL): 2699 return None 2700 2701 this = self._parse_primary() or self._parse_term() 2702 unit = self._parse_function() or self._parse_var() 2703 2704 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2705 # each INTERVAL expression into this canonical form so it's easy to transpile 2706 if this and isinstance(this, exp.Literal): 2707 if this.is_number: 2708 this = exp.Literal.string(this.name) 2709 2710 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2711 parts = this.name.split() 2712 if not unit and len(parts) <= 2: 2713 this = exp.Literal.string(seq_get(parts, 0)) 2714 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2715 2716 return self.expression(exp.Interval, this=this, unit=unit) 2717 2718 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2719 this = self._parse_term() 2720 2721 while True: 2722 if self._match_set(self.BITWISE): 2723 this = self.expression( 2724 self.BITWISE[self._prev.token_type], 2725 this=this, 2726 expression=self._parse_term(), 2727 ) 2728 elif self._match_pair(TokenType.LT, TokenType.LT): 2729 this = self.expression( 2730 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2731 ) 2732 elif self._match_pair(TokenType.GT, TokenType.GT): 2733 this = self.expression( 2734 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2735 ) 2736 else: 2737 break 2738 2739 return this 2740 2741 def _parse_term(self) -> t.Optional[exp.Expression]: 2742 return self._parse_tokens(self._parse_factor, self.TERM) 2743 2744 def _parse_factor(self) -> t.Optional[exp.Expression]: 2745 return self._parse_tokens(self._parse_unary, self.FACTOR) 2746 2747 def _parse_unary(self) -> t.Optional[exp.Expression]: 2748 if self._match_set(self.UNARY_PARSERS): 2749 return self.UNARY_PARSERS[self._prev.token_type](self) 2750 return self._parse_at_time_zone(self._parse_type()) 2751 2752 def _parse_type(self) -> t.Optional[exp.Expression]: 2753 interval = self._parse_interval() 2754 if interval: 2755 return interval 2756 2757 index = self._index 2758 data_type = self._parse_types(check_func=True) 2759 this = self._parse_column() 2760 2761 if data_type: 2762 if isinstance(this, exp.Literal): 2763 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2764 if parser: 2765 return parser(self, this, data_type) 2766 return self.expression(exp.Cast, this=this, to=data_type) 2767 if not data_type.args.get("expressions"): 2768 self._retreat(index) 2769 return self._parse_column() 2770 return data_type 2771 2772 return this 2773 2774 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2775 index = self._index 2776 2777 prefix = self._match_text_seq("SYSUDTLIB", ".") 2778 2779 if not self._match_set(self.TYPE_TOKENS): 2780 return None 2781 2782 type_token = self._prev.token_type 2783 2784 if type_token == TokenType.PSEUDO_TYPE: 2785 return self.expression(exp.PseudoType, this=self._prev.text) 2786 2787 nested = type_token in self.NESTED_TYPE_TOKENS 2788 is_struct = type_token == TokenType.STRUCT 2789 expressions = None 2790 maybe_func = False 2791 2792 if self._match(TokenType.L_PAREN): 2793 if is_struct: 2794 expressions = self._parse_csv(self._parse_struct_kwargs) 2795 elif nested: 2796 expressions = self._parse_csv(self._parse_types) 2797 else: 2798 expressions = self._parse_csv(self._parse_conjunction) 2799 2800 if not expressions: 2801 self._retreat(index) 2802 return None 2803 2804 self._match_r_paren() 2805 maybe_func = True 2806 2807 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2808 this = exp.DataType( 2809 this=exp.DataType.Type.ARRAY, 2810 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2811 nested=True, 2812 ) 2813 2814 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2815 this = exp.DataType( 2816 this=exp.DataType.Type.ARRAY, 2817 expressions=[this], 2818 nested=True, 2819 ) 2820 2821 return this 2822 2823 if self._match(TokenType.L_BRACKET): 2824 self._retreat(index) 2825 return None 2826 2827 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2828 if nested and self._match(TokenType.LT): 2829 if is_struct: 2830 expressions = self._parse_csv(self._parse_struct_kwargs) 2831 else: 2832 expressions = self._parse_csv(self._parse_types) 2833 2834 if not self._match(TokenType.GT): 2835 self.raise_error("Expecting >") 2836 2837 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2838 values = self._parse_csv(self._parse_conjunction) 2839 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2840 2841 value: t.Optional[exp.Expression] = None 2842 if type_token in self.TIMESTAMPS: 2843 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2844 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2845 elif ( 2846 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2847 ): 2848 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2849 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2850 if type_token == TokenType.TIME: 2851 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2852 else: 2853 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2854 2855 maybe_func = maybe_func and value is None 2856 2857 if value is None: 2858 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2859 elif type_token == TokenType.INTERVAL: 2860 unit = self._parse_var() 2861 2862 if not unit: 2863 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2864 else: 2865 value = self.expression(exp.Interval, unit=unit) 2866 2867 if maybe_func and check_func: 2868 index2 = self._index 2869 peek = self._parse_string() 2870 2871 if not peek: 2872 self._retreat(index) 2873 return None 2874 2875 self._retreat(index2) 2876 2877 if value: 2878 return value 2879 2880 return exp.DataType( 2881 this=exp.DataType.Type[type_token.value.upper()], 2882 expressions=expressions, 2883 nested=nested, 2884 values=values, 2885 prefix=prefix, 2886 ) 2887 2888 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2889 index = self._index 2890 this = self._parse_id_var() 2891 self._match(TokenType.COLON) 2892 data_type = self._parse_types() 2893 2894 if not data_type: 2895 self._retreat(index) 2896 return self._parse_types() 2897 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2898 2899 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2900 if not self._match(TokenType.AT_TIME_ZONE): 2901 return this 2902 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2903 2904 def _parse_column(self) -> t.Optional[exp.Expression]: 2905 this = self._parse_field() 2906 if isinstance(this, exp.Identifier): 2907 this = self.expression(exp.Column, this=this) 2908 elif not this: 2909 return self._parse_bracket(this) 2910 this = self._parse_bracket(this) 2911 2912 while self._match_set(self.COLUMN_OPERATORS): 2913 op_token = self._prev.token_type 2914 op = self.COLUMN_OPERATORS.get(op_token) 2915 2916 if op_token == TokenType.DCOLON: 2917 field = self._parse_types() 2918 if not field: 2919 self.raise_error("Expected type") 2920 elif op: 2921 self._advance() 2922 value = self._prev.text 2923 field = ( 2924 exp.Literal.number(value) 2925 if self._prev.token_type == TokenType.NUMBER 2926 else exp.Literal.string(value) 2927 ) 2928 else: 2929 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2930 2931 if isinstance(field, exp.Func): 2932 # bigquery allows function calls like x.y.count(...) 2933 # SAFE.SUBSTR(...) 2934 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2935 this = self._replace_columns_with_dots(this) 2936 2937 if op: 2938 this = op(self, this, field) 2939 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2940 this = self.expression( 2941 exp.Column, 2942 this=field, 2943 table=this.this, 2944 db=this.args.get("table"), 2945 catalog=this.args.get("db"), 2946 ) 2947 else: 2948 this = self.expression(exp.Dot, this=this, expression=field) 2949 this = self._parse_bracket(this) 2950 2951 return this 2952 2953 def _parse_primary(self) -> t.Optional[exp.Expression]: 2954 if self._match_set(self.PRIMARY_PARSERS): 2955 token_type = self._prev.token_type 2956 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2957 2958 if token_type == TokenType.STRING: 2959 expressions = [primary] 2960 while self._match(TokenType.STRING): 2961 expressions.append(exp.Literal.string(self._prev.text)) 2962 if len(expressions) > 1: 2963 return self.expression(exp.Concat, expressions=expressions) 2964 return primary 2965 2966 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2967 return exp.Literal.number(f"0.{self._prev.text}") 2968 2969 if self._match(TokenType.L_PAREN): 2970 comments = self._prev_comments 2971 query = self._parse_select() 2972 2973 if query: 2974 expressions = [query] 2975 else: 2976 expressions = self._parse_csv( 2977 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2978 ) 2979 2980 this = seq_get(expressions, 0) 2981 self._parse_query_modifiers(this) 2982 2983 if isinstance(this, exp.Subqueryable): 2984 this = self._parse_set_operations( 2985 self._parse_subquery(this=this, parse_alias=False) 2986 ) 2987 elif len(expressions) > 1: 2988 this = self.expression(exp.Tuple, expressions=expressions) 2989 else: 2990 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2991 2992 self._match_r_paren() 2993 comments.extend(self._prev_comments) 2994 2995 if this and comments: 2996 this.comments = comments 2997 2998 return this 2999 3000 return None 3001 3002 def _parse_field( 3003 self, 3004 any_token: bool = False, 3005 tokens: t.Optional[t.Collection[TokenType]] = None, 3006 ) -> t.Optional[exp.Expression]: 3007 return ( 3008 self._parse_primary() 3009 or self._parse_function() 3010 or self._parse_id_var(any_token=any_token, tokens=tokens) 3011 ) 3012 3013 def _parse_function( 3014 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 3015 ) -> t.Optional[exp.Expression]: 3016 if not self._curr: 3017 return None 3018 3019 token_type = self._curr.token_type 3020 3021 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3022 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3023 3024 if not self._next or self._next.token_type != TokenType.L_PAREN: 3025 if token_type in self.NO_PAREN_FUNCTIONS: 3026 self._advance() 3027 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3028 3029 return None 3030 3031 if token_type not in self.FUNC_TOKENS: 3032 return None 3033 3034 this = self._curr.text 3035 upper = this.upper() 3036 self._advance(2) 3037 3038 parser = self.FUNCTION_PARSERS.get(upper) 3039 3040 if parser: 3041 this = parser(self) 3042 else: 3043 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3044 3045 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3046 this = self.expression(subquery_predicate, this=self._parse_select()) 3047 self._match_r_paren() 3048 return this 3049 3050 if functions is None: 3051 functions = self.FUNCTIONS 3052 3053 function = functions.get(upper) 3054 args = self._parse_csv(self._parse_lambda) 3055 3056 if function: 3057 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 3058 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 3059 if count_params(function) == 2: 3060 params = None 3061 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 3062 params = self._parse_csv(self._parse_lambda) 3063 3064 this = function(args, params) 3065 else: 3066 this = function(args) 3067 3068 self.validate_expression(this, args) 3069 else: 3070 this = self.expression(exp.Anonymous, this=this, expressions=args) 3071 3072 self._match_r_paren(this) 3073 return self._parse_window(this) 3074 3075 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3076 return self._parse_column_def(self._parse_id_var()) 3077 3078 def _parse_user_defined_function( 3079 self, kind: t.Optional[TokenType] = None 3080 ) -> t.Optional[exp.Expression]: 3081 this = self._parse_id_var() 3082 3083 while self._match(TokenType.DOT): 3084 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3085 3086 if not self._match(TokenType.L_PAREN): 3087 return this 3088 3089 expressions = self._parse_csv(self._parse_function_parameter) 3090 self._match_r_paren() 3091 return self.expression( 3092 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3093 ) 3094 3095 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3096 literal = self._parse_primary() 3097 if literal: 3098 return self.expression(exp.Introducer, this=token.text, expression=literal) 3099 3100 return self.expression(exp.Identifier, this=token.text) 3101 3102 def _parse_national(self, token: Token) -> exp.Expression: 3103 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3104 3105 def _parse_session_parameter(self) -> exp.Expression: 3106 kind = None 3107 this = self._parse_id_var() or self._parse_primary() 3108 3109 if this and self._match(TokenType.DOT): 3110 kind = this.name 3111 this = self._parse_var() or self._parse_primary() 3112 3113 return self.expression(exp.SessionParameter, this=this, kind=kind) 3114 3115 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3116 index = self._index 3117 3118 if self._match(TokenType.L_PAREN): 3119 expressions = self._parse_csv(self._parse_id_var) 3120 3121 if not self._match(TokenType.R_PAREN): 3122 self._retreat(index) 3123 else: 3124 expressions = [self._parse_id_var()] 3125 3126 if self._match_set(self.LAMBDAS): 3127 return self.LAMBDAS[self._prev.token_type](self, expressions) 3128 3129 self._retreat(index) 3130 3131 this: t.Optional[exp.Expression] 3132 3133 if self._match(TokenType.DISTINCT): 3134 this = self.expression( 3135 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3136 ) 3137 else: 3138 this = self._parse_select_or_expression() 3139 3140 if isinstance(this, exp.EQ): 3141 left = this.this 3142 if isinstance(left, exp.Column): 3143 left.replace(exp.Var(this=left.text("this"))) 3144 3145 if self._match(TokenType.IGNORE_NULLS): 3146 this = self.expression(exp.IgnoreNulls, this=this) 3147 else: 3148 self._match(TokenType.RESPECT_NULLS) 3149 3150 return self._parse_limit(self._parse_order(this)) 3151 3152 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3153 index = self._index 3154 3155 try: 3156 if self._parse_select(nested=True): 3157 return this 3158 except Exception: 3159 pass 3160 finally: 3161 self._retreat(index) 3162 3163 if not self._match(TokenType.L_PAREN): 3164 return this 3165 3166 args = self._parse_csv( 3167 lambda: self._parse_constraint() 3168 or self._parse_column_def(self._parse_field(any_token=True)) 3169 ) 3170 self._match_r_paren() 3171 return self.expression(exp.Schema, this=this, expressions=args) 3172 3173 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3174 kind = self._parse_types() 3175 3176 if self._match_text_seq("FOR", "ORDINALITY"): 3177 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3178 3179 constraints = [] 3180 while True: 3181 constraint = self._parse_column_constraint() 3182 if not constraint: 3183 break 3184 constraints.append(constraint) 3185 3186 if not kind and not constraints: 3187 return this 3188 3189 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3190 3191 def _parse_auto_increment(self) -> exp.Expression: 3192 start = None 3193 increment = None 3194 3195 if self._match(TokenType.L_PAREN, advance=False): 3196 args = self._parse_wrapped_csv(self._parse_bitwise) 3197 start = seq_get(args, 0) 3198 increment = seq_get(args, 1) 3199 elif self._match_text_seq("START"): 3200 start = self._parse_bitwise() 3201 self._match_text_seq("INCREMENT") 3202 increment = self._parse_bitwise() 3203 3204 if start and increment: 3205 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3206 3207 return exp.AutoIncrementColumnConstraint() 3208 3209 def _parse_compress(self) -> exp.Expression: 3210 if self._match(TokenType.L_PAREN, advance=False): 3211 return self.expression( 3212 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3213 ) 3214 3215 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3216 3217 def _parse_generated_as_identity(self) -> exp.Expression: 3218 if self._match(TokenType.BY_DEFAULT): 3219 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3220 else: 3221 self._match_text_seq("ALWAYS") 3222 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3223 3224 self._match_text_seq("AS", "IDENTITY") 3225 if self._match(TokenType.L_PAREN): 3226 if self._match_text_seq("START", "WITH"): 3227 this.set("start", self._parse_bitwise()) 3228 if self._match_text_seq("INCREMENT", "BY"): 3229 this.set("increment", self._parse_bitwise()) 3230 if self._match_text_seq("MINVALUE"): 3231 this.set("minvalue", self._parse_bitwise()) 3232 if self._match_text_seq("MAXVALUE"): 3233 this.set("maxvalue", self._parse_bitwise()) 3234 3235 if self._match_text_seq("CYCLE"): 3236 this.set("cycle", True) 3237 elif self._match_text_seq("NO", "CYCLE"): 3238 this.set("cycle", False) 3239 3240 self._match_r_paren() 3241 3242 return this 3243 3244 def _parse_inline(self) -> t.Optional[exp.Expression]: 3245 self._match_text_seq("LENGTH") 3246 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3247 3248 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3249 if self._match_text_seq("NULL"): 3250 return self.expression(exp.NotNullColumnConstraint) 3251 if self._match_text_seq("CASESPECIFIC"): 3252 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3253 return None 3254 3255 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3256 if self._match(TokenType.CONSTRAINT): 3257 this = self._parse_id_var() 3258 else: 3259 this = None 3260 3261 if self._match_texts(self.CONSTRAINT_PARSERS): 3262 return self.expression( 3263 exp.ColumnConstraint, 3264 this=this, 3265 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3266 ) 3267 3268 return this 3269 3270 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3271 if not self._match(TokenType.CONSTRAINT): 3272 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3273 3274 this = self._parse_id_var() 3275 expressions = [] 3276 3277 while True: 3278 constraint = self._parse_unnamed_constraint() or self._parse_function() 3279 if not constraint: 3280 break 3281 expressions.append(constraint) 3282 3283 return self.expression(exp.Constraint, this=this, expressions=expressions) 3284 3285 def _parse_unnamed_constraint( 3286 self, constraints: t.Optional[t.Collection[str]] = None 3287 ) -> t.Optional[exp.Expression]: 3288 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3289 return None 3290 3291 constraint = self._prev.text.upper() 3292 if constraint not in self.CONSTRAINT_PARSERS: 3293 self.raise_error(f"No parser found for schema constraint {constraint}.") 3294 3295 return self.CONSTRAINT_PARSERS[constraint](self) 3296 3297 def _parse_unique(self) -> exp.Expression: 3298 if not self._match(TokenType.L_PAREN, advance=False): 3299 return self.expression(exp.UniqueColumnConstraint) 3300 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3301 3302 def _parse_key_constraint_options(self) -> t.List[str]: 3303 options = [] 3304 while True: 3305 if not self._curr: 3306 break 3307 3308 if self._match(TokenType.ON): 3309 action = None 3310 on = self._advance_any() and self._prev.text 3311 3312 if self._match(TokenType.NO_ACTION): 3313 action = "NO ACTION" 3314 elif self._match(TokenType.CASCADE): 3315 action = "CASCADE" 3316 elif self._match_pair(TokenType.SET, TokenType.NULL): 3317 action = "SET NULL" 3318 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3319 action = "SET DEFAULT" 3320 else: 3321 self.raise_error("Invalid key constraint") 3322 3323 options.append(f"ON {on} {action}") 3324 elif self._match_text_seq("NOT", "ENFORCED"): 3325 options.append("NOT ENFORCED") 3326 elif self._match_text_seq("DEFERRABLE"): 3327 options.append("DEFERRABLE") 3328 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3329 options.append("INITIALLY DEFERRED") 3330 elif self._match_text_seq("NORELY"): 3331 options.append("NORELY") 3332 elif self._match_text_seq("MATCH", "FULL"): 3333 options.append("MATCH FULL") 3334 else: 3335 break 3336 3337 return options 3338 3339 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3340 if match and not self._match(TokenType.REFERENCES): 3341 return None 3342 3343 expressions = None 3344 this = self._parse_id_var() 3345 3346 if self._match(TokenType.L_PAREN, advance=False): 3347 expressions = self._parse_wrapped_id_vars() 3348 3349 options = self._parse_key_constraint_options() 3350 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3351 3352 def _parse_foreign_key(self) -> exp.Expression: 3353 expressions = self._parse_wrapped_id_vars() 3354 reference = self._parse_references() 3355 options = {} 3356 3357 while self._match(TokenType.ON): 3358 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3359 self.raise_error("Expected DELETE or UPDATE") 3360 3361 kind = self._prev.text.lower() 3362 3363 if self._match(TokenType.NO_ACTION): 3364 action = "NO ACTION" 3365 elif self._match(TokenType.SET): 3366 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3367 action = "SET " + self._prev.text.upper() 3368 else: 3369 self._advance() 3370 action = self._prev.text.upper() 3371 3372 options[kind] = action 3373 3374 return self.expression( 3375 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3376 ) 3377 3378 def _parse_primary_key(self) -> exp.Expression: 3379 desc = ( 3380 self._match_set((TokenType.ASC, TokenType.DESC)) 3381 and self._prev.token_type == TokenType.DESC 3382 ) 3383 3384 if not self._match(TokenType.L_PAREN, advance=False): 3385 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3386 3387 expressions = self._parse_wrapped_id_vars() 3388 options = self._parse_key_constraint_options() 3389 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3390 3391 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3392 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3393 return this 3394 3395 bracket_kind = self._prev.token_type 3396 expressions: t.List[t.Optional[exp.Expression]] 3397 3398 if self._match(TokenType.COLON): 3399 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3400 else: 3401 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3402 3403 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3404 if bracket_kind == TokenType.L_BRACE: 3405 this = self.expression(exp.Struct, expressions=expressions) 3406 elif not this or this.name.upper() == "ARRAY": 3407 this = self.expression(exp.Array, expressions=expressions) 3408 else: 3409 expressions = apply_index_offset(this, expressions, -self.index_offset) 3410 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3411 3412 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3413 self.raise_error("Expected ]") 3414 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3415 self.raise_error("Expected }") 3416 3417 this.comments = self._prev_comments 3418 return self._parse_bracket(this) 3419 3420 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3421 if self._match(TokenType.COLON): 3422 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3423 return this 3424 3425 def _parse_case(self) -> t.Optional[exp.Expression]: 3426 ifs = [] 3427 default = None 3428 3429 expression = self._parse_conjunction() 3430 3431 while self._match(TokenType.WHEN): 3432 this = self._parse_conjunction() 3433 self._match(TokenType.THEN) 3434 then = self._parse_conjunction() 3435 ifs.append(self.expression(exp.If, this=this, true=then)) 3436 3437 if self._match(TokenType.ELSE): 3438 default = self._parse_conjunction() 3439 3440 if not self._match(TokenType.END): 3441 self.raise_error("Expected END after CASE", self._prev) 3442 3443 return self._parse_window( 3444 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3445 ) 3446 3447 def _parse_if(self) -> t.Optional[exp.Expression]: 3448 if self._match(TokenType.L_PAREN): 3449 args = self._parse_csv(self._parse_conjunction) 3450 this = exp.If.from_arg_list(args) 3451 self.validate_expression(this, args) 3452 self._match_r_paren() 3453 else: 3454 condition = self._parse_conjunction() 3455 self._match(TokenType.THEN) 3456 true = self._parse_conjunction() 3457 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3458 self._match(TokenType.END) 3459 this = self.expression(exp.If, this=condition, true=true, false=false) 3460 3461 return self._parse_window(this) 3462 3463 def _parse_extract(self) -> exp.Expression: 3464 this = self._parse_function() or self._parse_var() or self._parse_type() 3465 3466 if self._match(TokenType.FROM): 3467 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3468 3469 if not self._match(TokenType.COMMA): 3470 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3471 3472 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3473 3474 def _parse_cast(self, strict: bool) -> exp.Expression: 3475 this = self._parse_conjunction() 3476 3477 if not self._match(TokenType.ALIAS): 3478 self.raise_error("Expected AS after CAST") 3479 3480 to = self._parse_types() 3481 3482 if not to: 3483 self.raise_error("Expected TYPE after CAST") 3484 elif to.this == exp.DataType.Type.CHAR: 3485 if self._match(TokenType.CHARACTER_SET): 3486 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3487 3488 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3489 3490 def _parse_string_agg(self) -> exp.Expression: 3491 expression: t.Optional[exp.Expression] 3492 3493 if self._match(TokenType.DISTINCT): 3494 args = self._parse_csv(self._parse_conjunction) 3495 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3496 else: 3497 args = self._parse_csv(self._parse_conjunction) 3498 expression = seq_get(args, 0) 3499 3500 index = self._index 3501 if not self._match(TokenType.R_PAREN): 3502 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3503 order = self._parse_order(this=expression) 3504 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3505 3506 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3507 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3508 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3509 if not self._match(TokenType.WITHIN_GROUP): 3510 self._retreat(index) 3511 this = exp.GroupConcat.from_arg_list(args) 3512 self.validate_expression(this, args) 3513 return this 3514 3515 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3516 order = self._parse_order(this=expression) 3517 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3518 3519 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3520 to: t.Optional[exp.Expression] 3521 this = self._parse_bitwise() 3522 3523 if self._match(TokenType.USING): 3524 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3525 elif self._match(TokenType.COMMA): 3526 to = self._parse_bitwise() 3527 else: 3528 to = None 3529 3530 # Swap the argument order if needed to produce the correct AST 3531 if self.CONVERT_TYPE_FIRST: 3532 this, to = to, this 3533 3534 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3535 3536 def _parse_decode(self) -> t.Optional[exp.Expression]: 3537 """ 3538 There are generally two variants of the DECODE function: 3539 3540 - DECODE(bin, charset) 3541 - DECODE(expression, search, result [, search, result] ... [, default]) 3542 3543 The second variant will always be parsed into a CASE expression. Note that NULL 3544 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3545 instead of relying on pattern matching. 3546 """ 3547 args = self._parse_csv(self._parse_conjunction) 3548 3549 if len(args) < 3: 3550 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3551 3552 expression, *expressions = args 3553 if not expression: 3554 return None 3555 3556 ifs = [] 3557 for search, result in zip(expressions[::2], expressions[1::2]): 3558 if not search or not result: 3559 return None 3560 3561 if isinstance(search, exp.Literal): 3562 ifs.append( 3563 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3564 ) 3565 elif isinstance(search, exp.Null): 3566 ifs.append( 3567 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3568 ) 3569 else: 3570 cond = exp.or_( 3571 exp.EQ(this=expression.copy(), expression=search), 3572 exp.and_( 3573 exp.Is(this=expression.copy(), expression=exp.Null()), 3574 exp.Is(this=search.copy(), expression=exp.Null()), 3575 ), 3576 ) 3577 ifs.append(exp.If(this=cond, true=result)) 3578 3579 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3580 3581 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3582 self._match_text_seq("KEY") 3583 key = self._parse_field() 3584 self._match(TokenType.COLON) 3585 self._match_text_seq("VALUE") 3586 value = self._parse_field() 3587 if not key and not value: 3588 return None 3589 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3590 3591 def _parse_json_object(self) -> exp.Expression: 3592 expressions = self._parse_csv(self._parse_json_key_value) 3593 3594 null_handling = None 3595 if self._match_text_seq("NULL", "ON", "NULL"): 3596 null_handling = "NULL ON NULL" 3597 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3598 null_handling = "ABSENT ON NULL" 3599 3600 unique_keys = None 3601 if self._match_text_seq("WITH", "UNIQUE"): 3602 unique_keys = True 3603 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3604 unique_keys = False 3605 3606 self._match_text_seq("KEYS") 3607 3608 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3609 format_json = self._match_text_seq("FORMAT", "JSON") 3610 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3611 3612 return self.expression( 3613 exp.JSONObject, 3614 expressions=expressions, 3615 null_handling=null_handling, 3616 unique_keys=unique_keys, 3617 return_type=return_type, 3618 format_json=format_json, 3619 encoding=encoding, 3620 ) 3621 3622 def _parse_logarithm(self) -> exp.Expression: 3623 # Default argument order is base, expression 3624 args = self._parse_csv(self._parse_range) 3625 3626 if len(args) > 1: 3627 if not self.LOG_BASE_FIRST: 3628 args.reverse() 3629 return exp.Log.from_arg_list(args) 3630 3631 return self.expression( 3632 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3633 ) 3634 3635 def _parse_match_against(self) -> exp.Expression: 3636 expressions = self._parse_csv(self._parse_column) 3637 3638 self._match_text_seq(")", "AGAINST", "(") 3639 3640 this = self._parse_string() 3641 3642 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3643 modifier = "IN NATURAL LANGUAGE MODE" 3644 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3645 modifier = f"{modifier} WITH QUERY EXPANSION" 3646 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3647 modifier = "IN BOOLEAN MODE" 3648 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3649 modifier = "WITH QUERY EXPANSION" 3650 else: 3651 modifier = None 3652 3653 return self.expression( 3654 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3655 ) 3656 3657 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3658 args = self._parse_csv(self._parse_bitwise) 3659 3660 if self._match(TokenType.IN): 3661 return self.expression( 3662 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3663 ) 3664 3665 if haystack_first: 3666 haystack = seq_get(args, 0) 3667 needle = seq_get(args, 1) 3668 else: 3669 needle = seq_get(args, 0) 3670 haystack = seq_get(args, 1) 3671 3672 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3673 3674 self.validate_expression(this, args) 3675 3676 return this 3677 3678 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3679 args = self._parse_csv(self._parse_table) 3680 return exp.JoinHint(this=func_name.upper(), expressions=args) 3681 3682 def _parse_substring(self) -> exp.Expression: 3683 # Postgres supports the form: substring(string [from int] [for int]) 3684 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3685 3686 args = self._parse_csv(self._parse_bitwise) 3687 3688 if self._match(TokenType.FROM): 3689 args.append(self._parse_bitwise()) 3690 if self._match(TokenType.FOR): 3691 args.append(self._parse_bitwise()) 3692 3693 this = exp.Substring.from_arg_list(args) 3694 self.validate_expression(this, args) 3695 3696 return this 3697 3698 def _parse_trim(self) -> exp.Expression: 3699 # https://www.w3resource.com/sql/character-functions/trim.php 3700 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3701 3702 position = None 3703 collation = None 3704 3705 if self._match_set(self.TRIM_TYPES): 3706 position = self._prev.text.upper() 3707 3708 expression = self._parse_term() 3709 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3710 this = self._parse_term() 3711 else: 3712 this = expression 3713 expression = None 3714 3715 if self._match(TokenType.COLLATE): 3716 collation = self._parse_term() 3717 3718 return self.expression( 3719 exp.Trim, 3720 this=this, 3721 position=position, 3722 expression=expression, 3723 collation=collation, 3724 ) 3725 3726 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3727 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3728 3729 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3730 return self._parse_window(self._parse_id_var(), alias=True) 3731 3732 def _parse_window( 3733 self, this: t.Optional[exp.Expression], alias: bool = False 3734 ) -> t.Optional[exp.Expression]: 3735 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3736 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3737 self._match_r_paren() 3738 3739 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3740 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3741 if self._match(TokenType.WITHIN_GROUP): 3742 order = self._parse_wrapped(self._parse_order) 3743 this = self.expression(exp.WithinGroup, this=this, expression=order) 3744 3745 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3746 # Some dialects choose to implement and some do not. 3747 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3748 3749 # There is some code above in _parse_lambda that handles 3750 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3751 3752 # The below changes handle 3753 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3754 3755 # Oracle allows both formats 3756 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3757 # and Snowflake chose to do the same for familiarity 3758 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3759 if self._match(TokenType.IGNORE_NULLS): 3760 this = self.expression(exp.IgnoreNulls, this=this) 3761 elif self._match(TokenType.RESPECT_NULLS): 3762 this = self.expression(exp.RespectNulls, this=this) 3763 3764 # bigquery select from window x AS (partition by ...) 3765 if alias: 3766 self._match(TokenType.ALIAS) 3767 elif not self._match(TokenType.OVER): 3768 return this 3769 3770 if not self._match(TokenType.L_PAREN): 3771 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3772 3773 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3774 partition = self._parse_partition_by() 3775 order = self._parse_order() 3776 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3777 3778 if kind: 3779 self._match(TokenType.BETWEEN) 3780 start = self._parse_window_spec() 3781 self._match(TokenType.AND) 3782 end = self._parse_window_spec() 3783 3784 spec = self.expression( 3785 exp.WindowSpec, 3786 kind=kind, 3787 start=start["value"], 3788 start_side=start["side"], 3789 end=end["value"], 3790 end_side=end["side"], 3791 ) 3792 else: 3793 spec = None 3794 3795 self._match_r_paren() 3796 3797 return self.expression( 3798 exp.Window, 3799 this=this, 3800 partition_by=partition, 3801 order=order, 3802 spec=spec, 3803 alias=window_alias, 3804 ) 3805 3806 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3807 self._match(TokenType.BETWEEN) 3808 3809 return { 3810 "value": ( 3811 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3812 ) 3813 or self._parse_bitwise(), 3814 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3815 } 3816 3817 def _parse_alias( 3818 self, this: t.Optional[exp.Expression], explicit: bool = False 3819 ) -> t.Optional[exp.Expression]: 3820 any_token = self._match(TokenType.ALIAS) 3821 3822 if explicit and not any_token: 3823 return this 3824 3825 if self._match(TokenType.L_PAREN): 3826 aliases = self.expression( 3827 exp.Aliases, 3828 this=this, 3829 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3830 ) 3831 self._match_r_paren(aliases) 3832 return aliases 3833 3834 alias = self._parse_id_var(any_token) 3835 3836 if alias: 3837 return self.expression(exp.Alias, this=this, alias=alias) 3838 3839 return this 3840 3841 def _parse_id_var( 3842 self, 3843 any_token: bool = True, 3844 tokens: t.Optional[t.Collection[TokenType]] = None, 3845 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3846 ) -> t.Optional[exp.Expression]: 3847 identifier = self._parse_identifier() 3848 3849 if identifier: 3850 return identifier 3851 3852 prefix = "" 3853 3854 if prefix_tokens: 3855 while self._match_set(prefix_tokens): 3856 prefix += self._prev.text 3857 3858 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3859 quoted = self._prev.token_type == TokenType.STRING 3860 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3861 3862 return None 3863 3864 def _parse_string(self) -> t.Optional[exp.Expression]: 3865 if self._match(TokenType.STRING): 3866 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3867 return self._parse_placeholder() 3868 3869 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3870 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3871 3872 def _parse_number(self) -> t.Optional[exp.Expression]: 3873 if self._match(TokenType.NUMBER): 3874 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3875 return self._parse_placeholder() 3876 3877 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3878 if self._match(TokenType.IDENTIFIER): 3879 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3880 return self._parse_placeholder() 3881 3882 def _parse_var( 3883 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3884 ) -> t.Optional[exp.Expression]: 3885 if ( 3886 (any_token and self._advance_any()) 3887 or self._match(TokenType.VAR) 3888 or (self._match_set(tokens) if tokens else False) 3889 ): 3890 return self.expression(exp.Var, this=self._prev.text) 3891 return self._parse_placeholder() 3892 3893 def _advance_any(self) -> t.Optional[Token]: 3894 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3895 self._advance() 3896 return self._prev 3897 return None 3898 3899 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3900 return self._parse_var() or self._parse_string() 3901 3902 def _parse_null(self) -> t.Optional[exp.Expression]: 3903 if self._match(TokenType.NULL): 3904 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3905 return None 3906 3907 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3908 if self._match(TokenType.TRUE): 3909 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3910 if self._match(TokenType.FALSE): 3911 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3912 return None 3913 3914 def _parse_star(self) -> t.Optional[exp.Expression]: 3915 if self._match(TokenType.STAR): 3916 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3917 return None 3918 3919 def _parse_parameter(self) -> exp.Expression: 3920 wrapped = self._match(TokenType.L_BRACE) 3921 this = self._parse_var() or self._parse_primary() 3922 self._match(TokenType.R_BRACE) 3923 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3924 3925 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3926 if self._match_set(self.PLACEHOLDER_PARSERS): 3927 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3928 if placeholder: 3929 return placeholder 3930 self._advance(-1) 3931 return None 3932 3933 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3934 if not self._match(TokenType.EXCEPT): 3935 return None 3936 if self._match(TokenType.L_PAREN, advance=False): 3937 return self._parse_wrapped_csv(self._parse_column) 3938 return self._parse_csv(self._parse_column) 3939 3940 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3941 if not self._match(TokenType.REPLACE): 3942 return None 3943 if self._match(TokenType.L_PAREN, advance=False): 3944 return self._parse_wrapped_csv(self._parse_expression) 3945 return self._parse_csv(self._parse_expression) 3946 3947 def _parse_csv( 3948 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3949 ) -> t.List[t.Optional[exp.Expression]]: 3950 parse_result = parse_method() 3951 items = [parse_result] if parse_result is not None else [] 3952 3953 while self._match(sep): 3954 if parse_result and self._prev_comments: 3955 parse_result.comments = self._prev_comments 3956 3957 parse_result = parse_method() 3958 if parse_result is not None: 3959 items.append(parse_result) 3960 3961 return items 3962 3963 def _parse_tokens( 3964 self, parse_method: t.Callable, expressions: t.Dict 3965 ) -> t.Optional[exp.Expression]: 3966 this = parse_method() 3967 3968 while self._match_set(expressions): 3969 this = self.expression( 3970 expressions[self._prev.token_type], 3971 this=this, 3972 comments=self._prev_comments, 3973 expression=parse_method(), 3974 ) 3975 3976 return this 3977 3978 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3979 return self._parse_wrapped_csv(self._parse_id_var) 3980 3981 def _parse_wrapped_csv( 3982 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3983 ) -> t.List[t.Optional[exp.Expression]]: 3984 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3985 3986 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3987 self._match_l_paren() 3988 parse_result = parse_method() 3989 self._match_r_paren() 3990 return parse_result 3991 3992 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3993 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3994 3995 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3996 return self._parse_set_operations( 3997 self._parse_select(nested=True, parse_subquery_alias=False) 3998 ) 3999 4000 def _parse_transaction(self) -> exp.Expression: 4001 this = None 4002 if self._match_texts(self.TRANSACTION_KIND): 4003 this = self._prev.text 4004 4005 self._match_texts({"TRANSACTION", "WORK"}) 4006 4007 modes = [] 4008 while True: 4009 mode = [] 4010 while self._match(TokenType.VAR): 4011 mode.append(self._prev.text) 4012 4013 if mode: 4014 modes.append(" ".join(mode)) 4015 if not self._match(TokenType.COMMA): 4016 break 4017 4018 return self.expression(exp.Transaction, this=this, modes=modes) 4019 4020 def _parse_commit_or_rollback(self) -> exp.Expression: 4021 chain = None 4022 savepoint = None 4023 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4024 4025 self._match_texts({"TRANSACTION", "WORK"}) 4026 4027 if self._match_text_seq("TO"): 4028 self._match_text_seq("SAVEPOINT") 4029 savepoint = self._parse_id_var() 4030 4031 if self._match(TokenType.AND): 4032 chain = not self._match_text_seq("NO") 4033 self._match_text_seq("CHAIN") 4034 4035 if is_rollback: 4036 return self.expression(exp.Rollback, savepoint=savepoint) 4037 return self.expression(exp.Commit, chain=chain) 4038 4039 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4040 if not self._match_text_seq("ADD"): 4041 return None 4042 4043 self._match(TokenType.COLUMN) 4044 exists_column = self._parse_exists(not_=True) 4045 expression = self._parse_column_def(self._parse_field(any_token=True)) 4046 4047 if expression: 4048 expression.set("exists", exists_column) 4049 4050 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4051 if self._match_texts(("FIRST", "AFTER")): 4052 position = self._prev.text 4053 column_position = self.expression( 4054 exp.ColumnPosition, this=self._parse_column(), position=position 4055 ) 4056 expression.set("position", column_position) 4057 4058 return expression 4059 4060 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4061 drop = self._match(TokenType.DROP) and self._parse_drop() 4062 if drop and not isinstance(drop, exp.Command): 4063 drop.set("kind", drop.args.get("kind", "COLUMN")) 4064 return drop 4065 4066 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4067 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4068 return self.expression( 4069 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4070 ) 4071 4072 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4073 this = None 4074 kind = self._prev.token_type 4075 4076 if kind == TokenType.CONSTRAINT: 4077 this = self._parse_id_var() 4078 4079 if self._match_text_seq("CHECK"): 4080 expression = self._parse_wrapped(self._parse_conjunction) 4081 enforced = self._match_text_seq("ENFORCED") 4082 4083 return self.expression( 4084 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4085 ) 4086 4087 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4088 expression = self._parse_foreign_key() 4089 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4090 expression = self._parse_primary_key() 4091 else: 4092 expression = None 4093 4094 return self.expression(exp.AddConstraint, this=this, expression=expression) 4095 4096 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4097 index = self._index - 1 4098 4099 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4100 return self._parse_csv(self._parse_add_constraint) 4101 4102 self._retreat(index) 4103 return self._parse_csv(self._parse_add_column) 4104 4105 def _parse_alter_table_alter(self) -> exp.Expression: 4106 self._match(TokenType.COLUMN) 4107 column = self._parse_field(any_token=True) 4108 4109 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4110 return self.expression(exp.AlterColumn, this=column, drop=True) 4111 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4112 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4113 4114 self._match_text_seq("SET", "DATA") 4115 return self.expression( 4116 exp.AlterColumn, 4117 this=column, 4118 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4119 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4120 using=self._match(TokenType.USING) and self._parse_conjunction(), 4121 ) 4122 4123 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4124 index = self._index - 1 4125 4126 partition_exists = self._parse_exists() 4127 if self._match(TokenType.PARTITION, advance=False): 4128 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4129 4130 self._retreat(index) 4131 return self._parse_csv(self._parse_drop_column) 4132 4133 def _parse_alter_table_rename(self) -> exp.Expression: 4134 self._match_text_seq("TO") 4135 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4136 4137 def _parse_alter(self) -> t.Optional[exp.Expression]: 4138 start = self._prev 4139 4140 if not self._match(TokenType.TABLE): 4141 return self._parse_as_command(start) 4142 4143 exists = self._parse_exists() 4144 this = self._parse_table(schema=True) 4145 4146 if self._next: 4147 self._advance() 4148 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4149 4150 if parser: 4151 actions = ensure_list(parser(self)) 4152 4153 if not self._curr: 4154 return self.expression( 4155 exp.AlterTable, 4156 this=this, 4157 exists=exists, 4158 actions=actions, 4159 ) 4160 return self._parse_as_command(start) 4161 4162 def _parse_merge(self) -> exp.Expression: 4163 self._match(TokenType.INTO) 4164 target = self._parse_table() 4165 4166 self._match(TokenType.USING) 4167 using = self._parse_table() 4168 4169 self._match(TokenType.ON) 4170 on = self._parse_conjunction() 4171 4172 whens = [] 4173 while self._match(TokenType.WHEN): 4174 matched = not self._match(TokenType.NOT) 4175 self._match_text_seq("MATCHED") 4176 source = ( 4177 False 4178 if self._match_text_seq("BY", "TARGET") 4179 else self._match_text_seq("BY", "SOURCE") 4180 ) 4181 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4182 4183 self._match(TokenType.THEN) 4184 4185 if self._match(TokenType.INSERT): 4186 _this = self._parse_star() 4187 if _this: 4188 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4189 else: 4190 then = self.expression( 4191 exp.Insert, 4192 this=self._parse_value(), 4193 expression=self._match(TokenType.VALUES) and self._parse_value(), 4194 ) 4195 elif self._match(TokenType.UPDATE): 4196 expressions = self._parse_star() 4197 if expressions: 4198 then = self.expression(exp.Update, expressions=expressions) 4199 else: 4200 then = self.expression( 4201 exp.Update, 4202 expressions=self._match(TokenType.SET) 4203 and self._parse_csv(self._parse_equality), 4204 ) 4205 elif self._match(TokenType.DELETE): 4206 then = self.expression(exp.Var, this=self._prev.text) 4207 else: 4208 then = None 4209 4210 whens.append( 4211 self.expression( 4212 exp.When, 4213 matched=matched, 4214 source=source, 4215 condition=condition, 4216 then=then, 4217 ) 4218 ) 4219 4220 return self.expression( 4221 exp.Merge, 4222 this=target, 4223 using=using, 4224 on=on, 4225 expressions=whens, 4226 ) 4227 4228 def _parse_show(self) -> t.Optional[exp.Expression]: 4229 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4230 if parser: 4231 return parser(self) 4232 self._advance() 4233 return self.expression(exp.Show, this=self._prev.text.upper()) 4234 4235 def _parse_set_item_assignment( 4236 self, kind: t.Optional[str] = None 4237 ) -> t.Optional[exp.Expression]: 4238 index = self._index 4239 4240 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4241 return self._parse_set_transaction(global_=kind == "GLOBAL") 4242 4243 left = self._parse_primary() or self._parse_id_var() 4244 4245 if not self._match_texts(("=", "TO")): 4246 self._retreat(index) 4247 return None 4248 4249 right = self._parse_statement() or self._parse_id_var() 4250 this = self.expression( 4251 exp.EQ, 4252 this=left, 4253 expression=right, 4254 ) 4255 4256 return self.expression( 4257 exp.SetItem, 4258 this=this, 4259 kind=kind, 4260 ) 4261 4262 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4263 self._match_text_seq("TRANSACTION") 4264 characteristics = self._parse_csv( 4265 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4266 ) 4267 return self.expression( 4268 exp.SetItem, 4269 expressions=characteristics, 4270 kind="TRANSACTION", 4271 **{"global": global_}, # type: ignore 4272 ) 4273 4274 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4275 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4276 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4277 4278 def _parse_set(self) -> exp.Expression: 4279 index = self._index 4280 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4281 4282 if self._curr: 4283 self._retreat(index) 4284 return self._parse_as_command(self._prev) 4285 4286 return set_ 4287 4288 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4289 for option in options: 4290 if self._match_text_seq(*option.split(" ")): 4291 return exp.Var(this=option) 4292 return None 4293 4294 def _parse_as_command(self, start: Token) -> exp.Command: 4295 while self._curr: 4296 self._advance() 4297 text = self._find_sql(start, self._prev) 4298 size = len(start.text) 4299 return exp.Command(this=text[:size], expression=text[size:]) 4300 4301 def _find_parser( 4302 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4303 ) -> t.Optional[t.Callable]: 4304 if not self._curr: 4305 return None 4306 4307 index = self._index 4308 this = [] 4309 while True: 4310 # The current token might be multiple words 4311 curr = self._curr.text.upper() 4312 key = curr.split(" ") 4313 this.append(curr) 4314 self._advance() 4315 result, trie = in_trie(trie, key) 4316 if result == 0: 4317 break 4318 if result == 2: 4319 subparser = parsers[" ".join(this)] 4320 return subparser 4321 self._retreat(index) 4322 return None 4323 4324 def _match(self, token_type, advance=True): 4325 if not self._curr: 4326 return None 4327 4328 if self._curr.token_type == token_type: 4329 if advance: 4330 self._advance() 4331 return True 4332 4333 return None 4334 4335 def _match_set(self, types, advance=True): 4336 if not self._curr: 4337 return None 4338 4339 if self._curr.token_type in types: 4340 if advance: 4341 self._advance() 4342 return True 4343 4344 return None 4345 4346 def _match_pair(self, token_type_a, token_type_b, advance=True): 4347 if not self._curr or not self._next: 4348 return None 4349 4350 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4351 if advance: 4352 self._advance(2) 4353 return True 4354 4355 return None 4356 4357 def _match_l_paren(self, expression=None): 4358 if not self._match(TokenType.L_PAREN): 4359 self.raise_error("Expecting (") 4360 if expression and self._prev_comments: 4361 expression.comments = self._prev_comments 4362 4363 def _match_r_paren(self, expression=None): 4364 if not self._match(TokenType.R_PAREN): 4365 self.raise_error("Expecting )") 4366 if expression and self._prev_comments: 4367 expression.comments = self._prev_comments 4368 4369 def _match_texts(self, texts, advance=True): 4370 if self._curr and self._curr.text.upper() in texts: 4371 if advance: 4372 self._advance() 4373 return True 4374 return False 4375 4376 def _match_text_seq(self, *texts, advance=True): 4377 index = self._index 4378 for text in texts: 4379 if self._curr and self._curr.text.upper() == text: 4380 self._advance() 4381 else: 4382 self._retreat(index) 4383 return False 4384 4385 if not advance: 4386 self._retreat(index) 4387 4388 return True 4389 4390 def _replace_columns_with_dots(self, this): 4391 if isinstance(this, exp.Dot): 4392 exp.replace_children(this, self._replace_columns_with_dots) 4393 elif isinstance(this, exp.Column): 4394 exp.replace_children(this, self._replace_columns_with_dots) 4395 table = this.args.get("table") 4396 this = ( 4397 self.expression(exp.Dot, this=table, expression=this.this) 4398 if table 4399 else self.expression(exp.Var, this=this.name) 4400 ) 4401 elif isinstance(this, exp.Identifier): 4402 this = self.expression(exp.Var, this=this.name) 4403 return this 4404 4405 def _replace_lambda(self, node, lambda_variables): 4406 for column in node.find_all(exp.Column): 4407 if column.parts[0].name in lambda_variables: 4408 dot_or_id = column.to_dot() if column.table else column.this 4409 parent = column.parent 4410 4411 while isinstance(parent, exp.Dot): 4412 if not isinstance(parent.parent, exp.Dot): 4413 parent.replace(dot_or_id) 4414 break 4415 parent = parent.parent 4416 else: 4417 if column is node: 4418 node = dot_or_id 4419 else: 4420 column.replace(dot_or_id) 4421 return node
25def parse_var_map(args: t.Sequence) -> exp.Expression: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 return exp.VarMap( 35 keys=exp.Array(expressions=keys), 36 values=exp.Array(expressions=values), 37 )
62class Parser(metaclass=_Parser): 63 """ 64 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 65 a parsed syntax tree. 66 67 Args: 68 error_level: the desired error level. 69 Default: ErrorLevel.RAISE 70 error_message_context: determines the amount of context to capture from a 71 query string when displaying the error message (in number of characters). 72 Default: 50. 73 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 74 Default: 0 75 alias_post_tablesample: If the table alias comes after tablesample. 76 Default: False 77 max_errors: Maximum number of error messages to include in a raised ParseError. 78 This is only relevant if error_level is ErrorLevel.RAISE. 79 Default: 3 80 null_ordering: Indicates the default null ordering method to use if not explicitly set. 81 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 82 Default: "nulls_are_small" 83 """ 84 85 FUNCTIONS: t.Dict[str, t.Callable] = { 86 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 87 "DATE_TO_DATE_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 92 "IFNULL": exp.Coalesce.from_arg_list, 93 "LIKE": parse_like, 94 "TIME_TO_TIME_STR": lambda args: exp.Cast( 95 this=seq_get(args, 0), 96 to=exp.DataType(this=exp.DataType.Type.TEXT), 97 ), 98 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 99 this=exp.Cast( 100 this=seq_get(args, 0), 101 to=exp.DataType(this=exp.DataType.Type.TEXT), 102 ), 103 start=exp.Literal.number(1), 104 length=exp.Literal.number(10), 105 ), 106 "VAR_MAP": parse_var_map, 107 } 108 109 NO_PAREN_FUNCTIONS = { 110 TokenType.CURRENT_DATE: exp.CurrentDate, 111 TokenType.CURRENT_DATETIME: exp.CurrentDate, 112 TokenType.CURRENT_TIME: exp.CurrentTime, 113 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 114 TokenType.CURRENT_USER: exp.CurrentUser, 115 } 116 117 JOIN_HINTS: t.Set[str] = set() 118 119 NESTED_TYPE_TOKENS = { 120 TokenType.ARRAY, 121 TokenType.MAP, 122 TokenType.STRUCT, 123 TokenType.NULLABLE, 124 } 125 126 TYPE_TOKENS = { 127 TokenType.BIT, 128 TokenType.BOOLEAN, 129 TokenType.TINYINT, 130 TokenType.SMALLINT, 131 TokenType.INT, 132 TokenType.BIGINT, 133 TokenType.FLOAT, 134 TokenType.DOUBLE, 135 TokenType.CHAR, 136 TokenType.NCHAR, 137 TokenType.VARCHAR, 138 TokenType.NVARCHAR, 139 TokenType.TEXT, 140 TokenType.MEDIUMTEXT, 141 TokenType.LONGTEXT, 142 TokenType.MEDIUMBLOB, 143 TokenType.LONGBLOB, 144 TokenType.BINARY, 145 TokenType.VARBINARY, 146 TokenType.JSON, 147 TokenType.JSONB, 148 TokenType.INTERVAL, 149 TokenType.TIME, 150 TokenType.TIMESTAMP, 151 TokenType.TIMESTAMPTZ, 152 TokenType.TIMESTAMPLTZ, 153 TokenType.DATETIME, 154 TokenType.DATE, 155 TokenType.DECIMAL, 156 TokenType.BIGDECIMAL, 157 TokenType.UUID, 158 TokenType.GEOGRAPHY, 159 TokenType.GEOMETRY, 160 TokenType.HLLSKETCH, 161 TokenType.HSTORE, 162 TokenType.PSEUDO_TYPE, 163 TokenType.SUPER, 164 TokenType.SERIAL, 165 TokenType.SMALLSERIAL, 166 TokenType.BIGSERIAL, 167 TokenType.XML, 168 TokenType.UNIQUEIDENTIFIER, 169 TokenType.MONEY, 170 TokenType.SMALLMONEY, 171 TokenType.ROWVERSION, 172 TokenType.IMAGE, 173 TokenType.VARIANT, 174 TokenType.OBJECT, 175 TokenType.INET, 176 *NESTED_TYPE_TOKENS, 177 } 178 179 SUBQUERY_PREDICATES = { 180 TokenType.ANY: exp.Any, 181 TokenType.ALL: exp.All, 182 TokenType.EXISTS: exp.Exists, 183 TokenType.SOME: exp.Any, 184 } 185 186 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 187 188 DB_CREATABLES = { 189 TokenType.DATABASE, 190 TokenType.SCHEMA, 191 TokenType.TABLE, 192 TokenType.VIEW, 193 } 194 195 CREATABLES = { 196 TokenType.COLUMN, 197 TokenType.FUNCTION, 198 TokenType.INDEX, 199 TokenType.PROCEDURE, 200 *DB_CREATABLES, 201 } 202 203 ID_VAR_TOKENS = { 204 TokenType.VAR, 205 TokenType.ANTI, 206 TokenType.APPLY, 207 TokenType.AUTO_INCREMENT, 208 TokenType.BEGIN, 209 TokenType.BOTH, 210 TokenType.BUCKET, 211 TokenType.CACHE, 212 TokenType.CASCADE, 213 TokenType.COLLATE, 214 TokenType.COMMAND, 215 TokenType.COMMENT, 216 TokenType.COMMIT, 217 TokenType.COMPOUND, 218 TokenType.CONSTRAINT, 219 TokenType.DEFAULT, 220 TokenType.DELETE, 221 TokenType.DESCRIBE, 222 TokenType.DIV, 223 TokenType.END, 224 TokenType.EXECUTE, 225 TokenType.ESCAPE, 226 TokenType.FALSE, 227 TokenType.FIRST, 228 TokenType.FILTER, 229 TokenType.FOLLOWING, 230 TokenType.FORMAT, 231 TokenType.FULL, 232 TokenType.IF, 233 TokenType.IS, 234 TokenType.ISNULL, 235 TokenType.INTERVAL, 236 TokenType.LAZY, 237 TokenType.LEADING, 238 TokenType.LEFT, 239 TokenType.LOCAL, 240 TokenType.MATERIALIZED, 241 TokenType.MERGE, 242 TokenType.NATURAL, 243 TokenType.NEXT, 244 TokenType.OFFSET, 245 TokenType.ONLY, 246 TokenType.OPTIONS, 247 TokenType.ORDINALITY, 248 TokenType.OVERWRITE, 249 TokenType.PARTITION, 250 TokenType.PERCENT, 251 TokenType.PIVOT, 252 TokenType.PRAGMA, 253 TokenType.PRECEDING, 254 TokenType.RANGE, 255 TokenType.REFERENCES, 256 TokenType.RIGHT, 257 TokenType.ROW, 258 TokenType.ROWS, 259 TokenType.SEED, 260 TokenType.SEMI, 261 TokenType.SET, 262 TokenType.SHOW, 263 TokenType.SORTKEY, 264 TokenType.TEMPORARY, 265 TokenType.TOP, 266 TokenType.TRAILING, 267 TokenType.TRUE, 268 TokenType.UNBOUNDED, 269 TokenType.UNIQUE, 270 TokenType.UNLOGGED, 271 TokenType.UNPIVOT, 272 TokenType.VOLATILE, 273 TokenType.WINDOW, 274 *CREATABLES, 275 *SUBQUERY_PREDICATES, 276 *TYPE_TOKENS, 277 *NO_PAREN_FUNCTIONS, 278 } 279 280 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 281 282 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 283 TokenType.APPLY, 284 TokenType.FULL, 285 TokenType.LEFT, 286 TokenType.NATURAL, 287 TokenType.OFFSET, 288 TokenType.RIGHT, 289 TokenType.WINDOW, 290 } 291 292 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 293 294 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 295 296 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 297 298 FUNC_TOKENS = { 299 TokenType.COMMAND, 300 TokenType.CURRENT_DATE, 301 TokenType.CURRENT_DATETIME, 302 TokenType.CURRENT_TIMESTAMP, 303 TokenType.CURRENT_TIME, 304 TokenType.CURRENT_USER, 305 TokenType.FILTER, 306 TokenType.FIRST, 307 TokenType.FORMAT, 308 TokenType.GLOB, 309 TokenType.IDENTIFIER, 310 TokenType.INDEX, 311 TokenType.ISNULL, 312 TokenType.ILIKE, 313 TokenType.LIKE, 314 TokenType.MERGE, 315 TokenType.OFFSET, 316 TokenType.PRIMARY_KEY, 317 TokenType.REPLACE, 318 TokenType.ROW, 319 TokenType.UNNEST, 320 TokenType.VAR, 321 TokenType.LEFT, 322 TokenType.RIGHT, 323 TokenType.DATE, 324 TokenType.DATETIME, 325 TokenType.TABLE, 326 TokenType.TIMESTAMP, 327 TokenType.TIMESTAMPTZ, 328 TokenType.WINDOW, 329 *TYPE_TOKENS, 330 *SUBQUERY_PREDICATES, 331 } 332 333 CONJUNCTION = { 334 TokenType.AND: exp.And, 335 TokenType.OR: exp.Or, 336 } 337 338 EQUALITY = { 339 TokenType.EQ: exp.EQ, 340 TokenType.NEQ: exp.NEQ, 341 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 342 } 343 344 COMPARISON = { 345 TokenType.GT: exp.GT, 346 TokenType.GTE: exp.GTE, 347 TokenType.LT: exp.LT, 348 TokenType.LTE: exp.LTE, 349 } 350 351 BITWISE = { 352 TokenType.AMP: exp.BitwiseAnd, 353 TokenType.CARET: exp.BitwiseXor, 354 TokenType.PIPE: exp.BitwiseOr, 355 TokenType.DPIPE: exp.DPipe, 356 } 357 358 TERM = { 359 TokenType.DASH: exp.Sub, 360 TokenType.PLUS: exp.Add, 361 TokenType.MOD: exp.Mod, 362 TokenType.COLLATE: exp.Collate, 363 } 364 365 FACTOR = { 366 TokenType.DIV: exp.IntDiv, 367 TokenType.LR_ARROW: exp.Distance, 368 TokenType.SLASH: exp.Div, 369 TokenType.STAR: exp.Mul, 370 } 371 372 TIMESTAMPS = { 373 TokenType.TIME, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.TIMESTAMPLTZ, 377 } 378 379 SET_OPERATIONS = { 380 TokenType.UNION, 381 TokenType.INTERSECT, 382 TokenType.EXCEPT, 383 } 384 385 JOIN_SIDES = { 386 TokenType.LEFT, 387 TokenType.RIGHT, 388 TokenType.FULL, 389 } 390 391 JOIN_KINDS = { 392 TokenType.INNER, 393 TokenType.OUTER, 394 TokenType.CROSS, 395 TokenType.SEMI, 396 TokenType.ANTI, 397 } 398 399 LAMBDAS = { 400 TokenType.ARROW: lambda self, expressions: self.expression( 401 exp.Lambda, 402 this=self._replace_lambda( 403 self._parse_conjunction(), 404 {node.name for node in expressions}, 405 ), 406 expressions=expressions, 407 ), 408 TokenType.FARROW: lambda self, expressions: self.expression( 409 exp.Kwarg, 410 this=exp.Var(this=expressions[0].name), 411 expression=self._parse_conjunction(), 412 ), 413 } 414 415 COLUMN_OPERATORS = { 416 TokenType.DOT: None, 417 TokenType.DCOLON: lambda self, this, to: self.expression( 418 exp.Cast if self.STRICT_CAST else exp.TryCast, 419 this=this, 420 to=to, 421 ), 422 TokenType.ARROW: lambda self, this, path: self.expression( 423 exp.JSONExtract, 424 this=this, 425 expression=path, 426 ), 427 TokenType.DARROW: lambda self, this, path: self.expression( 428 exp.JSONExtractScalar, 429 this=this, 430 expression=path, 431 ), 432 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 433 exp.JSONBExtract, 434 this=this, 435 expression=path, 436 ), 437 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 438 exp.JSONBExtractScalar, 439 this=this, 440 expression=path, 441 ), 442 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 443 exp.JSONBContains, 444 this=this, 445 expression=key, 446 ), 447 } 448 449 EXPRESSION_PARSERS = { 450 exp.Column: lambda self: self._parse_column(), 451 exp.DataType: lambda self: self._parse_types(), 452 exp.From: lambda self: self._parse_from(), 453 exp.Group: lambda self: self._parse_group(), 454 exp.Identifier: lambda self: self._parse_id_var(), 455 exp.Lateral: lambda self: self._parse_lateral(), 456 exp.Join: lambda self: self._parse_join(), 457 exp.Order: lambda self: self._parse_order(), 458 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 459 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 460 exp.Lambda: lambda self: self._parse_lambda(), 461 exp.Limit: lambda self: self._parse_limit(), 462 exp.Offset: lambda self: self._parse_offset(), 463 exp.TableAlias: lambda self: self._parse_table_alias(), 464 exp.Table: lambda self: self._parse_table(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.Expression: lambda self: self._parse_statement(), 467 exp.Properties: lambda self: self._parse_properties(), 468 exp.Where: lambda self: self._parse_where(), 469 exp.Ordered: lambda self: self._parse_ordered(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.With: lambda self: self._parse_with(), 472 exp.Window: lambda self: self._parse_named_window(), 473 exp.Qualify: lambda self: self._parse_qualify(), 474 exp.Returning: lambda self: self._parse_returning(), 475 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 476 } 477 478 STATEMENT_PARSERS = { 479 TokenType.ALTER: lambda self: self._parse_alter(), 480 TokenType.BEGIN: lambda self: self._parse_transaction(), 481 TokenType.CACHE: lambda self: self._parse_cache(), 482 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 483 TokenType.COMMENT: lambda self: self._parse_comment(), 484 TokenType.CREATE: lambda self: self._parse_create(), 485 TokenType.DELETE: lambda self: self._parse_delete(), 486 TokenType.DESC: lambda self: self._parse_describe(), 487 TokenType.DESCRIBE: lambda self: self._parse_describe(), 488 TokenType.DROP: lambda self: self._parse_drop(), 489 TokenType.END: lambda self: self._parse_commit_or_rollback(), 490 TokenType.INSERT: lambda self: self._parse_insert(), 491 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 492 TokenType.MERGE: lambda self: self._parse_merge(), 493 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 494 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 495 TokenType.SET: lambda self: self._parse_set(), 496 TokenType.UNCACHE: lambda self: self._parse_uncache(), 497 TokenType.UPDATE: lambda self: self._parse_update(), 498 TokenType.USE: lambda self: self.expression( 499 exp.Use, 500 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 501 and exp.Var(this=self._prev.text), 502 this=self._parse_table(schema=False), 503 ), 504 } 505 506 UNARY_PARSERS = { 507 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 508 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 509 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 510 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 511 } 512 513 PRIMARY_PARSERS = { 514 TokenType.STRING: lambda self, token: self.expression( 515 exp.Literal, this=token.text, is_string=True 516 ), 517 TokenType.NUMBER: lambda self, token: self.expression( 518 exp.Literal, this=token.text, is_string=False 519 ), 520 TokenType.STAR: lambda self, _: self.expression( 521 exp.Star, 522 **{"except": self._parse_except(), "replace": self._parse_replace()}, 523 ), 524 TokenType.NULL: lambda self, _: self.expression(exp.Null), 525 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 526 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 527 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 528 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 529 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 530 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 531 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 532 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 533 } 534 535 PLACEHOLDER_PARSERS = { 536 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 537 TokenType.PARAMETER: lambda self: self._parse_parameter(), 538 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 539 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 540 else None, 541 } 542 543 RANGE_PARSERS = { 544 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 545 TokenType.GLOB: binary_range_parser(exp.Glob), 546 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 547 TokenType.IN: lambda self, this: self._parse_in(this), 548 TokenType.IS: lambda self, this: self._parse_is(this), 549 TokenType.LIKE: binary_range_parser(exp.Like), 550 TokenType.ILIKE: binary_range_parser(exp.ILike), 551 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 552 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 553 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 554 } 555 556 PROPERTY_PARSERS = { 557 "AFTER": lambda self: self._parse_afterjournal( 558 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 559 ), 560 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 561 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 562 "BEFORE": lambda self: self._parse_journal( 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 564 ), 565 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 566 "CHARACTER SET": lambda self: self._parse_character_set(), 567 "CHECKSUM": lambda self: self._parse_checksum(), 568 "CLUSTER BY": lambda self: self.expression( 569 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 570 ), 571 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 572 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 573 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 574 default=self._prev.text.upper() == "DEFAULT" 575 ), 576 "DEFINER": lambda self: self._parse_definer(), 577 "DETERMINISTIC": lambda self: self.expression( 578 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 579 ), 580 "DISTKEY": lambda self: self._parse_distkey(), 581 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 582 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 583 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 584 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 585 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 586 "FREESPACE": lambda self: self._parse_freespace(), 587 "GLOBAL": lambda self: self._parse_temporary(global_=True), 588 "IMMUTABLE": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "JOURNAL": lambda self: self._parse_journal( 592 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 593 ), 594 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 595 "LIKE": lambda self: self._parse_create_like(), 596 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 597 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 598 "LOCK": lambda self: self._parse_locking(), 599 "LOCKING": lambda self: self._parse_locking(), 600 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 601 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 602 "MAX": lambda self: self._parse_datablocksize(), 603 "MAXIMUM": lambda self: self._parse_datablocksize(), 604 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 605 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 606 ), 607 "MIN": lambda self: self._parse_datablocksize(), 608 "MINIMUM": lambda self: self._parse_datablocksize(), 609 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 610 "NO": lambda self: self._parse_noprimaryindex(), 611 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 612 "ON": lambda self: self._parse_oncommit(), 613 "PARTITION BY": lambda self: self._parse_partitioned_by(), 614 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 615 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 616 "RETURNS": lambda self: self._parse_returns(), 617 "ROW": lambda self: self._parse_row(), 618 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 619 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 620 "SORTKEY": lambda self: self._parse_sortkey(), 621 "STABLE": lambda self: self.expression( 622 exp.StabilityProperty, this=exp.Literal.string("STABLE") 623 ), 624 "STORED": lambda self: self._parse_stored(), 625 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 626 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 627 "TEMP": lambda self: self._parse_temporary(global_=False), 628 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 629 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 630 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 631 "VOLATILE": lambda self: self._parse_volatile_property(), 632 "WITH": lambda self: self._parse_with_property(), 633 } 634 635 CONSTRAINT_PARSERS = { 636 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 637 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 638 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 639 "CHARACTER SET": lambda self: self.expression( 640 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 641 ), 642 "CHECK": lambda self: self.expression( 643 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 644 ), 645 "COLLATE": lambda self: self.expression( 646 exp.CollateColumnConstraint, this=self._parse_var() 647 ), 648 "COMMENT": lambda self: self.expression( 649 exp.CommentColumnConstraint, this=self._parse_string() 650 ), 651 "COMPRESS": lambda self: self._parse_compress(), 652 "DEFAULT": lambda self: self.expression( 653 exp.DefaultColumnConstraint, this=self._parse_bitwise() 654 ), 655 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 656 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 657 "FORMAT": lambda self: self.expression( 658 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "GENERATED": lambda self: self._parse_generated_as_identity(), 661 "IDENTITY": lambda self: self._parse_auto_increment(), 662 "INLINE": lambda self: self._parse_inline(), 663 "LIKE": lambda self: self._parse_create_like(), 664 "NOT": lambda self: self._parse_not_constraint(), 665 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 666 "ON": lambda self: self._match(TokenType.UPDATE) 667 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 668 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 669 "PRIMARY KEY": lambda self: self._parse_primary_key(), 670 "REFERENCES": lambda self: self._parse_references(match=False), 671 "TITLE": lambda self: self.expression( 672 exp.TitleColumnConstraint, this=self._parse_var_or_string() 673 ), 674 "UNIQUE": lambda self: self._parse_unique(), 675 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 676 } 677 678 ALTER_PARSERS = { 679 "ADD": lambda self: self._parse_alter_table_add(), 680 "ALTER": lambda self: self._parse_alter_table_alter(), 681 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 682 "DROP": lambda self: self._parse_alter_table_drop(), 683 "RENAME": lambda self: self._parse_alter_table_rename(), 684 } 685 686 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 687 688 NO_PAREN_FUNCTION_PARSERS = { 689 TokenType.CASE: lambda self: self._parse_case(), 690 TokenType.IF: lambda self: self._parse_if(), 691 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 692 } 693 694 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 695 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 696 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 697 "DECODE": lambda self: self._parse_decode(), 698 "EXTRACT": lambda self: self._parse_extract(), 699 "JSON_OBJECT": lambda self: self._parse_json_object(), 700 "LOG": lambda self: self._parse_logarithm(), 701 "MATCH": lambda self: self._parse_match_against(), 702 "POSITION": lambda self: self._parse_position(), 703 "STRING_AGG": lambda self: self._parse_string_agg(), 704 "SUBSTRING": lambda self: self._parse_substring(), 705 "TRIM": lambda self: self._parse_trim(), 706 "TRY_CAST": lambda self: self._parse_cast(False), 707 "TRY_CONVERT": lambda self: self._parse_convert(False), 708 } 709 710 QUERY_MODIFIER_PARSERS = { 711 "match": lambda self: self._parse_match_recognize(), 712 "where": lambda self: self._parse_where(), 713 "group": lambda self: self._parse_group(), 714 "having": lambda self: self._parse_having(), 715 "qualify": lambda self: self._parse_qualify(), 716 "windows": lambda self: self._parse_window_clause(), 717 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 718 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 719 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 720 "order": lambda self: self._parse_order(), 721 "limit": lambda self: self._parse_limit(), 722 "offset": lambda self: self._parse_offset(), 723 "lock": lambda self: self._parse_lock(), 724 "sample": lambda self: self._parse_table_sample(as_modifier=True), 725 } 726 727 SET_PARSERS = { 728 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 729 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 730 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 731 "TRANSACTION": lambda self: self._parse_set_transaction(), 732 } 733 734 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 735 736 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 737 738 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 739 740 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 741 742 TRANSACTION_CHARACTERISTICS = { 743 "ISOLATION LEVEL REPEATABLE READ", 744 "ISOLATION LEVEL READ COMMITTED", 745 "ISOLATION LEVEL READ UNCOMMITTED", 746 "ISOLATION LEVEL SERIALIZABLE", 747 "READ WRITE", 748 "READ ONLY", 749 } 750 751 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 752 753 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 754 755 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 756 757 STRICT_CAST = True 758 759 CONVERT_TYPE_FIRST = False 760 761 QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None 762 PREFIXED_PIVOT_COLUMNS = False 763 764 LOG_BASE_FIRST = True 765 LOG_DEFAULTS_TO_LN = False 766 767 __slots__ = ( 768 "error_level", 769 "error_message_context", 770 "sql", 771 "errors", 772 "index_offset", 773 "unnest_column_only", 774 "alias_post_tablesample", 775 "max_errors", 776 "null_ordering", 777 "_tokens", 778 "_index", 779 "_curr", 780 "_next", 781 "_prev", 782 "_prev_comments", 783 "_show_trie", 784 "_set_trie", 785 ) 786 787 def __init__( 788 self, 789 error_level: t.Optional[ErrorLevel] = None, 790 error_message_context: int = 100, 791 index_offset: int = 0, 792 unnest_column_only: bool = False, 793 alias_post_tablesample: bool = False, 794 max_errors: int = 3, 795 null_ordering: t.Optional[str] = None, 796 ): 797 self.error_level = error_level or ErrorLevel.IMMEDIATE 798 self.error_message_context = error_message_context 799 self.index_offset = index_offset 800 self.unnest_column_only = unnest_column_only 801 self.alias_post_tablesample = alias_post_tablesample 802 self.max_errors = max_errors 803 self.null_ordering = null_ordering 804 self.reset() 805 806 def reset(self): 807 self.sql = "" 808 self.errors = [] 809 self._tokens = [] 810 self._index = 0 811 self._curr = None 812 self._next = None 813 self._prev = None 814 self._prev_comments = None 815 816 def parse( 817 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 818 ) -> t.List[t.Optional[exp.Expression]]: 819 """ 820 Parses a list of tokens and returns a list of syntax trees, one tree 821 per parsed SQL statement. 822 823 Args: 824 raw_tokens: the list of tokens. 825 sql: the original SQL string, used to produce helpful debug messages. 826 827 Returns: 828 The list of syntax trees. 829 """ 830 return self._parse( 831 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 832 ) 833 834 def parse_into( 835 self, 836 expression_types: exp.IntoType, 837 raw_tokens: t.List[Token], 838 sql: t.Optional[str] = None, 839 ) -> t.List[t.Optional[exp.Expression]]: 840 """ 841 Parses a list of tokens into a given Expression type. If a collection of Expression 842 types is given instead, this method will try to parse the token list into each one 843 of them, stopping at the first for which the parsing succeeds. 844 845 Args: 846 expression_types: the expression type(s) to try and parse the token list into. 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The target Expression. 852 """ 853 errors = [] 854 for expression_type in ensure_collection(expression_types): 855 parser = self.EXPRESSION_PARSERS.get(expression_type) 856 if not parser: 857 raise TypeError(f"No parser registered for {expression_type}") 858 try: 859 return self._parse(parser, raw_tokens, sql) 860 except ParseError as e: 861 e.errors[0]["into_expression"] = expression_type 862 errors.append(e) 863 raise ParseError( 864 f"Failed to parse into {expression_types}", 865 errors=merge_errors(errors), 866 ) from errors[-1] 867 868 def _parse( 869 self, 870 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 self.reset() 875 self.sql = sql or "" 876 total = len(raw_tokens) 877 chunks: t.List[t.List[Token]] = [[]] 878 879 for i, token in enumerate(raw_tokens): 880 if token.token_type == TokenType.SEMICOLON: 881 if i < total - 1: 882 chunks.append([]) 883 else: 884 chunks[-1].append(token) 885 886 expressions = [] 887 888 for tokens in chunks: 889 self._index = -1 890 self._tokens = tokens 891 self._advance() 892 893 expressions.append(parse_method(self)) 894 895 if self._index < len(self._tokens): 896 self.raise_error("Invalid expression / Unexpected token") 897 898 self.check_errors() 899 900 return expressions 901 902 def check_errors(self) -> None: 903 """ 904 Logs or raises any found errors, depending on the chosen error level setting. 905 """ 906 if self.error_level == ErrorLevel.WARN: 907 for error in self.errors: 908 logger.error(str(error)) 909 elif self.error_level == ErrorLevel.RAISE and self.errors: 910 raise ParseError( 911 concat_messages(self.errors, self.max_errors), 912 errors=merge_errors(self.errors), 913 ) 914 915 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 916 """ 917 Appends an error in the list of recorded errors or raises it, depending on the chosen 918 error level setting. 919 """ 920 token = token or self._curr or self._prev or Token.string("") 921 start = token.start 922 end = token.end 923 start_context = self.sql[max(start - self.error_message_context, 0) : start] 924 highlight = self.sql[start:end] 925 end_context = self.sql[end : end + self.error_message_context] 926 927 error = ParseError.new( 928 f"{message}. Line {token.line}, Col: {token.col}.\n" 929 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 930 description=message, 931 line=token.line, 932 col=token.col, 933 start_context=start_context, 934 highlight=highlight, 935 end_context=end_context, 936 ) 937 938 if self.error_level == ErrorLevel.IMMEDIATE: 939 raise error 940 941 self.errors.append(error) 942 943 def expression( 944 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 945 ) -> E: 946 """ 947 Creates a new, validated Expression. 948 949 Args: 950 exp_class: the expression class to instantiate. 951 comments: an optional list of comments to attach to the expression. 952 kwargs: the arguments to set for the expression along with their respective values. 953 954 Returns: 955 The target expression. 956 """ 957 instance = exp_class(**kwargs) 958 if self._prev_comments: 959 instance.comments = self._prev_comments 960 self._prev_comments = None 961 if comments: 962 instance.comments = comments 963 self.validate_expression(instance) 964 return instance 965 966 def validate_expression( 967 self, expression: exp.Expression, args: t.Optional[t.List] = None 968 ) -> None: 969 """ 970 Validates an already instantiated expression, making sure that all its mandatory arguments 971 are set. 972 973 Args: 974 expression: the expression to validate. 975 args: an optional list of items that was used to instantiate the expression, if it's a Func. 976 """ 977 if self.error_level == ErrorLevel.IGNORE: 978 return 979 980 for error_message in expression.error_messages(args): 981 self.raise_error(error_message) 982 983 def _find_sql(self, start: Token, end: Token) -> str: 984 return self.sql[start.start : end.end] 985 986 def _advance(self, times: int = 1) -> None: 987 self._index += times 988 self._curr = seq_get(self._tokens, self._index) 989 self._next = seq_get(self._tokens, self._index + 1) 990 if self._index > 0: 991 self._prev = self._tokens[self._index - 1] 992 self._prev_comments = self._prev.comments 993 else: 994 self._prev = None 995 self._prev_comments = None 996 997 def _retreat(self, index: int) -> None: 998 if index != self._index: 999 self._advance(index - self._index) 1000 1001 def _parse_command(self) -> exp.Command: 1002 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1003 1004 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1005 start = self._prev 1006 exists = self._parse_exists() if allow_exists else None 1007 1008 self._match(TokenType.ON) 1009 1010 kind = self._match_set(self.CREATABLES) and self._prev 1011 1012 if not kind: 1013 return self._parse_as_command(start) 1014 1015 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1016 this = self._parse_user_defined_function(kind=kind.token_type) 1017 elif kind.token_type == TokenType.TABLE: 1018 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1019 elif kind.token_type == TokenType.COLUMN: 1020 this = self._parse_column() 1021 else: 1022 this = self._parse_id_var() 1023 1024 self._match(TokenType.IS) 1025 1026 return self.expression( 1027 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1028 ) 1029 1030 def _parse_statement(self) -> t.Optional[exp.Expression]: 1031 if self._curr is None: 1032 return None 1033 1034 if self._match_set(self.STATEMENT_PARSERS): 1035 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1036 1037 if self._match_set(Tokenizer.COMMANDS): 1038 return self._parse_command() 1039 1040 expression = self._parse_expression() 1041 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1042 1043 self._parse_query_modifiers(expression) 1044 return expression 1045 1046 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1047 start = self._prev 1048 temporary = self._match(TokenType.TEMPORARY) 1049 materialized = self._match(TokenType.MATERIALIZED) 1050 kind = self._match_set(self.CREATABLES) and self._prev.text 1051 if not kind: 1052 return self._parse_as_command(start) 1053 1054 return self.expression( 1055 exp.Drop, 1056 exists=self._parse_exists(), 1057 this=self._parse_table(schema=True), 1058 kind=kind, 1059 temporary=temporary, 1060 materialized=materialized, 1061 cascade=self._match(TokenType.CASCADE), 1062 constraints=self._match_text_seq("CONSTRAINTS"), 1063 purge=self._match_text_seq("PURGE"), 1064 ) 1065 1066 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1067 return ( 1068 self._match(TokenType.IF) 1069 and (not not_ or self._match(TokenType.NOT)) 1070 and self._match(TokenType.EXISTS) 1071 ) 1072 1073 def _parse_create(self) -> t.Optional[exp.Expression]: 1074 start = self._prev 1075 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1076 TokenType.OR, TokenType.REPLACE 1077 ) 1078 unique = self._match(TokenType.UNIQUE) 1079 1080 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1081 self._match(TokenType.TABLE) 1082 1083 properties = None 1084 create_token = self._match_set(self.CREATABLES) and self._prev 1085 1086 if not create_token: 1087 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1088 create_token = self._match_set(self.CREATABLES) and self._prev 1089 1090 if not properties or not create_token: 1091 return self._parse_as_command(start) 1092 1093 exists = self._parse_exists(not_=True) 1094 this = None 1095 expression = None 1096 indexes = None 1097 no_schema_binding = None 1098 begin = None 1099 1100 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1101 this = self._parse_user_defined_function(kind=create_token.token_type) 1102 temp_properties = self._parse_properties() 1103 if properties and temp_properties: 1104 properties.expressions.extend(temp_properties.expressions) 1105 elif temp_properties: 1106 properties = temp_properties 1107 1108 self._match(TokenType.ALIAS) 1109 begin = self._match(TokenType.BEGIN) 1110 return_ = self._match_text_seq("RETURN") 1111 expression = self._parse_statement() 1112 1113 if return_: 1114 expression = self.expression(exp.Return, this=expression) 1115 elif create_token.token_type == TokenType.INDEX: 1116 this = self._parse_index() 1117 elif create_token.token_type in self.DB_CREATABLES: 1118 table_parts = self._parse_table_parts(schema=True) 1119 1120 # exp.Properties.Location.POST_NAME 1121 if self._match(TokenType.COMMA): 1122 temp_properties = self._parse_properties(before=True) 1123 if properties and temp_properties: 1124 properties.expressions.extend(temp_properties.expressions) 1125 elif temp_properties: 1126 properties = temp_properties 1127 1128 this = self._parse_schema(this=table_parts) 1129 1130 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1131 temp_properties = self._parse_properties() 1132 if properties and temp_properties: 1133 properties.expressions.extend(temp_properties.expressions) 1134 elif temp_properties: 1135 properties = temp_properties 1136 1137 self._match(TokenType.ALIAS) 1138 1139 # exp.Properties.Location.POST_ALIAS 1140 if not ( 1141 self._match(TokenType.SELECT, advance=False) 1142 or self._match(TokenType.WITH, advance=False) 1143 or self._match(TokenType.L_PAREN, advance=False) 1144 ): 1145 temp_properties = self._parse_properties() 1146 if properties and temp_properties: 1147 properties.expressions.extend(temp_properties.expressions) 1148 elif temp_properties: 1149 properties = temp_properties 1150 1151 expression = self._parse_ddl_select() 1152 1153 if create_token.token_type == TokenType.TABLE: 1154 # exp.Properties.Location.POST_EXPRESSION 1155 temp_properties = self._parse_properties() 1156 if properties and temp_properties: 1157 properties.expressions.extend(temp_properties.expressions) 1158 elif temp_properties: 1159 properties = temp_properties 1160 1161 indexes = [] 1162 while True: 1163 index = self._parse_create_table_index() 1164 1165 # exp.Properties.Location.POST_INDEX 1166 if self._match(TokenType.PARTITION_BY, advance=False): 1167 temp_properties = self._parse_properties() 1168 if properties and temp_properties: 1169 properties.expressions.extend(temp_properties.expressions) 1170 elif temp_properties: 1171 properties = temp_properties 1172 1173 if not index: 1174 break 1175 else: 1176 indexes.append(index) 1177 elif create_token.token_type == TokenType.VIEW: 1178 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1179 no_schema_binding = True 1180 1181 return self.expression( 1182 exp.Create, 1183 this=this, 1184 kind=create_token.text, 1185 replace=replace, 1186 unique=unique, 1187 expression=expression, 1188 exists=exists, 1189 properties=properties, 1190 indexes=indexes, 1191 no_schema_binding=no_schema_binding, 1192 begin=begin, 1193 ) 1194 1195 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1196 self._match(TokenType.COMMA) 1197 1198 # parsers look to _prev for no/dual/default, so need to consume first 1199 self._match_text_seq("NO") 1200 self._match_text_seq("DUAL") 1201 self._match_text_seq("DEFAULT") 1202 1203 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1204 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1205 1206 return None 1207 1208 def _parse_property(self) -> t.Optional[exp.Expression]: 1209 if self._match_texts(self.PROPERTY_PARSERS): 1210 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1211 1212 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1213 return self._parse_character_set(default=True) 1214 1215 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1216 return self._parse_sortkey(compound=True) 1217 1218 if self._match_text_seq("SQL", "SECURITY"): 1219 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1220 1221 assignment = self._match_pair( 1222 TokenType.VAR, TokenType.EQ, advance=False 1223 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1224 1225 if assignment: 1226 key = self._parse_var_or_string() 1227 self._match(TokenType.EQ) 1228 return self.expression(exp.Property, this=key, value=self._parse_column()) 1229 1230 return None 1231 1232 def _parse_stored(self) -> exp.Expression: 1233 self._match(TokenType.ALIAS) 1234 1235 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1236 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1237 1238 return self.expression( 1239 exp.FileFormatProperty, 1240 this=self.expression( 1241 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1242 ) 1243 if input_format or output_format 1244 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1245 ) 1246 1247 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1248 self._match(TokenType.EQ) 1249 self._match(TokenType.ALIAS) 1250 return self.expression( 1251 exp_class, 1252 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1253 ) 1254 1255 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1256 properties = [] 1257 1258 while True: 1259 if before: 1260 identified_property = self._parse_property_before() 1261 else: 1262 identified_property = self._parse_property() 1263 1264 if not identified_property: 1265 break 1266 for p in ensure_list(identified_property): 1267 properties.append(p) 1268 1269 if properties: 1270 return self.expression(exp.Properties, expressions=properties) 1271 1272 return None 1273 1274 def _parse_fallback(self, no=False) -> exp.Expression: 1275 self._match_text_seq("FALLBACK") 1276 return self.expression( 1277 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1278 ) 1279 1280 def _parse_volatile_property(self) -> exp.Expression: 1281 if self._index >= 2: 1282 pre_volatile_token = self._tokens[self._index - 2] 1283 else: 1284 pre_volatile_token = None 1285 1286 if pre_volatile_token and pre_volatile_token.token_type in ( 1287 TokenType.CREATE, 1288 TokenType.REPLACE, 1289 TokenType.UNIQUE, 1290 ): 1291 return exp.VolatileProperty() 1292 1293 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1294 1295 def _parse_with_property( 1296 self, 1297 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1298 self._match(TokenType.WITH) 1299 if self._match(TokenType.L_PAREN, advance=False): 1300 return self._parse_wrapped_csv(self._parse_property) 1301 1302 if self._match_text_seq("JOURNAL"): 1303 return self._parse_withjournaltable() 1304 1305 if self._match_text_seq("DATA"): 1306 return self._parse_withdata(no=False) 1307 elif self._match_text_seq("NO", "DATA"): 1308 return self._parse_withdata(no=True) 1309 1310 if not self._next: 1311 return None 1312 1313 return self._parse_withisolatedloading() 1314 1315 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1316 def _parse_definer(self) -> t.Optional[exp.Expression]: 1317 self._match(TokenType.EQ) 1318 1319 user = self._parse_id_var() 1320 self._match(TokenType.PARAMETER) 1321 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1322 1323 if not user or not host: 1324 return None 1325 1326 return exp.DefinerProperty(this=f"{user}@{host}") 1327 1328 def _parse_withjournaltable(self) -> exp.Expression: 1329 self._match(TokenType.TABLE) 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1332 1333 def _parse_log(self, no=False) -> exp.Expression: 1334 self._match_text_seq("LOG") 1335 return self.expression(exp.LogProperty, no=no) 1336 1337 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1338 before = self._match_text_seq("BEFORE") 1339 self._match_text_seq("JOURNAL") 1340 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1341 1342 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1343 self._match_text_seq("NOT") 1344 self._match_text_seq("LOCAL") 1345 self._match_text_seq("AFTER", "JOURNAL") 1346 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1347 1348 def _parse_checksum(self) -> exp.Expression: 1349 self._match_text_seq("CHECKSUM") 1350 self._match(TokenType.EQ) 1351 1352 on = None 1353 if self._match(TokenType.ON): 1354 on = True 1355 elif self._match_text_seq("OFF"): 1356 on = False 1357 default = self._match(TokenType.DEFAULT) 1358 1359 return self.expression( 1360 exp.ChecksumProperty, 1361 on=on, 1362 default=default, 1363 ) 1364 1365 def _parse_freespace(self) -> exp.Expression: 1366 self._match_text_seq("FREESPACE") 1367 self._match(TokenType.EQ) 1368 return self.expression( 1369 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1370 ) 1371 1372 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1373 self._match_text_seq("MERGEBLOCKRATIO") 1374 if self._match(TokenType.EQ): 1375 return self.expression( 1376 exp.MergeBlockRatioProperty, 1377 this=self._parse_number(), 1378 percent=self._match(TokenType.PERCENT), 1379 ) 1380 else: 1381 return self.expression( 1382 exp.MergeBlockRatioProperty, 1383 no=no, 1384 default=default, 1385 ) 1386 1387 def _parse_datablocksize(self, default=None) -> exp.Expression: 1388 if default: 1389 self._match_text_seq("DATABLOCKSIZE") 1390 return self.expression(exp.DataBlocksizeProperty, default=True) 1391 elif self._match_texts(("MIN", "MINIMUM")): 1392 self._match_text_seq("DATABLOCKSIZE") 1393 return self.expression(exp.DataBlocksizeProperty, min=True) 1394 elif self._match_texts(("MAX", "MAXIMUM")): 1395 self._match_text_seq("DATABLOCKSIZE") 1396 return self.expression(exp.DataBlocksizeProperty, min=False) 1397 1398 self._match_text_seq("DATABLOCKSIZE") 1399 self._match(TokenType.EQ) 1400 size = self._parse_number() 1401 units = None 1402 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1403 units = self._prev.text 1404 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1405 1406 def _parse_blockcompression(self) -> exp.Expression: 1407 self._match_text_seq("BLOCKCOMPRESSION") 1408 self._match(TokenType.EQ) 1409 always = self._match_text_seq("ALWAYS") 1410 manual = self._match_text_seq("MANUAL") 1411 never = self._match_text_seq("NEVER") 1412 default = self._match_text_seq("DEFAULT") 1413 autotemp = None 1414 if self._match_text_seq("AUTOTEMP"): 1415 autotemp = self._parse_schema() 1416 1417 return self.expression( 1418 exp.BlockCompressionProperty, 1419 always=always, 1420 manual=manual, 1421 never=never, 1422 default=default, 1423 autotemp=autotemp, 1424 ) 1425 1426 def _parse_withisolatedloading(self) -> exp.Expression: 1427 no = self._match_text_seq("NO") 1428 concurrent = self._match_text_seq("CONCURRENT") 1429 self._match_text_seq("ISOLATED", "LOADING") 1430 for_all = self._match_text_seq("FOR", "ALL") 1431 for_insert = self._match_text_seq("FOR", "INSERT") 1432 for_none = self._match_text_seq("FOR", "NONE") 1433 return self.expression( 1434 exp.IsolatedLoadingProperty, 1435 no=no, 1436 concurrent=concurrent, 1437 for_all=for_all, 1438 for_insert=for_insert, 1439 for_none=for_none, 1440 ) 1441 1442 def _parse_locking(self) -> exp.Expression: 1443 if self._match(TokenType.TABLE): 1444 kind = "TABLE" 1445 elif self._match(TokenType.VIEW): 1446 kind = "VIEW" 1447 elif self._match(TokenType.ROW): 1448 kind = "ROW" 1449 elif self._match_text_seq("DATABASE"): 1450 kind = "DATABASE" 1451 else: 1452 kind = None 1453 1454 if kind in ("DATABASE", "TABLE", "VIEW"): 1455 this = self._parse_table_parts() 1456 else: 1457 this = None 1458 1459 if self._match(TokenType.FOR): 1460 for_or_in = "FOR" 1461 elif self._match(TokenType.IN): 1462 for_or_in = "IN" 1463 else: 1464 for_or_in = None 1465 1466 if self._match_text_seq("ACCESS"): 1467 lock_type = "ACCESS" 1468 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1469 lock_type = "EXCLUSIVE" 1470 elif self._match_text_seq("SHARE"): 1471 lock_type = "SHARE" 1472 elif self._match_text_seq("READ"): 1473 lock_type = "READ" 1474 elif self._match_text_seq("WRITE"): 1475 lock_type = "WRITE" 1476 elif self._match_text_seq("CHECKSUM"): 1477 lock_type = "CHECKSUM" 1478 else: 1479 lock_type = None 1480 1481 override = self._match_text_seq("OVERRIDE") 1482 1483 return self.expression( 1484 exp.LockingProperty, 1485 this=this, 1486 kind=kind, 1487 for_or_in=for_or_in, 1488 lock_type=lock_type, 1489 override=override, 1490 ) 1491 1492 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1493 if self._match(TokenType.PARTITION_BY): 1494 return self._parse_csv(self._parse_conjunction) 1495 return [] 1496 1497 def _parse_partitioned_by(self) -> exp.Expression: 1498 self._match(TokenType.EQ) 1499 return self.expression( 1500 exp.PartitionedByProperty, 1501 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1502 ) 1503 1504 def _parse_withdata(self, no=False) -> exp.Expression: 1505 if self._match_text_seq("AND", "STATISTICS"): 1506 statistics = True 1507 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1508 statistics = False 1509 else: 1510 statistics = None 1511 1512 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1513 1514 def _parse_noprimaryindex(self) -> exp.Expression: 1515 self._match_text_seq("PRIMARY", "INDEX") 1516 return exp.NoPrimaryIndexProperty() 1517 1518 def _parse_oncommit(self) -> exp.Expression: 1519 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1520 return exp.OnCommitProperty() 1521 1522 def _parse_distkey(self) -> exp.Expression: 1523 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1524 1525 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1526 table = self._parse_table(schema=True) 1527 options = [] 1528 while self._match_texts(("INCLUDING", "EXCLUDING")): 1529 this = self._prev.text.upper() 1530 id_var = self._parse_id_var() 1531 1532 if not id_var: 1533 return None 1534 1535 options.append( 1536 self.expression( 1537 exp.Property, 1538 this=this, 1539 value=exp.Var(this=id_var.this.upper()), 1540 ) 1541 ) 1542 return self.expression(exp.LikeProperty, this=table, expressions=options) 1543 1544 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1545 return self.expression( 1546 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1547 ) 1548 1549 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1550 self._match(TokenType.EQ) 1551 return self.expression( 1552 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1553 ) 1554 1555 def _parse_returns(self) -> exp.Expression: 1556 value: t.Optional[exp.Expression] 1557 is_table = self._match(TokenType.TABLE) 1558 1559 if is_table: 1560 if self._match(TokenType.LT): 1561 value = self.expression( 1562 exp.Schema, 1563 this="TABLE", 1564 expressions=self._parse_csv(self._parse_struct_kwargs), 1565 ) 1566 if not self._match(TokenType.GT): 1567 self.raise_error("Expecting >") 1568 else: 1569 value = self._parse_schema(exp.Var(this="TABLE")) 1570 else: 1571 value = self._parse_types() 1572 1573 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1574 1575 def _parse_temporary(self, global_=False) -> exp.Expression: 1576 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1577 return self.expression(exp.TemporaryProperty, global_=global_) 1578 1579 def _parse_describe(self) -> exp.Expression: 1580 kind = self._match_set(self.CREATABLES) and self._prev.text 1581 this = self._parse_table() 1582 1583 return self.expression(exp.Describe, this=this, kind=kind) 1584 1585 def _parse_insert(self) -> exp.Expression: 1586 overwrite = self._match(TokenType.OVERWRITE) 1587 local = self._match(TokenType.LOCAL) 1588 alternative = None 1589 1590 if self._match_text_seq("DIRECTORY"): 1591 this: t.Optional[exp.Expression] = self.expression( 1592 exp.Directory, 1593 this=self._parse_var_or_string(), 1594 local=local, 1595 row_format=self._parse_row_format(match_row=True), 1596 ) 1597 else: 1598 if self._match(TokenType.OR): 1599 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1600 1601 self._match(TokenType.INTO) 1602 self._match(TokenType.TABLE) 1603 this = self._parse_table(schema=True) 1604 1605 return self.expression( 1606 exp.Insert, 1607 this=this, 1608 exists=self._parse_exists(), 1609 partition=self._parse_partition(), 1610 expression=self._parse_ddl_select(), 1611 conflict=self._parse_on_conflict(), 1612 returning=self._parse_returning(), 1613 overwrite=overwrite, 1614 alternative=alternative, 1615 ) 1616 1617 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1618 conflict = self._match_text_seq("ON", "CONFLICT") 1619 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1620 1621 if not (conflict or duplicate): 1622 return None 1623 1624 nothing = None 1625 expressions = None 1626 key = None 1627 constraint = None 1628 1629 if conflict: 1630 if self._match_text_seq("ON", "CONSTRAINT"): 1631 constraint = self._parse_id_var() 1632 else: 1633 key = self._parse_csv(self._parse_value) 1634 1635 self._match_text_seq("DO") 1636 if self._match_text_seq("NOTHING"): 1637 nothing = True 1638 else: 1639 self._match(TokenType.UPDATE) 1640 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1641 1642 return self.expression( 1643 exp.OnConflict, 1644 duplicate=duplicate, 1645 expressions=expressions, 1646 nothing=nothing, 1647 key=key, 1648 constraint=constraint, 1649 ) 1650 1651 def _parse_returning(self) -> t.Optional[exp.Expression]: 1652 if not self._match(TokenType.RETURNING): 1653 return None 1654 1655 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1656 1657 def _parse_row(self) -> t.Optional[exp.Expression]: 1658 if not self._match(TokenType.FORMAT): 1659 return None 1660 return self._parse_row_format() 1661 1662 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1663 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1664 return None 1665 1666 if self._match_text_seq("SERDE"): 1667 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1668 1669 self._match_text_seq("DELIMITED") 1670 1671 kwargs = {} 1672 1673 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1674 kwargs["fields"] = self._parse_string() 1675 if self._match_text_seq("ESCAPED", "BY"): 1676 kwargs["escaped"] = self._parse_string() 1677 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1678 kwargs["collection_items"] = self._parse_string() 1679 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1680 kwargs["map_keys"] = self._parse_string() 1681 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1682 kwargs["lines"] = self._parse_string() 1683 if self._match_text_seq("NULL", "DEFINED", "AS"): 1684 kwargs["null"] = self._parse_string() 1685 1686 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1687 1688 def _parse_load_data(self) -> exp.Expression: 1689 local = self._match(TokenType.LOCAL) 1690 self._match_text_seq("INPATH") 1691 inpath = self._parse_string() 1692 overwrite = self._match(TokenType.OVERWRITE) 1693 self._match_pair(TokenType.INTO, TokenType.TABLE) 1694 1695 return self.expression( 1696 exp.LoadData, 1697 this=self._parse_table(schema=True), 1698 local=local, 1699 overwrite=overwrite, 1700 inpath=inpath, 1701 partition=self._parse_partition(), 1702 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1703 serde=self._match_text_seq("SERDE") and self._parse_string(), 1704 ) 1705 1706 def _parse_delete(self) -> exp.Expression: 1707 self._match(TokenType.FROM) 1708 1709 return self.expression( 1710 exp.Delete, 1711 this=self._parse_table(), 1712 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1713 where=self._parse_where(), 1714 returning=self._parse_returning(), 1715 ) 1716 1717 def _parse_update(self) -> exp.Expression: 1718 return self.expression( 1719 exp.Update, 1720 **{ # type: ignore 1721 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1722 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1723 "from": self._parse_from(), 1724 "where": self._parse_where(), 1725 "returning": self._parse_returning(), 1726 }, 1727 ) 1728 1729 def _parse_uncache(self) -> exp.Expression: 1730 if not self._match(TokenType.TABLE): 1731 self.raise_error("Expecting TABLE after UNCACHE") 1732 1733 return self.expression( 1734 exp.Uncache, 1735 exists=self._parse_exists(), 1736 this=self._parse_table(schema=True), 1737 ) 1738 1739 def _parse_cache(self) -> exp.Expression: 1740 lazy = self._match(TokenType.LAZY) 1741 self._match(TokenType.TABLE) 1742 table = self._parse_table(schema=True) 1743 options = [] 1744 1745 if self._match(TokenType.OPTIONS): 1746 self._match_l_paren() 1747 k = self._parse_string() 1748 self._match(TokenType.EQ) 1749 v = self._parse_string() 1750 options = [k, v] 1751 self._match_r_paren() 1752 1753 self._match(TokenType.ALIAS) 1754 return self.expression( 1755 exp.Cache, 1756 this=table, 1757 lazy=lazy, 1758 options=options, 1759 expression=self._parse_select(nested=True), 1760 ) 1761 1762 def _parse_partition(self) -> t.Optional[exp.Expression]: 1763 if not self._match(TokenType.PARTITION): 1764 return None 1765 1766 return self.expression( 1767 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1768 ) 1769 1770 def _parse_value(self) -> exp.Expression: 1771 if self._match(TokenType.L_PAREN): 1772 expressions = self._parse_csv(self._parse_conjunction) 1773 self._match_r_paren() 1774 return self.expression(exp.Tuple, expressions=expressions) 1775 1776 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1777 # Source: https://prestodb.io/docs/current/sql/values.html 1778 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1779 1780 def _parse_select( 1781 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1782 ) -> t.Optional[exp.Expression]: 1783 cte = self._parse_with() 1784 if cte: 1785 this = self._parse_statement() 1786 1787 if not this: 1788 self.raise_error("Failed to parse any statement following CTE") 1789 return cte 1790 1791 if "with" in this.arg_types: 1792 this.set("with", cte) 1793 else: 1794 self.raise_error(f"{this.key} does not support CTE") 1795 this = cte 1796 elif self._match(TokenType.SELECT): 1797 comments = self._prev_comments 1798 1799 kind = ( 1800 self._match(TokenType.ALIAS) 1801 and self._match_texts(("STRUCT", "VALUE")) 1802 and self._prev.text 1803 ) 1804 hint = self._parse_hint() 1805 all_ = self._match(TokenType.ALL) 1806 distinct = self._match(TokenType.DISTINCT) 1807 1808 if distinct: 1809 distinct = self.expression( 1810 exp.Distinct, 1811 on=self._parse_value() if self._match(TokenType.ON) else None, 1812 ) 1813 1814 if all_ and distinct: 1815 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1816 1817 limit = self._parse_limit(top=True) 1818 expressions = self._parse_csv(self._parse_expression) 1819 1820 this = self.expression( 1821 exp.Select, 1822 kind=kind, 1823 hint=hint, 1824 distinct=distinct, 1825 expressions=expressions, 1826 limit=limit, 1827 ) 1828 this.comments = comments 1829 1830 into = self._parse_into() 1831 if into: 1832 this.set("into", into) 1833 1834 from_ = self._parse_from() 1835 if from_: 1836 this.set("from", from_) 1837 1838 self._parse_query_modifiers(this) 1839 elif (table or nested) and self._match(TokenType.L_PAREN): 1840 this = self._parse_table() if table else self._parse_select(nested=True) 1841 self._parse_query_modifiers(this) 1842 this = self._parse_set_operations(this) 1843 self._match_r_paren() 1844 1845 # early return so that subquery unions aren't parsed again 1846 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1847 # Union ALL should be a property of the top select node, not the subquery 1848 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1849 elif self._match(TokenType.VALUES): 1850 this = self.expression( 1851 exp.Values, 1852 expressions=self._parse_csv(self._parse_value), 1853 alias=self._parse_table_alias(), 1854 ) 1855 else: 1856 this = None 1857 1858 return self._parse_set_operations(this) 1859 1860 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1861 if not skip_with_token and not self._match(TokenType.WITH): 1862 return None 1863 1864 comments = self._prev_comments 1865 recursive = self._match(TokenType.RECURSIVE) 1866 1867 expressions = [] 1868 while True: 1869 expressions.append(self._parse_cte()) 1870 1871 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1872 break 1873 else: 1874 self._match(TokenType.WITH) 1875 1876 return self.expression( 1877 exp.With, comments=comments, expressions=expressions, recursive=recursive 1878 ) 1879 1880 def _parse_cte(self) -> exp.Expression: 1881 alias = self._parse_table_alias() 1882 if not alias or not alias.this: 1883 self.raise_error("Expected CTE to have alias") 1884 1885 self._match(TokenType.ALIAS) 1886 1887 return self.expression( 1888 exp.CTE, 1889 this=self._parse_wrapped(self._parse_statement), 1890 alias=alias, 1891 ) 1892 1893 def _parse_table_alias( 1894 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1895 ) -> t.Optional[exp.Expression]: 1896 any_token = self._match(TokenType.ALIAS) 1897 alias = ( 1898 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1899 or self._parse_string_as_identifier() 1900 ) 1901 1902 index = self._index 1903 if self._match(TokenType.L_PAREN): 1904 columns = self._parse_csv(self._parse_function_parameter) 1905 self._match_r_paren() if columns else self._retreat(index) 1906 else: 1907 columns = None 1908 1909 if not alias and not columns: 1910 return None 1911 1912 return self.expression(exp.TableAlias, this=alias, columns=columns) 1913 1914 def _parse_subquery( 1915 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1916 ) -> exp.Expression: 1917 return self.expression( 1918 exp.Subquery, 1919 this=this, 1920 pivots=self._parse_pivots(), 1921 alias=self._parse_table_alias() if parse_alias else None, 1922 ) 1923 1924 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1925 if not isinstance(this, self.MODIFIABLES): 1926 return 1927 1928 table = isinstance(this, exp.Table) 1929 1930 while True: 1931 join = self._parse_join() 1932 if join: 1933 this.append("joins", join) 1934 1935 lateral = None 1936 if not join: 1937 lateral = self._parse_lateral() 1938 if lateral: 1939 this.append("laterals", lateral) 1940 1941 comma = None if table else self._match(TokenType.COMMA) 1942 if comma: 1943 this.args["from"].append("expressions", self._parse_table()) 1944 1945 if not (lateral or join or comma): 1946 break 1947 1948 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1949 expression = parser(self) 1950 1951 if expression: 1952 this.set(key, expression) 1953 1954 def _parse_hint(self) -> t.Optional[exp.Expression]: 1955 if self._match(TokenType.HINT): 1956 hints = self._parse_csv(self._parse_function) 1957 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1958 self.raise_error("Expected */ after HINT") 1959 return self.expression(exp.Hint, expressions=hints) 1960 1961 return None 1962 1963 def _parse_into(self) -> t.Optional[exp.Expression]: 1964 if not self._match(TokenType.INTO): 1965 return None 1966 1967 temp = self._match(TokenType.TEMPORARY) 1968 unlogged = self._match(TokenType.UNLOGGED) 1969 self._match(TokenType.TABLE) 1970 1971 return self.expression( 1972 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1973 ) 1974 1975 def _parse_from(self) -> t.Optional[exp.Expression]: 1976 if not self._match(TokenType.FROM): 1977 return None 1978 1979 return self.expression( 1980 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1981 ) 1982 1983 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1984 if not self._match(TokenType.MATCH_RECOGNIZE): 1985 return None 1986 1987 self._match_l_paren() 1988 1989 partition = self._parse_partition_by() 1990 order = self._parse_order() 1991 measures = ( 1992 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 1993 ) 1994 1995 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1996 rows = exp.Var(this="ONE ROW PER MATCH") 1997 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1998 text = "ALL ROWS PER MATCH" 1999 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2000 text += f" SHOW EMPTY MATCHES" 2001 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2002 text += f" OMIT EMPTY MATCHES" 2003 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2004 text += f" WITH UNMATCHED ROWS" 2005 rows = exp.Var(this=text) 2006 else: 2007 rows = None 2008 2009 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2010 text = "AFTER MATCH SKIP" 2011 if self._match_text_seq("PAST", "LAST", "ROW"): 2012 text += f" PAST LAST ROW" 2013 elif self._match_text_seq("TO", "NEXT", "ROW"): 2014 text += f" TO NEXT ROW" 2015 elif self._match_text_seq("TO", "FIRST"): 2016 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2017 elif self._match_text_seq("TO", "LAST"): 2018 text += f" TO LAST {self._advance_any().text}" # type: ignore 2019 after = exp.Var(this=text) 2020 else: 2021 after = None 2022 2023 if self._match_text_seq("PATTERN"): 2024 self._match_l_paren() 2025 2026 if not self._curr: 2027 self.raise_error("Expecting )", self._curr) 2028 2029 paren = 1 2030 start = self._curr 2031 2032 while self._curr and paren > 0: 2033 if self._curr.token_type == TokenType.L_PAREN: 2034 paren += 1 2035 if self._curr.token_type == TokenType.R_PAREN: 2036 paren -= 1 2037 end = self._prev 2038 self._advance() 2039 if paren > 0: 2040 self.raise_error("Expecting )", self._curr) 2041 pattern = exp.Var(this=self._find_sql(start, end)) 2042 else: 2043 pattern = None 2044 2045 define = ( 2046 self._parse_csv( 2047 lambda: self.expression( 2048 exp.Alias, 2049 alias=self._parse_id_var(any_token=True), 2050 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2051 ) 2052 ) 2053 if self._match_text_seq("DEFINE") 2054 else None 2055 ) 2056 2057 self._match_r_paren() 2058 2059 return self.expression( 2060 exp.MatchRecognize, 2061 partition_by=partition, 2062 order=order, 2063 measures=measures, 2064 rows=rows, 2065 after=after, 2066 pattern=pattern, 2067 define=define, 2068 alias=self._parse_table_alias(), 2069 ) 2070 2071 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2072 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2073 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2074 2075 if outer_apply or cross_apply: 2076 this = self._parse_select(table=True) 2077 view = None 2078 outer = not cross_apply 2079 elif self._match(TokenType.LATERAL): 2080 this = self._parse_select(table=True) 2081 view = self._match(TokenType.VIEW) 2082 outer = self._match(TokenType.OUTER) 2083 else: 2084 return None 2085 2086 if not this: 2087 this = self._parse_function() or self._parse_id_var(any_token=False) 2088 while self._match(TokenType.DOT): 2089 this = exp.Dot( 2090 this=this, 2091 expression=self._parse_function() or self._parse_id_var(any_token=False), 2092 ) 2093 2094 table_alias: t.Optional[exp.Expression] 2095 2096 if view: 2097 table = self._parse_id_var(any_token=False) 2098 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2099 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2100 else: 2101 table_alias = self._parse_table_alias() 2102 2103 expression = self.expression( 2104 exp.Lateral, 2105 this=this, 2106 view=view, 2107 outer=outer, 2108 alias=table_alias, 2109 ) 2110 2111 return expression 2112 2113 def _parse_join_side_and_kind( 2114 self, 2115 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2116 return ( 2117 self._match(TokenType.NATURAL) and self._prev, 2118 self._match_set(self.JOIN_SIDES) and self._prev, 2119 self._match_set(self.JOIN_KINDS) and self._prev, 2120 ) 2121 2122 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2123 index = self._index 2124 natural, side, kind = self._parse_join_side_and_kind() 2125 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2126 join = self._match(TokenType.JOIN) 2127 2128 if not skip_join_token and not join: 2129 self._retreat(index) 2130 kind = None 2131 natural = None 2132 side = None 2133 2134 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2135 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2136 2137 if not skip_join_token and not join and not outer_apply and not cross_apply: 2138 return None 2139 2140 if outer_apply: 2141 side = Token(TokenType.LEFT, "LEFT") 2142 2143 kwargs: t.Dict[ 2144 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2145 ] = {"this": self._parse_table()} 2146 2147 if natural: 2148 kwargs["natural"] = True 2149 if side: 2150 kwargs["side"] = side.text 2151 if kind: 2152 kwargs["kind"] = kind.text 2153 if hint: 2154 kwargs["hint"] = hint 2155 2156 if self._match(TokenType.ON): 2157 kwargs["on"] = self._parse_conjunction() 2158 elif self._match(TokenType.USING): 2159 kwargs["using"] = self._parse_wrapped_id_vars() 2160 2161 return self.expression(exp.Join, **kwargs) # type: ignore 2162 2163 def _parse_index(self) -> exp.Expression: 2164 index = self._parse_id_var() 2165 self._match(TokenType.ON) 2166 self._match(TokenType.TABLE) # hive 2167 2168 return self.expression( 2169 exp.Index, 2170 this=index, 2171 table=self.expression(exp.Table, this=self._parse_id_var()), 2172 columns=self._parse_expression(), 2173 ) 2174 2175 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2176 unique = self._match(TokenType.UNIQUE) 2177 primary = self._match_text_seq("PRIMARY") 2178 amp = self._match_text_seq("AMP") 2179 if not self._match(TokenType.INDEX): 2180 return None 2181 index = self._parse_id_var() 2182 columns = None 2183 if self._match(TokenType.L_PAREN, advance=False): 2184 columns = self._parse_wrapped_csv(self._parse_column) 2185 return self.expression( 2186 exp.Index, 2187 this=index, 2188 columns=columns, 2189 unique=unique, 2190 primary=primary, 2191 amp=amp, 2192 ) 2193 2194 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2195 catalog = None 2196 db = None 2197 2198 table = ( 2199 (not schema and self._parse_function()) 2200 or self._parse_id_var(any_token=False) 2201 or self._parse_string_as_identifier() 2202 ) 2203 2204 while self._match(TokenType.DOT): 2205 if catalog: 2206 # This allows nesting the table in arbitrarily many dot expressions if needed 2207 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2208 else: 2209 catalog = db 2210 db = table 2211 table = self._parse_id_var() 2212 2213 if not table: 2214 self.raise_error(f"Expected table name but got {self._curr}") 2215 2216 return self.expression( 2217 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2218 ) 2219 2220 def _parse_table( 2221 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2222 ) -> t.Optional[exp.Expression]: 2223 lateral = self._parse_lateral() 2224 2225 if lateral: 2226 return lateral 2227 2228 unnest = self._parse_unnest() 2229 2230 if unnest: 2231 return unnest 2232 2233 values = self._parse_derived_table_values() 2234 2235 if values: 2236 return values 2237 2238 subquery = self._parse_select(table=True) 2239 2240 if subquery: 2241 if not subquery.args.get("pivots"): 2242 subquery.set("pivots", self._parse_pivots()) 2243 return subquery 2244 2245 this = self._parse_table_parts(schema=schema) 2246 2247 if schema: 2248 return self._parse_schema(this=this) 2249 2250 if self.alias_post_tablesample: 2251 table_sample = self._parse_table_sample() 2252 2253 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2254 2255 if alias: 2256 this.set("alias", alias) 2257 2258 if not this.args.get("pivots"): 2259 this.set("pivots", self._parse_pivots()) 2260 2261 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2262 this.set( 2263 "hints", 2264 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2265 ) 2266 self._match_r_paren() 2267 2268 if not self.alias_post_tablesample: 2269 table_sample = self._parse_table_sample() 2270 2271 if table_sample: 2272 table_sample.set("this", this) 2273 this = table_sample 2274 2275 return this 2276 2277 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2278 if not self._match(TokenType.UNNEST): 2279 return None 2280 2281 expressions = self._parse_wrapped_csv(self._parse_column) 2282 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2283 alias = self._parse_table_alias() 2284 2285 if alias and self.unnest_column_only: 2286 if alias.args.get("columns"): 2287 self.raise_error("Unexpected extra column alias in unnest.") 2288 alias.set("columns", [alias.this]) 2289 alias.set("this", None) 2290 2291 offset = None 2292 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2293 self._match(TokenType.ALIAS) 2294 offset = self._parse_id_var() or exp.Identifier(this="offset") 2295 2296 return self.expression( 2297 exp.Unnest, 2298 expressions=expressions, 2299 ordinality=ordinality, 2300 alias=alias, 2301 offset=offset, 2302 ) 2303 2304 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2305 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2306 if not is_derived and not self._match(TokenType.VALUES): 2307 return None 2308 2309 expressions = self._parse_csv(self._parse_value) 2310 2311 if is_derived: 2312 self._match_r_paren() 2313 2314 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2315 2316 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2317 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2318 as_modifier and self._match_text_seq("USING", "SAMPLE") 2319 ): 2320 return None 2321 2322 bucket_numerator = None 2323 bucket_denominator = None 2324 bucket_field = None 2325 percent = None 2326 rows = None 2327 size = None 2328 seed = None 2329 2330 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2331 method = self._parse_var(tokens=(TokenType.ROW,)) 2332 2333 self._match(TokenType.L_PAREN) 2334 2335 num = self._parse_number() 2336 2337 if self._match(TokenType.BUCKET): 2338 bucket_numerator = self._parse_number() 2339 self._match(TokenType.OUT_OF) 2340 bucket_denominator = bucket_denominator = self._parse_number() 2341 self._match(TokenType.ON) 2342 bucket_field = self._parse_field() 2343 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2344 percent = num 2345 elif self._match(TokenType.ROWS): 2346 rows = num 2347 else: 2348 size = num 2349 2350 self._match(TokenType.R_PAREN) 2351 2352 if self._match(TokenType.L_PAREN): 2353 method = self._parse_var() 2354 seed = self._match(TokenType.COMMA) and self._parse_number() 2355 self._match_r_paren() 2356 elif self._match_texts(("SEED", "REPEATABLE")): 2357 seed = self._parse_wrapped(self._parse_number) 2358 2359 return self.expression( 2360 exp.TableSample, 2361 method=method, 2362 bucket_numerator=bucket_numerator, 2363 bucket_denominator=bucket_denominator, 2364 bucket_field=bucket_field, 2365 percent=percent, 2366 rows=rows, 2367 size=size, 2368 seed=seed, 2369 kind=kind, 2370 ) 2371 2372 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2373 return list(iter(self._parse_pivot, None)) 2374 2375 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2376 index = self._index 2377 2378 if self._match(TokenType.PIVOT): 2379 unpivot = False 2380 elif self._match(TokenType.UNPIVOT): 2381 unpivot = True 2382 else: 2383 return None 2384 2385 expressions = [] 2386 field = None 2387 2388 if not self._match(TokenType.L_PAREN): 2389 self._retreat(index) 2390 return None 2391 2392 if unpivot: 2393 expressions = self._parse_csv(self._parse_column) 2394 else: 2395 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2396 2397 if not expressions: 2398 self.raise_error("Failed to parse PIVOT's aggregation list") 2399 2400 if not self._match(TokenType.FOR): 2401 self.raise_error("Expecting FOR") 2402 2403 value = self._parse_column() 2404 2405 if not self._match(TokenType.IN): 2406 self.raise_error("Expecting IN") 2407 2408 field = self._parse_in(value) 2409 2410 self._match_r_paren() 2411 2412 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2413 2414 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2415 pivot.set("alias", self._parse_table_alias()) 2416 2417 if not unpivot: 2418 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2419 2420 columns: t.List[exp.Expression] = [] 2421 for col in pivot.args["field"].expressions: 2422 for name in names: 2423 if self.PREFIXED_PIVOT_COLUMNS: 2424 name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name 2425 else: 2426 name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name 2427 2428 columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS)) 2429 2430 pivot.set("columns", columns) 2431 2432 return pivot 2433 2434 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2435 return [agg.alias for agg in pivot_columns] 2436 2437 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2438 if not skip_where_token and not self._match(TokenType.WHERE): 2439 return None 2440 2441 return self.expression( 2442 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2443 ) 2444 2445 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2446 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2447 return None 2448 2449 elements = defaultdict(list) 2450 2451 while True: 2452 expressions = self._parse_csv(self._parse_conjunction) 2453 if expressions: 2454 elements["expressions"].extend(expressions) 2455 2456 grouping_sets = self._parse_grouping_sets() 2457 if grouping_sets: 2458 elements["grouping_sets"].extend(grouping_sets) 2459 2460 rollup = None 2461 cube = None 2462 2463 with_ = self._match(TokenType.WITH) 2464 if self._match(TokenType.ROLLUP): 2465 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2466 elements["rollup"].extend(ensure_list(rollup)) 2467 2468 if self._match(TokenType.CUBE): 2469 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2470 elements["cube"].extend(ensure_list(cube)) 2471 2472 if not (expressions or grouping_sets or rollup or cube): 2473 break 2474 2475 return self.expression(exp.Group, **elements) # type: ignore 2476 2477 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2478 if not self._match(TokenType.GROUPING_SETS): 2479 return None 2480 2481 return self._parse_wrapped_csv(self._parse_grouping_set) 2482 2483 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2484 if self._match(TokenType.L_PAREN): 2485 grouping_set = self._parse_csv(self._parse_column) 2486 self._match_r_paren() 2487 return self.expression(exp.Tuple, expressions=grouping_set) 2488 2489 return self._parse_column() 2490 2491 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2492 if not skip_having_token and not self._match(TokenType.HAVING): 2493 return None 2494 return self.expression(exp.Having, this=self._parse_conjunction()) 2495 2496 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2497 if not self._match(TokenType.QUALIFY): 2498 return None 2499 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2500 2501 def _parse_order( 2502 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2503 ) -> t.Optional[exp.Expression]: 2504 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2505 return this 2506 2507 return self.expression( 2508 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2509 ) 2510 2511 def _parse_sort( 2512 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2513 ) -> t.Optional[exp.Expression]: 2514 if not self._match(token_type): 2515 return None 2516 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2517 2518 def _parse_ordered(self) -> exp.Expression: 2519 this = self._parse_conjunction() 2520 self._match(TokenType.ASC) 2521 is_desc = self._match(TokenType.DESC) 2522 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2523 is_nulls_last = self._match(TokenType.NULLS_LAST) 2524 desc = is_desc or False 2525 asc = not desc 2526 nulls_first = is_nulls_first or False 2527 explicitly_null_ordered = is_nulls_first or is_nulls_last 2528 if ( 2529 not explicitly_null_ordered 2530 and ( 2531 (asc and self.null_ordering == "nulls_are_small") 2532 or (desc and self.null_ordering != "nulls_are_small") 2533 ) 2534 and self.null_ordering != "nulls_are_last" 2535 ): 2536 nulls_first = True 2537 2538 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2539 2540 def _parse_limit( 2541 self, this: t.Optional[exp.Expression] = None, top: bool = False 2542 ) -> t.Optional[exp.Expression]: 2543 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2544 limit_paren = self._match(TokenType.L_PAREN) 2545 limit_exp = self.expression( 2546 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2547 ) 2548 2549 if limit_paren: 2550 self._match_r_paren() 2551 2552 return limit_exp 2553 2554 if self._match(TokenType.FETCH): 2555 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2556 direction = self._prev.text if direction else "FIRST" 2557 2558 count = self._parse_number() 2559 percent = self._match(TokenType.PERCENT) 2560 2561 self._match_set((TokenType.ROW, TokenType.ROWS)) 2562 2563 only = self._match(TokenType.ONLY) 2564 with_ties = self._match_text_seq("WITH", "TIES") 2565 2566 if only and with_ties: 2567 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2568 2569 return self.expression( 2570 exp.Fetch, 2571 direction=direction, 2572 count=count, 2573 percent=percent, 2574 with_ties=with_ties, 2575 ) 2576 2577 return this 2578 2579 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2580 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2581 return this 2582 2583 count = self._parse_number() 2584 self._match_set((TokenType.ROW, TokenType.ROWS)) 2585 return self.expression(exp.Offset, this=this, expression=count) 2586 2587 def _parse_lock(self) -> t.Optional[exp.Expression]: 2588 if self._match_text_seq("FOR", "UPDATE"): 2589 return self.expression(exp.Lock, update=True) 2590 if self._match_text_seq("FOR", "SHARE"): 2591 return self.expression(exp.Lock, update=False) 2592 2593 return None 2594 2595 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2596 if not self._match_set(self.SET_OPERATIONS): 2597 return this 2598 2599 token_type = self._prev.token_type 2600 2601 if token_type == TokenType.UNION: 2602 expression = exp.Union 2603 elif token_type == TokenType.EXCEPT: 2604 expression = exp.Except 2605 else: 2606 expression = exp.Intersect 2607 2608 return self.expression( 2609 expression, 2610 this=this, 2611 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2612 expression=self._parse_set_operations(self._parse_select(nested=True)), 2613 ) 2614 2615 def _parse_expression(self) -> t.Optional[exp.Expression]: 2616 return self._parse_alias(self._parse_conjunction()) 2617 2618 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2619 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2620 2621 def _parse_equality(self) -> t.Optional[exp.Expression]: 2622 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2623 2624 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2625 return self._parse_tokens(self._parse_range, self.COMPARISON) 2626 2627 def _parse_range(self) -> t.Optional[exp.Expression]: 2628 this = self._parse_bitwise() 2629 negate = self._match(TokenType.NOT) 2630 2631 if self._match_set(self.RANGE_PARSERS): 2632 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2633 if not expression: 2634 return this 2635 2636 this = expression 2637 elif self._match(TokenType.ISNULL): 2638 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2639 2640 # Postgres supports ISNULL and NOTNULL for conditions. 2641 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2642 if self._match(TokenType.NOTNULL): 2643 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2644 this = self.expression(exp.Not, this=this) 2645 2646 if negate: 2647 this = self.expression(exp.Not, this=this) 2648 2649 if self._match(TokenType.IS): 2650 this = self._parse_is(this) 2651 2652 return this 2653 2654 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2655 index = self._index - 1 2656 negate = self._match(TokenType.NOT) 2657 if self._match(TokenType.DISTINCT_FROM): 2658 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2659 return self.expression(klass, this=this, expression=self._parse_expression()) 2660 2661 expression = self._parse_null() or self._parse_boolean() 2662 if not expression: 2663 self._retreat(index) 2664 return None 2665 2666 this = self.expression(exp.Is, this=this, expression=expression) 2667 return self.expression(exp.Not, this=this) if negate else this 2668 2669 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2670 unnest = self._parse_unnest() 2671 if unnest: 2672 this = self.expression(exp.In, this=this, unnest=unnest) 2673 elif self._match(TokenType.L_PAREN): 2674 expressions = self._parse_csv(self._parse_select_or_expression) 2675 2676 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2677 this = self.expression(exp.In, this=this, query=expressions[0]) 2678 else: 2679 this = self.expression(exp.In, this=this, expressions=expressions) 2680 2681 self._match_r_paren() 2682 else: 2683 this = self.expression(exp.In, this=this, field=self._parse_field()) 2684 2685 return this 2686 2687 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2688 low = self._parse_bitwise() 2689 self._match(TokenType.AND) 2690 high = self._parse_bitwise() 2691 return self.expression(exp.Between, this=this, low=low, high=high) 2692 2693 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2694 if not self._match(TokenType.ESCAPE): 2695 return this 2696 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2697 2698 def _parse_interval(self) -> t.Optional[exp.Expression]: 2699 if not self._match(TokenType.INTERVAL): 2700 return None 2701 2702 this = self._parse_primary() or self._parse_term() 2703 unit = self._parse_function() or self._parse_var() 2704 2705 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2706 # each INTERVAL expression into this canonical form so it's easy to transpile 2707 if this and isinstance(this, exp.Literal): 2708 if this.is_number: 2709 this = exp.Literal.string(this.name) 2710 2711 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2712 parts = this.name.split() 2713 if not unit and len(parts) <= 2: 2714 this = exp.Literal.string(seq_get(parts, 0)) 2715 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2716 2717 return self.expression(exp.Interval, this=this, unit=unit) 2718 2719 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2720 this = self._parse_term() 2721 2722 while True: 2723 if self._match_set(self.BITWISE): 2724 this = self.expression( 2725 self.BITWISE[self._prev.token_type], 2726 this=this, 2727 expression=self._parse_term(), 2728 ) 2729 elif self._match_pair(TokenType.LT, TokenType.LT): 2730 this = self.expression( 2731 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2732 ) 2733 elif self._match_pair(TokenType.GT, TokenType.GT): 2734 this = self.expression( 2735 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2736 ) 2737 else: 2738 break 2739 2740 return this 2741 2742 def _parse_term(self) -> t.Optional[exp.Expression]: 2743 return self._parse_tokens(self._parse_factor, self.TERM) 2744 2745 def _parse_factor(self) -> t.Optional[exp.Expression]: 2746 return self._parse_tokens(self._parse_unary, self.FACTOR) 2747 2748 def _parse_unary(self) -> t.Optional[exp.Expression]: 2749 if self._match_set(self.UNARY_PARSERS): 2750 return self.UNARY_PARSERS[self._prev.token_type](self) 2751 return self._parse_at_time_zone(self._parse_type()) 2752 2753 def _parse_type(self) -> t.Optional[exp.Expression]: 2754 interval = self._parse_interval() 2755 if interval: 2756 return interval 2757 2758 index = self._index 2759 data_type = self._parse_types(check_func=True) 2760 this = self._parse_column() 2761 2762 if data_type: 2763 if isinstance(this, exp.Literal): 2764 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2765 if parser: 2766 return parser(self, this, data_type) 2767 return self.expression(exp.Cast, this=this, to=data_type) 2768 if not data_type.args.get("expressions"): 2769 self._retreat(index) 2770 return self._parse_column() 2771 return data_type 2772 2773 return this 2774 2775 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2776 index = self._index 2777 2778 prefix = self._match_text_seq("SYSUDTLIB", ".") 2779 2780 if not self._match_set(self.TYPE_TOKENS): 2781 return None 2782 2783 type_token = self._prev.token_type 2784 2785 if type_token == TokenType.PSEUDO_TYPE: 2786 return self.expression(exp.PseudoType, this=self._prev.text) 2787 2788 nested = type_token in self.NESTED_TYPE_TOKENS 2789 is_struct = type_token == TokenType.STRUCT 2790 expressions = None 2791 maybe_func = False 2792 2793 if self._match(TokenType.L_PAREN): 2794 if is_struct: 2795 expressions = self._parse_csv(self._parse_struct_kwargs) 2796 elif nested: 2797 expressions = self._parse_csv(self._parse_types) 2798 else: 2799 expressions = self._parse_csv(self._parse_conjunction) 2800 2801 if not expressions: 2802 self._retreat(index) 2803 return None 2804 2805 self._match_r_paren() 2806 maybe_func = True 2807 2808 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2809 this = exp.DataType( 2810 this=exp.DataType.Type.ARRAY, 2811 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2812 nested=True, 2813 ) 2814 2815 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2816 this = exp.DataType( 2817 this=exp.DataType.Type.ARRAY, 2818 expressions=[this], 2819 nested=True, 2820 ) 2821 2822 return this 2823 2824 if self._match(TokenType.L_BRACKET): 2825 self._retreat(index) 2826 return None 2827 2828 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2829 if nested and self._match(TokenType.LT): 2830 if is_struct: 2831 expressions = self._parse_csv(self._parse_struct_kwargs) 2832 else: 2833 expressions = self._parse_csv(self._parse_types) 2834 2835 if not self._match(TokenType.GT): 2836 self.raise_error("Expecting >") 2837 2838 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2839 values = self._parse_csv(self._parse_conjunction) 2840 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2841 2842 value: t.Optional[exp.Expression] = None 2843 if type_token in self.TIMESTAMPS: 2844 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2845 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2846 elif ( 2847 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2848 ): 2849 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2850 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2851 if type_token == TokenType.TIME: 2852 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2853 else: 2854 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2855 2856 maybe_func = maybe_func and value is None 2857 2858 if value is None: 2859 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2860 elif type_token == TokenType.INTERVAL: 2861 unit = self._parse_var() 2862 2863 if not unit: 2864 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2865 else: 2866 value = self.expression(exp.Interval, unit=unit) 2867 2868 if maybe_func and check_func: 2869 index2 = self._index 2870 peek = self._parse_string() 2871 2872 if not peek: 2873 self._retreat(index) 2874 return None 2875 2876 self._retreat(index2) 2877 2878 if value: 2879 return value 2880 2881 return exp.DataType( 2882 this=exp.DataType.Type[type_token.value.upper()], 2883 expressions=expressions, 2884 nested=nested, 2885 values=values, 2886 prefix=prefix, 2887 ) 2888 2889 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2890 index = self._index 2891 this = self._parse_id_var() 2892 self._match(TokenType.COLON) 2893 data_type = self._parse_types() 2894 2895 if not data_type: 2896 self._retreat(index) 2897 return self._parse_types() 2898 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2899 2900 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2901 if not self._match(TokenType.AT_TIME_ZONE): 2902 return this 2903 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2904 2905 def _parse_column(self) -> t.Optional[exp.Expression]: 2906 this = self._parse_field() 2907 if isinstance(this, exp.Identifier): 2908 this = self.expression(exp.Column, this=this) 2909 elif not this: 2910 return self._parse_bracket(this) 2911 this = self._parse_bracket(this) 2912 2913 while self._match_set(self.COLUMN_OPERATORS): 2914 op_token = self._prev.token_type 2915 op = self.COLUMN_OPERATORS.get(op_token) 2916 2917 if op_token == TokenType.DCOLON: 2918 field = self._parse_types() 2919 if not field: 2920 self.raise_error("Expected type") 2921 elif op: 2922 self._advance() 2923 value = self._prev.text 2924 field = ( 2925 exp.Literal.number(value) 2926 if self._prev.token_type == TokenType.NUMBER 2927 else exp.Literal.string(value) 2928 ) 2929 else: 2930 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2931 2932 if isinstance(field, exp.Func): 2933 # bigquery allows function calls like x.y.count(...) 2934 # SAFE.SUBSTR(...) 2935 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2936 this = self._replace_columns_with_dots(this) 2937 2938 if op: 2939 this = op(self, this, field) 2940 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2941 this = self.expression( 2942 exp.Column, 2943 this=field, 2944 table=this.this, 2945 db=this.args.get("table"), 2946 catalog=this.args.get("db"), 2947 ) 2948 else: 2949 this = self.expression(exp.Dot, this=this, expression=field) 2950 this = self._parse_bracket(this) 2951 2952 return this 2953 2954 def _parse_primary(self) -> t.Optional[exp.Expression]: 2955 if self._match_set(self.PRIMARY_PARSERS): 2956 token_type = self._prev.token_type 2957 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2958 2959 if token_type == TokenType.STRING: 2960 expressions = [primary] 2961 while self._match(TokenType.STRING): 2962 expressions.append(exp.Literal.string(self._prev.text)) 2963 if len(expressions) > 1: 2964 return self.expression(exp.Concat, expressions=expressions) 2965 return primary 2966 2967 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2968 return exp.Literal.number(f"0.{self._prev.text}") 2969 2970 if self._match(TokenType.L_PAREN): 2971 comments = self._prev_comments 2972 query = self._parse_select() 2973 2974 if query: 2975 expressions = [query] 2976 else: 2977 expressions = self._parse_csv( 2978 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2979 ) 2980 2981 this = seq_get(expressions, 0) 2982 self._parse_query_modifiers(this) 2983 2984 if isinstance(this, exp.Subqueryable): 2985 this = self._parse_set_operations( 2986 self._parse_subquery(this=this, parse_alias=False) 2987 ) 2988 elif len(expressions) > 1: 2989 this = self.expression(exp.Tuple, expressions=expressions) 2990 else: 2991 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 2992 2993 self._match_r_paren() 2994 comments.extend(self._prev_comments) 2995 2996 if this and comments: 2997 this.comments = comments 2998 2999 return this 3000 3001 return None 3002 3003 def _parse_field( 3004 self, 3005 any_token: bool = False, 3006 tokens: t.Optional[t.Collection[TokenType]] = None, 3007 ) -> t.Optional[exp.Expression]: 3008 return ( 3009 self._parse_primary() 3010 or self._parse_function() 3011 or self._parse_id_var(any_token=any_token, tokens=tokens) 3012 ) 3013 3014 def _parse_function( 3015 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 3016 ) -> t.Optional[exp.Expression]: 3017 if not self._curr: 3018 return None 3019 3020 token_type = self._curr.token_type 3021 3022 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3023 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3024 3025 if not self._next or self._next.token_type != TokenType.L_PAREN: 3026 if token_type in self.NO_PAREN_FUNCTIONS: 3027 self._advance() 3028 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3029 3030 return None 3031 3032 if token_type not in self.FUNC_TOKENS: 3033 return None 3034 3035 this = self._curr.text 3036 upper = this.upper() 3037 self._advance(2) 3038 3039 parser = self.FUNCTION_PARSERS.get(upper) 3040 3041 if parser: 3042 this = parser(self) 3043 else: 3044 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3045 3046 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3047 this = self.expression(subquery_predicate, this=self._parse_select()) 3048 self._match_r_paren() 3049 return this 3050 3051 if functions is None: 3052 functions = self.FUNCTIONS 3053 3054 function = functions.get(upper) 3055 args = self._parse_csv(self._parse_lambda) 3056 3057 if function: 3058 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 3059 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 3060 if count_params(function) == 2: 3061 params = None 3062 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 3063 params = self._parse_csv(self._parse_lambda) 3064 3065 this = function(args, params) 3066 else: 3067 this = function(args) 3068 3069 self.validate_expression(this, args) 3070 else: 3071 this = self.expression(exp.Anonymous, this=this, expressions=args) 3072 3073 self._match_r_paren(this) 3074 return self._parse_window(this) 3075 3076 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3077 return self._parse_column_def(self._parse_id_var()) 3078 3079 def _parse_user_defined_function( 3080 self, kind: t.Optional[TokenType] = None 3081 ) -> t.Optional[exp.Expression]: 3082 this = self._parse_id_var() 3083 3084 while self._match(TokenType.DOT): 3085 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3086 3087 if not self._match(TokenType.L_PAREN): 3088 return this 3089 3090 expressions = self._parse_csv(self._parse_function_parameter) 3091 self._match_r_paren() 3092 return self.expression( 3093 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3094 ) 3095 3096 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3097 literal = self._parse_primary() 3098 if literal: 3099 return self.expression(exp.Introducer, this=token.text, expression=literal) 3100 3101 return self.expression(exp.Identifier, this=token.text) 3102 3103 def _parse_national(self, token: Token) -> exp.Expression: 3104 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3105 3106 def _parse_session_parameter(self) -> exp.Expression: 3107 kind = None 3108 this = self._parse_id_var() or self._parse_primary() 3109 3110 if this and self._match(TokenType.DOT): 3111 kind = this.name 3112 this = self._parse_var() or self._parse_primary() 3113 3114 return self.expression(exp.SessionParameter, this=this, kind=kind) 3115 3116 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3117 index = self._index 3118 3119 if self._match(TokenType.L_PAREN): 3120 expressions = self._parse_csv(self._parse_id_var) 3121 3122 if not self._match(TokenType.R_PAREN): 3123 self._retreat(index) 3124 else: 3125 expressions = [self._parse_id_var()] 3126 3127 if self._match_set(self.LAMBDAS): 3128 return self.LAMBDAS[self._prev.token_type](self, expressions) 3129 3130 self._retreat(index) 3131 3132 this: t.Optional[exp.Expression] 3133 3134 if self._match(TokenType.DISTINCT): 3135 this = self.expression( 3136 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3137 ) 3138 else: 3139 this = self._parse_select_or_expression() 3140 3141 if isinstance(this, exp.EQ): 3142 left = this.this 3143 if isinstance(left, exp.Column): 3144 left.replace(exp.Var(this=left.text("this"))) 3145 3146 if self._match(TokenType.IGNORE_NULLS): 3147 this = self.expression(exp.IgnoreNulls, this=this) 3148 else: 3149 self._match(TokenType.RESPECT_NULLS) 3150 3151 return self._parse_limit(self._parse_order(this)) 3152 3153 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3154 index = self._index 3155 3156 try: 3157 if self._parse_select(nested=True): 3158 return this 3159 except Exception: 3160 pass 3161 finally: 3162 self._retreat(index) 3163 3164 if not self._match(TokenType.L_PAREN): 3165 return this 3166 3167 args = self._parse_csv( 3168 lambda: self._parse_constraint() 3169 or self._parse_column_def(self._parse_field(any_token=True)) 3170 ) 3171 self._match_r_paren() 3172 return self.expression(exp.Schema, this=this, expressions=args) 3173 3174 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3175 kind = self._parse_types() 3176 3177 if self._match_text_seq("FOR", "ORDINALITY"): 3178 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3179 3180 constraints = [] 3181 while True: 3182 constraint = self._parse_column_constraint() 3183 if not constraint: 3184 break 3185 constraints.append(constraint) 3186 3187 if not kind and not constraints: 3188 return this 3189 3190 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3191 3192 def _parse_auto_increment(self) -> exp.Expression: 3193 start = None 3194 increment = None 3195 3196 if self._match(TokenType.L_PAREN, advance=False): 3197 args = self._parse_wrapped_csv(self._parse_bitwise) 3198 start = seq_get(args, 0) 3199 increment = seq_get(args, 1) 3200 elif self._match_text_seq("START"): 3201 start = self._parse_bitwise() 3202 self._match_text_seq("INCREMENT") 3203 increment = self._parse_bitwise() 3204 3205 if start and increment: 3206 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3207 3208 return exp.AutoIncrementColumnConstraint() 3209 3210 def _parse_compress(self) -> exp.Expression: 3211 if self._match(TokenType.L_PAREN, advance=False): 3212 return self.expression( 3213 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3214 ) 3215 3216 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3217 3218 def _parse_generated_as_identity(self) -> exp.Expression: 3219 if self._match(TokenType.BY_DEFAULT): 3220 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 3221 else: 3222 self._match_text_seq("ALWAYS") 3223 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3224 3225 self._match_text_seq("AS", "IDENTITY") 3226 if self._match(TokenType.L_PAREN): 3227 if self._match_text_seq("START", "WITH"): 3228 this.set("start", self._parse_bitwise()) 3229 if self._match_text_seq("INCREMENT", "BY"): 3230 this.set("increment", self._parse_bitwise()) 3231 if self._match_text_seq("MINVALUE"): 3232 this.set("minvalue", self._parse_bitwise()) 3233 if self._match_text_seq("MAXVALUE"): 3234 this.set("maxvalue", self._parse_bitwise()) 3235 3236 if self._match_text_seq("CYCLE"): 3237 this.set("cycle", True) 3238 elif self._match_text_seq("NO", "CYCLE"): 3239 this.set("cycle", False) 3240 3241 self._match_r_paren() 3242 3243 return this 3244 3245 def _parse_inline(self) -> t.Optional[exp.Expression]: 3246 self._match_text_seq("LENGTH") 3247 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3248 3249 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3250 if self._match_text_seq("NULL"): 3251 return self.expression(exp.NotNullColumnConstraint) 3252 if self._match_text_seq("CASESPECIFIC"): 3253 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3254 return None 3255 3256 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3257 if self._match(TokenType.CONSTRAINT): 3258 this = self._parse_id_var() 3259 else: 3260 this = None 3261 3262 if self._match_texts(self.CONSTRAINT_PARSERS): 3263 return self.expression( 3264 exp.ColumnConstraint, 3265 this=this, 3266 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3267 ) 3268 3269 return this 3270 3271 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3272 if not self._match(TokenType.CONSTRAINT): 3273 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3274 3275 this = self._parse_id_var() 3276 expressions = [] 3277 3278 while True: 3279 constraint = self._parse_unnamed_constraint() or self._parse_function() 3280 if not constraint: 3281 break 3282 expressions.append(constraint) 3283 3284 return self.expression(exp.Constraint, this=this, expressions=expressions) 3285 3286 def _parse_unnamed_constraint( 3287 self, constraints: t.Optional[t.Collection[str]] = None 3288 ) -> t.Optional[exp.Expression]: 3289 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3290 return None 3291 3292 constraint = self._prev.text.upper() 3293 if constraint not in self.CONSTRAINT_PARSERS: 3294 self.raise_error(f"No parser found for schema constraint {constraint}.") 3295 3296 return self.CONSTRAINT_PARSERS[constraint](self) 3297 3298 def _parse_unique(self) -> exp.Expression: 3299 if not self._match(TokenType.L_PAREN, advance=False): 3300 return self.expression(exp.UniqueColumnConstraint) 3301 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3302 3303 def _parse_key_constraint_options(self) -> t.List[str]: 3304 options = [] 3305 while True: 3306 if not self._curr: 3307 break 3308 3309 if self._match(TokenType.ON): 3310 action = None 3311 on = self._advance_any() and self._prev.text 3312 3313 if self._match(TokenType.NO_ACTION): 3314 action = "NO ACTION" 3315 elif self._match(TokenType.CASCADE): 3316 action = "CASCADE" 3317 elif self._match_pair(TokenType.SET, TokenType.NULL): 3318 action = "SET NULL" 3319 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3320 action = "SET DEFAULT" 3321 else: 3322 self.raise_error("Invalid key constraint") 3323 3324 options.append(f"ON {on} {action}") 3325 elif self._match_text_seq("NOT", "ENFORCED"): 3326 options.append("NOT ENFORCED") 3327 elif self._match_text_seq("DEFERRABLE"): 3328 options.append("DEFERRABLE") 3329 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3330 options.append("INITIALLY DEFERRED") 3331 elif self._match_text_seq("NORELY"): 3332 options.append("NORELY") 3333 elif self._match_text_seq("MATCH", "FULL"): 3334 options.append("MATCH FULL") 3335 else: 3336 break 3337 3338 return options 3339 3340 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3341 if match and not self._match(TokenType.REFERENCES): 3342 return None 3343 3344 expressions = None 3345 this = self._parse_id_var() 3346 3347 if self._match(TokenType.L_PAREN, advance=False): 3348 expressions = self._parse_wrapped_id_vars() 3349 3350 options = self._parse_key_constraint_options() 3351 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3352 3353 def _parse_foreign_key(self) -> exp.Expression: 3354 expressions = self._parse_wrapped_id_vars() 3355 reference = self._parse_references() 3356 options = {} 3357 3358 while self._match(TokenType.ON): 3359 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3360 self.raise_error("Expected DELETE or UPDATE") 3361 3362 kind = self._prev.text.lower() 3363 3364 if self._match(TokenType.NO_ACTION): 3365 action = "NO ACTION" 3366 elif self._match(TokenType.SET): 3367 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3368 action = "SET " + self._prev.text.upper() 3369 else: 3370 self._advance() 3371 action = self._prev.text.upper() 3372 3373 options[kind] = action 3374 3375 return self.expression( 3376 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3377 ) 3378 3379 def _parse_primary_key(self) -> exp.Expression: 3380 desc = ( 3381 self._match_set((TokenType.ASC, TokenType.DESC)) 3382 and self._prev.token_type == TokenType.DESC 3383 ) 3384 3385 if not self._match(TokenType.L_PAREN, advance=False): 3386 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3387 3388 expressions = self._parse_wrapped_id_vars() 3389 options = self._parse_key_constraint_options() 3390 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3391 3392 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3393 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3394 return this 3395 3396 bracket_kind = self._prev.token_type 3397 expressions: t.List[t.Optional[exp.Expression]] 3398 3399 if self._match(TokenType.COLON): 3400 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3401 else: 3402 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3403 3404 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3405 if bracket_kind == TokenType.L_BRACE: 3406 this = self.expression(exp.Struct, expressions=expressions) 3407 elif not this or this.name.upper() == "ARRAY": 3408 this = self.expression(exp.Array, expressions=expressions) 3409 else: 3410 expressions = apply_index_offset(this, expressions, -self.index_offset) 3411 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3412 3413 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3414 self.raise_error("Expected ]") 3415 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3416 self.raise_error("Expected }") 3417 3418 this.comments = self._prev_comments 3419 return self._parse_bracket(this) 3420 3421 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3422 if self._match(TokenType.COLON): 3423 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3424 return this 3425 3426 def _parse_case(self) -> t.Optional[exp.Expression]: 3427 ifs = [] 3428 default = None 3429 3430 expression = self._parse_conjunction() 3431 3432 while self._match(TokenType.WHEN): 3433 this = self._parse_conjunction() 3434 self._match(TokenType.THEN) 3435 then = self._parse_conjunction() 3436 ifs.append(self.expression(exp.If, this=this, true=then)) 3437 3438 if self._match(TokenType.ELSE): 3439 default = self._parse_conjunction() 3440 3441 if not self._match(TokenType.END): 3442 self.raise_error("Expected END after CASE", self._prev) 3443 3444 return self._parse_window( 3445 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3446 ) 3447 3448 def _parse_if(self) -> t.Optional[exp.Expression]: 3449 if self._match(TokenType.L_PAREN): 3450 args = self._parse_csv(self._parse_conjunction) 3451 this = exp.If.from_arg_list(args) 3452 self.validate_expression(this, args) 3453 self._match_r_paren() 3454 else: 3455 condition = self._parse_conjunction() 3456 self._match(TokenType.THEN) 3457 true = self._parse_conjunction() 3458 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3459 self._match(TokenType.END) 3460 this = self.expression(exp.If, this=condition, true=true, false=false) 3461 3462 return self._parse_window(this) 3463 3464 def _parse_extract(self) -> exp.Expression: 3465 this = self._parse_function() or self._parse_var() or self._parse_type() 3466 3467 if self._match(TokenType.FROM): 3468 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3469 3470 if not self._match(TokenType.COMMA): 3471 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3472 3473 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3474 3475 def _parse_cast(self, strict: bool) -> exp.Expression: 3476 this = self._parse_conjunction() 3477 3478 if not self._match(TokenType.ALIAS): 3479 self.raise_error("Expected AS after CAST") 3480 3481 to = self._parse_types() 3482 3483 if not to: 3484 self.raise_error("Expected TYPE after CAST") 3485 elif to.this == exp.DataType.Type.CHAR: 3486 if self._match(TokenType.CHARACTER_SET): 3487 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3488 3489 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3490 3491 def _parse_string_agg(self) -> exp.Expression: 3492 expression: t.Optional[exp.Expression] 3493 3494 if self._match(TokenType.DISTINCT): 3495 args = self._parse_csv(self._parse_conjunction) 3496 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3497 else: 3498 args = self._parse_csv(self._parse_conjunction) 3499 expression = seq_get(args, 0) 3500 3501 index = self._index 3502 if not self._match(TokenType.R_PAREN): 3503 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3504 order = self._parse_order(this=expression) 3505 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3506 3507 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3508 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3509 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3510 if not self._match(TokenType.WITHIN_GROUP): 3511 self._retreat(index) 3512 this = exp.GroupConcat.from_arg_list(args) 3513 self.validate_expression(this, args) 3514 return this 3515 3516 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3517 order = self._parse_order(this=expression) 3518 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3519 3520 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3521 to: t.Optional[exp.Expression] 3522 this = self._parse_bitwise() 3523 3524 if self._match(TokenType.USING): 3525 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3526 elif self._match(TokenType.COMMA): 3527 to = self._parse_bitwise() 3528 else: 3529 to = None 3530 3531 # Swap the argument order if needed to produce the correct AST 3532 if self.CONVERT_TYPE_FIRST: 3533 this, to = to, this 3534 3535 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3536 3537 def _parse_decode(self) -> t.Optional[exp.Expression]: 3538 """ 3539 There are generally two variants of the DECODE function: 3540 3541 - DECODE(bin, charset) 3542 - DECODE(expression, search, result [, search, result] ... [, default]) 3543 3544 The second variant will always be parsed into a CASE expression. Note that NULL 3545 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3546 instead of relying on pattern matching. 3547 """ 3548 args = self._parse_csv(self._parse_conjunction) 3549 3550 if len(args) < 3: 3551 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3552 3553 expression, *expressions = args 3554 if not expression: 3555 return None 3556 3557 ifs = [] 3558 for search, result in zip(expressions[::2], expressions[1::2]): 3559 if not search or not result: 3560 return None 3561 3562 if isinstance(search, exp.Literal): 3563 ifs.append( 3564 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3565 ) 3566 elif isinstance(search, exp.Null): 3567 ifs.append( 3568 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3569 ) 3570 else: 3571 cond = exp.or_( 3572 exp.EQ(this=expression.copy(), expression=search), 3573 exp.and_( 3574 exp.Is(this=expression.copy(), expression=exp.Null()), 3575 exp.Is(this=search.copy(), expression=exp.Null()), 3576 ), 3577 ) 3578 ifs.append(exp.If(this=cond, true=result)) 3579 3580 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3581 3582 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3583 self._match_text_seq("KEY") 3584 key = self._parse_field() 3585 self._match(TokenType.COLON) 3586 self._match_text_seq("VALUE") 3587 value = self._parse_field() 3588 if not key and not value: 3589 return None 3590 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3591 3592 def _parse_json_object(self) -> exp.Expression: 3593 expressions = self._parse_csv(self._parse_json_key_value) 3594 3595 null_handling = None 3596 if self._match_text_seq("NULL", "ON", "NULL"): 3597 null_handling = "NULL ON NULL" 3598 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3599 null_handling = "ABSENT ON NULL" 3600 3601 unique_keys = None 3602 if self._match_text_seq("WITH", "UNIQUE"): 3603 unique_keys = True 3604 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3605 unique_keys = False 3606 3607 self._match_text_seq("KEYS") 3608 3609 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3610 format_json = self._match_text_seq("FORMAT", "JSON") 3611 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3612 3613 return self.expression( 3614 exp.JSONObject, 3615 expressions=expressions, 3616 null_handling=null_handling, 3617 unique_keys=unique_keys, 3618 return_type=return_type, 3619 format_json=format_json, 3620 encoding=encoding, 3621 ) 3622 3623 def _parse_logarithm(self) -> exp.Expression: 3624 # Default argument order is base, expression 3625 args = self._parse_csv(self._parse_range) 3626 3627 if len(args) > 1: 3628 if not self.LOG_BASE_FIRST: 3629 args.reverse() 3630 return exp.Log.from_arg_list(args) 3631 3632 return self.expression( 3633 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3634 ) 3635 3636 def _parse_match_against(self) -> exp.Expression: 3637 expressions = self._parse_csv(self._parse_column) 3638 3639 self._match_text_seq(")", "AGAINST", "(") 3640 3641 this = self._parse_string() 3642 3643 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3644 modifier = "IN NATURAL LANGUAGE MODE" 3645 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3646 modifier = f"{modifier} WITH QUERY EXPANSION" 3647 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3648 modifier = "IN BOOLEAN MODE" 3649 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3650 modifier = "WITH QUERY EXPANSION" 3651 else: 3652 modifier = None 3653 3654 return self.expression( 3655 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3656 ) 3657 3658 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3659 args = self._parse_csv(self._parse_bitwise) 3660 3661 if self._match(TokenType.IN): 3662 return self.expression( 3663 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3664 ) 3665 3666 if haystack_first: 3667 haystack = seq_get(args, 0) 3668 needle = seq_get(args, 1) 3669 else: 3670 needle = seq_get(args, 0) 3671 haystack = seq_get(args, 1) 3672 3673 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3674 3675 self.validate_expression(this, args) 3676 3677 return this 3678 3679 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3680 args = self._parse_csv(self._parse_table) 3681 return exp.JoinHint(this=func_name.upper(), expressions=args) 3682 3683 def _parse_substring(self) -> exp.Expression: 3684 # Postgres supports the form: substring(string [from int] [for int]) 3685 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3686 3687 args = self._parse_csv(self._parse_bitwise) 3688 3689 if self._match(TokenType.FROM): 3690 args.append(self._parse_bitwise()) 3691 if self._match(TokenType.FOR): 3692 args.append(self._parse_bitwise()) 3693 3694 this = exp.Substring.from_arg_list(args) 3695 self.validate_expression(this, args) 3696 3697 return this 3698 3699 def _parse_trim(self) -> exp.Expression: 3700 # https://www.w3resource.com/sql/character-functions/trim.php 3701 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3702 3703 position = None 3704 collation = None 3705 3706 if self._match_set(self.TRIM_TYPES): 3707 position = self._prev.text.upper() 3708 3709 expression = self._parse_term() 3710 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3711 this = self._parse_term() 3712 else: 3713 this = expression 3714 expression = None 3715 3716 if self._match(TokenType.COLLATE): 3717 collation = self._parse_term() 3718 3719 return self.expression( 3720 exp.Trim, 3721 this=this, 3722 position=position, 3723 expression=expression, 3724 collation=collation, 3725 ) 3726 3727 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3728 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3729 3730 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3731 return self._parse_window(self._parse_id_var(), alias=True) 3732 3733 def _parse_window( 3734 self, this: t.Optional[exp.Expression], alias: bool = False 3735 ) -> t.Optional[exp.Expression]: 3736 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3737 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3738 self._match_r_paren() 3739 3740 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3741 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3742 if self._match(TokenType.WITHIN_GROUP): 3743 order = self._parse_wrapped(self._parse_order) 3744 this = self.expression(exp.WithinGroup, this=this, expression=order) 3745 3746 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3747 # Some dialects choose to implement and some do not. 3748 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3749 3750 # There is some code above in _parse_lambda that handles 3751 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3752 3753 # The below changes handle 3754 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3755 3756 # Oracle allows both formats 3757 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3758 # and Snowflake chose to do the same for familiarity 3759 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3760 if self._match(TokenType.IGNORE_NULLS): 3761 this = self.expression(exp.IgnoreNulls, this=this) 3762 elif self._match(TokenType.RESPECT_NULLS): 3763 this = self.expression(exp.RespectNulls, this=this) 3764 3765 # bigquery select from window x AS (partition by ...) 3766 if alias: 3767 self._match(TokenType.ALIAS) 3768 elif not self._match(TokenType.OVER): 3769 return this 3770 3771 if not self._match(TokenType.L_PAREN): 3772 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3773 3774 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3775 partition = self._parse_partition_by() 3776 order = self._parse_order() 3777 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3778 3779 if kind: 3780 self._match(TokenType.BETWEEN) 3781 start = self._parse_window_spec() 3782 self._match(TokenType.AND) 3783 end = self._parse_window_spec() 3784 3785 spec = self.expression( 3786 exp.WindowSpec, 3787 kind=kind, 3788 start=start["value"], 3789 start_side=start["side"], 3790 end=end["value"], 3791 end_side=end["side"], 3792 ) 3793 else: 3794 spec = None 3795 3796 self._match_r_paren() 3797 3798 return self.expression( 3799 exp.Window, 3800 this=this, 3801 partition_by=partition, 3802 order=order, 3803 spec=spec, 3804 alias=window_alias, 3805 ) 3806 3807 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3808 self._match(TokenType.BETWEEN) 3809 3810 return { 3811 "value": ( 3812 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3813 ) 3814 or self._parse_bitwise(), 3815 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3816 } 3817 3818 def _parse_alias( 3819 self, this: t.Optional[exp.Expression], explicit: bool = False 3820 ) -> t.Optional[exp.Expression]: 3821 any_token = self._match(TokenType.ALIAS) 3822 3823 if explicit and not any_token: 3824 return this 3825 3826 if self._match(TokenType.L_PAREN): 3827 aliases = self.expression( 3828 exp.Aliases, 3829 this=this, 3830 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3831 ) 3832 self._match_r_paren(aliases) 3833 return aliases 3834 3835 alias = self._parse_id_var(any_token) 3836 3837 if alias: 3838 return self.expression(exp.Alias, this=this, alias=alias) 3839 3840 return this 3841 3842 def _parse_id_var( 3843 self, 3844 any_token: bool = True, 3845 tokens: t.Optional[t.Collection[TokenType]] = None, 3846 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3847 ) -> t.Optional[exp.Expression]: 3848 identifier = self._parse_identifier() 3849 3850 if identifier: 3851 return identifier 3852 3853 prefix = "" 3854 3855 if prefix_tokens: 3856 while self._match_set(prefix_tokens): 3857 prefix += self._prev.text 3858 3859 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3860 quoted = self._prev.token_type == TokenType.STRING 3861 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3862 3863 return None 3864 3865 def _parse_string(self) -> t.Optional[exp.Expression]: 3866 if self._match(TokenType.STRING): 3867 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3868 return self._parse_placeholder() 3869 3870 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3871 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3872 3873 def _parse_number(self) -> t.Optional[exp.Expression]: 3874 if self._match(TokenType.NUMBER): 3875 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3876 return self._parse_placeholder() 3877 3878 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3879 if self._match(TokenType.IDENTIFIER): 3880 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3881 return self._parse_placeholder() 3882 3883 def _parse_var( 3884 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3885 ) -> t.Optional[exp.Expression]: 3886 if ( 3887 (any_token and self._advance_any()) 3888 or self._match(TokenType.VAR) 3889 or (self._match_set(tokens) if tokens else False) 3890 ): 3891 return self.expression(exp.Var, this=self._prev.text) 3892 return self._parse_placeholder() 3893 3894 def _advance_any(self) -> t.Optional[Token]: 3895 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3896 self._advance() 3897 return self._prev 3898 return None 3899 3900 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3901 return self._parse_var() or self._parse_string() 3902 3903 def _parse_null(self) -> t.Optional[exp.Expression]: 3904 if self._match(TokenType.NULL): 3905 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3906 return None 3907 3908 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3909 if self._match(TokenType.TRUE): 3910 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3911 if self._match(TokenType.FALSE): 3912 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3913 return None 3914 3915 def _parse_star(self) -> t.Optional[exp.Expression]: 3916 if self._match(TokenType.STAR): 3917 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3918 return None 3919 3920 def _parse_parameter(self) -> exp.Expression: 3921 wrapped = self._match(TokenType.L_BRACE) 3922 this = self._parse_var() or self._parse_primary() 3923 self._match(TokenType.R_BRACE) 3924 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3925 3926 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3927 if self._match_set(self.PLACEHOLDER_PARSERS): 3928 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3929 if placeholder: 3930 return placeholder 3931 self._advance(-1) 3932 return None 3933 3934 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3935 if not self._match(TokenType.EXCEPT): 3936 return None 3937 if self._match(TokenType.L_PAREN, advance=False): 3938 return self._parse_wrapped_csv(self._parse_column) 3939 return self._parse_csv(self._parse_column) 3940 3941 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3942 if not self._match(TokenType.REPLACE): 3943 return None 3944 if self._match(TokenType.L_PAREN, advance=False): 3945 return self._parse_wrapped_csv(self._parse_expression) 3946 return self._parse_csv(self._parse_expression) 3947 3948 def _parse_csv( 3949 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3950 ) -> t.List[t.Optional[exp.Expression]]: 3951 parse_result = parse_method() 3952 items = [parse_result] if parse_result is not None else [] 3953 3954 while self._match(sep): 3955 if parse_result and self._prev_comments: 3956 parse_result.comments = self._prev_comments 3957 3958 parse_result = parse_method() 3959 if parse_result is not None: 3960 items.append(parse_result) 3961 3962 return items 3963 3964 def _parse_tokens( 3965 self, parse_method: t.Callable, expressions: t.Dict 3966 ) -> t.Optional[exp.Expression]: 3967 this = parse_method() 3968 3969 while self._match_set(expressions): 3970 this = self.expression( 3971 expressions[self._prev.token_type], 3972 this=this, 3973 comments=self._prev_comments, 3974 expression=parse_method(), 3975 ) 3976 3977 return this 3978 3979 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3980 return self._parse_wrapped_csv(self._parse_id_var) 3981 3982 def _parse_wrapped_csv( 3983 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3984 ) -> t.List[t.Optional[exp.Expression]]: 3985 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3986 3987 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3988 self._match_l_paren() 3989 parse_result = parse_method() 3990 self._match_r_paren() 3991 return parse_result 3992 3993 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3994 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 3995 3996 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3997 return self._parse_set_operations( 3998 self._parse_select(nested=True, parse_subquery_alias=False) 3999 ) 4000 4001 def _parse_transaction(self) -> exp.Expression: 4002 this = None 4003 if self._match_texts(self.TRANSACTION_KIND): 4004 this = self._prev.text 4005 4006 self._match_texts({"TRANSACTION", "WORK"}) 4007 4008 modes = [] 4009 while True: 4010 mode = [] 4011 while self._match(TokenType.VAR): 4012 mode.append(self._prev.text) 4013 4014 if mode: 4015 modes.append(" ".join(mode)) 4016 if not self._match(TokenType.COMMA): 4017 break 4018 4019 return self.expression(exp.Transaction, this=this, modes=modes) 4020 4021 def _parse_commit_or_rollback(self) -> exp.Expression: 4022 chain = None 4023 savepoint = None 4024 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4025 4026 self._match_texts({"TRANSACTION", "WORK"}) 4027 4028 if self._match_text_seq("TO"): 4029 self._match_text_seq("SAVEPOINT") 4030 savepoint = self._parse_id_var() 4031 4032 if self._match(TokenType.AND): 4033 chain = not self._match_text_seq("NO") 4034 self._match_text_seq("CHAIN") 4035 4036 if is_rollback: 4037 return self.expression(exp.Rollback, savepoint=savepoint) 4038 return self.expression(exp.Commit, chain=chain) 4039 4040 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4041 if not self._match_text_seq("ADD"): 4042 return None 4043 4044 self._match(TokenType.COLUMN) 4045 exists_column = self._parse_exists(not_=True) 4046 expression = self._parse_column_def(self._parse_field(any_token=True)) 4047 4048 if expression: 4049 expression.set("exists", exists_column) 4050 4051 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4052 if self._match_texts(("FIRST", "AFTER")): 4053 position = self._prev.text 4054 column_position = self.expression( 4055 exp.ColumnPosition, this=self._parse_column(), position=position 4056 ) 4057 expression.set("position", column_position) 4058 4059 return expression 4060 4061 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4062 drop = self._match(TokenType.DROP) and self._parse_drop() 4063 if drop and not isinstance(drop, exp.Command): 4064 drop.set("kind", drop.args.get("kind", "COLUMN")) 4065 return drop 4066 4067 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4068 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4069 return self.expression( 4070 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4071 ) 4072 4073 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4074 this = None 4075 kind = self._prev.token_type 4076 4077 if kind == TokenType.CONSTRAINT: 4078 this = self._parse_id_var() 4079 4080 if self._match_text_seq("CHECK"): 4081 expression = self._parse_wrapped(self._parse_conjunction) 4082 enforced = self._match_text_seq("ENFORCED") 4083 4084 return self.expression( 4085 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4086 ) 4087 4088 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4089 expression = self._parse_foreign_key() 4090 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4091 expression = self._parse_primary_key() 4092 else: 4093 expression = None 4094 4095 return self.expression(exp.AddConstraint, this=this, expression=expression) 4096 4097 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4098 index = self._index - 1 4099 4100 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4101 return self._parse_csv(self._parse_add_constraint) 4102 4103 self._retreat(index) 4104 return self._parse_csv(self._parse_add_column) 4105 4106 def _parse_alter_table_alter(self) -> exp.Expression: 4107 self._match(TokenType.COLUMN) 4108 column = self._parse_field(any_token=True) 4109 4110 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4111 return self.expression(exp.AlterColumn, this=column, drop=True) 4112 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4113 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4114 4115 self._match_text_seq("SET", "DATA") 4116 return self.expression( 4117 exp.AlterColumn, 4118 this=column, 4119 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4120 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4121 using=self._match(TokenType.USING) and self._parse_conjunction(), 4122 ) 4123 4124 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4125 index = self._index - 1 4126 4127 partition_exists = self._parse_exists() 4128 if self._match(TokenType.PARTITION, advance=False): 4129 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4130 4131 self._retreat(index) 4132 return self._parse_csv(self._parse_drop_column) 4133 4134 def _parse_alter_table_rename(self) -> exp.Expression: 4135 self._match_text_seq("TO") 4136 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4137 4138 def _parse_alter(self) -> t.Optional[exp.Expression]: 4139 start = self._prev 4140 4141 if not self._match(TokenType.TABLE): 4142 return self._parse_as_command(start) 4143 4144 exists = self._parse_exists() 4145 this = self._parse_table(schema=True) 4146 4147 if self._next: 4148 self._advance() 4149 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4150 4151 if parser: 4152 actions = ensure_list(parser(self)) 4153 4154 if not self._curr: 4155 return self.expression( 4156 exp.AlterTable, 4157 this=this, 4158 exists=exists, 4159 actions=actions, 4160 ) 4161 return self._parse_as_command(start) 4162 4163 def _parse_merge(self) -> exp.Expression: 4164 self._match(TokenType.INTO) 4165 target = self._parse_table() 4166 4167 self._match(TokenType.USING) 4168 using = self._parse_table() 4169 4170 self._match(TokenType.ON) 4171 on = self._parse_conjunction() 4172 4173 whens = [] 4174 while self._match(TokenType.WHEN): 4175 matched = not self._match(TokenType.NOT) 4176 self._match_text_seq("MATCHED") 4177 source = ( 4178 False 4179 if self._match_text_seq("BY", "TARGET") 4180 else self._match_text_seq("BY", "SOURCE") 4181 ) 4182 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4183 4184 self._match(TokenType.THEN) 4185 4186 if self._match(TokenType.INSERT): 4187 _this = self._parse_star() 4188 if _this: 4189 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4190 else: 4191 then = self.expression( 4192 exp.Insert, 4193 this=self._parse_value(), 4194 expression=self._match(TokenType.VALUES) and self._parse_value(), 4195 ) 4196 elif self._match(TokenType.UPDATE): 4197 expressions = self._parse_star() 4198 if expressions: 4199 then = self.expression(exp.Update, expressions=expressions) 4200 else: 4201 then = self.expression( 4202 exp.Update, 4203 expressions=self._match(TokenType.SET) 4204 and self._parse_csv(self._parse_equality), 4205 ) 4206 elif self._match(TokenType.DELETE): 4207 then = self.expression(exp.Var, this=self._prev.text) 4208 else: 4209 then = None 4210 4211 whens.append( 4212 self.expression( 4213 exp.When, 4214 matched=matched, 4215 source=source, 4216 condition=condition, 4217 then=then, 4218 ) 4219 ) 4220 4221 return self.expression( 4222 exp.Merge, 4223 this=target, 4224 using=using, 4225 on=on, 4226 expressions=whens, 4227 ) 4228 4229 def _parse_show(self) -> t.Optional[exp.Expression]: 4230 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4231 if parser: 4232 return parser(self) 4233 self._advance() 4234 return self.expression(exp.Show, this=self._prev.text.upper()) 4235 4236 def _parse_set_item_assignment( 4237 self, kind: t.Optional[str] = None 4238 ) -> t.Optional[exp.Expression]: 4239 index = self._index 4240 4241 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4242 return self._parse_set_transaction(global_=kind == "GLOBAL") 4243 4244 left = self._parse_primary() or self._parse_id_var() 4245 4246 if not self._match_texts(("=", "TO")): 4247 self._retreat(index) 4248 return None 4249 4250 right = self._parse_statement() or self._parse_id_var() 4251 this = self.expression( 4252 exp.EQ, 4253 this=left, 4254 expression=right, 4255 ) 4256 4257 return self.expression( 4258 exp.SetItem, 4259 this=this, 4260 kind=kind, 4261 ) 4262 4263 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4264 self._match_text_seq("TRANSACTION") 4265 characteristics = self._parse_csv( 4266 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4267 ) 4268 return self.expression( 4269 exp.SetItem, 4270 expressions=characteristics, 4271 kind="TRANSACTION", 4272 **{"global": global_}, # type: ignore 4273 ) 4274 4275 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4276 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4277 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4278 4279 def _parse_set(self) -> exp.Expression: 4280 index = self._index 4281 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4282 4283 if self._curr: 4284 self._retreat(index) 4285 return self._parse_as_command(self._prev) 4286 4287 return set_ 4288 4289 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4290 for option in options: 4291 if self._match_text_seq(*option.split(" ")): 4292 return exp.Var(this=option) 4293 return None 4294 4295 def _parse_as_command(self, start: Token) -> exp.Command: 4296 while self._curr: 4297 self._advance() 4298 text = self._find_sql(start, self._prev) 4299 size = len(start.text) 4300 return exp.Command(this=text[:size], expression=text[size:]) 4301 4302 def _find_parser( 4303 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4304 ) -> t.Optional[t.Callable]: 4305 if not self._curr: 4306 return None 4307 4308 index = self._index 4309 this = [] 4310 while True: 4311 # The current token might be multiple words 4312 curr = self._curr.text.upper() 4313 key = curr.split(" ") 4314 this.append(curr) 4315 self._advance() 4316 result, trie = in_trie(trie, key) 4317 if result == 0: 4318 break 4319 if result == 2: 4320 subparser = parsers[" ".join(this)] 4321 return subparser 4322 self._retreat(index) 4323 return None 4324 4325 def _match(self, token_type, advance=True): 4326 if not self._curr: 4327 return None 4328 4329 if self._curr.token_type == token_type: 4330 if advance: 4331 self._advance() 4332 return True 4333 4334 return None 4335 4336 def _match_set(self, types, advance=True): 4337 if not self._curr: 4338 return None 4339 4340 if self._curr.token_type in types: 4341 if advance: 4342 self._advance() 4343 return True 4344 4345 return None 4346 4347 def _match_pair(self, token_type_a, token_type_b, advance=True): 4348 if not self._curr or not self._next: 4349 return None 4350 4351 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4352 if advance: 4353 self._advance(2) 4354 return True 4355 4356 return None 4357 4358 def _match_l_paren(self, expression=None): 4359 if not self._match(TokenType.L_PAREN): 4360 self.raise_error("Expecting (") 4361 if expression and self._prev_comments: 4362 expression.comments = self._prev_comments 4363 4364 def _match_r_paren(self, expression=None): 4365 if not self._match(TokenType.R_PAREN): 4366 self.raise_error("Expecting )") 4367 if expression and self._prev_comments: 4368 expression.comments = self._prev_comments 4369 4370 def _match_texts(self, texts, advance=True): 4371 if self._curr and self._curr.text.upper() in texts: 4372 if advance: 4373 self._advance() 4374 return True 4375 return False 4376 4377 def _match_text_seq(self, *texts, advance=True): 4378 index = self._index 4379 for text in texts: 4380 if self._curr and self._curr.text.upper() == text: 4381 self._advance() 4382 else: 4383 self._retreat(index) 4384 return False 4385 4386 if not advance: 4387 self._retreat(index) 4388 4389 return True 4390 4391 def _replace_columns_with_dots(self, this): 4392 if isinstance(this, exp.Dot): 4393 exp.replace_children(this, self._replace_columns_with_dots) 4394 elif isinstance(this, exp.Column): 4395 exp.replace_children(this, self._replace_columns_with_dots) 4396 table = this.args.get("table") 4397 this = ( 4398 self.expression(exp.Dot, this=table, expression=this.this) 4399 if table 4400 else self.expression(exp.Var, this=this.name) 4401 ) 4402 elif isinstance(this, exp.Identifier): 4403 this = self.expression(exp.Var, this=this.name) 4404 return this 4405 4406 def _replace_lambda(self, node, lambda_variables): 4407 for column in node.find_all(exp.Column): 4408 if column.parts[0].name in lambda_variables: 4409 dot_or_id = column.to_dot() if column.table else column.this 4410 parent = column.parent 4411 4412 while isinstance(parent, exp.Dot): 4413 if not isinstance(parent.parent, exp.Dot): 4414 parent.replace(dot_or_id) 4415 break 4416 parent = parent.parent 4417 else: 4418 if column is node: 4419 node = dot_or_id 4420 else: 4421 column.replace(dot_or_id) 4422 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
787 def __init__( 788 self, 789 error_level: t.Optional[ErrorLevel] = None, 790 error_message_context: int = 100, 791 index_offset: int = 0, 792 unnest_column_only: bool = False, 793 alias_post_tablesample: bool = False, 794 max_errors: int = 3, 795 null_ordering: t.Optional[str] = None, 796 ): 797 self.error_level = error_level or ErrorLevel.IMMEDIATE 798 self.error_message_context = error_message_context 799 self.index_offset = index_offset 800 self.unnest_column_only = unnest_column_only 801 self.alias_post_tablesample = alias_post_tablesample 802 self.max_errors = max_errors 803 self.null_ordering = null_ordering 804 self.reset()
816 def parse( 817 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 818 ) -> t.List[t.Optional[exp.Expression]]: 819 """ 820 Parses a list of tokens and returns a list of syntax trees, one tree 821 per parsed SQL statement. 822 823 Args: 824 raw_tokens: the list of tokens. 825 sql: the original SQL string, used to produce helpful debug messages. 826 827 Returns: 828 The list of syntax trees. 829 """ 830 return self._parse( 831 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 832 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
834 def parse_into( 835 self, 836 expression_types: exp.IntoType, 837 raw_tokens: t.List[Token], 838 sql: t.Optional[str] = None, 839 ) -> t.List[t.Optional[exp.Expression]]: 840 """ 841 Parses a list of tokens into a given Expression type. If a collection of Expression 842 types is given instead, this method will try to parse the token list into each one 843 of them, stopping at the first for which the parsing succeeds. 844 845 Args: 846 expression_types: the expression type(s) to try and parse the token list into. 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The target Expression. 852 """ 853 errors = [] 854 for expression_type in ensure_collection(expression_types): 855 parser = self.EXPRESSION_PARSERS.get(expression_type) 856 if not parser: 857 raise TypeError(f"No parser registered for {expression_type}") 858 try: 859 return self._parse(parser, raw_tokens, sql) 860 except ParseError as e: 861 e.errors[0]["into_expression"] = expression_type 862 errors.append(e) 863 raise ParseError( 864 f"Failed to parse into {expression_types}", 865 errors=merge_errors(errors), 866 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
902 def check_errors(self) -> None: 903 """ 904 Logs or raises any found errors, depending on the chosen error level setting. 905 """ 906 if self.error_level == ErrorLevel.WARN: 907 for error in self.errors: 908 logger.error(str(error)) 909 elif self.error_level == ErrorLevel.RAISE and self.errors: 910 raise ParseError( 911 concat_messages(self.errors, self.max_errors), 912 errors=merge_errors(self.errors), 913 )
Logs or raises any found errors, depending on the chosen error level setting.
915 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 916 """ 917 Appends an error in the list of recorded errors or raises it, depending on the chosen 918 error level setting. 919 """ 920 token = token or self._curr or self._prev or Token.string("") 921 start = token.start 922 end = token.end 923 start_context = self.sql[max(start - self.error_message_context, 0) : start] 924 highlight = self.sql[start:end] 925 end_context = self.sql[end : end + self.error_message_context] 926 927 error = ParseError.new( 928 f"{message}. Line {token.line}, Col: {token.col}.\n" 929 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 930 description=message, 931 line=token.line, 932 col=token.col, 933 start_context=start_context, 934 highlight=highlight, 935 end_context=end_context, 936 ) 937 938 if self.error_level == ErrorLevel.IMMEDIATE: 939 raise error 940 941 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
943 def expression( 944 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 945 ) -> E: 946 """ 947 Creates a new, validated Expression. 948 949 Args: 950 exp_class: the expression class to instantiate. 951 comments: an optional list of comments to attach to the expression. 952 kwargs: the arguments to set for the expression along with their respective values. 953 954 Returns: 955 The target expression. 956 """ 957 instance = exp_class(**kwargs) 958 if self._prev_comments: 959 instance.comments = self._prev_comments 960 self._prev_comments = None 961 if comments: 962 instance.comments = comments 963 self.validate_expression(instance) 964 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
966 def validate_expression( 967 self, expression: exp.Expression, args: t.Optional[t.List] = None 968 ) -> None: 969 """ 970 Validates an already instantiated expression, making sure that all its mandatory arguments 971 are set. 972 973 Args: 974 expression: the expression to validate. 975 args: an optional list of items that was used to instantiate the expression, if it's a Func. 976 """ 977 if self.error_level == ErrorLevel.IGNORE: 978 return 979 980 for error_message in expression.error_messages(args): 981 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.