sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34class _Parser(type): 35 def __new__(cls, clsname, bases, attrs): 36 klass = super().__new__(cls, clsname, bases, attrs) 37 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 38 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 39 40 if not klass.INTEGER_DIVISION: 41 klass.FACTOR = {**klass.FACTOR, TokenType.SLASH: exp.FloatDiv} 42 43 return klass 44 45 46class Parser(metaclass=_Parser): 47 """ 48 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 49 a parsed syntax tree. 50 51 Args: 52 error_level: the desired error level. 53 Default: ErrorLevel.RAISE 54 error_message_context: determines the amount of context to capture from a 55 query string when displaying the error message (in number of characters). 56 Default: 50. 57 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 58 Default: 0 59 alias_post_tablesample: If the table alias comes after tablesample. 60 Default: False 61 max_errors: Maximum number of error messages to include in a raised ParseError. 62 This is only relevant if error_level is ErrorLevel.RAISE. 63 Default: 3 64 null_ordering: Indicates the default null ordering method to use if not explicitly set. 65 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 66 Default: "nulls_are_small" 67 """ 68 69 FUNCTIONS: t.Dict[str, t.Callable] = { 70 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 71 "DATE_TO_DATE_STR": lambda args: exp.Cast( 72 this=seq_get(args, 0), 73 to=exp.DataType(this=exp.DataType.Type.TEXT), 74 ), 75 "TIME_TO_TIME_STR": lambda args: exp.Cast( 76 this=seq_get(args, 0), 77 to=exp.DataType(this=exp.DataType.Type.TEXT), 78 ), 79 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 80 this=exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 start=exp.Literal.number(1), 85 length=exp.Literal.number(10), 86 ), 87 "VAR_MAP": parse_var_map, 88 "IFNULL": exp.Coalesce.from_arg_list, 89 } 90 91 NO_PAREN_FUNCTIONS = { 92 TokenType.CURRENT_DATE: exp.CurrentDate, 93 TokenType.CURRENT_DATETIME: exp.CurrentDate, 94 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 95 } 96 97 NESTED_TYPE_TOKENS = { 98 TokenType.ARRAY, 99 TokenType.MAP, 100 TokenType.STRUCT, 101 TokenType.NULLABLE, 102 } 103 104 TYPE_TOKENS = { 105 TokenType.BOOLEAN, 106 TokenType.TINYINT, 107 TokenType.SMALLINT, 108 TokenType.INT, 109 TokenType.BIGINT, 110 TokenType.FLOAT, 111 TokenType.DOUBLE, 112 TokenType.CHAR, 113 TokenType.NCHAR, 114 TokenType.VARCHAR, 115 TokenType.NVARCHAR, 116 TokenType.TEXT, 117 TokenType.MEDIUMTEXT, 118 TokenType.LONGTEXT, 119 TokenType.MEDIUMBLOB, 120 TokenType.LONGBLOB, 121 TokenType.BINARY, 122 TokenType.VARBINARY, 123 TokenType.JSON, 124 TokenType.JSONB, 125 TokenType.INTERVAL, 126 TokenType.TIME, 127 TokenType.TIMESTAMP, 128 TokenType.TIMESTAMPTZ, 129 TokenType.TIMESTAMPLTZ, 130 TokenType.DATETIME, 131 TokenType.DATE, 132 TokenType.DECIMAL, 133 TokenType.UUID, 134 TokenType.GEOGRAPHY, 135 TokenType.GEOMETRY, 136 TokenType.HLLSKETCH, 137 TokenType.HSTORE, 138 TokenType.PSEUDO_TYPE, 139 TokenType.SUPER, 140 TokenType.SERIAL, 141 TokenType.SMALLSERIAL, 142 TokenType.BIGSERIAL, 143 TokenType.XML, 144 TokenType.UNIQUEIDENTIFIER, 145 TokenType.MONEY, 146 TokenType.SMALLMONEY, 147 TokenType.ROWVERSION, 148 TokenType.IMAGE, 149 TokenType.VARIANT, 150 TokenType.OBJECT, 151 TokenType.INET, 152 *NESTED_TYPE_TOKENS, 153 } 154 155 SUBQUERY_PREDICATES = { 156 TokenType.ANY: exp.Any, 157 TokenType.ALL: exp.All, 158 TokenType.EXISTS: exp.Exists, 159 TokenType.SOME: exp.Any, 160 } 161 162 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 163 164 DB_CREATABLES = { 165 TokenType.DATABASE, 166 TokenType.SCHEMA, 167 TokenType.TABLE, 168 TokenType.VIEW, 169 } 170 171 CREATABLES = { 172 TokenType.COLUMN, 173 TokenType.FUNCTION, 174 TokenType.INDEX, 175 TokenType.PROCEDURE, 176 *DB_CREATABLES, 177 } 178 179 ID_VAR_TOKENS = { 180 TokenType.VAR, 181 TokenType.ANTI, 182 TokenType.APPLY, 183 TokenType.AUTO_INCREMENT, 184 TokenType.BEGIN, 185 TokenType.BOTH, 186 TokenType.BUCKET, 187 TokenType.CACHE, 188 TokenType.CASCADE, 189 TokenType.COLLATE, 190 TokenType.COMMAND, 191 TokenType.COMMENT, 192 TokenType.COMMIT, 193 TokenType.COMPOUND, 194 TokenType.CONSTRAINT, 195 TokenType.CURRENT_TIME, 196 TokenType.DEFAULT, 197 TokenType.DELETE, 198 TokenType.DESCRIBE, 199 TokenType.DIV, 200 TokenType.END, 201 TokenType.EXECUTE, 202 TokenType.ESCAPE, 203 TokenType.FALSE, 204 TokenType.FIRST, 205 TokenType.FILTER, 206 TokenType.FOLLOWING, 207 TokenType.FORMAT, 208 TokenType.IF, 209 TokenType.ISNULL, 210 TokenType.INTERVAL, 211 TokenType.LAZY, 212 TokenType.LEADING, 213 TokenType.LEFT, 214 TokenType.LOCAL, 215 TokenType.MATERIALIZED, 216 TokenType.MERGE, 217 TokenType.NATURAL, 218 TokenType.NEXT, 219 TokenType.OFFSET, 220 TokenType.ONLY, 221 TokenType.OPTIONS, 222 TokenType.ORDINALITY, 223 TokenType.PERCENT, 224 TokenType.PIVOT, 225 TokenType.PRECEDING, 226 TokenType.RANGE, 227 TokenType.REFERENCES, 228 TokenType.RIGHT, 229 TokenType.ROW, 230 TokenType.ROWS, 231 TokenType.SEED, 232 TokenType.SEMI, 233 TokenType.SET, 234 TokenType.SHOW, 235 TokenType.SORTKEY, 236 TokenType.TEMPORARY, 237 TokenType.TOP, 238 TokenType.TRAILING, 239 TokenType.TRUE, 240 TokenType.UNBOUNDED, 241 TokenType.UNIQUE, 242 TokenType.UNLOGGED, 243 TokenType.UNPIVOT, 244 TokenType.VOLATILE, 245 TokenType.WINDOW, 246 *CREATABLES, 247 *SUBQUERY_PREDICATES, 248 *TYPE_TOKENS, 249 *NO_PAREN_FUNCTIONS, 250 } 251 252 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 253 TokenType.APPLY, 254 TokenType.LEFT, 255 TokenType.NATURAL, 256 TokenType.OFFSET, 257 TokenType.RIGHT, 258 TokenType.WINDOW, 259 } 260 261 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 262 263 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 264 265 FUNC_TOKENS = { 266 TokenType.COMMAND, 267 TokenType.CURRENT_DATE, 268 TokenType.CURRENT_DATETIME, 269 TokenType.CURRENT_TIMESTAMP, 270 TokenType.CURRENT_TIME, 271 TokenType.FILTER, 272 TokenType.FIRST, 273 TokenType.FORMAT, 274 TokenType.IDENTIFIER, 275 TokenType.INDEX, 276 TokenType.ISNULL, 277 TokenType.ILIKE, 278 TokenType.LIKE, 279 TokenType.MERGE, 280 TokenType.OFFSET, 281 TokenType.PRIMARY_KEY, 282 TokenType.REPLACE, 283 TokenType.ROW, 284 TokenType.UNNEST, 285 TokenType.VAR, 286 TokenType.LEFT, 287 TokenType.RIGHT, 288 TokenType.DATE, 289 TokenType.DATETIME, 290 TokenType.TABLE, 291 TokenType.TIMESTAMP, 292 TokenType.TIMESTAMPTZ, 293 TokenType.WINDOW, 294 *TYPE_TOKENS, 295 *SUBQUERY_PREDICATES, 296 } 297 298 CONJUNCTION = { 299 TokenType.AND: exp.And, 300 TokenType.OR: exp.Or, 301 } 302 303 EQUALITY = { 304 TokenType.EQ: exp.EQ, 305 TokenType.NEQ: exp.NEQ, 306 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 307 } 308 309 COMPARISON = { 310 TokenType.GT: exp.GT, 311 TokenType.GTE: exp.GTE, 312 TokenType.LT: exp.LT, 313 TokenType.LTE: exp.LTE, 314 } 315 316 BITWISE = { 317 TokenType.AMP: exp.BitwiseAnd, 318 TokenType.CARET: exp.BitwiseXor, 319 TokenType.PIPE: exp.BitwiseOr, 320 TokenType.DPIPE: exp.DPipe, 321 } 322 323 TERM = { 324 TokenType.DASH: exp.Sub, 325 TokenType.PLUS: exp.Add, 326 TokenType.MOD: exp.Mod, 327 TokenType.COLLATE: exp.Collate, 328 } 329 330 FACTOR = { 331 TokenType.DIV: exp.IntDiv, 332 TokenType.LR_ARROW: exp.Distance, 333 TokenType.SLASH: exp.Div, 334 TokenType.STAR: exp.Mul, 335 } 336 337 TIMESTAMPS = { 338 TokenType.TIME, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.TIMESTAMPLTZ, 342 } 343 344 SET_OPERATIONS = { 345 TokenType.UNION, 346 TokenType.INTERSECT, 347 TokenType.EXCEPT, 348 } 349 350 JOIN_SIDES = { 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.FULL, 354 } 355 356 JOIN_KINDS = { 357 TokenType.INNER, 358 TokenType.OUTER, 359 TokenType.CROSS, 360 TokenType.SEMI, 361 TokenType.ANTI, 362 } 363 364 LAMBDAS = { 365 TokenType.ARROW: lambda self, expressions: self.expression( 366 exp.Lambda, 367 this=self._parse_conjunction().transform( 368 self._replace_lambda, {node.name for node in expressions} 369 ), 370 expressions=expressions, 371 ), 372 TokenType.FARROW: lambda self, expressions: self.expression( 373 exp.Kwarg, 374 this=exp.Var(this=expressions[0].name), 375 expression=self._parse_conjunction(), 376 ), 377 } 378 379 COLUMN_OPERATORS = { 380 TokenType.DOT: None, 381 TokenType.DCOLON: lambda self, this, to: self.expression( 382 exp.Cast, 383 this=this, 384 to=to, 385 ), 386 TokenType.ARROW: lambda self, this, path: self.expression( 387 exp.JSONExtract, 388 this=this, 389 expression=path, 390 ), 391 TokenType.DARROW: lambda self, this, path: self.expression( 392 exp.JSONExtractScalar, 393 this=this, 394 expression=path, 395 ), 396 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 397 exp.JSONBExtract, 398 this=this, 399 expression=path, 400 ), 401 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 402 exp.JSONBExtractScalar, 403 this=this, 404 expression=path, 405 ), 406 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 407 exp.JSONBContains, 408 this=this, 409 expression=key, 410 ), 411 } 412 413 EXPRESSION_PARSERS = { 414 exp.Column: lambda self: self._parse_column(), 415 exp.DataType: lambda self: self._parse_types(), 416 exp.From: lambda self: self._parse_from(), 417 exp.Group: lambda self: self._parse_group(), 418 exp.Identifier: lambda self: self._parse_id_var(), 419 exp.Lateral: lambda self: self._parse_lateral(), 420 exp.Join: lambda self: self._parse_join(), 421 exp.Order: lambda self: self._parse_order(), 422 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 423 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 424 exp.Lambda: lambda self: self._parse_lambda(), 425 exp.Limit: lambda self: self._parse_limit(), 426 exp.Offset: lambda self: self._parse_offset(), 427 exp.TableAlias: lambda self: self._parse_table_alias(), 428 exp.Table: lambda self: self._parse_table(), 429 exp.Condition: lambda self: self._parse_conjunction(), 430 exp.Expression: lambda self: self._parse_statement(), 431 exp.Properties: lambda self: self._parse_properties(), 432 exp.Where: lambda self: self._parse_where(), 433 exp.Ordered: lambda self: self._parse_ordered(), 434 exp.Having: lambda self: self._parse_having(), 435 exp.With: lambda self: self._parse_with(), 436 exp.Window: lambda self: self._parse_named_window(), 437 exp.Qualify: lambda self: self._parse_qualify(), 438 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 439 } 440 441 STATEMENT_PARSERS = { 442 TokenType.ALTER: lambda self: self._parse_alter(), 443 TokenType.BEGIN: lambda self: self._parse_transaction(), 444 TokenType.CACHE: lambda self: self._parse_cache(), 445 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 446 TokenType.COMMENT: lambda self: self._parse_comment(), 447 TokenType.CREATE: lambda self: self._parse_create(), 448 TokenType.DELETE: lambda self: self._parse_delete(), 449 TokenType.DESC: lambda self: self._parse_describe(), 450 TokenType.DESCRIBE: lambda self: self._parse_describe(), 451 TokenType.DROP: lambda self: self._parse_drop(), 452 TokenType.END: lambda self: self._parse_commit_or_rollback(), 453 TokenType.INSERT: lambda self: self._parse_insert(), 454 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 455 TokenType.MERGE: lambda self: self._parse_merge(), 456 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 457 TokenType.UNCACHE: lambda self: self._parse_uncache(), 458 TokenType.UPDATE: lambda self: self._parse_update(), 459 TokenType.USE: lambda self: self.expression( 460 exp.Use, 461 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 462 and exp.Var(this=self._prev.text), 463 this=self._parse_table(schema=False), 464 ), 465 } 466 467 UNARY_PARSERS = { 468 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 469 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 470 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 471 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 472 } 473 474 PRIMARY_PARSERS = { 475 TokenType.STRING: lambda self, token: self.expression( 476 exp.Literal, this=token.text, is_string=True 477 ), 478 TokenType.NUMBER: lambda self, token: self.expression( 479 exp.Literal, this=token.text, is_string=False 480 ), 481 TokenType.STAR: lambda self, _: self.expression( 482 exp.Star, 483 **{"except": self._parse_except(), "replace": self._parse_replace()}, 484 ), 485 TokenType.NULL: lambda self, _: self.expression(exp.Null), 486 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 487 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 488 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 489 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 490 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 491 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 492 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 493 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 494 } 495 496 PLACEHOLDER_PARSERS = { 497 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 498 TokenType.PARAMETER: lambda self: self._parse_parameter(), 499 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 500 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 501 else None, 502 } 503 504 RANGE_PARSERS = { 505 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 506 TokenType.GLOB: lambda self, this: self._parse_escape( 507 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 508 ), 509 TokenType.OVERLAPS: lambda self, this: self._parse_escape( 510 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 511 ), 512 TokenType.IN: lambda self, this: self._parse_in(this), 513 TokenType.IS: lambda self, this: self._parse_is(this), 514 TokenType.LIKE: lambda self, this: self._parse_escape( 515 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 516 ), 517 TokenType.ILIKE: lambda self, this: self._parse_escape( 518 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 519 ), 520 TokenType.IRLIKE: lambda self, this: self.expression( 521 exp.RegexpILike, this=this, expression=self._parse_bitwise() 522 ), 523 TokenType.RLIKE: lambda self, this: self.expression( 524 exp.RegexpLike, this=this, expression=self._parse_bitwise() 525 ), 526 TokenType.SIMILAR_TO: lambda self, this: self.expression( 527 exp.SimilarTo, this=this, expression=self._parse_bitwise() 528 ), 529 } 530 531 PROPERTY_PARSERS = { 532 "AFTER": lambda self: self._parse_afterjournal( 533 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 534 ), 535 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 536 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 537 "BEFORE": lambda self: self._parse_journal( 538 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 539 ), 540 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 541 "CHARACTER SET": lambda self: self._parse_character_set(), 542 "CHECKSUM": lambda self: self._parse_checksum(), 543 "CLUSTER BY": lambda self: self.expression( 544 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 545 ), 546 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 547 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 548 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 549 default=self._prev.text.upper() == "DEFAULT" 550 ), 551 "DEFINER": lambda self: self._parse_definer(), 552 "DETERMINISTIC": lambda self: self.expression( 553 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 554 ), 555 "DISTKEY": lambda self: self._parse_distkey(), 556 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 557 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 558 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 559 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 560 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 561 "FREESPACE": lambda self: self._parse_freespace(), 562 "GLOBAL": lambda self: self._parse_temporary(global_=True), 563 "IMMUTABLE": lambda self: self.expression( 564 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 565 ), 566 "JOURNAL": lambda self: self._parse_journal( 567 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 568 ), 569 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 570 "LIKE": lambda self: self._parse_create_like(), 571 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 572 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 573 "LOCK": lambda self: self._parse_locking(), 574 "LOCKING": lambda self: self._parse_locking(), 575 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 576 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 577 "MAX": lambda self: self._parse_datablocksize(), 578 "MAXIMUM": lambda self: self._parse_datablocksize(), 579 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 580 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 581 ), 582 "MIN": lambda self: self._parse_datablocksize(), 583 "MINIMUM": lambda self: self._parse_datablocksize(), 584 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 585 "NO": lambda self: self._parse_noprimaryindex(), 586 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 587 "ON": lambda self: self._parse_oncommit(), 588 "PARTITION BY": lambda self: self._parse_partitioned_by(), 589 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 590 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 591 "RETURNS": lambda self: self._parse_returns(), 592 "ROW": lambda self: self._parse_row(), 593 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 594 "SORTKEY": lambda self: self._parse_sortkey(), 595 "STABLE": lambda self: self.expression( 596 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 597 ), 598 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 599 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 600 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 601 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 602 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 603 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 604 "VOLATILE": lambda self: self.expression( 605 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 606 ), 607 "WITH": lambda self: self._parse_with_property(), 608 } 609 610 CONSTRAINT_PARSERS = { 611 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 612 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 613 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 614 "CHARACTER SET": lambda self: self.expression( 615 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 616 ), 617 "CHECK": lambda self: self.expression( 618 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 619 ), 620 "COLLATE": lambda self: self.expression( 621 exp.CollateColumnConstraint, this=self._parse_var() 622 ), 623 "COMMENT": lambda self: self.expression( 624 exp.CommentColumnConstraint, this=self._parse_string() 625 ), 626 "COMPRESS": lambda self: self._parse_compress(), 627 "DEFAULT": lambda self: self.expression( 628 exp.DefaultColumnConstraint, this=self._parse_bitwise() 629 ), 630 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 631 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 632 "FORMAT": lambda self: self.expression( 633 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 634 ), 635 "GENERATED": lambda self: self._parse_generated_as_identity(), 636 "IDENTITY": lambda self: self._parse_auto_increment(), 637 "INLINE": lambda self: self._parse_inline(), 638 "LIKE": lambda self: self._parse_create_like(), 639 "NOT": lambda self: self._parse_not_constraint(), 640 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 641 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 642 "PRIMARY KEY": lambda self: self._parse_primary_key(), 643 "TITLE": lambda self: self.expression( 644 exp.TitleColumnConstraint, this=self._parse_var_or_string() 645 ), 646 "UNIQUE": lambda self: self._parse_unique(), 647 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 648 } 649 650 ALTER_PARSERS = { 651 "ADD": lambda self: self._parse_alter_table_add(), 652 "ALTER": lambda self: self._parse_alter_table_alter(), 653 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 654 "DROP": lambda self: self._parse_alter_table_drop(), 655 "RENAME": lambda self: self._parse_alter_table_rename(), 656 } 657 658 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 659 660 NO_PAREN_FUNCTION_PARSERS = { 661 TokenType.CASE: lambda self: self._parse_case(), 662 TokenType.IF: lambda self: self._parse_if(), 663 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 664 } 665 666 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 667 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 668 "TRY_CONVERT": lambda self: self._parse_convert(False), 669 "EXTRACT": lambda self: self._parse_extract(), 670 "POSITION": lambda self: self._parse_position(), 671 "SUBSTRING": lambda self: self._parse_substring(), 672 "TRIM": lambda self: self._parse_trim(), 673 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 674 "TRY_CAST": lambda self: self._parse_cast(False), 675 "STRING_AGG": lambda self: self._parse_string_agg(), 676 } 677 678 QUERY_MODIFIER_PARSERS = { 679 "match": lambda self: self._parse_match_recognize(), 680 "where": lambda self: self._parse_where(), 681 "group": lambda self: self._parse_group(), 682 "having": lambda self: self._parse_having(), 683 "qualify": lambda self: self._parse_qualify(), 684 "windows": lambda self: self._parse_window_clause(), 685 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 686 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 687 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 688 "order": lambda self: self._parse_order(), 689 "limit": lambda self: self._parse_limit(), 690 "offset": lambda self: self._parse_offset(), 691 "lock": lambda self: self._parse_lock(), 692 "sample": lambda self: self._parse_table_sample(as_modifier=True), 693 } 694 695 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 696 SET_PARSERS: t.Dict[str, t.Callable] = {} 697 698 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 699 700 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 701 702 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 703 704 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 705 706 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 707 708 STRICT_CAST = True 709 710 INTEGER_DIVISION = True 711 712 __slots__ = ( 713 "error_level", 714 "error_message_context", 715 "sql", 716 "errors", 717 "index_offset", 718 "unnest_column_only", 719 "alias_post_tablesample", 720 "max_errors", 721 "null_ordering", 722 "_tokens", 723 "_index", 724 "_curr", 725 "_next", 726 "_prev", 727 "_prev_comments", 728 "_show_trie", 729 "_set_trie", 730 ) 731 732 def __init__( 733 self, 734 error_level: t.Optional[ErrorLevel] = None, 735 error_message_context: int = 100, 736 index_offset: int = 0, 737 unnest_column_only: bool = False, 738 alias_post_tablesample: bool = False, 739 max_errors: int = 3, 740 null_ordering: t.Optional[str] = None, 741 ): 742 self.error_level = error_level or ErrorLevel.IMMEDIATE 743 self.error_message_context = error_message_context 744 self.index_offset = index_offset 745 self.unnest_column_only = unnest_column_only 746 self.alias_post_tablesample = alias_post_tablesample 747 self.max_errors = max_errors 748 self.null_ordering = null_ordering 749 self.reset() 750 751 def reset(self): 752 self.sql = "" 753 self.errors = [] 754 self._tokens = [] 755 self._index = 0 756 self._curr = None 757 self._next = None 758 self._prev = None 759 self._prev_comments = None 760 761 def parse( 762 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 763 ) -> t.List[t.Optional[exp.Expression]]: 764 """ 765 Parses a list of tokens and returns a list of syntax trees, one tree 766 per parsed SQL statement. 767 768 Args: 769 raw_tokens: the list of tokens. 770 sql: the original SQL string, used to produce helpful debug messages. 771 772 Returns: 773 The list of syntax trees. 774 """ 775 return self._parse( 776 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 777 ) 778 779 def parse_into( 780 self, 781 expression_types: exp.IntoType, 782 raw_tokens: t.List[Token], 783 sql: t.Optional[str] = None, 784 ) -> t.List[t.Optional[exp.Expression]]: 785 """ 786 Parses a list of tokens into a given Expression type. If a collection of Expression 787 types is given instead, this method will try to parse the token list into each one 788 of them, stopping at the first for which the parsing succeeds. 789 790 Args: 791 expression_types: the expression type(s) to try and parse the token list into. 792 raw_tokens: the list of tokens. 793 sql: the original SQL string, used to produce helpful debug messages. 794 795 Returns: 796 The target Expression. 797 """ 798 errors = [] 799 for expression_type in ensure_collection(expression_types): 800 parser = self.EXPRESSION_PARSERS.get(expression_type) 801 if not parser: 802 raise TypeError(f"No parser registered for {expression_type}") 803 try: 804 return self._parse(parser, raw_tokens, sql) 805 except ParseError as e: 806 e.errors[0]["into_expression"] = expression_type 807 errors.append(e) 808 raise ParseError( 809 f"Failed to parse into {expression_types}", 810 errors=merge_errors(errors), 811 ) from errors[-1] 812 813 def _parse( 814 self, 815 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 816 raw_tokens: t.List[Token], 817 sql: t.Optional[str] = None, 818 ) -> t.List[t.Optional[exp.Expression]]: 819 self.reset() 820 self.sql = sql or "" 821 total = len(raw_tokens) 822 chunks: t.List[t.List[Token]] = [[]] 823 824 for i, token in enumerate(raw_tokens): 825 if token.token_type == TokenType.SEMICOLON: 826 if i < total - 1: 827 chunks.append([]) 828 else: 829 chunks[-1].append(token) 830 831 expressions = [] 832 833 for tokens in chunks: 834 self._index = -1 835 self._tokens = tokens 836 self._advance() 837 838 expressions.append(parse_method(self)) 839 840 if self._index < len(self._tokens): 841 self.raise_error("Invalid expression / Unexpected token") 842 843 self.check_errors() 844 845 return expressions 846 847 def check_errors(self) -> None: 848 """ 849 Logs or raises any found errors, depending on the chosen error level setting. 850 """ 851 if self.error_level == ErrorLevel.WARN: 852 for error in self.errors: 853 logger.error(str(error)) 854 elif self.error_level == ErrorLevel.RAISE and self.errors: 855 raise ParseError( 856 concat_messages(self.errors, self.max_errors), 857 errors=merge_errors(self.errors), 858 ) 859 860 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 861 """ 862 Appends an error in the list of recorded errors or raises it, depending on the chosen 863 error level setting. 864 """ 865 token = token or self._curr or self._prev or Token.string("") 866 start = self._find_token(token) 867 end = start + len(token.text) 868 start_context = self.sql[max(start - self.error_message_context, 0) : start] 869 highlight = self.sql[start:end] 870 end_context = self.sql[end : end + self.error_message_context] 871 872 error = ParseError.new( 873 f"{message}. Line {token.line}, Col: {token.col}.\n" 874 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 875 description=message, 876 line=token.line, 877 col=token.col, 878 start_context=start_context, 879 highlight=highlight, 880 end_context=end_context, 881 ) 882 883 if self.error_level == ErrorLevel.IMMEDIATE: 884 raise error 885 886 self.errors.append(error) 887 888 def expression( 889 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 890 ) -> exp.Expression: 891 """ 892 Creates a new, validated Expression. 893 894 Args: 895 exp_class: the expression class to instantiate. 896 comments: an optional list of comments to attach to the expression. 897 kwargs: the arguments to set for the expression along with their respective values. 898 899 Returns: 900 The target expression. 901 """ 902 instance = exp_class(**kwargs) 903 if self._prev_comments: 904 instance.comments = self._prev_comments 905 self._prev_comments = None 906 if comments: 907 instance.comments = comments 908 self.validate_expression(instance) 909 return instance 910 911 def validate_expression( 912 self, expression: exp.Expression, args: t.Optional[t.List] = None 913 ) -> None: 914 """ 915 Validates an already instantiated expression, making sure that all its mandatory arguments 916 are set. 917 918 Args: 919 expression: the expression to validate. 920 args: an optional list of items that was used to instantiate the expression, if it's a Func. 921 """ 922 if self.error_level == ErrorLevel.IGNORE: 923 return 924 925 for error_message in expression.error_messages(args): 926 self.raise_error(error_message) 927 928 def _find_sql(self, start: Token, end: Token) -> str: 929 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 930 931 def _find_token(self, token: Token) -> int: 932 line = 1 933 col = 1 934 index = 0 935 936 while line < token.line or col < token.col: 937 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 938 line += 1 939 col = 1 940 else: 941 col += 1 942 index += 1 943 944 return index 945 946 def _advance(self, times: int = 1) -> None: 947 self._index += times 948 self._curr = seq_get(self._tokens, self._index) 949 self._next = seq_get(self._tokens, self._index + 1) 950 if self._index > 0: 951 self._prev = self._tokens[self._index - 1] 952 self._prev_comments = self._prev.comments 953 else: 954 self._prev = None 955 self._prev_comments = None 956 957 def _retreat(self, index: int) -> None: 958 if index != self._index: 959 self._advance(index - self._index) 960 961 def _parse_command(self) -> exp.Expression: 962 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 963 964 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 965 start = self._prev 966 exists = self._parse_exists() if allow_exists else None 967 968 self._match(TokenType.ON) 969 970 kind = self._match_set(self.CREATABLES) and self._prev 971 972 if not kind: 973 return self._parse_as_command(start) 974 975 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 976 this = self._parse_user_defined_function(kind=kind.token_type) 977 elif kind.token_type == TokenType.TABLE: 978 this = self._parse_table() 979 elif kind.token_type == TokenType.COLUMN: 980 this = self._parse_column() 981 else: 982 this = self._parse_id_var() 983 984 self._match(TokenType.IS) 985 986 return self.expression( 987 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 988 ) 989 990 def _parse_statement(self) -> t.Optional[exp.Expression]: 991 if self._curr is None: 992 return None 993 994 if self._match_set(self.STATEMENT_PARSERS): 995 return self.STATEMENT_PARSERS[self._prev.token_type](self) 996 997 if self._match_set(Tokenizer.COMMANDS): 998 return self._parse_command() 999 1000 expression = self._parse_expression() 1001 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1002 1003 self._parse_query_modifiers(expression) 1004 return expression 1005 1006 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1007 start = self._prev 1008 temporary = self._match(TokenType.TEMPORARY) 1009 materialized = self._match(TokenType.MATERIALIZED) 1010 kind = self._match_set(self.CREATABLES) and self._prev.text 1011 if not kind: 1012 if default_kind: 1013 kind = default_kind 1014 else: 1015 return self._parse_as_command(start) 1016 1017 return self.expression( 1018 exp.Drop, 1019 exists=self._parse_exists(), 1020 this=self._parse_table(schema=True), 1021 kind=kind, 1022 temporary=temporary, 1023 materialized=materialized, 1024 cascade=self._match(TokenType.CASCADE), 1025 ) 1026 1027 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1028 return ( 1029 self._match(TokenType.IF) 1030 and (not not_ or self._match(TokenType.NOT)) 1031 and self._match(TokenType.EXISTS) 1032 ) 1033 1034 def _parse_create(self) -> t.Optional[exp.Expression]: 1035 start = self._prev 1036 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1037 TokenType.OR, TokenType.REPLACE 1038 ) 1039 unique = self._match(TokenType.UNIQUE) 1040 volatile = self._match(TokenType.VOLATILE) 1041 1042 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1043 self._match(TokenType.TABLE) 1044 1045 properties = None 1046 create_token = self._match_set(self.CREATABLES) and self._prev 1047 1048 if not create_token: 1049 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1050 create_token = self._match_set(self.CREATABLES) and self._prev 1051 1052 if not properties or not create_token: 1053 return self._parse_as_command(start) 1054 1055 exists = self._parse_exists(not_=True) 1056 this = None 1057 expression = None 1058 indexes = None 1059 no_schema_binding = None 1060 begin = None 1061 1062 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1063 this = self._parse_user_defined_function(kind=create_token.token_type) 1064 temp_properties = self._parse_properties() 1065 if properties and temp_properties: 1066 properties.expressions.extend(temp_properties.expressions) 1067 elif temp_properties: 1068 properties = temp_properties 1069 1070 self._match(TokenType.ALIAS) 1071 begin = self._match(TokenType.BEGIN) 1072 return_ = self._match_text_seq("RETURN") 1073 expression = self._parse_statement() 1074 1075 if return_: 1076 expression = self.expression(exp.Return, this=expression) 1077 elif create_token.token_type == TokenType.INDEX: 1078 this = self._parse_index() 1079 elif create_token.token_type in self.DB_CREATABLES: 1080 table_parts = self._parse_table_parts(schema=True) 1081 1082 # exp.Properties.Location.POST_NAME 1083 if self._match(TokenType.COMMA): 1084 temp_properties = self._parse_properties(before=True) 1085 if properties and temp_properties: 1086 properties.expressions.extend(temp_properties.expressions) 1087 elif temp_properties: 1088 properties = temp_properties 1089 1090 this = self._parse_schema(this=table_parts) 1091 1092 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1093 temp_properties = self._parse_properties() 1094 if properties and temp_properties: 1095 properties.expressions.extend(temp_properties.expressions) 1096 elif temp_properties: 1097 properties = temp_properties 1098 1099 self._match(TokenType.ALIAS) 1100 1101 # exp.Properties.Location.POST_ALIAS 1102 if not ( 1103 self._match(TokenType.SELECT, advance=False) 1104 or self._match(TokenType.WITH, advance=False) 1105 or self._match(TokenType.L_PAREN, advance=False) 1106 ): 1107 temp_properties = self._parse_properties() 1108 if properties and temp_properties: 1109 properties.expressions.extend(temp_properties.expressions) 1110 elif temp_properties: 1111 properties = temp_properties 1112 1113 expression = self._parse_ddl_select() 1114 1115 if create_token.token_type == TokenType.TABLE: 1116 # exp.Properties.Location.POST_EXPRESSION 1117 temp_properties = self._parse_properties() 1118 if properties and temp_properties: 1119 properties.expressions.extend(temp_properties.expressions) 1120 elif temp_properties: 1121 properties = temp_properties 1122 1123 indexes = [] 1124 while True: 1125 index = self._parse_create_table_index() 1126 1127 # exp.Properties.Location.POST_INDEX 1128 if self._match(TokenType.PARTITION_BY, advance=False): 1129 temp_properties = self._parse_properties() 1130 if properties and temp_properties: 1131 properties.expressions.extend(temp_properties.expressions) 1132 elif temp_properties: 1133 properties = temp_properties 1134 1135 if not index: 1136 break 1137 else: 1138 indexes.append(index) 1139 elif create_token.token_type == TokenType.VIEW: 1140 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1141 no_schema_binding = True 1142 1143 return self.expression( 1144 exp.Create, 1145 this=this, 1146 kind=create_token.text, 1147 replace=replace, 1148 unique=unique, 1149 volatile=volatile, 1150 expression=expression, 1151 exists=exists, 1152 properties=properties, 1153 indexes=indexes, 1154 no_schema_binding=no_schema_binding, 1155 begin=begin, 1156 ) 1157 1158 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1159 self._match(TokenType.COMMA) 1160 1161 # parsers look to _prev for no/dual/default, so need to consume first 1162 self._match_text_seq("NO") 1163 self._match_text_seq("DUAL") 1164 self._match_text_seq("DEFAULT") 1165 1166 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1167 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1168 1169 return None 1170 1171 def _parse_property(self) -> t.Optional[exp.Expression]: 1172 if self._match_texts(self.PROPERTY_PARSERS): 1173 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1174 1175 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1176 return self._parse_character_set(default=True) 1177 1178 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1179 return self._parse_sortkey(compound=True) 1180 1181 if self._match_text_seq("SQL", "SECURITY"): 1182 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1183 1184 assignment = self._match_pair( 1185 TokenType.VAR, TokenType.EQ, advance=False 1186 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1187 1188 if assignment: 1189 key = self._parse_var_or_string() 1190 self._match(TokenType.EQ) 1191 return self.expression(exp.Property, this=key, value=self._parse_column()) 1192 1193 return None 1194 1195 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1196 self._match(TokenType.EQ) 1197 self._match(TokenType.ALIAS) 1198 return self.expression( 1199 exp_class, 1200 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1201 ) 1202 1203 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1204 properties = [] 1205 1206 while True: 1207 if before: 1208 identified_property = self._parse_property_before() 1209 else: 1210 identified_property = self._parse_property() 1211 1212 if not identified_property: 1213 break 1214 for p in ensure_collection(identified_property): 1215 properties.append(p) 1216 1217 if properties: 1218 return self.expression(exp.Properties, expressions=properties) 1219 1220 return None 1221 1222 def _parse_fallback(self, no=False) -> exp.Expression: 1223 self._match_text_seq("FALLBACK") 1224 return self.expression( 1225 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1226 ) 1227 1228 def _parse_with_property( 1229 self, 1230 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1231 self._match(TokenType.WITH) 1232 if self._match(TokenType.L_PAREN, advance=False): 1233 return self._parse_wrapped_csv(self._parse_property) 1234 1235 if self._match_text_seq("JOURNAL"): 1236 return self._parse_withjournaltable() 1237 1238 if self._match_text_seq("DATA"): 1239 return self._parse_withdata(no=False) 1240 elif self._match_text_seq("NO", "DATA"): 1241 return self._parse_withdata(no=True) 1242 1243 if not self._next: 1244 return None 1245 1246 return self._parse_withisolatedloading() 1247 1248 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1249 def _parse_definer(self) -> t.Optional[exp.Expression]: 1250 self._match(TokenType.EQ) 1251 1252 user = self._parse_id_var() 1253 self._match(TokenType.PARAMETER) 1254 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1255 1256 if not user or not host: 1257 return None 1258 1259 return exp.DefinerProperty(this=f"{user}@{host}") 1260 1261 def _parse_withjournaltable(self) -> exp.Expression: 1262 self._match(TokenType.TABLE) 1263 self._match(TokenType.EQ) 1264 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1265 1266 def _parse_log(self, no=False) -> exp.Expression: 1267 self._match_text_seq("LOG") 1268 return self.expression(exp.LogProperty, no=no) 1269 1270 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1271 before = self._match_text_seq("BEFORE") 1272 self._match_text_seq("JOURNAL") 1273 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1274 1275 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1276 self._match_text_seq("NOT") 1277 self._match_text_seq("LOCAL") 1278 self._match_text_seq("AFTER", "JOURNAL") 1279 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1280 1281 def _parse_checksum(self) -> exp.Expression: 1282 self._match_text_seq("CHECKSUM") 1283 self._match(TokenType.EQ) 1284 1285 on = None 1286 if self._match(TokenType.ON): 1287 on = True 1288 elif self._match_text_seq("OFF"): 1289 on = False 1290 default = self._match(TokenType.DEFAULT) 1291 1292 return self.expression( 1293 exp.ChecksumProperty, 1294 on=on, 1295 default=default, 1296 ) 1297 1298 def _parse_freespace(self) -> exp.Expression: 1299 self._match_text_seq("FREESPACE") 1300 self._match(TokenType.EQ) 1301 return self.expression( 1302 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1303 ) 1304 1305 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1306 self._match_text_seq("MERGEBLOCKRATIO") 1307 if self._match(TokenType.EQ): 1308 return self.expression( 1309 exp.MergeBlockRatioProperty, 1310 this=self._parse_number(), 1311 percent=self._match(TokenType.PERCENT), 1312 ) 1313 else: 1314 return self.expression( 1315 exp.MergeBlockRatioProperty, 1316 no=no, 1317 default=default, 1318 ) 1319 1320 def _parse_datablocksize(self, default=None) -> exp.Expression: 1321 if default: 1322 self._match_text_seq("DATABLOCKSIZE") 1323 return self.expression(exp.DataBlocksizeProperty, default=True) 1324 elif self._match_texts(("MIN", "MINIMUM")): 1325 self._match_text_seq("DATABLOCKSIZE") 1326 return self.expression(exp.DataBlocksizeProperty, min=True) 1327 elif self._match_texts(("MAX", "MAXIMUM")): 1328 self._match_text_seq("DATABLOCKSIZE") 1329 return self.expression(exp.DataBlocksizeProperty, min=False) 1330 1331 self._match_text_seq("DATABLOCKSIZE") 1332 self._match(TokenType.EQ) 1333 size = self._parse_number() 1334 units = None 1335 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1336 units = self._prev.text 1337 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1338 1339 def _parse_blockcompression(self) -> exp.Expression: 1340 self._match_text_seq("BLOCKCOMPRESSION") 1341 self._match(TokenType.EQ) 1342 always = self._match_text_seq("ALWAYS") 1343 manual = self._match_text_seq("MANUAL") 1344 never = self._match_text_seq("NEVER") 1345 default = self._match_text_seq("DEFAULT") 1346 autotemp = None 1347 if self._match_text_seq("AUTOTEMP"): 1348 autotemp = self._parse_schema() 1349 1350 return self.expression( 1351 exp.BlockCompressionProperty, 1352 always=always, 1353 manual=manual, 1354 never=never, 1355 default=default, 1356 autotemp=autotemp, 1357 ) 1358 1359 def _parse_withisolatedloading(self) -> exp.Expression: 1360 no = self._match_text_seq("NO") 1361 concurrent = self._match_text_seq("CONCURRENT") 1362 self._match_text_seq("ISOLATED", "LOADING") 1363 for_all = self._match_text_seq("FOR", "ALL") 1364 for_insert = self._match_text_seq("FOR", "INSERT") 1365 for_none = self._match_text_seq("FOR", "NONE") 1366 return self.expression( 1367 exp.IsolatedLoadingProperty, 1368 no=no, 1369 concurrent=concurrent, 1370 for_all=for_all, 1371 for_insert=for_insert, 1372 for_none=for_none, 1373 ) 1374 1375 def _parse_locking(self) -> exp.Expression: 1376 if self._match(TokenType.TABLE): 1377 kind = "TABLE" 1378 elif self._match(TokenType.VIEW): 1379 kind = "VIEW" 1380 elif self._match(TokenType.ROW): 1381 kind = "ROW" 1382 elif self._match_text_seq("DATABASE"): 1383 kind = "DATABASE" 1384 else: 1385 kind = None 1386 1387 if kind in ("DATABASE", "TABLE", "VIEW"): 1388 this = self._parse_table_parts() 1389 else: 1390 this = None 1391 1392 if self._match(TokenType.FOR): 1393 for_or_in = "FOR" 1394 elif self._match(TokenType.IN): 1395 for_or_in = "IN" 1396 else: 1397 for_or_in = None 1398 1399 if self._match_text_seq("ACCESS"): 1400 lock_type = "ACCESS" 1401 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1402 lock_type = "EXCLUSIVE" 1403 elif self._match_text_seq("SHARE"): 1404 lock_type = "SHARE" 1405 elif self._match_text_seq("READ"): 1406 lock_type = "READ" 1407 elif self._match_text_seq("WRITE"): 1408 lock_type = "WRITE" 1409 elif self._match_text_seq("CHECKSUM"): 1410 lock_type = "CHECKSUM" 1411 else: 1412 lock_type = None 1413 1414 override = self._match_text_seq("OVERRIDE") 1415 1416 return self.expression( 1417 exp.LockingProperty, 1418 this=this, 1419 kind=kind, 1420 for_or_in=for_or_in, 1421 lock_type=lock_type, 1422 override=override, 1423 ) 1424 1425 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1426 if self._match(TokenType.PARTITION_BY): 1427 return self._parse_csv(self._parse_conjunction) 1428 return [] 1429 1430 def _parse_partitioned_by(self) -> exp.Expression: 1431 self._match(TokenType.EQ) 1432 return self.expression( 1433 exp.PartitionedByProperty, 1434 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1435 ) 1436 1437 def _parse_withdata(self, no=False) -> exp.Expression: 1438 if self._match_text_seq("AND", "STATISTICS"): 1439 statistics = True 1440 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1441 statistics = False 1442 else: 1443 statistics = None 1444 1445 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1446 1447 def _parse_noprimaryindex(self) -> exp.Expression: 1448 self._match_text_seq("PRIMARY", "INDEX") 1449 return exp.NoPrimaryIndexProperty() 1450 1451 def _parse_oncommit(self) -> exp.Expression: 1452 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1453 return exp.OnCommitProperty() 1454 1455 def _parse_distkey(self) -> exp.Expression: 1456 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1457 1458 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1459 table = self._parse_table(schema=True) 1460 options = [] 1461 while self._match_texts(("INCLUDING", "EXCLUDING")): 1462 this = self._prev.text.upper() 1463 id_var = self._parse_id_var() 1464 1465 if not id_var: 1466 return None 1467 1468 options.append( 1469 self.expression( 1470 exp.Property, 1471 this=this, 1472 value=exp.Var(this=id_var.this.upper()), 1473 ) 1474 ) 1475 return self.expression(exp.LikeProperty, this=table, expressions=options) 1476 1477 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1478 return self.expression( 1479 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1480 ) 1481 1482 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1483 self._match(TokenType.EQ) 1484 return self.expression( 1485 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1486 ) 1487 1488 def _parse_returns(self) -> exp.Expression: 1489 value: t.Optional[exp.Expression] 1490 is_table = self._match(TokenType.TABLE) 1491 1492 if is_table: 1493 if self._match(TokenType.LT): 1494 value = self.expression( 1495 exp.Schema, 1496 this="TABLE", 1497 expressions=self._parse_csv(self._parse_struct_kwargs), 1498 ) 1499 if not self._match(TokenType.GT): 1500 self.raise_error("Expecting >") 1501 else: 1502 value = self._parse_schema(exp.Var(this="TABLE")) 1503 else: 1504 value = self._parse_types() 1505 1506 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1507 1508 def _parse_temporary(self, global_=False) -> exp.Expression: 1509 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1510 return self.expression(exp.TemporaryProperty, global_=global_) 1511 1512 def _parse_describe(self) -> exp.Expression: 1513 kind = self._match_set(self.CREATABLES) and self._prev.text 1514 this = self._parse_table() 1515 1516 return self.expression(exp.Describe, this=this, kind=kind) 1517 1518 def _parse_insert(self) -> exp.Expression: 1519 overwrite = self._match(TokenType.OVERWRITE) 1520 local = self._match(TokenType.LOCAL) 1521 alternative = None 1522 1523 if self._match_text_seq("DIRECTORY"): 1524 this: t.Optional[exp.Expression] = self.expression( 1525 exp.Directory, 1526 this=self._parse_var_or_string(), 1527 local=local, 1528 row_format=self._parse_row_format(match_row=True), 1529 ) 1530 else: 1531 if self._match(TokenType.OR): 1532 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1533 1534 self._match(TokenType.INTO) 1535 self._match(TokenType.TABLE) 1536 this = self._parse_table(schema=True) 1537 1538 return self.expression( 1539 exp.Insert, 1540 this=this, 1541 exists=self._parse_exists(), 1542 partition=self._parse_partition(), 1543 expression=self._parse_ddl_select(), 1544 returning=self._parse_returning(), 1545 overwrite=overwrite, 1546 alternative=alternative, 1547 ) 1548 1549 def _parse_returning(self) -> t.Optional[exp.Expression]: 1550 if not self._match(TokenType.RETURNING): 1551 return None 1552 1553 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1554 1555 def _parse_row(self) -> t.Optional[exp.Expression]: 1556 if not self._match(TokenType.FORMAT): 1557 return None 1558 return self._parse_row_format() 1559 1560 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1561 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1562 return None 1563 1564 if self._match_text_seq("SERDE"): 1565 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1566 1567 self._match_text_seq("DELIMITED") 1568 1569 kwargs = {} 1570 1571 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1572 kwargs["fields"] = self._parse_string() 1573 if self._match_text_seq("ESCAPED", "BY"): 1574 kwargs["escaped"] = self._parse_string() 1575 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1576 kwargs["collection_items"] = self._parse_string() 1577 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1578 kwargs["map_keys"] = self._parse_string() 1579 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1580 kwargs["lines"] = self._parse_string() 1581 if self._match_text_seq("NULL", "DEFINED", "AS"): 1582 kwargs["null"] = self._parse_string() 1583 1584 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1585 1586 def _parse_load_data(self) -> exp.Expression: 1587 local = self._match(TokenType.LOCAL) 1588 self._match_text_seq("INPATH") 1589 inpath = self._parse_string() 1590 overwrite = self._match(TokenType.OVERWRITE) 1591 self._match_pair(TokenType.INTO, TokenType.TABLE) 1592 1593 return self.expression( 1594 exp.LoadData, 1595 this=self._parse_table(schema=True), 1596 local=local, 1597 overwrite=overwrite, 1598 inpath=inpath, 1599 partition=self._parse_partition(), 1600 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1601 serde=self._match_text_seq("SERDE") and self._parse_string(), 1602 ) 1603 1604 def _parse_delete(self) -> exp.Expression: 1605 self._match(TokenType.FROM) 1606 1607 return self.expression( 1608 exp.Delete, 1609 this=self._parse_table(schema=True), 1610 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1611 where=self._parse_where(), 1612 returning=self._parse_returning(), 1613 ) 1614 1615 def _parse_update(self) -> exp.Expression: 1616 return self.expression( 1617 exp.Update, 1618 **{ # type: ignore 1619 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1620 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1621 "from": self._parse_from(), 1622 "where": self._parse_where(), 1623 "returning": self._parse_returning(), 1624 }, 1625 ) 1626 1627 def _parse_uncache(self) -> exp.Expression: 1628 if not self._match(TokenType.TABLE): 1629 self.raise_error("Expecting TABLE after UNCACHE") 1630 1631 return self.expression( 1632 exp.Uncache, 1633 exists=self._parse_exists(), 1634 this=self._parse_table(schema=True), 1635 ) 1636 1637 def _parse_cache(self) -> exp.Expression: 1638 lazy = self._match(TokenType.LAZY) 1639 self._match(TokenType.TABLE) 1640 table = self._parse_table(schema=True) 1641 options = [] 1642 1643 if self._match(TokenType.OPTIONS): 1644 self._match_l_paren() 1645 k = self._parse_string() 1646 self._match(TokenType.EQ) 1647 v = self._parse_string() 1648 options = [k, v] 1649 self._match_r_paren() 1650 1651 self._match(TokenType.ALIAS) 1652 return self.expression( 1653 exp.Cache, 1654 this=table, 1655 lazy=lazy, 1656 options=options, 1657 expression=self._parse_select(nested=True), 1658 ) 1659 1660 def _parse_partition(self) -> t.Optional[exp.Expression]: 1661 if not self._match(TokenType.PARTITION): 1662 return None 1663 1664 return self.expression( 1665 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1666 ) 1667 1668 def _parse_value(self) -> exp.Expression: 1669 if self._match(TokenType.L_PAREN): 1670 expressions = self._parse_csv(self._parse_conjunction) 1671 self._match_r_paren() 1672 return self.expression(exp.Tuple, expressions=expressions) 1673 1674 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1675 # Source: https://prestodb.io/docs/current/sql/values.html 1676 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1677 1678 def _parse_select( 1679 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1680 ) -> t.Optional[exp.Expression]: 1681 cte = self._parse_with() 1682 if cte: 1683 this = self._parse_statement() 1684 1685 if not this: 1686 self.raise_error("Failed to parse any statement following CTE") 1687 return cte 1688 1689 if "with" in this.arg_types: 1690 this.set("with", cte) 1691 else: 1692 self.raise_error(f"{this.key} does not support CTE") 1693 this = cte 1694 elif self._match(TokenType.SELECT): 1695 comments = self._prev_comments 1696 1697 hint = self._parse_hint() 1698 all_ = self._match(TokenType.ALL) 1699 distinct = self._match(TokenType.DISTINCT) 1700 1701 if distinct: 1702 distinct = self.expression( 1703 exp.Distinct, 1704 on=self._parse_value() if self._match(TokenType.ON) else None, 1705 ) 1706 1707 if all_ and distinct: 1708 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1709 1710 limit = self._parse_limit(top=True) 1711 expressions = self._parse_csv(self._parse_expression) 1712 1713 this = self.expression( 1714 exp.Select, 1715 hint=hint, 1716 distinct=distinct, 1717 expressions=expressions, 1718 limit=limit, 1719 ) 1720 this.comments = comments 1721 1722 into = self._parse_into() 1723 if into: 1724 this.set("into", into) 1725 1726 from_ = self._parse_from() 1727 if from_: 1728 this.set("from", from_) 1729 1730 self._parse_query_modifiers(this) 1731 elif (table or nested) and self._match(TokenType.L_PAREN): 1732 this = self._parse_table() if table else self._parse_select(nested=True) 1733 self._parse_query_modifiers(this) 1734 this = self._parse_set_operations(this) 1735 self._match_r_paren() 1736 1737 # early return so that subquery unions aren't parsed again 1738 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1739 # Union ALL should be a property of the top select node, not the subquery 1740 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1741 elif self._match(TokenType.VALUES): 1742 this = self.expression( 1743 exp.Values, 1744 expressions=self._parse_csv(self._parse_value), 1745 alias=self._parse_table_alias(), 1746 ) 1747 else: 1748 this = None 1749 1750 return self._parse_set_operations(this) 1751 1752 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1753 if not skip_with_token and not self._match(TokenType.WITH): 1754 return None 1755 1756 recursive = self._match(TokenType.RECURSIVE) 1757 1758 expressions = [] 1759 while True: 1760 expressions.append(self._parse_cte()) 1761 1762 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1763 break 1764 else: 1765 self._match(TokenType.WITH) 1766 1767 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1768 1769 def _parse_cte(self) -> exp.Expression: 1770 alias = self._parse_table_alias() 1771 if not alias or not alias.this: 1772 self.raise_error("Expected CTE to have alias") 1773 1774 self._match(TokenType.ALIAS) 1775 1776 return self.expression( 1777 exp.CTE, 1778 this=self._parse_wrapped(self._parse_statement), 1779 alias=alias, 1780 ) 1781 1782 def _parse_table_alias( 1783 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1784 ) -> t.Optional[exp.Expression]: 1785 any_token = self._match(TokenType.ALIAS) 1786 alias = self._parse_id_var( 1787 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1788 ) 1789 index = self._index 1790 1791 if self._match(TokenType.L_PAREN): 1792 columns = self._parse_csv(self._parse_function_parameter) 1793 self._match_r_paren() if columns else self._retreat(index) 1794 else: 1795 columns = None 1796 1797 if not alias and not columns: 1798 return None 1799 1800 return self.expression(exp.TableAlias, this=alias, columns=columns) 1801 1802 def _parse_subquery( 1803 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1804 ) -> exp.Expression: 1805 return self.expression( 1806 exp.Subquery, 1807 this=this, 1808 pivots=self._parse_pivots(), 1809 alias=self._parse_table_alias() if parse_alias else None, 1810 ) 1811 1812 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1813 if not isinstance(this, self.MODIFIABLES): 1814 return 1815 1816 table = isinstance(this, exp.Table) 1817 1818 while True: 1819 lateral = self._parse_lateral() 1820 join = self._parse_join() 1821 comma = None if table else self._match(TokenType.COMMA) 1822 if lateral: 1823 this.append("laterals", lateral) 1824 if join: 1825 this.append("joins", join) 1826 if comma: 1827 this.args["from"].append("expressions", self._parse_table()) 1828 if not (lateral or join or comma): 1829 break 1830 1831 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1832 expression = parser(self) 1833 1834 if expression: 1835 this.set(key, expression) 1836 1837 def _parse_hint(self) -> t.Optional[exp.Expression]: 1838 if self._match(TokenType.HINT): 1839 hints = self._parse_csv(self._parse_function) 1840 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1841 self.raise_error("Expected */ after HINT") 1842 return self.expression(exp.Hint, expressions=hints) 1843 1844 return None 1845 1846 def _parse_into(self) -> t.Optional[exp.Expression]: 1847 if not self._match(TokenType.INTO): 1848 return None 1849 1850 temp = self._match(TokenType.TEMPORARY) 1851 unlogged = self._match(TokenType.UNLOGGED) 1852 self._match(TokenType.TABLE) 1853 1854 return self.expression( 1855 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1856 ) 1857 1858 def _parse_from(self) -> t.Optional[exp.Expression]: 1859 if not self._match(TokenType.FROM): 1860 return None 1861 1862 return self.expression( 1863 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1864 ) 1865 1866 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1867 if not self._match(TokenType.MATCH_RECOGNIZE): 1868 return None 1869 self._match_l_paren() 1870 1871 partition = self._parse_partition_by() 1872 order = self._parse_order() 1873 measures = ( 1874 self._parse_alias(self._parse_conjunction()) 1875 if self._match_text_seq("MEASURES") 1876 else None 1877 ) 1878 1879 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1880 rows = exp.Var(this="ONE ROW PER MATCH") 1881 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1882 text = "ALL ROWS PER MATCH" 1883 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1884 text += f" SHOW EMPTY MATCHES" 1885 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1886 text += f" OMIT EMPTY MATCHES" 1887 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1888 text += f" WITH UNMATCHED ROWS" 1889 rows = exp.Var(this=text) 1890 else: 1891 rows = None 1892 1893 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1894 text = "AFTER MATCH SKIP" 1895 if self._match_text_seq("PAST", "LAST", "ROW"): 1896 text += f" PAST LAST ROW" 1897 elif self._match_text_seq("TO", "NEXT", "ROW"): 1898 text += f" TO NEXT ROW" 1899 elif self._match_text_seq("TO", "FIRST"): 1900 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1901 elif self._match_text_seq("TO", "LAST"): 1902 text += f" TO LAST {self._advance_any().text}" # type: ignore 1903 after = exp.Var(this=text) 1904 else: 1905 after = None 1906 1907 if self._match_text_seq("PATTERN"): 1908 self._match_l_paren() 1909 1910 if not self._curr: 1911 self.raise_error("Expecting )", self._curr) 1912 1913 paren = 1 1914 start = self._curr 1915 1916 while self._curr and paren > 0: 1917 if self._curr.token_type == TokenType.L_PAREN: 1918 paren += 1 1919 if self._curr.token_type == TokenType.R_PAREN: 1920 paren -= 1 1921 end = self._prev 1922 self._advance() 1923 if paren > 0: 1924 self.raise_error("Expecting )", self._curr) 1925 pattern = exp.Var(this=self._find_sql(start, end)) 1926 else: 1927 pattern = None 1928 1929 define = ( 1930 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1931 ) 1932 self._match_r_paren() 1933 1934 return self.expression( 1935 exp.MatchRecognize, 1936 partition_by=partition, 1937 order=order, 1938 measures=measures, 1939 rows=rows, 1940 after=after, 1941 pattern=pattern, 1942 define=define, 1943 ) 1944 1945 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1946 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1947 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1948 1949 if outer_apply or cross_apply: 1950 this = self._parse_select(table=True) 1951 view = None 1952 outer = not cross_apply 1953 elif self._match(TokenType.LATERAL): 1954 this = self._parse_select(table=True) 1955 view = self._match(TokenType.VIEW) 1956 outer = self._match(TokenType.OUTER) 1957 else: 1958 return None 1959 1960 if not this: 1961 this = self._parse_function() or self._parse_id_var(any_token=False) 1962 while self._match(TokenType.DOT): 1963 this = exp.Dot( 1964 this=this, 1965 expression=self._parse_function() or self._parse_id_var(any_token=False), 1966 ) 1967 1968 table_alias: t.Optional[exp.Expression] 1969 1970 if view: 1971 table = self._parse_id_var(any_token=False) 1972 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1973 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1974 else: 1975 table_alias = self._parse_table_alias() 1976 1977 expression = self.expression( 1978 exp.Lateral, 1979 this=this, 1980 view=view, 1981 outer=outer, 1982 alias=table_alias, 1983 ) 1984 1985 if outer_apply or cross_apply: 1986 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1987 1988 return expression 1989 1990 def _parse_join_side_and_kind( 1991 self, 1992 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1993 return ( 1994 self._match(TokenType.NATURAL) and self._prev, 1995 self._match_set(self.JOIN_SIDES) and self._prev, 1996 self._match_set(self.JOIN_KINDS) and self._prev, 1997 ) 1998 1999 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2000 natural, side, kind = self._parse_join_side_and_kind() 2001 2002 if not skip_join_token and not self._match(TokenType.JOIN): 2003 return None 2004 2005 kwargs: t.Dict[ 2006 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2007 ] = {"this": self._parse_table()} 2008 2009 if natural: 2010 kwargs["natural"] = True 2011 if side: 2012 kwargs["side"] = side.text 2013 if kind: 2014 kwargs["kind"] = kind.text 2015 2016 if self._match(TokenType.ON): 2017 kwargs["on"] = self._parse_conjunction() 2018 elif self._match(TokenType.USING): 2019 kwargs["using"] = self._parse_wrapped_id_vars() 2020 2021 return self.expression(exp.Join, **kwargs) # type: ignore 2022 2023 def _parse_index(self) -> exp.Expression: 2024 index = self._parse_id_var() 2025 self._match(TokenType.ON) 2026 self._match(TokenType.TABLE) # hive 2027 2028 return self.expression( 2029 exp.Index, 2030 this=index, 2031 table=self.expression(exp.Table, this=self._parse_id_var()), 2032 columns=self._parse_expression(), 2033 ) 2034 2035 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2036 unique = self._match(TokenType.UNIQUE) 2037 primary = self._match_text_seq("PRIMARY") 2038 amp = self._match_text_seq("AMP") 2039 if not self._match(TokenType.INDEX): 2040 return None 2041 index = self._parse_id_var() 2042 columns = None 2043 if self._match(TokenType.L_PAREN, advance=False): 2044 columns = self._parse_wrapped_csv(self._parse_column) 2045 return self.expression( 2046 exp.Index, 2047 this=index, 2048 columns=columns, 2049 unique=unique, 2050 primary=primary, 2051 amp=amp, 2052 ) 2053 2054 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2055 catalog = None 2056 db = None 2057 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2058 2059 while self._match(TokenType.DOT): 2060 if catalog: 2061 # This allows nesting the table in arbitrarily many dot expressions if needed 2062 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2063 else: 2064 catalog = db 2065 db = table 2066 table = self._parse_id_var() 2067 2068 if not table: 2069 self.raise_error(f"Expected table name but got {self._curr}") 2070 2071 return self.expression( 2072 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2073 ) 2074 2075 def _parse_table( 2076 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2077 ) -> t.Optional[exp.Expression]: 2078 lateral = self._parse_lateral() 2079 2080 if lateral: 2081 return lateral 2082 2083 unnest = self._parse_unnest() 2084 2085 if unnest: 2086 return unnest 2087 2088 values = self._parse_derived_table_values() 2089 2090 if values: 2091 return values 2092 2093 subquery = self._parse_select(table=True) 2094 2095 if subquery: 2096 return subquery 2097 2098 this = self._parse_table_parts(schema=schema) 2099 2100 if schema: 2101 return self._parse_schema(this=this) 2102 2103 if self.alias_post_tablesample: 2104 table_sample = self._parse_table_sample() 2105 2106 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2107 2108 if alias: 2109 this.set("alias", alias) 2110 2111 if not this.args.get("pivots"): 2112 this.set("pivots", self._parse_pivots()) 2113 2114 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2115 this.set( 2116 "hints", 2117 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2118 ) 2119 self._match_r_paren() 2120 2121 if not self.alias_post_tablesample: 2122 table_sample = self._parse_table_sample() 2123 2124 if table_sample: 2125 table_sample.set("this", this) 2126 this = table_sample 2127 2128 return this 2129 2130 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2131 if not self._match(TokenType.UNNEST): 2132 return None 2133 2134 expressions = self._parse_wrapped_csv(self._parse_column) 2135 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2136 alias = self._parse_table_alias() 2137 2138 if alias and self.unnest_column_only: 2139 if alias.args.get("columns"): 2140 self.raise_error("Unexpected extra column alias in unnest.") 2141 alias.set("columns", [alias.this]) 2142 alias.set("this", None) 2143 2144 offset = None 2145 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2146 self._match(TokenType.ALIAS) 2147 offset = self._parse_conjunction() 2148 2149 return self.expression( 2150 exp.Unnest, 2151 expressions=expressions, 2152 ordinality=ordinality, 2153 alias=alias, 2154 offset=offset, 2155 ) 2156 2157 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2158 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2159 if not is_derived and not self._match(TokenType.VALUES): 2160 return None 2161 2162 expressions = self._parse_csv(self._parse_value) 2163 2164 if is_derived: 2165 self._match_r_paren() 2166 2167 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2168 2169 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2170 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2171 as_modifier and self._match_text_seq("USING", "SAMPLE") 2172 ): 2173 return None 2174 2175 bucket_numerator = None 2176 bucket_denominator = None 2177 bucket_field = None 2178 percent = None 2179 rows = None 2180 size = None 2181 seed = None 2182 2183 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2184 method = self._parse_var(tokens=(TokenType.ROW,)) 2185 2186 self._match(TokenType.L_PAREN) 2187 2188 num = self._parse_number() 2189 2190 if self._match(TokenType.BUCKET): 2191 bucket_numerator = self._parse_number() 2192 self._match(TokenType.OUT_OF) 2193 bucket_denominator = bucket_denominator = self._parse_number() 2194 self._match(TokenType.ON) 2195 bucket_field = self._parse_field() 2196 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2197 percent = num 2198 elif self._match(TokenType.ROWS): 2199 rows = num 2200 else: 2201 size = num 2202 2203 self._match(TokenType.R_PAREN) 2204 2205 if self._match(TokenType.L_PAREN): 2206 method = self._parse_var() 2207 seed = self._match(TokenType.COMMA) and self._parse_number() 2208 self._match_r_paren() 2209 elif self._match_texts(("SEED", "REPEATABLE")): 2210 seed = self._parse_wrapped(self._parse_number) 2211 2212 return self.expression( 2213 exp.TableSample, 2214 method=method, 2215 bucket_numerator=bucket_numerator, 2216 bucket_denominator=bucket_denominator, 2217 bucket_field=bucket_field, 2218 percent=percent, 2219 rows=rows, 2220 size=size, 2221 seed=seed, 2222 kind=kind, 2223 ) 2224 2225 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2226 return list(iter(self._parse_pivot, None)) 2227 2228 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2229 index = self._index 2230 2231 if self._match(TokenType.PIVOT): 2232 unpivot = False 2233 elif self._match(TokenType.UNPIVOT): 2234 unpivot = True 2235 else: 2236 return None 2237 2238 expressions = [] 2239 field = None 2240 2241 if not self._match(TokenType.L_PAREN): 2242 self._retreat(index) 2243 return None 2244 2245 if unpivot: 2246 expressions = self._parse_csv(self._parse_column) 2247 else: 2248 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2249 2250 if not self._match(TokenType.FOR): 2251 self.raise_error("Expecting FOR") 2252 2253 value = self._parse_column() 2254 2255 if not self._match(TokenType.IN): 2256 self.raise_error("Expecting IN") 2257 2258 field = self._parse_in(value) 2259 2260 self._match_r_paren() 2261 2262 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2263 2264 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2265 pivot.set("alias", self._parse_table_alias()) 2266 2267 return pivot 2268 2269 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2270 if not skip_where_token and not self._match(TokenType.WHERE): 2271 return None 2272 2273 return self.expression( 2274 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2275 ) 2276 2277 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2278 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2279 return None 2280 2281 elements = defaultdict(list) 2282 2283 while True: 2284 expressions = self._parse_csv(self._parse_conjunction) 2285 if expressions: 2286 elements["expressions"].extend(expressions) 2287 2288 grouping_sets = self._parse_grouping_sets() 2289 if grouping_sets: 2290 elements["grouping_sets"].extend(grouping_sets) 2291 2292 rollup = None 2293 cube = None 2294 2295 with_ = self._match(TokenType.WITH) 2296 if self._match(TokenType.ROLLUP): 2297 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2298 elements["rollup"].extend(ensure_list(rollup)) 2299 2300 if self._match(TokenType.CUBE): 2301 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2302 elements["cube"].extend(ensure_list(cube)) 2303 2304 if not (expressions or grouping_sets or rollup or cube): 2305 break 2306 2307 return self.expression(exp.Group, **elements) # type: ignore 2308 2309 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2310 if not self._match(TokenType.GROUPING_SETS): 2311 return None 2312 2313 return self._parse_wrapped_csv(self._parse_grouping_set) 2314 2315 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2316 if self._match(TokenType.L_PAREN): 2317 grouping_set = self._parse_csv(self._parse_column) 2318 self._match_r_paren() 2319 return self.expression(exp.Tuple, expressions=grouping_set) 2320 2321 return self._parse_column() 2322 2323 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2324 if not skip_having_token and not self._match(TokenType.HAVING): 2325 return None 2326 return self.expression(exp.Having, this=self._parse_conjunction()) 2327 2328 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2329 if not self._match(TokenType.QUALIFY): 2330 return None 2331 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2332 2333 def _parse_order( 2334 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2335 ) -> t.Optional[exp.Expression]: 2336 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2337 return this 2338 2339 return self.expression( 2340 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2341 ) 2342 2343 def _parse_sort( 2344 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2345 ) -> t.Optional[exp.Expression]: 2346 if not self._match(token_type): 2347 return None 2348 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2349 2350 def _parse_ordered(self) -> exp.Expression: 2351 this = self._parse_conjunction() 2352 self._match(TokenType.ASC) 2353 is_desc = self._match(TokenType.DESC) 2354 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2355 is_nulls_last = self._match(TokenType.NULLS_LAST) 2356 desc = is_desc or False 2357 asc = not desc 2358 nulls_first = is_nulls_first or False 2359 explicitly_null_ordered = is_nulls_first or is_nulls_last 2360 if ( 2361 not explicitly_null_ordered 2362 and ( 2363 (asc and self.null_ordering == "nulls_are_small") 2364 or (desc and self.null_ordering != "nulls_are_small") 2365 ) 2366 and self.null_ordering != "nulls_are_last" 2367 ): 2368 nulls_first = True 2369 2370 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2371 2372 def _parse_limit( 2373 self, this: t.Optional[exp.Expression] = None, top: bool = False 2374 ) -> t.Optional[exp.Expression]: 2375 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2376 limit_paren = self._match(TokenType.L_PAREN) 2377 limit_exp = self.expression( 2378 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2379 ) 2380 2381 if limit_paren: 2382 self._match_r_paren() 2383 2384 return limit_exp 2385 2386 if self._match(TokenType.FETCH): 2387 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2388 direction = self._prev.text if direction else "FIRST" 2389 count = self._parse_number() 2390 self._match_set((TokenType.ROW, TokenType.ROWS)) 2391 self._match(TokenType.ONLY) 2392 return self.expression(exp.Fetch, direction=direction, count=count) 2393 2394 return this 2395 2396 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2397 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2398 return this 2399 2400 count = self._parse_number() 2401 self._match_set((TokenType.ROW, TokenType.ROWS)) 2402 return self.expression(exp.Offset, this=this, expression=count) 2403 2404 def _parse_lock(self) -> t.Optional[exp.Expression]: 2405 if self._match_text_seq("FOR", "UPDATE"): 2406 return self.expression(exp.Lock, update=True) 2407 if self._match_text_seq("FOR", "SHARE"): 2408 return self.expression(exp.Lock, update=False) 2409 2410 return None 2411 2412 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2413 if not self._match_set(self.SET_OPERATIONS): 2414 return this 2415 2416 token_type = self._prev.token_type 2417 2418 if token_type == TokenType.UNION: 2419 expression = exp.Union 2420 elif token_type == TokenType.EXCEPT: 2421 expression = exp.Except 2422 else: 2423 expression = exp.Intersect 2424 2425 return self.expression( 2426 expression, 2427 this=this, 2428 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2429 expression=self._parse_set_operations(self._parse_select(nested=True)), 2430 ) 2431 2432 def _parse_expression(self) -> t.Optional[exp.Expression]: 2433 return self._parse_alias(self._parse_conjunction()) 2434 2435 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2436 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2437 2438 def _parse_equality(self) -> t.Optional[exp.Expression]: 2439 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2440 2441 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2442 return self._parse_tokens(self._parse_range, self.COMPARISON) 2443 2444 def _parse_range(self) -> t.Optional[exp.Expression]: 2445 this = self._parse_bitwise() 2446 negate = self._match(TokenType.NOT) 2447 2448 if self._match_set(self.RANGE_PARSERS): 2449 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2450 elif self._match(TokenType.ISNULL): 2451 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2452 2453 # Postgres supports ISNULL and NOTNULL for conditions. 2454 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2455 if self._match(TokenType.NOTNULL): 2456 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2457 this = self.expression(exp.Not, this=this) 2458 2459 if negate: 2460 this = self.expression(exp.Not, this=this) 2461 2462 if self._match(TokenType.IS): 2463 this = self._parse_is(this) 2464 2465 return this 2466 2467 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2468 negate = self._match(TokenType.NOT) 2469 if self._match(TokenType.DISTINCT_FROM): 2470 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2471 return self.expression(klass, this=this, expression=self._parse_expression()) 2472 2473 this = self.expression( 2474 exp.Is, 2475 this=this, 2476 expression=self._parse_null() or self._parse_boolean(), 2477 ) 2478 return self.expression(exp.Not, this=this) if negate else this 2479 2480 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2481 unnest = self._parse_unnest() 2482 if unnest: 2483 this = self.expression(exp.In, this=this, unnest=unnest) 2484 elif self._match(TokenType.L_PAREN): 2485 expressions = self._parse_csv(self._parse_select_or_expression) 2486 2487 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2488 this = self.expression(exp.In, this=this, query=expressions[0]) 2489 else: 2490 this = self.expression(exp.In, this=this, expressions=expressions) 2491 2492 self._match_r_paren() 2493 else: 2494 this = self.expression(exp.In, this=this, field=self._parse_field()) 2495 2496 return this 2497 2498 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2499 low = self._parse_bitwise() 2500 self._match(TokenType.AND) 2501 high = self._parse_bitwise() 2502 return self.expression(exp.Between, this=this, low=low, high=high) 2503 2504 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2505 if not self._match(TokenType.ESCAPE): 2506 return this 2507 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2508 2509 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2510 this = self._parse_term() 2511 2512 while True: 2513 if self._match_set(self.BITWISE): 2514 this = self.expression( 2515 self.BITWISE[self._prev.token_type], 2516 this=this, 2517 expression=self._parse_term(), 2518 ) 2519 elif self._match_pair(TokenType.LT, TokenType.LT): 2520 this = self.expression( 2521 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2522 ) 2523 elif self._match_pair(TokenType.GT, TokenType.GT): 2524 this = self.expression( 2525 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2526 ) 2527 else: 2528 break 2529 2530 return this 2531 2532 def _parse_term(self) -> t.Optional[exp.Expression]: 2533 return self._parse_tokens(self._parse_factor, self.TERM) 2534 2535 def _parse_factor(self) -> t.Optional[exp.Expression]: 2536 return self._parse_tokens(self._parse_unary, self.FACTOR) 2537 2538 def _parse_unary(self) -> t.Optional[exp.Expression]: 2539 if self._match_set(self.UNARY_PARSERS): 2540 return self.UNARY_PARSERS[self._prev.token_type](self) 2541 return self._parse_at_time_zone(self._parse_type()) 2542 2543 def _parse_type(self) -> t.Optional[exp.Expression]: 2544 if self._match(TokenType.INTERVAL): 2545 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2546 2547 index = self._index 2548 type_token = self._parse_types(check_func=True) 2549 this = self._parse_column() 2550 2551 if type_token: 2552 if this and not isinstance(this, exp.Star): 2553 return self.expression(exp.Cast, this=this, to=type_token) 2554 if not type_token.args.get("expressions"): 2555 self._retreat(index) 2556 return self._parse_column() 2557 return type_token 2558 2559 return this 2560 2561 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2562 index = self._index 2563 2564 prefix = self._match_text_seq("SYSUDTLIB", ".") 2565 2566 if not self._match_set(self.TYPE_TOKENS): 2567 return None 2568 2569 type_token = self._prev.token_type 2570 2571 if type_token == TokenType.PSEUDO_TYPE: 2572 return self.expression(exp.PseudoType, this=self._prev.text) 2573 2574 nested = type_token in self.NESTED_TYPE_TOKENS 2575 is_struct = type_token == TokenType.STRUCT 2576 expressions = None 2577 maybe_func = False 2578 2579 if self._match(TokenType.L_PAREN): 2580 if is_struct: 2581 expressions = self._parse_csv(self._parse_struct_kwargs) 2582 elif nested: 2583 expressions = self._parse_csv(self._parse_types) 2584 else: 2585 expressions = self._parse_csv(self._parse_conjunction) 2586 2587 if not expressions: 2588 self._retreat(index) 2589 return None 2590 2591 self._match_r_paren() 2592 maybe_func = True 2593 2594 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2595 this = exp.DataType( 2596 this=exp.DataType.Type.ARRAY, 2597 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2598 nested=True, 2599 ) 2600 2601 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2602 this = exp.DataType( 2603 this=exp.DataType.Type.ARRAY, 2604 expressions=[this], 2605 nested=True, 2606 ) 2607 2608 return this 2609 2610 if self._match(TokenType.L_BRACKET): 2611 self._retreat(index) 2612 return None 2613 2614 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2615 if nested and self._match(TokenType.LT): 2616 if is_struct: 2617 expressions = self._parse_csv(self._parse_struct_kwargs) 2618 else: 2619 expressions = self._parse_csv(self._parse_types) 2620 2621 if not self._match(TokenType.GT): 2622 self.raise_error("Expecting >") 2623 2624 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2625 values = self._parse_csv(self._parse_conjunction) 2626 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2627 2628 value: t.Optional[exp.Expression] = None 2629 if type_token in self.TIMESTAMPS: 2630 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2631 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2632 elif ( 2633 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2634 ): 2635 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2636 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2637 if type_token == TokenType.TIME: 2638 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2639 else: 2640 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2641 2642 maybe_func = maybe_func and value is None 2643 2644 if value is None: 2645 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2646 elif type_token == TokenType.INTERVAL: 2647 unit = self._parse_var() 2648 2649 if not unit: 2650 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2651 else: 2652 value = self.expression(exp.Interval, unit=unit) 2653 2654 if maybe_func and check_func: 2655 index2 = self._index 2656 peek = self._parse_string() 2657 2658 if not peek: 2659 self._retreat(index) 2660 return None 2661 2662 self._retreat(index2) 2663 2664 if value: 2665 return value 2666 2667 return exp.DataType( 2668 this=exp.DataType.Type[type_token.value.upper()], 2669 expressions=expressions, 2670 nested=nested, 2671 values=values, 2672 prefix=prefix, 2673 ) 2674 2675 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2676 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2677 return self._parse_types() 2678 2679 this = self._parse_id_var() 2680 self._match(TokenType.COLON) 2681 data_type = self._parse_types() 2682 2683 if not data_type: 2684 return None 2685 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2686 2687 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2688 if not self._match(TokenType.AT_TIME_ZONE): 2689 return this 2690 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2691 2692 def _parse_column(self) -> t.Optional[exp.Expression]: 2693 this = self._parse_field() 2694 if isinstance(this, exp.Identifier): 2695 this = self.expression(exp.Column, this=this) 2696 elif not this: 2697 return self._parse_bracket(this) 2698 this = self._parse_bracket(this) 2699 2700 while self._match_set(self.COLUMN_OPERATORS): 2701 op_token = self._prev.token_type 2702 op = self.COLUMN_OPERATORS.get(op_token) 2703 2704 if op_token == TokenType.DCOLON: 2705 field = self._parse_types() 2706 if not field: 2707 self.raise_error("Expected type") 2708 elif op: 2709 self._advance() 2710 value = self._prev.text 2711 field = ( 2712 exp.Literal.number(value) 2713 if self._prev.token_type == TokenType.NUMBER 2714 else exp.Literal.string(value) 2715 ) 2716 else: 2717 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2718 2719 if isinstance(field, exp.Func): 2720 # bigquery allows function calls like x.y.count(...) 2721 # SAFE.SUBSTR(...) 2722 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2723 this = self._replace_columns_with_dots(this) 2724 2725 if op: 2726 this = op(self, this, field) 2727 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2728 this = self.expression( 2729 exp.Column, 2730 this=field, 2731 table=this.this, 2732 db=this.args.get("table"), 2733 catalog=this.args.get("db"), 2734 ) 2735 else: 2736 this = self.expression(exp.Dot, this=this, expression=field) 2737 this = self._parse_bracket(this) 2738 2739 return this 2740 2741 def _parse_primary(self) -> t.Optional[exp.Expression]: 2742 if self._match_set(self.PRIMARY_PARSERS): 2743 token_type = self._prev.token_type 2744 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2745 2746 if token_type == TokenType.STRING: 2747 expressions = [primary] 2748 while self._match(TokenType.STRING): 2749 expressions.append(exp.Literal.string(self._prev.text)) 2750 if len(expressions) > 1: 2751 return self.expression(exp.Concat, expressions=expressions) 2752 return primary 2753 2754 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2755 return exp.Literal.number(f"0.{self._prev.text}") 2756 2757 if self._match(TokenType.L_PAREN): 2758 comments = self._prev_comments 2759 query = self._parse_select() 2760 2761 if query: 2762 expressions = [query] 2763 else: 2764 expressions = self._parse_csv( 2765 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2766 ) 2767 2768 this = seq_get(expressions, 0) 2769 self._parse_query_modifiers(this) 2770 self._match_r_paren() 2771 2772 if isinstance(this, exp.Subqueryable): 2773 this = self._parse_set_operations( 2774 self._parse_subquery(this=this, parse_alias=False) 2775 ) 2776 elif len(expressions) > 1: 2777 this = self.expression(exp.Tuple, expressions=expressions) 2778 else: 2779 this = self.expression(exp.Paren, this=this) 2780 2781 if this and comments: 2782 this.comments = comments 2783 2784 return this 2785 2786 return None 2787 2788 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2789 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2790 2791 def _parse_function( 2792 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2793 ) -> t.Optional[exp.Expression]: 2794 if not self._curr: 2795 return None 2796 2797 token_type = self._curr.token_type 2798 2799 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2800 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2801 2802 if not self._next or self._next.token_type != TokenType.L_PAREN: 2803 if token_type in self.NO_PAREN_FUNCTIONS: 2804 self._advance() 2805 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2806 2807 return None 2808 2809 if token_type not in self.FUNC_TOKENS: 2810 return None 2811 2812 this = self._curr.text 2813 upper = this.upper() 2814 self._advance(2) 2815 2816 parser = self.FUNCTION_PARSERS.get(upper) 2817 2818 if parser: 2819 this = parser(self) 2820 else: 2821 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2822 2823 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2824 this = self.expression(subquery_predicate, this=self._parse_select()) 2825 self._match_r_paren() 2826 return this 2827 2828 if functions is None: 2829 functions = self.FUNCTIONS 2830 2831 function = functions.get(upper) 2832 args = self._parse_csv(self._parse_lambda) 2833 2834 if function: 2835 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2836 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2837 if count_params(function) == 2: 2838 params = None 2839 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2840 params = self._parse_csv(self._parse_lambda) 2841 2842 this = function(args, params) 2843 else: 2844 this = function(args) 2845 2846 self.validate_expression(this, args) 2847 else: 2848 this = self.expression(exp.Anonymous, this=this, expressions=args) 2849 2850 self._match_r_paren(this) 2851 return self._parse_window(this) 2852 2853 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2854 return self._parse_column_def(self._parse_id_var()) 2855 2856 def _parse_user_defined_function( 2857 self, kind: t.Optional[TokenType] = None 2858 ) -> t.Optional[exp.Expression]: 2859 this = self._parse_id_var() 2860 2861 while self._match(TokenType.DOT): 2862 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2863 2864 if not self._match(TokenType.L_PAREN): 2865 return this 2866 2867 expressions = self._parse_csv(self._parse_function_parameter) 2868 self._match_r_paren() 2869 return self.expression( 2870 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2871 ) 2872 2873 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2874 literal = self._parse_primary() 2875 if literal: 2876 return self.expression(exp.Introducer, this=token.text, expression=literal) 2877 2878 return self.expression(exp.Identifier, this=token.text) 2879 2880 def _parse_national(self, token: Token) -> exp.Expression: 2881 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2882 2883 def _parse_session_parameter(self) -> exp.Expression: 2884 kind = None 2885 this = self._parse_id_var() or self._parse_primary() 2886 2887 if this and self._match(TokenType.DOT): 2888 kind = this.name 2889 this = self._parse_var() or self._parse_primary() 2890 2891 return self.expression(exp.SessionParameter, this=this, kind=kind) 2892 2893 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2894 index = self._index 2895 2896 if self._match(TokenType.L_PAREN): 2897 expressions = self._parse_csv(self._parse_id_var) 2898 2899 if not self._match(TokenType.R_PAREN): 2900 self._retreat(index) 2901 else: 2902 expressions = [self._parse_id_var()] 2903 2904 if self._match_set(self.LAMBDAS): 2905 return self.LAMBDAS[self._prev.token_type](self, expressions) 2906 2907 self._retreat(index) 2908 2909 this: t.Optional[exp.Expression] 2910 2911 if self._match(TokenType.DISTINCT): 2912 this = self.expression( 2913 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2914 ) 2915 else: 2916 this = self._parse_select_or_expression() 2917 2918 if self._match(TokenType.IGNORE_NULLS): 2919 this = self.expression(exp.IgnoreNulls, this=this) 2920 else: 2921 self._match(TokenType.RESPECT_NULLS) 2922 2923 return self._parse_limit(self._parse_order(this)) 2924 2925 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2926 index = self._index 2927 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2928 self._retreat(index) 2929 return this 2930 2931 args = self._parse_csv( 2932 lambda: self._parse_constraint() 2933 or self._parse_column_def(self._parse_field(any_token=True)) 2934 ) 2935 self._match_r_paren() 2936 return self.expression(exp.Schema, this=this, expressions=args) 2937 2938 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2939 kind = self._parse_types() 2940 2941 if self._match_text_seq("FOR", "ORDINALITY"): 2942 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2943 2944 constraints = [] 2945 while True: 2946 constraint = self._parse_column_constraint() 2947 if not constraint: 2948 break 2949 constraints.append(constraint) 2950 2951 if not kind and not constraints: 2952 return this 2953 2954 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2955 2956 def _parse_auto_increment(self) -> exp.Expression: 2957 start = None 2958 increment = None 2959 2960 if self._match(TokenType.L_PAREN, advance=False): 2961 args = self._parse_wrapped_csv(self._parse_bitwise) 2962 start = seq_get(args, 0) 2963 increment = seq_get(args, 1) 2964 elif self._match_text_seq("START"): 2965 start = self._parse_bitwise() 2966 self._match_text_seq("INCREMENT") 2967 increment = self._parse_bitwise() 2968 2969 if start and increment: 2970 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2971 2972 return exp.AutoIncrementColumnConstraint() 2973 2974 def _parse_compress(self) -> exp.Expression: 2975 if self._match(TokenType.L_PAREN, advance=False): 2976 return self.expression( 2977 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2978 ) 2979 2980 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2981 2982 def _parse_generated_as_identity(self) -> exp.Expression: 2983 if self._match(TokenType.BY_DEFAULT): 2984 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2985 else: 2986 self._match_text_seq("ALWAYS") 2987 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2988 2989 self._match_text_seq("AS", "IDENTITY") 2990 if self._match(TokenType.L_PAREN): 2991 if self._match_text_seq("START", "WITH"): 2992 this.set("start", self._parse_bitwise()) 2993 if self._match_text_seq("INCREMENT", "BY"): 2994 this.set("increment", self._parse_bitwise()) 2995 if self._match_text_seq("MINVALUE"): 2996 this.set("minvalue", self._parse_bitwise()) 2997 if self._match_text_seq("MAXVALUE"): 2998 this.set("maxvalue", self._parse_bitwise()) 2999 3000 if self._match_text_seq("CYCLE"): 3001 this.set("cycle", True) 3002 elif self._match_text_seq("NO", "CYCLE"): 3003 this.set("cycle", False) 3004 3005 self._match_r_paren() 3006 3007 return this 3008 3009 def _parse_inline(self) -> t.Optional[exp.Expression]: 3010 self._match_text_seq("LENGTH") 3011 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3012 3013 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3014 if self._match_text_seq("NULL"): 3015 return self.expression(exp.NotNullColumnConstraint) 3016 if self._match_text_seq("CASESPECIFIC"): 3017 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3018 return None 3019 3020 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3021 this = self._parse_references() 3022 if this: 3023 return this 3024 3025 if self._match(TokenType.CONSTRAINT): 3026 this = self._parse_id_var() 3027 3028 if self._match_texts(self.CONSTRAINT_PARSERS): 3029 return self.expression( 3030 exp.ColumnConstraint, 3031 this=this, 3032 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3033 ) 3034 3035 return this 3036 3037 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3038 if not self._match(TokenType.CONSTRAINT): 3039 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3040 3041 this = self._parse_id_var() 3042 expressions = [] 3043 3044 while True: 3045 constraint = self._parse_unnamed_constraint() or self._parse_function() 3046 if not constraint: 3047 break 3048 expressions.append(constraint) 3049 3050 return self.expression(exp.Constraint, this=this, expressions=expressions) 3051 3052 def _parse_unnamed_constraint( 3053 self, constraints: t.Optional[t.Collection[str]] = None 3054 ) -> t.Optional[exp.Expression]: 3055 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3056 return None 3057 3058 constraint = self._prev.text.upper() 3059 if constraint not in self.CONSTRAINT_PARSERS: 3060 self.raise_error(f"No parser found for schema constraint {constraint}.") 3061 3062 return self.CONSTRAINT_PARSERS[constraint](self) 3063 3064 def _parse_unique(self) -> exp.Expression: 3065 if not self._match(TokenType.L_PAREN, advance=False): 3066 return self.expression(exp.UniqueColumnConstraint) 3067 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3068 3069 def _parse_key_constraint_options(self) -> t.List[str]: 3070 options = [] 3071 while True: 3072 if not self._curr: 3073 break 3074 3075 if self._match(TokenType.ON): 3076 action = None 3077 on = self._advance_any() and self._prev.text 3078 3079 if self._match(TokenType.NO_ACTION): 3080 action = "NO ACTION" 3081 elif self._match(TokenType.CASCADE): 3082 action = "CASCADE" 3083 elif self._match_pair(TokenType.SET, TokenType.NULL): 3084 action = "SET NULL" 3085 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3086 action = "SET DEFAULT" 3087 else: 3088 self.raise_error("Invalid key constraint") 3089 3090 options.append(f"ON {on} {action}") 3091 elif self._match_text_seq("NOT", "ENFORCED"): 3092 options.append("NOT ENFORCED") 3093 elif self._match_text_seq("DEFERRABLE"): 3094 options.append("DEFERRABLE") 3095 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3096 options.append("INITIALLY DEFERRED") 3097 elif self._match_text_seq("NORELY"): 3098 options.append("NORELY") 3099 elif self._match_text_seq("MATCH", "FULL"): 3100 options.append("MATCH FULL") 3101 else: 3102 break 3103 3104 return options 3105 3106 def _parse_references(self) -> t.Optional[exp.Expression]: 3107 if not self._match(TokenType.REFERENCES): 3108 return None 3109 3110 expressions = None 3111 this = self._parse_id_var() 3112 3113 if self._match(TokenType.L_PAREN, advance=False): 3114 expressions = self._parse_wrapped_id_vars() 3115 3116 options = self._parse_key_constraint_options() 3117 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3118 3119 def _parse_foreign_key(self) -> exp.Expression: 3120 expressions = self._parse_wrapped_id_vars() 3121 reference = self._parse_references() 3122 options = {} 3123 3124 while self._match(TokenType.ON): 3125 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3126 self.raise_error("Expected DELETE or UPDATE") 3127 3128 kind = self._prev.text.lower() 3129 3130 if self._match(TokenType.NO_ACTION): 3131 action = "NO ACTION" 3132 elif self._match(TokenType.SET): 3133 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3134 action = "SET " + self._prev.text.upper() 3135 else: 3136 self._advance() 3137 action = self._prev.text.upper() 3138 3139 options[kind] = action 3140 3141 return self.expression( 3142 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3143 ) 3144 3145 def _parse_primary_key(self) -> exp.Expression: 3146 desc = ( 3147 self._match_set((TokenType.ASC, TokenType.DESC)) 3148 and self._prev.token_type == TokenType.DESC 3149 ) 3150 3151 if not self._match(TokenType.L_PAREN, advance=False): 3152 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3153 3154 expressions = self._parse_wrapped_id_vars() 3155 options = self._parse_key_constraint_options() 3156 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3157 3158 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3159 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3160 return this 3161 3162 bracket_kind = self._prev.token_type 3163 expressions: t.List[t.Optional[exp.Expression]] 3164 3165 if self._match(TokenType.COLON): 3166 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3167 else: 3168 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3169 3170 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3171 if bracket_kind == TokenType.L_BRACE: 3172 this = self.expression(exp.Struct, expressions=expressions) 3173 elif not this or this.name.upper() == "ARRAY": 3174 this = self.expression(exp.Array, expressions=expressions) 3175 else: 3176 expressions = apply_index_offset(expressions, -self.index_offset) 3177 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3178 3179 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3180 self.raise_error("Expected ]") 3181 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3182 self.raise_error("Expected }") 3183 3184 this.comments = self._prev_comments 3185 return self._parse_bracket(this) 3186 3187 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3188 if self._match(TokenType.COLON): 3189 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3190 return this 3191 3192 def _parse_case(self) -> t.Optional[exp.Expression]: 3193 ifs = [] 3194 default = None 3195 3196 expression = self._parse_conjunction() 3197 3198 while self._match(TokenType.WHEN): 3199 this = self._parse_conjunction() 3200 self._match(TokenType.THEN) 3201 then = self._parse_conjunction() 3202 ifs.append(self.expression(exp.If, this=this, true=then)) 3203 3204 if self._match(TokenType.ELSE): 3205 default = self._parse_conjunction() 3206 3207 if not self._match(TokenType.END): 3208 self.raise_error("Expected END after CASE", self._prev) 3209 3210 return self._parse_window( 3211 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3212 ) 3213 3214 def _parse_if(self) -> t.Optional[exp.Expression]: 3215 if self._match(TokenType.L_PAREN): 3216 args = self._parse_csv(self._parse_conjunction) 3217 this = exp.If.from_arg_list(args) 3218 self.validate_expression(this, args) 3219 self._match_r_paren() 3220 else: 3221 condition = self._parse_conjunction() 3222 self._match(TokenType.THEN) 3223 true = self._parse_conjunction() 3224 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3225 self._match(TokenType.END) 3226 this = self.expression(exp.If, this=condition, true=true, false=false) 3227 3228 return self._parse_window(this) 3229 3230 def _parse_extract(self) -> exp.Expression: 3231 this = self._parse_function() or self._parse_var() or self._parse_type() 3232 3233 if self._match(TokenType.FROM): 3234 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3235 3236 if not self._match(TokenType.COMMA): 3237 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3238 3239 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3240 3241 def _parse_cast(self, strict: bool) -> exp.Expression: 3242 this = self._parse_conjunction() 3243 3244 if not self._match(TokenType.ALIAS): 3245 self.raise_error("Expected AS after CAST") 3246 3247 to = self._parse_types() 3248 3249 if not to: 3250 self.raise_error("Expected TYPE after CAST") 3251 elif to.this == exp.DataType.Type.CHAR: 3252 if self._match(TokenType.CHARACTER_SET): 3253 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3254 3255 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3256 3257 def _parse_string_agg(self) -> exp.Expression: 3258 expression: t.Optional[exp.Expression] 3259 3260 if self._match(TokenType.DISTINCT): 3261 args = self._parse_csv(self._parse_conjunction) 3262 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3263 else: 3264 args = self._parse_csv(self._parse_conjunction) 3265 expression = seq_get(args, 0) 3266 3267 index = self._index 3268 if not self._match(TokenType.R_PAREN): 3269 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3270 order = self._parse_order(this=expression) 3271 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3272 3273 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3274 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3275 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3276 if not self._match(TokenType.WITHIN_GROUP): 3277 self._retreat(index) 3278 this = exp.GroupConcat.from_arg_list(args) 3279 self.validate_expression(this, args) 3280 return this 3281 3282 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3283 order = self._parse_order(this=expression) 3284 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3285 3286 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3287 to: t.Optional[exp.Expression] 3288 this = self._parse_column() 3289 3290 if self._match(TokenType.USING): 3291 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3292 elif self._match(TokenType.COMMA): 3293 to = self._parse_types() 3294 else: 3295 to = None 3296 3297 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3298 3299 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3300 args = self._parse_csv(self._parse_bitwise) 3301 3302 if self._match(TokenType.IN): 3303 return self.expression( 3304 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3305 ) 3306 3307 if haystack_first: 3308 haystack = seq_get(args, 0) 3309 needle = seq_get(args, 1) 3310 else: 3311 needle = seq_get(args, 0) 3312 haystack = seq_get(args, 1) 3313 3314 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3315 3316 self.validate_expression(this, args) 3317 3318 return this 3319 3320 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3321 args = self._parse_csv(self._parse_table) 3322 return exp.JoinHint(this=func_name.upper(), expressions=args) 3323 3324 def _parse_substring(self) -> exp.Expression: 3325 # Postgres supports the form: substring(string [from int] [for int]) 3326 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3327 3328 args = self._parse_csv(self._parse_bitwise) 3329 3330 if self._match(TokenType.FROM): 3331 args.append(self._parse_bitwise()) 3332 if self._match(TokenType.FOR): 3333 args.append(self._parse_bitwise()) 3334 3335 this = exp.Substring.from_arg_list(args) 3336 self.validate_expression(this, args) 3337 3338 return this 3339 3340 def _parse_trim(self) -> exp.Expression: 3341 # https://www.w3resource.com/sql/character-functions/trim.php 3342 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3343 3344 position = None 3345 collation = None 3346 3347 if self._match_set(self.TRIM_TYPES): 3348 position = self._prev.text.upper() 3349 3350 expression = self._parse_term() 3351 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3352 this = self._parse_term() 3353 else: 3354 this = expression 3355 expression = None 3356 3357 if self._match(TokenType.COLLATE): 3358 collation = self._parse_term() 3359 3360 return self.expression( 3361 exp.Trim, 3362 this=this, 3363 position=position, 3364 expression=expression, 3365 collation=collation, 3366 ) 3367 3368 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3369 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3370 3371 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3372 return self._parse_window(self._parse_id_var(), alias=True) 3373 3374 def _parse_window( 3375 self, this: t.Optional[exp.Expression], alias: bool = False 3376 ) -> t.Optional[exp.Expression]: 3377 if self._match(TokenType.FILTER): 3378 where = self._parse_wrapped(self._parse_where) 3379 this = self.expression(exp.Filter, this=this, expression=where) 3380 3381 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3382 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3383 if self._match(TokenType.WITHIN_GROUP): 3384 order = self._parse_wrapped(self._parse_order) 3385 this = self.expression(exp.WithinGroup, this=this, expression=order) 3386 3387 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3388 # Some dialects choose to implement and some do not. 3389 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3390 3391 # There is some code above in _parse_lambda that handles 3392 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3393 3394 # The below changes handle 3395 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3396 3397 # Oracle allows both formats 3398 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3399 # and Snowflake chose to do the same for familiarity 3400 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3401 if self._match(TokenType.IGNORE_NULLS): 3402 this = self.expression(exp.IgnoreNulls, this=this) 3403 elif self._match(TokenType.RESPECT_NULLS): 3404 this = self.expression(exp.RespectNulls, this=this) 3405 3406 # bigquery select from window x AS (partition by ...) 3407 if alias: 3408 self._match(TokenType.ALIAS) 3409 elif not self._match(TokenType.OVER): 3410 return this 3411 3412 if not self._match(TokenType.L_PAREN): 3413 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3414 3415 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3416 partition = self._parse_partition_by() 3417 order = self._parse_order() 3418 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3419 3420 if kind: 3421 self._match(TokenType.BETWEEN) 3422 start = self._parse_window_spec() 3423 self._match(TokenType.AND) 3424 end = self._parse_window_spec() 3425 3426 spec = self.expression( 3427 exp.WindowSpec, 3428 kind=kind, 3429 start=start["value"], 3430 start_side=start["side"], 3431 end=end["value"], 3432 end_side=end["side"], 3433 ) 3434 else: 3435 spec = None 3436 3437 self._match_r_paren() 3438 3439 return self.expression( 3440 exp.Window, 3441 this=this, 3442 partition_by=partition, 3443 order=order, 3444 spec=spec, 3445 alias=window_alias, 3446 ) 3447 3448 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3449 self._match(TokenType.BETWEEN) 3450 3451 return { 3452 "value": ( 3453 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3454 ) 3455 or self._parse_bitwise(), 3456 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3457 } 3458 3459 def _parse_alias( 3460 self, this: t.Optional[exp.Expression], explicit: bool = False 3461 ) -> t.Optional[exp.Expression]: 3462 any_token = self._match(TokenType.ALIAS) 3463 3464 if explicit and not any_token: 3465 return this 3466 3467 if self._match(TokenType.L_PAREN): 3468 aliases = self.expression( 3469 exp.Aliases, 3470 this=this, 3471 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3472 ) 3473 self._match_r_paren(aliases) 3474 return aliases 3475 3476 alias = self._parse_id_var(any_token) 3477 3478 if alias: 3479 return self.expression(exp.Alias, this=this, alias=alias) 3480 3481 return this 3482 3483 def _parse_id_var( 3484 self, 3485 any_token: bool = True, 3486 tokens: t.Optional[t.Collection[TokenType]] = None, 3487 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3488 ) -> t.Optional[exp.Expression]: 3489 identifier = self._parse_identifier() 3490 3491 if identifier: 3492 return identifier 3493 3494 prefix = "" 3495 3496 if prefix_tokens: 3497 while self._match_set(prefix_tokens): 3498 prefix += self._prev.text 3499 3500 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3501 quoted = self._prev.token_type == TokenType.STRING 3502 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3503 3504 return None 3505 3506 def _parse_string(self) -> t.Optional[exp.Expression]: 3507 if self._match(TokenType.STRING): 3508 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3509 return self._parse_placeholder() 3510 3511 def _parse_number(self) -> t.Optional[exp.Expression]: 3512 if self._match(TokenType.NUMBER): 3513 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3514 return self._parse_placeholder() 3515 3516 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3517 if self._match(TokenType.IDENTIFIER): 3518 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3519 return self._parse_placeholder() 3520 3521 def _parse_var( 3522 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3523 ) -> t.Optional[exp.Expression]: 3524 if ( 3525 (any_token and self._advance_any()) 3526 or self._match(TokenType.VAR) 3527 or (self._match_set(tokens) if tokens else False) 3528 ): 3529 return self.expression(exp.Var, this=self._prev.text) 3530 return self._parse_placeholder() 3531 3532 def _advance_any(self) -> t.Optional[Token]: 3533 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3534 self._advance() 3535 return self._prev 3536 return None 3537 3538 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3539 return self._parse_var() or self._parse_string() 3540 3541 def _parse_null(self) -> t.Optional[exp.Expression]: 3542 if self._match(TokenType.NULL): 3543 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3544 return None 3545 3546 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3547 if self._match(TokenType.TRUE): 3548 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3549 if self._match(TokenType.FALSE): 3550 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3551 return None 3552 3553 def _parse_star(self) -> t.Optional[exp.Expression]: 3554 if self._match(TokenType.STAR): 3555 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3556 return None 3557 3558 def _parse_parameter(self) -> exp.Expression: 3559 wrapped = self._match(TokenType.L_BRACE) 3560 this = self._parse_var() or self._parse_primary() 3561 self._match(TokenType.R_BRACE) 3562 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3563 3564 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3565 if self._match_set(self.PLACEHOLDER_PARSERS): 3566 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3567 if placeholder: 3568 return placeholder 3569 self._advance(-1) 3570 return None 3571 3572 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3573 if not self._match(TokenType.EXCEPT): 3574 return None 3575 if self._match(TokenType.L_PAREN, advance=False): 3576 return self._parse_wrapped_csv(self._parse_column) 3577 return self._parse_csv(self._parse_column) 3578 3579 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3580 if not self._match(TokenType.REPLACE): 3581 return None 3582 if self._match(TokenType.L_PAREN, advance=False): 3583 return self._parse_wrapped_csv(self._parse_expression) 3584 return self._parse_csv(self._parse_expression) 3585 3586 def _parse_csv( 3587 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3588 ) -> t.List[t.Optional[exp.Expression]]: 3589 parse_result = parse_method() 3590 items = [parse_result] if parse_result is not None else [] 3591 3592 while self._match(sep): 3593 if parse_result and self._prev_comments: 3594 parse_result.comments = self._prev_comments 3595 3596 parse_result = parse_method() 3597 if parse_result is not None: 3598 items.append(parse_result) 3599 3600 return items 3601 3602 def _parse_tokens( 3603 self, parse_method: t.Callable, expressions: t.Dict 3604 ) -> t.Optional[exp.Expression]: 3605 this = parse_method() 3606 3607 while self._match_set(expressions): 3608 this = self.expression( 3609 expressions[self._prev.token_type], 3610 this=this, 3611 comments=self._prev_comments, 3612 expression=parse_method(), 3613 ) 3614 3615 return this 3616 3617 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3618 return self._parse_wrapped_csv(self._parse_id_var) 3619 3620 def _parse_wrapped_csv( 3621 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3622 ) -> t.List[t.Optional[exp.Expression]]: 3623 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3624 3625 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3626 self._match_l_paren() 3627 parse_result = parse_method() 3628 self._match_r_paren() 3629 return parse_result 3630 3631 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3632 return self._parse_select() or self._parse_expression() 3633 3634 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3635 return self._parse_set_operations( 3636 self._parse_select(nested=True, parse_subquery_alias=False) 3637 ) 3638 3639 def _parse_transaction(self) -> exp.Expression: 3640 this = None 3641 if self._match_texts(self.TRANSACTION_KIND): 3642 this = self._prev.text 3643 3644 self._match_texts({"TRANSACTION", "WORK"}) 3645 3646 modes = [] 3647 while True: 3648 mode = [] 3649 while self._match(TokenType.VAR): 3650 mode.append(self._prev.text) 3651 3652 if mode: 3653 modes.append(" ".join(mode)) 3654 if not self._match(TokenType.COMMA): 3655 break 3656 3657 return self.expression(exp.Transaction, this=this, modes=modes) 3658 3659 def _parse_commit_or_rollback(self) -> exp.Expression: 3660 chain = None 3661 savepoint = None 3662 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3663 3664 self._match_texts({"TRANSACTION", "WORK"}) 3665 3666 if self._match_text_seq("TO"): 3667 self._match_text_seq("SAVEPOINT") 3668 savepoint = self._parse_id_var() 3669 3670 if self._match(TokenType.AND): 3671 chain = not self._match_text_seq("NO") 3672 self._match_text_seq("CHAIN") 3673 3674 if is_rollback: 3675 return self.expression(exp.Rollback, savepoint=savepoint) 3676 return self.expression(exp.Commit, chain=chain) 3677 3678 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3679 if not self._match_text_seq("ADD"): 3680 return None 3681 3682 self._match(TokenType.COLUMN) 3683 exists_column = self._parse_exists(not_=True) 3684 expression = self._parse_column_def(self._parse_field(any_token=True)) 3685 3686 if expression: 3687 expression.set("exists", exists_column) 3688 3689 return expression 3690 3691 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3692 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3693 3694 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3695 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3696 return self.expression( 3697 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3698 ) 3699 3700 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3701 this = None 3702 kind = self._prev.token_type 3703 3704 if kind == TokenType.CONSTRAINT: 3705 this = self._parse_id_var() 3706 3707 if self._match_text_seq("CHECK"): 3708 expression = self._parse_wrapped(self._parse_conjunction) 3709 enforced = self._match_text_seq("ENFORCED") 3710 3711 return self.expression( 3712 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3713 ) 3714 3715 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3716 expression = self._parse_foreign_key() 3717 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3718 expression = self._parse_primary_key() 3719 3720 return self.expression(exp.AddConstraint, this=this, expression=expression) 3721 3722 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3723 index = self._index - 1 3724 3725 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3726 return self._parse_csv(self._parse_add_constraint) 3727 3728 self._retreat(index) 3729 return self._parse_csv(self._parse_add_column) 3730 3731 def _parse_alter_table_alter(self) -> exp.Expression: 3732 self._match(TokenType.COLUMN) 3733 column = self._parse_field(any_token=True) 3734 3735 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3736 return self.expression(exp.AlterColumn, this=column, drop=True) 3737 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3738 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3739 3740 self._match_text_seq("SET", "DATA") 3741 return self.expression( 3742 exp.AlterColumn, 3743 this=column, 3744 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3745 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3746 using=self._match(TokenType.USING) and self._parse_conjunction(), 3747 ) 3748 3749 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3750 index = self._index - 1 3751 3752 partition_exists = self._parse_exists() 3753 if self._match(TokenType.PARTITION, advance=False): 3754 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3755 3756 self._retreat(index) 3757 return self._parse_csv(self._parse_drop_column) 3758 3759 def _parse_alter_table_rename(self) -> exp.Expression: 3760 self._match_text_seq("TO") 3761 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3762 3763 def _parse_alter(self) -> t.Optional[exp.Expression]: 3764 start = self._prev 3765 3766 if not self._match(TokenType.TABLE): 3767 return self._parse_as_command(start) 3768 3769 exists = self._parse_exists() 3770 this = self._parse_table(schema=True) 3771 3772 if self._next: 3773 self._advance() 3774 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3775 3776 if parser: 3777 return self.expression( 3778 exp.AlterTable, 3779 this=this, 3780 exists=exists, 3781 actions=ensure_list(parser(self)), 3782 ) 3783 return self._parse_as_command(start) 3784 3785 def _parse_show(self) -> t.Optional[exp.Expression]: 3786 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3787 if parser: 3788 return parser(self) 3789 self._advance() 3790 return self.expression(exp.Show, this=self._prev.text.upper()) 3791 3792 def _default_parse_set_item(self) -> exp.Expression: 3793 return self.expression( 3794 exp.SetItem, 3795 this=self._parse_statement(), 3796 ) 3797 3798 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3799 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3800 return parser(self) if parser else self._default_parse_set_item() 3801 3802 def _parse_merge(self) -> exp.Expression: 3803 self._match(TokenType.INTO) 3804 target = self._parse_table() 3805 3806 self._match(TokenType.USING) 3807 using = self._parse_table() 3808 3809 self._match(TokenType.ON) 3810 on = self._parse_conjunction() 3811 3812 whens = [] 3813 while self._match(TokenType.WHEN): 3814 matched = not self._match(TokenType.NOT) 3815 self._match_text_seq("MATCHED") 3816 source = ( 3817 False 3818 if self._match_text_seq("BY", "TARGET") 3819 else self._match_text_seq("BY", "SOURCE") 3820 ) 3821 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3822 3823 self._match(TokenType.THEN) 3824 3825 if self._match(TokenType.INSERT): 3826 _this = self._parse_star() 3827 if _this: 3828 then = self.expression(exp.Insert, this=_this) 3829 else: 3830 then = self.expression( 3831 exp.Insert, 3832 this=self._parse_value(), 3833 expression=self._match(TokenType.VALUES) and self._parse_value(), 3834 ) 3835 elif self._match(TokenType.UPDATE): 3836 expressions = self._parse_star() 3837 if expressions: 3838 then = self.expression(exp.Update, expressions=expressions) 3839 else: 3840 then = self.expression( 3841 exp.Update, 3842 expressions=self._match(TokenType.SET) 3843 and self._parse_csv(self._parse_equality), 3844 ) 3845 elif self._match(TokenType.DELETE): 3846 then = self.expression(exp.Var, this=self._prev.text) 3847 else: 3848 then = None 3849 3850 whens.append( 3851 self.expression( 3852 exp.When, 3853 matched=matched, 3854 source=source, 3855 condition=condition, 3856 then=then, 3857 ) 3858 ) 3859 3860 return self.expression( 3861 exp.Merge, 3862 this=target, 3863 using=using, 3864 on=on, 3865 expressions=whens, 3866 ) 3867 3868 def _parse_set(self) -> exp.Expression: 3869 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3870 3871 def _parse_as_command(self, start: Token) -> exp.Command: 3872 while self._curr: 3873 self._advance() 3874 text = self._find_sql(start, self._prev) 3875 size = len(start.text) 3876 return exp.Command(this=text[:size], expression=text[size:]) 3877 3878 def _find_parser( 3879 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3880 ) -> t.Optional[t.Callable]: 3881 index = self._index 3882 this = [] 3883 while True: 3884 # The current token might be multiple words 3885 curr = self._curr.text.upper() 3886 key = curr.split(" ") 3887 this.append(curr) 3888 self._advance() 3889 result, trie = in_trie(trie, key) 3890 if result == 0: 3891 break 3892 if result == 2: 3893 subparser = parsers[" ".join(this)] 3894 return subparser 3895 self._retreat(index) 3896 return None 3897 3898 def _match(self, token_type, advance=True): 3899 if not self._curr: 3900 return None 3901 3902 if self._curr.token_type == token_type: 3903 if advance: 3904 self._advance() 3905 return True 3906 3907 return None 3908 3909 def _match_set(self, types, advance=True): 3910 if not self._curr: 3911 return None 3912 3913 if self._curr.token_type in types: 3914 if advance: 3915 self._advance() 3916 return True 3917 3918 return None 3919 3920 def _match_pair(self, token_type_a, token_type_b, advance=True): 3921 if not self._curr or not self._next: 3922 return None 3923 3924 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3925 if advance: 3926 self._advance(2) 3927 return True 3928 3929 return None 3930 3931 def _match_l_paren(self, expression=None): 3932 if not self._match(TokenType.L_PAREN): 3933 self.raise_error("Expecting (") 3934 if expression and self._prev_comments: 3935 expression.comments = self._prev_comments 3936 3937 def _match_r_paren(self, expression=None): 3938 if not self._match(TokenType.R_PAREN): 3939 self.raise_error("Expecting )") 3940 if expression and self._prev_comments: 3941 expression.comments = self._prev_comments 3942 3943 def _match_texts(self, texts, advance=True): 3944 if self._curr and self._curr.text.upper() in texts: 3945 if advance: 3946 self._advance() 3947 return True 3948 return False 3949 3950 def _match_text_seq(self, *texts, advance=True): 3951 index = self._index 3952 for text in texts: 3953 if self._curr and self._curr.text.upper() == text: 3954 self._advance() 3955 else: 3956 self._retreat(index) 3957 return False 3958 3959 if not advance: 3960 self._retreat(index) 3961 3962 return True 3963 3964 def _replace_columns_with_dots(self, this): 3965 if isinstance(this, exp.Dot): 3966 exp.replace_children(this, self._replace_columns_with_dots) 3967 elif isinstance(this, exp.Column): 3968 exp.replace_children(this, self._replace_columns_with_dots) 3969 table = this.args.get("table") 3970 this = ( 3971 self.expression(exp.Dot, this=table, expression=this.this) 3972 if table 3973 else self.expression(exp.Var, this=this.name) 3974 ) 3975 elif isinstance(this, exp.Identifier): 3976 this = self.expression(exp.Var, this=this.name) 3977 return this 3978 3979 def _replace_lambda(self, node, lambda_variables): 3980 if isinstance(node, exp.Column): 3981 if node.name in lambda_variables: 3982 return node.this 3983 return node
47class Parser(metaclass=_Parser): 48 """ 49 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 50 a parsed syntax tree. 51 52 Args: 53 error_level: the desired error level. 54 Default: ErrorLevel.RAISE 55 error_message_context: determines the amount of context to capture from a 56 query string when displaying the error message (in number of characters). 57 Default: 50. 58 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 59 Default: 0 60 alias_post_tablesample: If the table alias comes after tablesample. 61 Default: False 62 max_errors: Maximum number of error messages to include in a raised ParseError. 63 This is only relevant if error_level is ErrorLevel.RAISE. 64 Default: 3 65 null_ordering: Indicates the default null ordering method to use if not explicitly set. 66 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 67 Default: "nulls_are_small" 68 """ 69 70 FUNCTIONS: t.Dict[str, t.Callable] = { 71 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 72 "DATE_TO_DATE_STR": lambda args: exp.Cast( 73 this=seq_get(args, 0), 74 to=exp.DataType(this=exp.DataType.Type.TEXT), 75 ), 76 "TIME_TO_TIME_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 81 this=exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 start=exp.Literal.number(1), 86 length=exp.Literal.number(10), 87 ), 88 "VAR_MAP": parse_var_map, 89 "IFNULL": exp.Coalesce.from_arg_list, 90 } 91 92 NO_PAREN_FUNCTIONS = { 93 TokenType.CURRENT_DATE: exp.CurrentDate, 94 TokenType.CURRENT_DATETIME: exp.CurrentDate, 95 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 96 } 97 98 NESTED_TYPE_TOKENS = { 99 TokenType.ARRAY, 100 TokenType.MAP, 101 TokenType.STRUCT, 102 TokenType.NULLABLE, 103 } 104 105 TYPE_TOKENS = { 106 TokenType.BOOLEAN, 107 TokenType.TINYINT, 108 TokenType.SMALLINT, 109 TokenType.INT, 110 TokenType.BIGINT, 111 TokenType.FLOAT, 112 TokenType.DOUBLE, 113 TokenType.CHAR, 114 TokenType.NCHAR, 115 TokenType.VARCHAR, 116 TokenType.NVARCHAR, 117 TokenType.TEXT, 118 TokenType.MEDIUMTEXT, 119 TokenType.LONGTEXT, 120 TokenType.MEDIUMBLOB, 121 TokenType.LONGBLOB, 122 TokenType.BINARY, 123 TokenType.VARBINARY, 124 TokenType.JSON, 125 TokenType.JSONB, 126 TokenType.INTERVAL, 127 TokenType.TIME, 128 TokenType.TIMESTAMP, 129 TokenType.TIMESTAMPTZ, 130 TokenType.TIMESTAMPLTZ, 131 TokenType.DATETIME, 132 TokenType.DATE, 133 TokenType.DECIMAL, 134 TokenType.UUID, 135 TokenType.GEOGRAPHY, 136 TokenType.GEOMETRY, 137 TokenType.HLLSKETCH, 138 TokenType.HSTORE, 139 TokenType.PSEUDO_TYPE, 140 TokenType.SUPER, 141 TokenType.SERIAL, 142 TokenType.SMALLSERIAL, 143 TokenType.BIGSERIAL, 144 TokenType.XML, 145 TokenType.UNIQUEIDENTIFIER, 146 TokenType.MONEY, 147 TokenType.SMALLMONEY, 148 TokenType.ROWVERSION, 149 TokenType.IMAGE, 150 TokenType.VARIANT, 151 TokenType.OBJECT, 152 TokenType.INET, 153 *NESTED_TYPE_TOKENS, 154 } 155 156 SUBQUERY_PREDICATES = { 157 TokenType.ANY: exp.Any, 158 TokenType.ALL: exp.All, 159 TokenType.EXISTS: exp.Exists, 160 TokenType.SOME: exp.Any, 161 } 162 163 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 164 165 DB_CREATABLES = { 166 TokenType.DATABASE, 167 TokenType.SCHEMA, 168 TokenType.TABLE, 169 TokenType.VIEW, 170 } 171 172 CREATABLES = { 173 TokenType.COLUMN, 174 TokenType.FUNCTION, 175 TokenType.INDEX, 176 TokenType.PROCEDURE, 177 *DB_CREATABLES, 178 } 179 180 ID_VAR_TOKENS = { 181 TokenType.VAR, 182 TokenType.ANTI, 183 TokenType.APPLY, 184 TokenType.AUTO_INCREMENT, 185 TokenType.BEGIN, 186 TokenType.BOTH, 187 TokenType.BUCKET, 188 TokenType.CACHE, 189 TokenType.CASCADE, 190 TokenType.COLLATE, 191 TokenType.COMMAND, 192 TokenType.COMMENT, 193 TokenType.COMMIT, 194 TokenType.COMPOUND, 195 TokenType.CONSTRAINT, 196 TokenType.CURRENT_TIME, 197 TokenType.DEFAULT, 198 TokenType.DELETE, 199 TokenType.DESCRIBE, 200 TokenType.DIV, 201 TokenType.END, 202 TokenType.EXECUTE, 203 TokenType.ESCAPE, 204 TokenType.FALSE, 205 TokenType.FIRST, 206 TokenType.FILTER, 207 TokenType.FOLLOWING, 208 TokenType.FORMAT, 209 TokenType.IF, 210 TokenType.ISNULL, 211 TokenType.INTERVAL, 212 TokenType.LAZY, 213 TokenType.LEADING, 214 TokenType.LEFT, 215 TokenType.LOCAL, 216 TokenType.MATERIALIZED, 217 TokenType.MERGE, 218 TokenType.NATURAL, 219 TokenType.NEXT, 220 TokenType.OFFSET, 221 TokenType.ONLY, 222 TokenType.OPTIONS, 223 TokenType.ORDINALITY, 224 TokenType.PERCENT, 225 TokenType.PIVOT, 226 TokenType.PRECEDING, 227 TokenType.RANGE, 228 TokenType.REFERENCES, 229 TokenType.RIGHT, 230 TokenType.ROW, 231 TokenType.ROWS, 232 TokenType.SEED, 233 TokenType.SEMI, 234 TokenType.SET, 235 TokenType.SHOW, 236 TokenType.SORTKEY, 237 TokenType.TEMPORARY, 238 TokenType.TOP, 239 TokenType.TRAILING, 240 TokenType.TRUE, 241 TokenType.UNBOUNDED, 242 TokenType.UNIQUE, 243 TokenType.UNLOGGED, 244 TokenType.UNPIVOT, 245 TokenType.VOLATILE, 246 TokenType.WINDOW, 247 *CREATABLES, 248 *SUBQUERY_PREDICATES, 249 *TYPE_TOKENS, 250 *NO_PAREN_FUNCTIONS, 251 } 252 253 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 254 TokenType.APPLY, 255 TokenType.LEFT, 256 TokenType.NATURAL, 257 TokenType.OFFSET, 258 TokenType.RIGHT, 259 TokenType.WINDOW, 260 } 261 262 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 263 264 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 265 266 FUNC_TOKENS = { 267 TokenType.COMMAND, 268 TokenType.CURRENT_DATE, 269 TokenType.CURRENT_DATETIME, 270 TokenType.CURRENT_TIMESTAMP, 271 TokenType.CURRENT_TIME, 272 TokenType.FILTER, 273 TokenType.FIRST, 274 TokenType.FORMAT, 275 TokenType.IDENTIFIER, 276 TokenType.INDEX, 277 TokenType.ISNULL, 278 TokenType.ILIKE, 279 TokenType.LIKE, 280 TokenType.MERGE, 281 TokenType.OFFSET, 282 TokenType.PRIMARY_KEY, 283 TokenType.REPLACE, 284 TokenType.ROW, 285 TokenType.UNNEST, 286 TokenType.VAR, 287 TokenType.LEFT, 288 TokenType.RIGHT, 289 TokenType.DATE, 290 TokenType.DATETIME, 291 TokenType.TABLE, 292 TokenType.TIMESTAMP, 293 TokenType.TIMESTAMPTZ, 294 TokenType.WINDOW, 295 *TYPE_TOKENS, 296 *SUBQUERY_PREDICATES, 297 } 298 299 CONJUNCTION = { 300 TokenType.AND: exp.And, 301 TokenType.OR: exp.Or, 302 } 303 304 EQUALITY = { 305 TokenType.EQ: exp.EQ, 306 TokenType.NEQ: exp.NEQ, 307 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 308 } 309 310 COMPARISON = { 311 TokenType.GT: exp.GT, 312 TokenType.GTE: exp.GTE, 313 TokenType.LT: exp.LT, 314 TokenType.LTE: exp.LTE, 315 } 316 317 BITWISE = { 318 TokenType.AMP: exp.BitwiseAnd, 319 TokenType.CARET: exp.BitwiseXor, 320 TokenType.PIPE: exp.BitwiseOr, 321 TokenType.DPIPE: exp.DPipe, 322 } 323 324 TERM = { 325 TokenType.DASH: exp.Sub, 326 TokenType.PLUS: exp.Add, 327 TokenType.MOD: exp.Mod, 328 TokenType.COLLATE: exp.Collate, 329 } 330 331 FACTOR = { 332 TokenType.DIV: exp.IntDiv, 333 TokenType.LR_ARROW: exp.Distance, 334 TokenType.SLASH: exp.Div, 335 TokenType.STAR: exp.Mul, 336 } 337 338 TIMESTAMPS = { 339 TokenType.TIME, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.TIMESTAMPLTZ, 343 } 344 345 SET_OPERATIONS = { 346 TokenType.UNION, 347 TokenType.INTERSECT, 348 TokenType.EXCEPT, 349 } 350 351 JOIN_SIDES = { 352 TokenType.LEFT, 353 TokenType.RIGHT, 354 TokenType.FULL, 355 } 356 357 JOIN_KINDS = { 358 TokenType.INNER, 359 TokenType.OUTER, 360 TokenType.CROSS, 361 TokenType.SEMI, 362 TokenType.ANTI, 363 } 364 365 LAMBDAS = { 366 TokenType.ARROW: lambda self, expressions: self.expression( 367 exp.Lambda, 368 this=self._parse_conjunction().transform( 369 self._replace_lambda, {node.name for node in expressions} 370 ), 371 expressions=expressions, 372 ), 373 TokenType.FARROW: lambda self, expressions: self.expression( 374 exp.Kwarg, 375 this=exp.Var(this=expressions[0].name), 376 expression=self._parse_conjunction(), 377 ), 378 } 379 380 COLUMN_OPERATORS = { 381 TokenType.DOT: None, 382 TokenType.DCOLON: lambda self, this, to: self.expression( 383 exp.Cast, 384 this=this, 385 to=to, 386 ), 387 TokenType.ARROW: lambda self, this, path: self.expression( 388 exp.JSONExtract, 389 this=this, 390 expression=path, 391 ), 392 TokenType.DARROW: lambda self, this, path: self.expression( 393 exp.JSONExtractScalar, 394 this=this, 395 expression=path, 396 ), 397 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 398 exp.JSONBExtract, 399 this=this, 400 expression=path, 401 ), 402 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 403 exp.JSONBExtractScalar, 404 this=this, 405 expression=path, 406 ), 407 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 408 exp.JSONBContains, 409 this=this, 410 expression=key, 411 ), 412 } 413 414 EXPRESSION_PARSERS = { 415 exp.Column: lambda self: self._parse_column(), 416 exp.DataType: lambda self: self._parse_types(), 417 exp.From: lambda self: self._parse_from(), 418 exp.Group: lambda self: self._parse_group(), 419 exp.Identifier: lambda self: self._parse_id_var(), 420 exp.Lateral: lambda self: self._parse_lateral(), 421 exp.Join: lambda self: self._parse_join(), 422 exp.Order: lambda self: self._parse_order(), 423 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 424 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 425 exp.Lambda: lambda self: self._parse_lambda(), 426 exp.Limit: lambda self: self._parse_limit(), 427 exp.Offset: lambda self: self._parse_offset(), 428 exp.TableAlias: lambda self: self._parse_table_alias(), 429 exp.Table: lambda self: self._parse_table(), 430 exp.Condition: lambda self: self._parse_conjunction(), 431 exp.Expression: lambda self: self._parse_statement(), 432 exp.Properties: lambda self: self._parse_properties(), 433 exp.Where: lambda self: self._parse_where(), 434 exp.Ordered: lambda self: self._parse_ordered(), 435 exp.Having: lambda self: self._parse_having(), 436 exp.With: lambda self: self._parse_with(), 437 exp.Window: lambda self: self._parse_named_window(), 438 exp.Qualify: lambda self: self._parse_qualify(), 439 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 440 } 441 442 STATEMENT_PARSERS = { 443 TokenType.ALTER: lambda self: self._parse_alter(), 444 TokenType.BEGIN: lambda self: self._parse_transaction(), 445 TokenType.CACHE: lambda self: self._parse_cache(), 446 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 447 TokenType.COMMENT: lambda self: self._parse_comment(), 448 TokenType.CREATE: lambda self: self._parse_create(), 449 TokenType.DELETE: lambda self: self._parse_delete(), 450 TokenType.DESC: lambda self: self._parse_describe(), 451 TokenType.DESCRIBE: lambda self: self._parse_describe(), 452 TokenType.DROP: lambda self: self._parse_drop(), 453 TokenType.END: lambda self: self._parse_commit_or_rollback(), 454 TokenType.INSERT: lambda self: self._parse_insert(), 455 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 456 TokenType.MERGE: lambda self: self._parse_merge(), 457 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 458 TokenType.UNCACHE: lambda self: self._parse_uncache(), 459 TokenType.UPDATE: lambda self: self._parse_update(), 460 TokenType.USE: lambda self: self.expression( 461 exp.Use, 462 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 463 and exp.Var(this=self._prev.text), 464 this=self._parse_table(schema=False), 465 ), 466 } 467 468 UNARY_PARSERS = { 469 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 470 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 471 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 472 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 473 } 474 475 PRIMARY_PARSERS = { 476 TokenType.STRING: lambda self, token: self.expression( 477 exp.Literal, this=token.text, is_string=True 478 ), 479 TokenType.NUMBER: lambda self, token: self.expression( 480 exp.Literal, this=token.text, is_string=False 481 ), 482 TokenType.STAR: lambda self, _: self.expression( 483 exp.Star, 484 **{"except": self._parse_except(), "replace": self._parse_replace()}, 485 ), 486 TokenType.NULL: lambda self, _: self.expression(exp.Null), 487 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 488 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 489 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 490 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 491 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 492 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 493 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 494 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 495 } 496 497 PLACEHOLDER_PARSERS = { 498 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 499 TokenType.PARAMETER: lambda self: self._parse_parameter(), 500 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 501 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 502 else None, 503 } 504 505 RANGE_PARSERS = { 506 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 507 TokenType.GLOB: lambda self, this: self._parse_escape( 508 self.expression(exp.Glob, this=this, expression=self._parse_bitwise()) 509 ), 510 TokenType.OVERLAPS: lambda self, this: self._parse_escape( 511 self.expression(exp.Overlaps, this=this, expression=self._parse_bitwise()) 512 ), 513 TokenType.IN: lambda self, this: self._parse_in(this), 514 TokenType.IS: lambda self, this: self._parse_is(this), 515 TokenType.LIKE: lambda self, this: self._parse_escape( 516 self.expression(exp.Like, this=this, expression=self._parse_bitwise()) 517 ), 518 TokenType.ILIKE: lambda self, this: self._parse_escape( 519 self.expression(exp.ILike, this=this, expression=self._parse_bitwise()) 520 ), 521 TokenType.IRLIKE: lambda self, this: self.expression( 522 exp.RegexpILike, this=this, expression=self._parse_bitwise() 523 ), 524 TokenType.RLIKE: lambda self, this: self.expression( 525 exp.RegexpLike, this=this, expression=self._parse_bitwise() 526 ), 527 TokenType.SIMILAR_TO: lambda self, this: self.expression( 528 exp.SimilarTo, this=this, expression=self._parse_bitwise() 529 ), 530 } 531 532 PROPERTY_PARSERS = { 533 "AFTER": lambda self: self._parse_afterjournal( 534 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 535 ), 536 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 537 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 538 "BEFORE": lambda self: self._parse_journal( 539 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 540 ), 541 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 542 "CHARACTER SET": lambda self: self._parse_character_set(), 543 "CHECKSUM": lambda self: self._parse_checksum(), 544 "CLUSTER BY": lambda self: self.expression( 545 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 546 ), 547 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 548 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 549 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 550 default=self._prev.text.upper() == "DEFAULT" 551 ), 552 "DEFINER": lambda self: self._parse_definer(), 553 "DETERMINISTIC": lambda self: self.expression( 554 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 555 ), 556 "DISTKEY": lambda self: self._parse_distkey(), 557 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 558 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 559 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 560 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 561 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 562 "FREESPACE": lambda self: self._parse_freespace(), 563 "GLOBAL": lambda self: self._parse_temporary(global_=True), 564 "IMMUTABLE": lambda self: self.expression( 565 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 566 ), 567 "JOURNAL": lambda self: self._parse_journal( 568 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 569 ), 570 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 571 "LIKE": lambda self: self._parse_create_like(), 572 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 573 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 574 "LOCK": lambda self: self._parse_locking(), 575 "LOCKING": lambda self: self._parse_locking(), 576 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 577 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 578 "MAX": lambda self: self._parse_datablocksize(), 579 "MAXIMUM": lambda self: self._parse_datablocksize(), 580 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 581 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 582 ), 583 "MIN": lambda self: self._parse_datablocksize(), 584 "MINIMUM": lambda self: self._parse_datablocksize(), 585 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 586 "NO": lambda self: self._parse_noprimaryindex(), 587 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 588 "ON": lambda self: self._parse_oncommit(), 589 "PARTITION BY": lambda self: self._parse_partitioned_by(), 590 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 591 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 592 "RETURNS": lambda self: self._parse_returns(), 593 "ROW": lambda self: self._parse_row(), 594 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 595 "SORTKEY": lambda self: self._parse_sortkey(), 596 "STABLE": lambda self: self.expression( 597 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 598 ), 599 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 600 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 601 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 602 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 603 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 604 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 605 "VOLATILE": lambda self: self.expression( 606 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 607 ), 608 "WITH": lambda self: self._parse_with_property(), 609 } 610 611 CONSTRAINT_PARSERS = { 612 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 613 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 614 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 615 "CHARACTER SET": lambda self: self.expression( 616 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 617 ), 618 "CHECK": lambda self: self.expression( 619 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 620 ), 621 "COLLATE": lambda self: self.expression( 622 exp.CollateColumnConstraint, this=self._parse_var() 623 ), 624 "COMMENT": lambda self: self.expression( 625 exp.CommentColumnConstraint, this=self._parse_string() 626 ), 627 "COMPRESS": lambda self: self._parse_compress(), 628 "DEFAULT": lambda self: self.expression( 629 exp.DefaultColumnConstraint, this=self._parse_bitwise() 630 ), 631 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 632 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 633 "FORMAT": lambda self: self.expression( 634 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 635 ), 636 "GENERATED": lambda self: self._parse_generated_as_identity(), 637 "IDENTITY": lambda self: self._parse_auto_increment(), 638 "INLINE": lambda self: self._parse_inline(), 639 "LIKE": lambda self: self._parse_create_like(), 640 "NOT": lambda self: self._parse_not_constraint(), 641 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 642 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 643 "PRIMARY KEY": lambda self: self._parse_primary_key(), 644 "TITLE": lambda self: self.expression( 645 exp.TitleColumnConstraint, this=self._parse_var_or_string() 646 ), 647 "UNIQUE": lambda self: self._parse_unique(), 648 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 649 } 650 651 ALTER_PARSERS = { 652 "ADD": lambda self: self._parse_alter_table_add(), 653 "ALTER": lambda self: self._parse_alter_table_alter(), 654 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 655 "DROP": lambda self: self._parse_alter_table_drop(), 656 "RENAME": lambda self: self._parse_alter_table_rename(), 657 } 658 659 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 660 661 NO_PAREN_FUNCTION_PARSERS = { 662 TokenType.CASE: lambda self: self._parse_case(), 663 TokenType.IF: lambda self: self._parse_if(), 664 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 665 } 666 667 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 668 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 669 "TRY_CONVERT": lambda self: self._parse_convert(False), 670 "EXTRACT": lambda self: self._parse_extract(), 671 "POSITION": lambda self: self._parse_position(), 672 "SUBSTRING": lambda self: self._parse_substring(), 673 "TRIM": lambda self: self._parse_trim(), 674 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 675 "TRY_CAST": lambda self: self._parse_cast(False), 676 "STRING_AGG": lambda self: self._parse_string_agg(), 677 } 678 679 QUERY_MODIFIER_PARSERS = { 680 "match": lambda self: self._parse_match_recognize(), 681 "where": lambda self: self._parse_where(), 682 "group": lambda self: self._parse_group(), 683 "having": lambda self: self._parse_having(), 684 "qualify": lambda self: self._parse_qualify(), 685 "windows": lambda self: self._parse_window_clause(), 686 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 687 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 688 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 689 "order": lambda self: self._parse_order(), 690 "limit": lambda self: self._parse_limit(), 691 "offset": lambda self: self._parse_offset(), 692 "lock": lambda self: self._parse_lock(), 693 "sample": lambda self: self._parse_table_sample(as_modifier=True), 694 } 695 696 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 697 SET_PARSERS: t.Dict[str, t.Callable] = {} 698 699 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 700 701 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 702 703 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 704 705 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 706 707 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 708 709 STRICT_CAST = True 710 711 INTEGER_DIVISION = True 712 713 __slots__ = ( 714 "error_level", 715 "error_message_context", 716 "sql", 717 "errors", 718 "index_offset", 719 "unnest_column_only", 720 "alias_post_tablesample", 721 "max_errors", 722 "null_ordering", 723 "_tokens", 724 "_index", 725 "_curr", 726 "_next", 727 "_prev", 728 "_prev_comments", 729 "_show_trie", 730 "_set_trie", 731 ) 732 733 def __init__( 734 self, 735 error_level: t.Optional[ErrorLevel] = None, 736 error_message_context: int = 100, 737 index_offset: int = 0, 738 unnest_column_only: bool = False, 739 alias_post_tablesample: bool = False, 740 max_errors: int = 3, 741 null_ordering: t.Optional[str] = None, 742 ): 743 self.error_level = error_level or ErrorLevel.IMMEDIATE 744 self.error_message_context = error_message_context 745 self.index_offset = index_offset 746 self.unnest_column_only = unnest_column_only 747 self.alias_post_tablesample = alias_post_tablesample 748 self.max_errors = max_errors 749 self.null_ordering = null_ordering 750 self.reset() 751 752 def reset(self): 753 self.sql = "" 754 self.errors = [] 755 self._tokens = [] 756 self._index = 0 757 self._curr = None 758 self._next = None 759 self._prev = None 760 self._prev_comments = None 761 762 def parse( 763 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 764 ) -> t.List[t.Optional[exp.Expression]]: 765 """ 766 Parses a list of tokens and returns a list of syntax trees, one tree 767 per parsed SQL statement. 768 769 Args: 770 raw_tokens: the list of tokens. 771 sql: the original SQL string, used to produce helpful debug messages. 772 773 Returns: 774 The list of syntax trees. 775 """ 776 return self._parse( 777 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 778 ) 779 780 def parse_into( 781 self, 782 expression_types: exp.IntoType, 783 raw_tokens: t.List[Token], 784 sql: t.Optional[str] = None, 785 ) -> t.List[t.Optional[exp.Expression]]: 786 """ 787 Parses a list of tokens into a given Expression type. If a collection of Expression 788 types is given instead, this method will try to parse the token list into each one 789 of them, stopping at the first for which the parsing succeeds. 790 791 Args: 792 expression_types: the expression type(s) to try and parse the token list into. 793 raw_tokens: the list of tokens. 794 sql: the original SQL string, used to produce helpful debug messages. 795 796 Returns: 797 The target Expression. 798 """ 799 errors = [] 800 for expression_type in ensure_collection(expression_types): 801 parser = self.EXPRESSION_PARSERS.get(expression_type) 802 if not parser: 803 raise TypeError(f"No parser registered for {expression_type}") 804 try: 805 return self._parse(parser, raw_tokens, sql) 806 except ParseError as e: 807 e.errors[0]["into_expression"] = expression_type 808 errors.append(e) 809 raise ParseError( 810 f"Failed to parse into {expression_types}", 811 errors=merge_errors(errors), 812 ) from errors[-1] 813 814 def _parse( 815 self, 816 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 817 raw_tokens: t.List[Token], 818 sql: t.Optional[str] = None, 819 ) -> t.List[t.Optional[exp.Expression]]: 820 self.reset() 821 self.sql = sql or "" 822 total = len(raw_tokens) 823 chunks: t.List[t.List[Token]] = [[]] 824 825 for i, token in enumerate(raw_tokens): 826 if token.token_type == TokenType.SEMICOLON: 827 if i < total - 1: 828 chunks.append([]) 829 else: 830 chunks[-1].append(token) 831 832 expressions = [] 833 834 for tokens in chunks: 835 self._index = -1 836 self._tokens = tokens 837 self._advance() 838 839 expressions.append(parse_method(self)) 840 841 if self._index < len(self._tokens): 842 self.raise_error("Invalid expression / Unexpected token") 843 844 self.check_errors() 845 846 return expressions 847 848 def check_errors(self) -> None: 849 """ 850 Logs or raises any found errors, depending on the chosen error level setting. 851 """ 852 if self.error_level == ErrorLevel.WARN: 853 for error in self.errors: 854 logger.error(str(error)) 855 elif self.error_level == ErrorLevel.RAISE and self.errors: 856 raise ParseError( 857 concat_messages(self.errors, self.max_errors), 858 errors=merge_errors(self.errors), 859 ) 860 861 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 862 """ 863 Appends an error in the list of recorded errors or raises it, depending on the chosen 864 error level setting. 865 """ 866 token = token or self._curr or self._prev or Token.string("") 867 start = self._find_token(token) 868 end = start + len(token.text) 869 start_context = self.sql[max(start - self.error_message_context, 0) : start] 870 highlight = self.sql[start:end] 871 end_context = self.sql[end : end + self.error_message_context] 872 873 error = ParseError.new( 874 f"{message}. Line {token.line}, Col: {token.col}.\n" 875 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 876 description=message, 877 line=token.line, 878 col=token.col, 879 start_context=start_context, 880 highlight=highlight, 881 end_context=end_context, 882 ) 883 884 if self.error_level == ErrorLevel.IMMEDIATE: 885 raise error 886 887 self.errors.append(error) 888 889 def expression( 890 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 891 ) -> exp.Expression: 892 """ 893 Creates a new, validated Expression. 894 895 Args: 896 exp_class: the expression class to instantiate. 897 comments: an optional list of comments to attach to the expression. 898 kwargs: the arguments to set for the expression along with their respective values. 899 900 Returns: 901 The target expression. 902 """ 903 instance = exp_class(**kwargs) 904 if self._prev_comments: 905 instance.comments = self._prev_comments 906 self._prev_comments = None 907 if comments: 908 instance.comments = comments 909 self.validate_expression(instance) 910 return instance 911 912 def validate_expression( 913 self, expression: exp.Expression, args: t.Optional[t.List] = None 914 ) -> None: 915 """ 916 Validates an already instantiated expression, making sure that all its mandatory arguments 917 are set. 918 919 Args: 920 expression: the expression to validate. 921 args: an optional list of items that was used to instantiate the expression, if it's a Func. 922 """ 923 if self.error_level == ErrorLevel.IGNORE: 924 return 925 926 for error_message in expression.error_messages(args): 927 self.raise_error(error_message) 928 929 def _find_sql(self, start: Token, end: Token) -> str: 930 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 931 932 def _find_token(self, token: Token) -> int: 933 line = 1 934 col = 1 935 index = 0 936 937 while line < token.line or col < token.col: 938 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 939 line += 1 940 col = 1 941 else: 942 col += 1 943 index += 1 944 945 return index 946 947 def _advance(self, times: int = 1) -> None: 948 self._index += times 949 self._curr = seq_get(self._tokens, self._index) 950 self._next = seq_get(self._tokens, self._index + 1) 951 if self._index > 0: 952 self._prev = self._tokens[self._index - 1] 953 self._prev_comments = self._prev.comments 954 else: 955 self._prev = None 956 self._prev_comments = None 957 958 def _retreat(self, index: int) -> None: 959 if index != self._index: 960 self._advance(index - self._index) 961 962 def _parse_command(self) -> exp.Expression: 963 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 964 965 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 966 start = self._prev 967 exists = self._parse_exists() if allow_exists else None 968 969 self._match(TokenType.ON) 970 971 kind = self._match_set(self.CREATABLES) and self._prev 972 973 if not kind: 974 return self._parse_as_command(start) 975 976 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 977 this = self._parse_user_defined_function(kind=kind.token_type) 978 elif kind.token_type == TokenType.TABLE: 979 this = self._parse_table() 980 elif kind.token_type == TokenType.COLUMN: 981 this = self._parse_column() 982 else: 983 this = self._parse_id_var() 984 985 self._match(TokenType.IS) 986 987 return self.expression( 988 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 989 ) 990 991 def _parse_statement(self) -> t.Optional[exp.Expression]: 992 if self._curr is None: 993 return None 994 995 if self._match_set(self.STATEMENT_PARSERS): 996 return self.STATEMENT_PARSERS[self._prev.token_type](self) 997 998 if self._match_set(Tokenizer.COMMANDS): 999 return self._parse_command() 1000 1001 expression = self._parse_expression() 1002 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1003 1004 self._parse_query_modifiers(expression) 1005 return expression 1006 1007 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1008 start = self._prev 1009 temporary = self._match(TokenType.TEMPORARY) 1010 materialized = self._match(TokenType.MATERIALIZED) 1011 kind = self._match_set(self.CREATABLES) and self._prev.text 1012 if not kind: 1013 if default_kind: 1014 kind = default_kind 1015 else: 1016 return self._parse_as_command(start) 1017 1018 return self.expression( 1019 exp.Drop, 1020 exists=self._parse_exists(), 1021 this=self._parse_table(schema=True), 1022 kind=kind, 1023 temporary=temporary, 1024 materialized=materialized, 1025 cascade=self._match(TokenType.CASCADE), 1026 ) 1027 1028 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1029 return ( 1030 self._match(TokenType.IF) 1031 and (not not_ or self._match(TokenType.NOT)) 1032 and self._match(TokenType.EXISTS) 1033 ) 1034 1035 def _parse_create(self) -> t.Optional[exp.Expression]: 1036 start = self._prev 1037 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1038 TokenType.OR, TokenType.REPLACE 1039 ) 1040 unique = self._match(TokenType.UNIQUE) 1041 volatile = self._match(TokenType.VOLATILE) 1042 1043 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1044 self._match(TokenType.TABLE) 1045 1046 properties = None 1047 create_token = self._match_set(self.CREATABLES) and self._prev 1048 1049 if not create_token: 1050 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1051 create_token = self._match_set(self.CREATABLES) and self._prev 1052 1053 if not properties or not create_token: 1054 return self._parse_as_command(start) 1055 1056 exists = self._parse_exists(not_=True) 1057 this = None 1058 expression = None 1059 indexes = None 1060 no_schema_binding = None 1061 begin = None 1062 1063 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=create_token.token_type) 1065 temp_properties = self._parse_properties() 1066 if properties and temp_properties: 1067 properties.expressions.extend(temp_properties.expressions) 1068 elif temp_properties: 1069 properties = temp_properties 1070 1071 self._match(TokenType.ALIAS) 1072 begin = self._match(TokenType.BEGIN) 1073 return_ = self._match_text_seq("RETURN") 1074 expression = self._parse_statement() 1075 1076 if return_: 1077 expression = self.expression(exp.Return, this=expression) 1078 elif create_token.token_type == TokenType.INDEX: 1079 this = self._parse_index() 1080 elif create_token.token_type in self.DB_CREATABLES: 1081 table_parts = self._parse_table_parts(schema=True) 1082 1083 # exp.Properties.Location.POST_NAME 1084 if self._match(TokenType.COMMA): 1085 temp_properties = self._parse_properties(before=True) 1086 if properties and temp_properties: 1087 properties.expressions.extend(temp_properties.expressions) 1088 elif temp_properties: 1089 properties = temp_properties 1090 1091 this = self._parse_schema(this=table_parts) 1092 1093 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1094 temp_properties = self._parse_properties() 1095 if properties and temp_properties: 1096 properties.expressions.extend(temp_properties.expressions) 1097 elif temp_properties: 1098 properties = temp_properties 1099 1100 self._match(TokenType.ALIAS) 1101 1102 # exp.Properties.Location.POST_ALIAS 1103 if not ( 1104 self._match(TokenType.SELECT, advance=False) 1105 or self._match(TokenType.WITH, advance=False) 1106 or self._match(TokenType.L_PAREN, advance=False) 1107 ): 1108 temp_properties = self._parse_properties() 1109 if properties and temp_properties: 1110 properties.expressions.extend(temp_properties.expressions) 1111 elif temp_properties: 1112 properties = temp_properties 1113 1114 expression = self._parse_ddl_select() 1115 1116 if create_token.token_type == TokenType.TABLE: 1117 # exp.Properties.Location.POST_EXPRESSION 1118 temp_properties = self._parse_properties() 1119 if properties and temp_properties: 1120 properties.expressions.extend(temp_properties.expressions) 1121 elif temp_properties: 1122 properties = temp_properties 1123 1124 indexes = [] 1125 while True: 1126 index = self._parse_create_table_index() 1127 1128 # exp.Properties.Location.POST_INDEX 1129 if self._match(TokenType.PARTITION_BY, advance=False): 1130 temp_properties = self._parse_properties() 1131 if properties and temp_properties: 1132 properties.expressions.extend(temp_properties.expressions) 1133 elif temp_properties: 1134 properties = temp_properties 1135 1136 if not index: 1137 break 1138 else: 1139 indexes.append(index) 1140 elif create_token.token_type == TokenType.VIEW: 1141 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1142 no_schema_binding = True 1143 1144 return self.expression( 1145 exp.Create, 1146 this=this, 1147 kind=create_token.text, 1148 replace=replace, 1149 unique=unique, 1150 volatile=volatile, 1151 expression=expression, 1152 exists=exists, 1153 properties=properties, 1154 indexes=indexes, 1155 no_schema_binding=no_schema_binding, 1156 begin=begin, 1157 ) 1158 1159 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1160 self._match(TokenType.COMMA) 1161 1162 # parsers look to _prev for no/dual/default, so need to consume first 1163 self._match_text_seq("NO") 1164 self._match_text_seq("DUAL") 1165 self._match_text_seq("DEFAULT") 1166 1167 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1168 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1169 1170 return None 1171 1172 def _parse_property(self) -> t.Optional[exp.Expression]: 1173 if self._match_texts(self.PROPERTY_PARSERS): 1174 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1175 1176 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1177 return self._parse_character_set(default=True) 1178 1179 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1180 return self._parse_sortkey(compound=True) 1181 1182 if self._match_text_seq("SQL", "SECURITY"): 1183 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1184 1185 assignment = self._match_pair( 1186 TokenType.VAR, TokenType.EQ, advance=False 1187 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1188 1189 if assignment: 1190 key = self._parse_var_or_string() 1191 self._match(TokenType.EQ) 1192 return self.expression(exp.Property, this=key, value=self._parse_column()) 1193 1194 return None 1195 1196 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1197 self._match(TokenType.EQ) 1198 self._match(TokenType.ALIAS) 1199 return self.expression( 1200 exp_class, 1201 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1202 ) 1203 1204 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1205 properties = [] 1206 1207 while True: 1208 if before: 1209 identified_property = self._parse_property_before() 1210 else: 1211 identified_property = self._parse_property() 1212 1213 if not identified_property: 1214 break 1215 for p in ensure_collection(identified_property): 1216 properties.append(p) 1217 1218 if properties: 1219 return self.expression(exp.Properties, expressions=properties) 1220 1221 return None 1222 1223 def _parse_fallback(self, no=False) -> exp.Expression: 1224 self._match_text_seq("FALLBACK") 1225 return self.expression( 1226 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1227 ) 1228 1229 def _parse_with_property( 1230 self, 1231 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1232 self._match(TokenType.WITH) 1233 if self._match(TokenType.L_PAREN, advance=False): 1234 return self._parse_wrapped_csv(self._parse_property) 1235 1236 if self._match_text_seq("JOURNAL"): 1237 return self._parse_withjournaltable() 1238 1239 if self._match_text_seq("DATA"): 1240 return self._parse_withdata(no=False) 1241 elif self._match_text_seq("NO", "DATA"): 1242 return self._parse_withdata(no=True) 1243 1244 if not self._next: 1245 return None 1246 1247 return self._parse_withisolatedloading() 1248 1249 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1250 def _parse_definer(self) -> t.Optional[exp.Expression]: 1251 self._match(TokenType.EQ) 1252 1253 user = self._parse_id_var() 1254 self._match(TokenType.PARAMETER) 1255 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1256 1257 if not user or not host: 1258 return None 1259 1260 return exp.DefinerProperty(this=f"{user}@{host}") 1261 1262 def _parse_withjournaltable(self) -> exp.Expression: 1263 self._match(TokenType.TABLE) 1264 self._match(TokenType.EQ) 1265 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1266 1267 def _parse_log(self, no=False) -> exp.Expression: 1268 self._match_text_seq("LOG") 1269 return self.expression(exp.LogProperty, no=no) 1270 1271 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1272 before = self._match_text_seq("BEFORE") 1273 self._match_text_seq("JOURNAL") 1274 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1275 1276 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1277 self._match_text_seq("NOT") 1278 self._match_text_seq("LOCAL") 1279 self._match_text_seq("AFTER", "JOURNAL") 1280 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1281 1282 def _parse_checksum(self) -> exp.Expression: 1283 self._match_text_seq("CHECKSUM") 1284 self._match(TokenType.EQ) 1285 1286 on = None 1287 if self._match(TokenType.ON): 1288 on = True 1289 elif self._match_text_seq("OFF"): 1290 on = False 1291 default = self._match(TokenType.DEFAULT) 1292 1293 return self.expression( 1294 exp.ChecksumProperty, 1295 on=on, 1296 default=default, 1297 ) 1298 1299 def _parse_freespace(self) -> exp.Expression: 1300 self._match_text_seq("FREESPACE") 1301 self._match(TokenType.EQ) 1302 return self.expression( 1303 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1304 ) 1305 1306 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1307 self._match_text_seq("MERGEBLOCKRATIO") 1308 if self._match(TokenType.EQ): 1309 return self.expression( 1310 exp.MergeBlockRatioProperty, 1311 this=self._parse_number(), 1312 percent=self._match(TokenType.PERCENT), 1313 ) 1314 else: 1315 return self.expression( 1316 exp.MergeBlockRatioProperty, 1317 no=no, 1318 default=default, 1319 ) 1320 1321 def _parse_datablocksize(self, default=None) -> exp.Expression: 1322 if default: 1323 self._match_text_seq("DATABLOCKSIZE") 1324 return self.expression(exp.DataBlocksizeProperty, default=True) 1325 elif self._match_texts(("MIN", "MINIMUM")): 1326 self._match_text_seq("DATABLOCKSIZE") 1327 return self.expression(exp.DataBlocksizeProperty, min=True) 1328 elif self._match_texts(("MAX", "MAXIMUM")): 1329 self._match_text_seq("DATABLOCKSIZE") 1330 return self.expression(exp.DataBlocksizeProperty, min=False) 1331 1332 self._match_text_seq("DATABLOCKSIZE") 1333 self._match(TokenType.EQ) 1334 size = self._parse_number() 1335 units = None 1336 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1337 units = self._prev.text 1338 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1339 1340 def _parse_blockcompression(self) -> exp.Expression: 1341 self._match_text_seq("BLOCKCOMPRESSION") 1342 self._match(TokenType.EQ) 1343 always = self._match_text_seq("ALWAYS") 1344 manual = self._match_text_seq("MANUAL") 1345 never = self._match_text_seq("NEVER") 1346 default = self._match_text_seq("DEFAULT") 1347 autotemp = None 1348 if self._match_text_seq("AUTOTEMP"): 1349 autotemp = self._parse_schema() 1350 1351 return self.expression( 1352 exp.BlockCompressionProperty, 1353 always=always, 1354 manual=manual, 1355 never=never, 1356 default=default, 1357 autotemp=autotemp, 1358 ) 1359 1360 def _parse_withisolatedloading(self) -> exp.Expression: 1361 no = self._match_text_seq("NO") 1362 concurrent = self._match_text_seq("CONCURRENT") 1363 self._match_text_seq("ISOLATED", "LOADING") 1364 for_all = self._match_text_seq("FOR", "ALL") 1365 for_insert = self._match_text_seq("FOR", "INSERT") 1366 for_none = self._match_text_seq("FOR", "NONE") 1367 return self.expression( 1368 exp.IsolatedLoadingProperty, 1369 no=no, 1370 concurrent=concurrent, 1371 for_all=for_all, 1372 for_insert=for_insert, 1373 for_none=for_none, 1374 ) 1375 1376 def _parse_locking(self) -> exp.Expression: 1377 if self._match(TokenType.TABLE): 1378 kind = "TABLE" 1379 elif self._match(TokenType.VIEW): 1380 kind = "VIEW" 1381 elif self._match(TokenType.ROW): 1382 kind = "ROW" 1383 elif self._match_text_seq("DATABASE"): 1384 kind = "DATABASE" 1385 else: 1386 kind = None 1387 1388 if kind in ("DATABASE", "TABLE", "VIEW"): 1389 this = self._parse_table_parts() 1390 else: 1391 this = None 1392 1393 if self._match(TokenType.FOR): 1394 for_or_in = "FOR" 1395 elif self._match(TokenType.IN): 1396 for_or_in = "IN" 1397 else: 1398 for_or_in = None 1399 1400 if self._match_text_seq("ACCESS"): 1401 lock_type = "ACCESS" 1402 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1403 lock_type = "EXCLUSIVE" 1404 elif self._match_text_seq("SHARE"): 1405 lock_type = "SHARE" 1406 elif self._match_text_seq("READ"): 1407 lock_type = "READ" 1408 elif self._match_text_seq("WRITE"): 1409 lock_type = "WRITE" 1410 elif self._match_text_seq("CHECKSUM"): 1411 lock_type = "CHECKSUM" 1412 else: 1413 lock_type = None 1414 1415 override = self._match_text_seq("OVERRIDE") 1416 1417 return self.expression( 1418 exp.LockingProperty, 1419 this=this, 1420 kind=kind, 1421 for_or_in=for_or_in, 1422 lock_type=lock_type, 1423 override=override, 1424 ) 1425 1426 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1427 if self._match(TokenType.PARTITION_BY): 1428 return self._parse_csv(self._parse_conjunction) 1429 return [] 1430 1431 def _parse_partitioned_by(self) -> exp.Expression: 1432 self._match(TokenType.EQ) 1433 return self.expression( 1434 exp.PartitionedByProperty, 1435 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1436 ) 1437 1438 def _parse_withdata(self, no=False) -> exp.Expression: 1439 if self._match_text_seq("AND", "STATISTICS"): 1440 statistics = True 1441 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1442 statistics = False 1443 else: 1444 statistics = None 1445 1446 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1447 1448 def _parse_noprimaryindex(self) -> exp.Expression: 1449 self._match_text_seq("PRIMARY", "INDEX") 1450 return exp.NoPrimaryIndexProperty() 1451 1452 def _parse_oncommit(self) -> exp.Expression: 1453 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1454 return exp.OnCommitProperty() 1455 1456 def _parse_distkey(self) -> exp.Expression: 1457 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1458 1459 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1460 table = self._parse_table(schema=True) 1461 options = [] 1462 while self._match_texts(("INCLUDING", "EXCLUDING")): 1463 this = self._prev.text.upper() 1464 id_var = self._parse_id_var() 1465 1466 if not id_var: 1467 return None 1468 1469 options.append( 1470 self.expression( 1471 exp.Property, 1472 this=this, 1473 value=exp.Var(this=id_var.this.upper()), 1474 ) 1475 ) 1476 return self.expression(exp.LikeProperty, this=table, expressions=options) 1477 1478 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1479 return self.expression( 1480 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1481 ) 1482 1483 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1484 self._match(TokenType.EQ) 1485 return self.expression( 1486 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1487 ) 1488 1489 def _parse_returns(self) -> exp.Expression: 1490 value: t.Optional[exp.Expression] 1491 is_table = self._match(TokenType.TABLE) 1492 1493 if is_table: 1494 if self._match(TokenType.LT): 1495 value = self.expression( 1496 exp.Schema, 1497 this="TABLE", 1498 expressions=self._parse_csv(self._parse_struct_kwargs), 1499 ) 1500 if not self._match(TokenType.GT): 1501 self.raise_error("Expecting >") 1502 else: 1503 value = self._parse_schema(exp.Var(this="TABLE")) 1504 else: 1505 value = self._parse_types() 1506 1507 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1508 1509 def _parse_temporary(self, global_=False) -> exp.Expression: 1510 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1511 return self.expression(exp.TemporaryProperty, global_=global_) 1512 1513 def _parse_describe(self) -> exp.Expression: 1514 kind = self._match_set(self.CREATABLES) and self._prev.text 1515 this = self._parse_table() 1516 1517 return self.expression(exp.Describe, this=this, kind=kind) 1518 1519 def _parse_insert(self) -> exp.Expression: 1520 overwrite = self._match(TokenType.OVERWRITE) 1521 local = self._match(TokenType.LOCAL) 1522 alternative = None 1523 1524 if self._match_text_seq("DIRECTORY"): 1525 this: t.Optional[exp.Expression] = self.expression( 1526 exp.Directory, 1527 this=self._parse_var_or_string(), 1528 local=local, 1529 row_format=self._parse_row_format(match_row=True), 1530 ) 1531 else: 1532 if self._match(TokenType.OR): 1533 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1534 1535 self._match(TokenType.INTO) 1536 self._match(TokenType.TABLE) 1537 this = self._parse_table(schema=True) 1538 1539 return self.expression( 1540 exp.Insert, 1541 this=this, 1542 exists=self._parse_exists(), 1543 partition=self._parse_partition(), 1544 expression=self._parse_ddl_select(), 1545 returning=self._parse_returning(), 1546 overwrite=overwrite, 1547 alternative=alternative, 1548 ) 1549 1550 def _parse_returning(self) -> t.Optional[exp.Expression]: 1551 if not self._match(TokenType.RETURNING): 1552 return None 1553 1554 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1555 1556 def _parse_row(self) -> t.Optional[exp.Expression]: 1557 if not self._match(TokenType.FORMAT): 1558 return None 1559 return self._parse_row_format() 1560 1561 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1562 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1563 return None 1564 1565 if self._match_text_seq("SERDE"): 1566 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1567 1568 self._match_text_seq("DELIMITED") 1569 1570 kwargs = {} 1571 1572 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1573 kwargs["fields"] = self._parse_string() 1574 if self._match_text_seq("ESCAPED", "BY"): 1575 kwargs["escaped"] = self._parse_string() 1576 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1577 kwargs["collection_items"] = self._parse_string() 1578 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1579 kwargs["map_keys"] = self._parse_string() 1580 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1581 kwargs["lines"] = self._parse_string() 1582 if self._match_text_seq("NULL", "DEFINED", "AS"): 1583 kwargs["null"] = self._parse_string() 1584 1585 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1586 1587 def _parse_load_data(self) -> exp.Expression: 1588 local = self._match(TokenType.LOCAL) 1589 self._match_text_seq("INPATH") 1590 inpath = self._parse_string() 1591 overwrite = self._match(TokenType.OVERWRITE) 1592 self._match_pair(TokenType.INTO, TokenType.TABLE) 1593 1594 return self.expression( 1595 exp.LoadData, 1596 this=self._parse_table(schema=True), 1597 local=local, 1598 overwrite=overwrite, 1599 inpath=inpath, 1600 partition=self._parse_partition(), 1601 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1602 serde=self._match_text_seq("SERDE") and self._parse_string(), 1603 ) 1604 1605 def _parse_delete(self) -> exp.Expression: 1606 self._match(TokenType.FROM) 1607 1608 return self.expression( 1609 exp.Delete, 1610 this=self._parse_table(schema=True), 1611 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1612 where=self._parse_where(), 1613 returning=self._parse_returning(), 1614 ) 1615 1616 def _parse_update(self) -> exp.Expression: 1617 return self.expression( 1618 exp.Update, 1619 **{ # type: ignore 1620 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1621 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1622 "from": self._parse_from(), 1623 "where": self._parse_where(), 1624 "returning": self._parse_returning(), 1625 }, 1626 ) 1627 1628 def _parse_uncache(self) -> exp.Expression: 1629 if not self._match(TokenType.TABLE): 1630 self.raise_error("Expecting TABLE after UNCACHE") 1631 1632 return self.expression( 1633 exp.Uncache, 1634 exists=self._parse_exists(), 1635 this=self._parse_table(schema=True), 1636 ) 1637 1638 def _parse_cache(self) -> exp.Expression: 1639 lazy = self._match(TokenType.LAZY) 1640 self._match(TokenType.TABLE) 1641 table = self._parse_table(schema=True) 1642 options = [] 1643 1644 if self._match(TokenType.OPTIONS): 1645 self._match_l_paren() 1646 k = self._parse_string() 1647 self._match(TokenType.EQ) 1648 v = self._parse_string() 1649 options = [k, v] 1650 self._match_r_paren() 1651 1652 self._match(TokenType.ALIAS) 1653 return self.expression( 1654 exp.Cache, 1655 this=table, 1656 lazy=lazy, 1657 options=options, 1658 expression=self._parse_select(nested=True), 1659 ) 1660 1661 def _parse_partition(self) -> t.Optional[exp.Expression]: 1662 if not self._match(TokenType.PARTITION): 1663 return None 1664 1665 return self.expression( 1666 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1667 ) 1668 1669 def _parse_value(self) -> exp.Expression: 1670 if self._match(TokenType.L_PAREN): 1671 expressions = self._parse_csv(self._parse_conjunction) 1672 self._match_r_paren() 1673 return self.expression(exp.Tuple, expressions=expressions) 1674 1675 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1676 # Source: https://prestodb.io/docs/current/sql/values.html 1677 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1678 1679 def _parse_select( 1680 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1681 ) -> t.Optional[exp.Expression]: 1682 cte = self._parse_with() 1683 if cte: 1684 this = self._parse_statement() 1685 1686 if not this: 1687 self.raise_error("Failed to parse any statement following CTE") 1688 return cte 1689 1690 if "with" in this.arg_types: 1691 this.set("with", cte) 1692 else: 1693 self.raise_error(f"{this.key} does not support CTE") 1694 this = cte 1695 elif self._match(TokenType.SELECT): 1696 comments = self._prev_comments 1697 1698 hint = self._parse_hint() 1699 all_ = self._match(TokenType.ALL) 1700 distinct = self._match(TokenType.DISTINCT) 1701 1702 if distinct: 1703 distinct = self.expression( 1704 exp.Distinct, 1705 on=self._parse_value() if self._match(TokenType.ON) else None, 1706 ) 1707 1708 if all_ and distinct: 1709 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1710 1711 limit = self._parse_limit(top=True) 1712 expressions = self._parse_csv(self._parse_expression) 1713 1714 this = self.expression( 1715 exp.Select, 1716 hint=hint, 1717 distinct=distinct, 1718 expressions=expressions, 1719 limit=limit, 1720 ) 1721 this.comments = comments 1722 1723 into = self._parse_into() 1724 if into: 1725 this.set("into", into) 1726 1727 from_ = self._parse_from() 1728 if from_: 1729 this.set("from", from_) 1730 1731 self._parse_query_modifiers(this) 1732 elif (table or nested) and self._match(TokenType.L_PAREN): 1733 this = self._parse_table() if table else self._parse_select(nested=True) 1734 self._parse_query_modifiers(this) 1735 this = self._parse_set_operations(this) 1736 self._match_r_paren() 1737 1738 # early return so that subquery unions aren't parsed again 1739 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1740 # Union ALL should be a property of the top select node, not the subquery 1741 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1742 elif self._match(TokenType.VALUES): 1743 this = self.expression( 1744 exp.Values, 1745 expressions=self._parse_csv(self._parse_value), 1746 alias=self._parse_table_alias(), 1747 ) 1748 else: 1749 this = None 1750 1751 return self._parse_set_operations(this) 1752 1753 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1754 if not skip_with_token and not self._match(TokenType.WITH): 1755 return None 1756 1757 recursive = self._match(TokenType.RECURSIVE) 1758 1759 expressions = [] 1760 while True: 1761 expressions.append(self._parse_cte()) 1762 1763 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1764 break 1765 else: 1766 self._match(TokenType.WITH) 1767 1768 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1769 1770 def _parse_cte(self) -> exp.Expression: 1771 alias = self._parse_table_alias() 1772 if not alias or not alias.this: 1773 self.raise_error("Expected CTE to have alias") 1774 1775 self._match(TokenType.ALIAS) 1776 1777 return self.expression( 1778 exp.CTE, 1779 this=self._parse_wrapped(self._parse_statement), 1780 alias=alias, 1781 ) 1782 1783 def _parse_table_alias( 1784 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1785 ) -> t.Optional[exp.Expression]: 1786 any_token = self._match(TokenType.ALIAS) 1787 alias = self._parse_id_var( 1788 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1789 ) 1790 index = self._index 1791 1792 if self._match(TokenType.L_PAREN): 1793 columns = self._parse_csv(self._parse_function_parameter) 1794 self._match_r_paren() if columns else self._retreat(index) 1795 else: 1796 columns = None 1797 1798 if not alias and not columns: 1799 return None 1800 1801 return self.expression(exp.TableAlias, this=alias, columns=columns) 1802 1803 def _parse_subquery( 1804 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1805 ) -> exp.Expression: 1806 return self.expression( 1807 exp.Subquery, 1808 this=this, 1809 pivots=self._parse_pivots(), 1810 alias=self._parse_table_alias() if parse_alias else None, 1811 ) 1812 1813 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1814 if not isinstance(this, self.MODIFIABLES): 1815 return 1816 1817 table = isinstance(this, exp.Table) 1818 1819 while True: 1820 lateral = self._parse_lateral() 1821 join = self._parse_join() 1822 comma = None if table else self._match(TokenType.COMMA) 1823 if lateral: 1824 this.append("laterals", lateral) 1825 if join: 1826 this.append("joins", join) 1827 if comma: 1828 this.args["from"].append("expressions", self._parse_table()) 1829 if not (lateral or join or comma): 1830 break 1831 1832 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1833 expression = parser(self) 1834 1835 if expression: 1836 this.set(key, expression) 1837 1838 def _parse_hint(self) -> t.Optional[exp.Expression]: 1839 if self._match(TokenType.HINT): 1840 hints = self._parse_csv(self._parse_function) 1841 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1842 self.raise_error("Expected */ after HINT") 1843 return self.expression(exp.Hint, expressions=hints) 1844 1845 return None 1846 1847 def _parse_into(self) -> t.Optional[exp.Expression]: 1848 if not self._match(TokenType.INTO): 1849 return None 1850 1851 temp = self._match(TokenType.TEMPORARY) 1852 unlogged = self._match(TokenType.UNLOGGED) 1853 self._match(TokenType.TABLE) 1854 1855 return self.expression( 1856 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1857 ) 1858 1859 def _parse_from(self) -> t.Optional[exp.Expression]: 1860 if not self._match(TokenType.FROM): 1861 return None 1862 1863 return self.expression( 1864 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1865 ) 1866 1867 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1868 if not self._match(TokenType.MATCH_RECOGNIZE): 1869 return None 1870 self._match_l_paren() 1871 1872 partition = self._parse_partition_by() 1873 order = self._parse_order() 1874 measures = ( 1875 self._parse_alias(self._parse_conjunction()) 1876 if self._match_text_seq("MEASURES") 1877 else None 1878 ) 1879 1880 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1881 rows = exp.Var(this="ONE ROW PER MATCH") 1882 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1883 text = "ALL ROWS PER MATCH" 1884 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1885 text += f" SHOW EMPTY MATCHES" 1886 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1887 text += f" OMIT EMPTY MATCHES" 1888 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1889 text += f" WITH UNMATCHED ROWS" 1890 rows = exp.Var(this=text) 1891 else: 1892 rows = None 1893 1894 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1895 text = "AFTER MATCH SKIP" 1896 if self._match_text_seq("PAST", "LAST", "ROW"): 1897 text += f" PAST LAST ROW" 1898 elif self._match_text_seq("TO", "NEXT", "ROW"): 1899 text += f" TO NEXT ROW" 1900 elif self._match_text_seq("TO", "FIRST"): 1901 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1902 elif self._match_text_seq("TO", "LAST"): 1903 text += f" TO LAST {self._advance_any().text}" # type: ignore 1904 after = exp.Var(this=text) 1905 else: 1906 after = None 1907 1908 if self._match_text_seq("PATTERN"): 1909 self._match_l_paren() 1910 1911 if not self._curr: 1912 self.raise_error("Expecting )", self._curr) 1913 1914 paren = 1 1915 start = self._curr 1916 1917 while self._curr and paren > 0: 1918 if self._curr.token_type == TokenType.L_PAREN: 1919 paren += 1 1920 if self._curr.token_type == TokenType.R_PAREN: 1921 paren -= 1 1922 end = self._prev 1923 self._advance() 1924 if paren > 0: 1925 self.raise_error("Expecting )", self._curr) 1926 pattern = exp.Var(this=self._find_sql(start, end)) 1927 else: 1928 pattern = None 1929 1930 define = ( 1931 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1932 ) 1933 self._match_r_paren() 1934 1935 return self.expression( 1936 exp.MatchRecognize, 1937 partition_by=partition, 1938 order=order, 1939 measures=measures, 1940 rows=rows, 1941 after=after, 1942 pattern=pattern, 1943 define=define, 1944 ) 1945 1946 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1947 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1948 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1949 1950 if outer_apply or cross_apply: 1951 this = self._parse_select(table=True) 1952 view = None 1953 outer = not cross_apply 1954 elif self._match(TokenType.LATERAL): 1955 this = self._parse_select(table=True) 1956 view = self._match(TokenType.VIEW) 1957 outer = self._match(TokenType.OUTER) 1958 else: 1959 return None 1960 1961 if not this: 1962 this = self._parse_function() or self._parse_id_var(any_token=False) 1963 while self._match(TokenType.DOT): 1964 this = exp.Dot( 1965 this=this, 1966 expression=self._parse_function() or self._parse_id_var(any_token=False), 1967 ) 1968 1969 table_alias: t.Optional[exp.Expression] 1970 1971 if view: 1972 table = self._parse_id_var(any_token=False) 1973 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1974 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1975 else: 1976 table_alias = self._parse_table_alias() 1977 1978 expression = self.expression( 1979 exp.Lateral, 1980 this=this, 1981 view=view, 1982 outer=outer, 1983 alias=table_alias, 1984 ) 1985 1986 if outer_apply or cross_apply: 1987 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1988 1989 return expression 1990 1991 def _parse_join_side_and_kind( 1992 self, 1993 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1994 return ( 1995 self._match(TokenType.NATURAL) and self._prev, 1996 self._match_set(self.JOIN_SIDES) and self._prev, 1997 self._match_set(self.JOIN_KINDS) and self._prev, 1998 ) 1999 2000 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2001 natural, side, kind = self._parse_join_side_and_kind() 2002 2003 if not skip_join_token and not self._match(TokenType.JOIN): 2004 return None 2005 2006 kwargs: t.Dict[ 2007 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2008 ] = {"this": self._parse_table()} 2009 2010 if natural: 2011 kwargs["natural"] = True 2012 if side: 2013 kwargs["side"] = side.text 2014 if kind: 2015 kwargs["kind"] = kind.text 2016 2017 if self._match(TokenType.ON): 2018 kwargs["on"] = self._parse_conjunction() 2019 elif self._match(TokenType.USING): 2020 kwargs["using"] = self._parse_wrapped_id_vars() 2021 2022 return self.expression(exp.Join, **kwargs) # type: ignore 2023 2024 def _parse_index(self) -> exp.Expression: 2025 index = self._parse_id_var() 2026 self._match(TokenType.ON) 2027 self._match(TokenType.TABLE) # hive 2028 2029 return self.expression( 2030 exp.Index, 2031 this=index, 2032 table=self.expression(exp.Table, this=self._parse_id_var()), 2033 columns=self._parse_expression(), 2034 ) 2035 2036 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2037 unique = self._match(TokenType.UNIQUE) 2038 primary = self._match_text_seq("PRIMARY") 2039 amp = self._match_text_seq("AMP") 2040 if not self._match(TokenType.INDEX): 2041 return None 2042 index = self._parse_id_var() 2043 columns = None 2044 if self._match(TokenType.L_PAREN, advance=False): 2045 columns = self._parse_wrapped_csv(self._parse_column) 2046 return self.expression( 2047 exp.Index, 2048 this=index, 2049 columns=columns, 2050 unique=unique, 2051 primary=primary, 2052 amp=amp, 2053 ) 2054 2055 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2056 catalog = None 2057 db = None 2058 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2059 2060 while self._match(TokenType.DOT): 2061 if catalog: 2062 # This allows nesting the table in arbitrarily many dot expressions if needed 2063 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2064 else: 2065 catalog = db 2066 db = table 2067 table = self._parse_id_var() 2068 2069 if not table: 2070 self.raise_error(f"Expected table name but got {self._curr}") 2071 2072 return self.expression( 2073 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2074 ) 2075 2076 def _parse_table( 2077 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2078 ) -> t.Optional[exp.Expression]: 2079 lateral = self._parse_lateral() 2080 2081 if lateral: 2082 return lateral 2083 2084 unnest = self._parse_unnest() 2085 2086 if unnest: 2087 return unnest 2088 2089 values = self._parse_derived_table_values() 2090 2091 if values: 2092 return values 2093 2094 subquery = self._parse_select(table=True) 2095 2096 if subquery: 2097 return subquery 2098 2099 this = self._parse_table_parts(schema=schema) 2100 2101 if schema: 2102 return self._parse_schema(this=this) 2103 2104 if self.alias_post_tablesample: 2105 table_sample = self._parse_table_sample() 2106 2107 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2108 2109 if alias: 2110 this.set("alias", alias) 2111 2112 if not this.args.get("pivots"): 2113 this.set("pivots", self._parse_pivots()) 2114 2115 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2116 this.set( 2117 "hints", 2118 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2119 ) 2120 self._match_r_paren() 2121 2122 if not self.alias_post_tablesample: 2123 table_sample = self._parse_table_sample() 2124 2125 if table_sample: 2126 table_sample.set("this", this) 2127 this = table_sample 2128 2129 return this 2130 2131 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2132 if not self._match(TokenType.UNNEST): 2133 return None 2134 2135 expressions = self._parse_wrapped_csv(self._parse_column) 2136 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2137 alias = self._parse_table_alias() 2138 2139 if alias and self.unnest_column_only: 2140 if alias.args.get("columns"): 2141 self.raise_error("Unexpected extra column alias in unnest.") 2142 alias.set("columns", [alias.this]) 2143 alias.set("this", None) 2144 2145 offset = None 2146 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2147 self._match(TokenType.ALIAS) 2148 offset = self._parse_conjunction() 2149 2150 return self.expression( 2151 exp.Unnest, 2152 expressions=expressions, 2153 ordinality=ordinality, 2154 alias=alias, 2155 offset=offset, 2156 ) 2157 2158 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2159 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2160 if not is_derived and not self._match(TokenType.VALUES): 2161 return None 2162 2163 expressions = self._parse_csv(self._parse_value) 2164 2165 if is_derived: 2166 self._match_r_paren() 2167 2168 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2169 2170 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2171 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2172 as_modifier and self._match_text_seq("USING", "SAMPLE") 2173 ): 2174 return None 2175 2176 bucket_numerator = None 2177 bucket_denominator = None 2178 bucket_field = None 2179 percent = None 2180 rows = None 2181 size = None 2182 seed = None 2183 2184 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2185 method = self._parse_var(tokens=(TokenType.ROW,)) 2186 2187 self._match(TokenType.L_PAREN) 2188 2189 num = self._parse_number() 2190 2191 if self._match(TokenType.BUCKET): 2192 bucket_numerator = self._parse_number() 2193 self._match(TokenType.OUT_OF) 2194 bucket_denominator = bucket_denominator = self._parse_number() 2195 self._match(TokenType.ON) 2196 bucket_field = self._parse_field() 2197 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2198 percent = num 2199 elif self._match(TokenType.ROWS): 2200 rows = num 2201 else: 2202 size = num 2203 2204 self._match(TokenType.R_PAREN) 2205 2206 if self._match(TokenType.L_PAREN): 2207 method = self._parse_var() 2208 seed = self._match(TokenType.COMMA) and self._parse_number() 2209 self._match_r_paren() 2210 elif self._match_texts(("SEED", "REPEATABLE")): 2211 seed = self._parse_wrapped(self._parse_number) 2212 2213 return self.expression( 2214 exp.TableSample, 2215 method=method, 2216 bucket_numerator=bucket_numerator, 2217 bucket_denominator=bucket_denominator, 2218 bucket_field=bucket_field, 2219 percent=percent, 2220 rows=rows, 2221 size=size, 2222 seed=seed, 2223 kind=kind, 2224 ) 2225 2226 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2227 return list(iter(self._parse_pivot, None)) 2228 2229 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2230 index = self._index 2231 2232 if self._match(TokenType.PIVOT): 2233 unpivot = False 2234 elif self._match(TokenType.UNPIVOT): 2235 unpivot = True 2236 else: 2237 return None 2238 2239 expressions = [] 2240 field = None 2241 2242 if not self._match(TokenType.L_PAREN): 2243 self._retreat(index) 2244 return None 2245 2246 if unpivot: 2247 expressions = self._parse_csv(self._parse_column) 2248 else: 2249 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2250 2251 if not self._match(TokenType.FOR): 2252 self.raise_error("Expecting FOR") 2253 2254 value = self._parse_column() 2255 2256 if not self._match(TokenType.IN): 2257 self.raise_error("Expecting IN") 2258 2259 field = self._parse_in(value) 2260 2261 self._match_r_paren() 2262 2263 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2264 2265 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2266 pivot.set("alias", self._parse_table_alias()) 2267 2268 return pivot 2269 2270 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2271 if not skip_where_token and not self._match(TokenType.WHERE): 2272 return None 2273 2274 return self.expression( 2275 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2276 ) 2277 2278 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2279 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2280 return None 2281 2282 elements = defaultdict(list) 2283 2284 while True: 2285 expressions = self._parse_csv(self._parse_conjunction) 2286 if expressions: 2287 elements["expressions"].extend(expressions) 2288 2289 grouping_sets = self._parse_grouping_sets() 2290 if grouping_sets: 2291 elements["grouping_sets"].extend(grouping_sets) 2292 2293 rollup = None 2294 cube = None 2295 2296 with_ = self._match(TokenType.WITH) 2297 if self._match(TokenType.ROLLUP): 2298 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2299 elements["rollup"].extend(ensure_list(rollup)) 2300 2301 if self._match(TokenType.CUBE): 2302 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2303 elements["cube"].extend(ensure_list(cube)) 2304 2305 if not (expressions or grouping_sets or rollup or cube): 2306 break 2307 2308 return self.expression(exp.Group, **elements) # type: ignore 2309 2310 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2311 if not self._match(TokenType.GROUPING_SETS): 2312 return None 2313 2314 return self._parse_wrapped_csv(self._parse_grouping_set) 2315 2316 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2317 if self._match(TokenType.L_PAREN): 2318 grouping_set = self._parse_csv(self._parse_column) 2319 self._match_r_paren() 2320 return self.expression(exp.Tuple, expressions=grouping_set) 2321 2322 return self._parse_column() 2323 2324 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2325 if not skip_having_token and not self._match(TokenType.HAVING): 2326 return None 2327 return self.expression(exp.Having, this=self._parse_conjunction()) 2328 2329 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2330 if not self._match(TokenType.QUALIFY): 2331 return None 2332 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2333 2334 def _parse_order( 2335 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2336 ) -> t.Optional[exp.Expression]: 2337 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2338 return this 2339 2340 return self.expression( 2341 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2342 ) 2343 2344 def _parse_sort( 2345 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2346 ) -> t.Optional[exp.Expression]: 2347 if not self._match(token_type): 2348 return None 2349 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2350 2351 def _parse_ordered(self) -> exp.Expression: 2352 this = self._parse_conjunction() 2353 self._match(TokenType.ASC) 2354 is_desc = self._match(TokenType.DESC) 2355 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2356 is_nulls_last = self._match(TokenType.NULLS_LAST) 2357 desc = is_desc or False 2358 asc = not desc 2359 nulls_first = is_nulls_first or False 2360 explicitly_null_ordered = is_nulls_first or is_nulls_last 2361 if ( 2362 not explicitly_null_ordered 2363 and ( 2364 (asc and self.null_ordering == "nulls_are_small") 2365 or (desc and self.null_ordering != "nulls_are_small") 2366 ) 2367 and self.null_ordering != "nulls_are_last" 2368 ): 2369 nulls_first = True 2370 2371 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2372 2373 def _parse_limit( 2374 self, this: t.Optional[exp.Expression] = None, top: bool = False 2375 ) -> t.Optional[exp.Expression]: 2376 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2377 limit_paren = self._match(TokenType.L_PAREN) 2378 limit_exp = self.expression( 2379 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2380 ) 2381 2382 if limit_paren: 2383 self._match_r_paren() 2384 2385 return limit_exp 2386 2387 if self._match(TokenType.FETCH): 2388 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2389 direction = self._prev.text if direction else "FIRST" 2390 count = self._parse_number() 2391 self._match_set((TokenType.ROW, TokenType.ROWS)) 2392 self._match(TokenType.ONLY) 2393 return self.expression(exp.Fetch, direction=direction, count=count) 2394 2395 return this 2396 2397 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2398 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2399 return this 2400 2401 count = self._parse_number() 2402 self._match_set((TokenType.ROW, TokenType.ROWS)) 2403 return self.expression(exp.Offset, this=this, expression=count) 2404 2405 def _parse_lock(self) -> t.Optional[exp.Expression]: 2406 if self._match_text_seq("FOR", "UPDATE"): 2407 return self.expression(exp.Lock, update=True) 2408 if self._match_text_seq("FOR", "SHARE"): 2409 return self.expression(exp.Lock, update=False) 2410 2411 return None 2412 2413 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2414 if not self._match_set(self.SET_OPERATIONS): 2415 return this 2416 2417 token_type = self._prev.token_type 2418 2419 if token_type == TokenType.UNION: 2420 expression = exp.Union 2421 elif token_type == TokenType.EXCEPT: 2422 expression = exp.Except 2423 else: 2424 expression = exp.Intersect 2425 2426 return self.expression( 2427 expression, 2428 this=this, 2429 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2430 expression=self._parse_set_operations(self._parse_select(nested=True)), 2431 ) 2432 2433 def _parse_expression(self) -> t.Optional[exp.Expression]: 2434 return self._parse_alias(self._parse_conjunction()) 2435 2436 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2437 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2438 2439 def _parse_equality(self) -> t.Optional[exp.Expression]: 2440 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2441 2442 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2443 return self._parse_tokens(self._parse_range, self.COMPARISON) 2444 2445 def _parse_range(self) -> t.Optional[exp.Expression]: 2446 this = self._parse_bitwise() 2447 negate = self._match(TokenType.NOT) 2448 2449 if self._match_set(self.RANGE_PARSERS): 2450 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2451 elif self._match(TokenType.ISNULL): 2452 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2453 2454 # Postgres supports ISNULL and NOTNULL for conditions. 2455 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2456 if self._match(TokenType.NOTNULL): 2457 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2458 this = self.expression(exp.Not, this=this) 2459 2460 if negate: 2461 this = self.expression(exp.Not, this=this) 2462 2463 if self._match(TokenType.IS): 2464 this = self._parse_is(this) 2465 2466 return this 2467 2468 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2469 negate = self._match(TokenType.NOT) 2470 if self._match(TokenType.DISTINCT_FROM): 2471 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2472 return self.expression(klass, this=this, expression=self._parse_expression()) 2473 2474 this = self.expression( 2475 exp.Is, 2476 this=this, 2477 expression=self._parse_null() or self._parse_boolean(), 2478 ) 2479 return self.expression(exp.Not, this=this) if negate else this 2480 2481 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2482 unnest = self._parse_unnest() 2483 if unnest: 2484 this = self.expression(exp.In, this=this, unnest=unnest) 2485 elif self._match(TokenType.L_PAREN): 2486 expressions = self._parse_csv(self._parse_select_or_expression) 2487 2488 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2489 this = self.expression(exp.In, this=this, query=expressions[0]) 2490 else: 2491 this = self.expression(exp.In, this=this, expressions=expressions) 2492 2493 self._match_r_paren() 2494 else: 2495 this = self.expression(exp.In, this=this, field=self._parse_field()) 2496 2497 return this 2498 2499 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2500 low = self._parse_bitwise() 2501 self._match(TokenType.AND) 2502 high = self._parse_bitwise() 2503 return self.expression(exp.Between, this=this, low=low, high=high) 2504 2505 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2506 if not self._match(TokenType.ESCAPE): 2507 return this 2508 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2509 2510 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2511 this = self._parse_term() 2512 2513 while True: 2514 if self._match_set(self.BITWISE): 2515 this = self.expression( 2516 self.BITWISE[self._prev.token_type], 2517 this=this, 2518 expression=self._parse_term(), 2519 ) 2520 elif self._match_pair(TokenType.LT, TokenType.LT): 2521 this = self.expression( 2522 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2523 ) 2524 elif self._match_pair(TokenType.GT, TokenType.GT): 2525 this = self.expression( 2526 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2527 ) 2528 else: 2529 break 2530 2531 return this 2532 2533 def _parse_term(self) -> t.Optional[exp.Expression]: 2534 return self._parse_tokens(self._parse_factor, self.TERM) 2535 2536 def _parse_factor(self) -> t.Optional[exp.Expression]: 2537 return self._parse_tokens(self._parse_unary, self.FACTOR) 2538 2539 def _parse_unary(self) -> t.Optional[exp.Expression]: 2540 if self._match_set(self.UNARY_PARSERS): 2541 return self.UNARY_PARSERS[self._prev.token_type](self) 2542 return self._parse_at_time_zone(self._parse_type()) 2543 2544 def _parse_type(self) -> t.Optional[exp.Expression]: 2545 if self._match(TokenType.INTERVAL): 2546 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_var()) 2547 2548 index = self._index 2549 type_token = self._parse_types(check_func=True) 2550 this = self._parse_column() 2551 2552 if type_token: 2553 if this and not isinstance(this, exp.Star): 2554 return self.expression(exp.Cast, this=this, to=type_token) 2555 if not type_token.args.get("expressions"): 2556 self._retreat(index) 2557 return self._parse_column() 2558 return type_token 2559 2560 return this 2561 2562 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2563 index = self._index 2564 2565 prefix = self._match_text_seq("SYSUDTLIB", ".") 2566 2567 if not self._match_set(self.TYPE_TOKENS): 2568 return None 2569 2570 type_token = self._prev.token_type 2571 2572 if type_token == TokenType.PSEUDO_TYPE: 2573 return self.expression(exp.PseudoType, this=self._prev.text) 2574 2575 nested = type_token in self.NESTED_TYPE_TOKENS 2576 is_struct = type_token == TokenType.STRUCT 2577 expressions = None 2578 maybe_func = False 2579 2580 if self._match(TokenType.L_PAREN): 2581 if is_struct: 2582 expressions = self._parse_csv(self._parse_struct_kwargs) 2583 elif nested: 2584 expressions = self._parse_csv(self._parse_types) 2585 else: 2586 expressions = self._parse_csv(self._parse_conjunction) 2587 2588 if not expressions: 2589 self._retreat(index) 2590 return None 2591 2592 self._match_r_paren() 2593 maybe_func = True 2594 2595 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2596 this = exp.DataType( 2597 this=exp.DataType.Type.ARRAY, 2598 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2599 nested=True, 2600 ) 2601 2602 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2603 this = exp.DataType( 2604 this=exp.DataType.Type.ARRAY, 2605 expressions=[this], 2606 nested=True, 2607 ) 2608 2609 return this 2610 2611 if self._match(TokenType.L_BRACKET): 2612 self._retreat(index) 2613 return None 2614 2615 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2616 if nested and self._match(TokenType.LT): 2617 if is_struct: 2618 expressions = self._parse_csv(self._parse_struct_kwargs) 2619 else: 2620 expressions = self._parse_csv(self._parse_types) 2621 2622 if not self._match(TokenType.GT): 2623 self.raise_error("Expecting >") 2624 2625 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2626 values = self._parse_csv(self._parse_conjunction) 2627 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2628 2629 value: t.Optional[exp.Expression] = None 2630 if type_token in self.TIMESTAMPS: 2631 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2632 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2633 elif ( 2634 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2635 ): 2636 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2637 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2638 if type_token == TokenType.TIME: 2639 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2640 else: 2641 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2642 2643 maybe_func = maybe_func and value is None 2644 2645 if value is None: 2646 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2647 elif type_token == TokenType.INTERVAL: 2648 unit = self._parse_var() 2649 2650 if not unit: 2651 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2652 else: 2653 value = self.expression(exp.Interval, unit=unit) 2654 2655 if maybe_func and check_func: 2656 index2 = self._index 2657 peek = self._parse_string() 2658 2659 if not peek: 2660 self._retreat(index) 2661 return None 2662 2663 self._retreat(index2) 2664 2665 if value: 2666 return value 2667 2668 return exp.DataType( 2669 this=exp.DataType.Type[type_token.value.upper()], 2670 expressions=expressions, 2671 nested=nested, 2672 values=values, 2673 prefix=prefix, 2674 ) 2675 2676 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2677 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2678 return self._parse_types() 2679 2680 this = self._parse_id_var() 2681 self._match(TokenType.COLON) 2682 data_type = self._parse_types() 2683 2684 if not data_type: 2685 return None 2686 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2687 2688 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2689 if not self._match(TokenType.AT_TIME_ZONE): 2690 return this 2691 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2692 2693 def _parse_column(self) -> t.Optional[exp.Expression]: 2694 this = self._parse_field() 2695 if isinstance(this, exp.Identifier): 2696 this = self.expression(exp.Column, this=this) 2697 elif not this: 2698 return self._parse_bracket(this) 2699 this = self._parse_bracket(this) 2700 2701 while self._match_set(self.COLUMN_OPERATORS): 2702 op_token = self._prev.token_type 2703 op = self.COLUMN_OPERATORS.get(op_token) 2704 2705 if op_token == TokenType.DCOLON: 2706 field = self._parse_types() 2707 if not field: 2708 self.raise_error("Expected type") 2709 elif op: 2710 self._advance() 2711 value = self._prev.text 2712 field = ( 2713 exp.Literal.number(value) 2714 if self._prev.token_type == TokenType.NUMBER 2715 else exp.Literal.string(value) 2716 ) 2717 else: 2718 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2719 2720 if isinstance(field, exp.Func): 2721 # bigquery allows function calls like x.y.count(...) 2722 # SAFE.SUBSTR(...) 2723 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2724 this = self._replace_columns_with_dots(this) 2725 2726 if op: 2727 this = op(self, this, field) 2728 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2729 this = self.expression( 2730 exp.Column, 2731 this=field, 2732 table=this.this, 2733 db=this.args.get("table"), 2734 catalog=this.args.get("db"), 2735 ) 2736 else: 2737 this = self.expression(exp.Dot, this=this, expression=field) 2738 this = self._parse_bracket(this) 2739 2740 return this 2741 2742 def _parse_primary(self) -> t.Optional[exp.Expression]: 2743 if self._match_set(self.PRIMARY_PARSERS): 2744 token_type = self._prev.token_type 2745 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2746 2747 if token_type == TokenType.STRING: 2748 expressions = [primary] 2749 while self._match(TokenType.STRING): 2750 expressions.append(exp.Literal.string(self._prev.text)) 2751 if len(expressions) > 1: 2752 return self.expression(exp.Concat, expressions=expressions) 2753 return primary 2754 2755 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2756 return exp.Literal.number(f"0.{self._prev.text}") 2757 2758 if self._match(TokenType.L_PAREN): 2759 comments = self._prev_comments 2760 query = self._parse_select() 2761 2762 if query: 2763 expressions = [query] 2764 else: 2765 expressions = self._parse_csv( 2766 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2767 ) 2768 2769 this = seq_get(expressions, 0) 2770 self._parse_query_modifiers(this) 2771 self._match_r_paren() 2772 2773 if isinstance(this, exp.Subqueryable): 2774 this = self._parse_set_operations( 2775 self._parse_subquery(this=this, parse_alias=False) 2776 ) 2777 elif len(expressions) > 1: 2778 this = self.expression(exp.Tuple, expressions=expressions) 2779 else: 2780 this = self.expression(exp.Paren, this=this) 2781 2782 if this and comments: 2783 this.comments = comments 2784 2785 return this 2786 2787 return None 2788 2789 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2790 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2791 2792 def _parse_function( 2793 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2794 ) -> t.Optional[exp.Expression]: 2795 if not self._curr: 2796 return None 2797 2798 token_type = self._curr.token_type 2799 2800 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2801 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2802 2803 if not self._next or self._next.token_type != TokenType.L_PAREN: 2804 if token_type in self.NO_PAREN_FUNCTIONS: 2805 self._advance() 2806 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2807 2808 return None 2809 2810 if token_type not in self.FUNC_TOKENS: 2811 return None 2812 2813 this = self._curr.text 2814 upper = this.upper() 2815 self._advance(2) 2816 2817 parser = self.FUNCTION_PARSERS.get(upper) 2818 2819 if parser: 2820 this = parser(self) 2821 else: 2822 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2823 2824 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2825 this = self.expression(subquery_predicate, this=self._parse_select()) 2826 self._match_r_paren() 2827 return this 2828 2829 if functions is None: 2830 functions = self.FUNCTIONS 2831 2832 function = functions.get(upper) 2833 args = self._parse_csv(self._parse_lambda) 2834 2835 if function: 2836 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2837 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2838 if count_params(function) == 2: 2839 params = None 2840 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2841 params = self._parse_csv(self._parse_lambda) 2842 2843 this = function(args, params) 2844 else: 2845 this = function(args) 2846 2847 self.validate_expression(this, args) 2848 else: 2849 this = self.expression(exp.Anonymous, this=this, expressions=args) 2850 2851 self._match_r_paren(this) 2852 return self._parse_window(this) 2853 2854 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2855 return self._parse_column_def(self._parse_id_var()) 2856 2857 def _parse_user_defined_function( 2858 self, kind: t.Optional[TokenType] = None 2859 ) -> t.Optional[exp.Expression]: 2860 this = self._parse_id_var() 2861 2862 while self._match(TokenType.DOT): 2863 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2864 2865 if not self._match(TokenType.L_PAREN): 2866 return this 2867 2868 expressions = self._parse_csv(self._parse_function_parameter) 2869 self._match_r_paren() 2870 return self.expression( 2871 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2872 ) 2873 2874 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2875 literal = self._parse_primary() 2876 if literal: 2877 return self.expression(exp.Introducer, this=token.text, expression=literal) 2878 2879 return self.expression(exp.Identifier, this=token.text) 2880 2881 def _parse_national(self, token: Token) -> exp.Expression: 2882 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2883 2884 def _parse_session_parameter(self) -> exp.Expression: 2885 kind = None 2886 this = self._parse_id_var() or self._parse_primary() 2887 2888 if this and self._match(TokenType.DOT): 2889 kind = this.name 2890 this = self._parse_var() or self._parse_primary() 2891 2892 return self.expression(exp.SessionParameter, this=this, kind=kind) 2893 2894 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2895 index = self._index 2896 2897 if self._match(TokenType.L_PAREN): 2898 expressions = self._parse_csv(self._parse_id_var) 2899 2900 if not self._match(TokenType.R_PAREN): 2901 self._retreat(index) 2902 else: 2903 expressions = [self._parse_id_var()] 2904 2905 if self._match_set(self.LAMBDAS): 2906 return self.LAMBDAS[self._prev.token_type](self, expressions) 2907 2908 self._retreat(index) 2909 2910 this: t.Optional[exp.Expression] 2911 2912 if self._match(TokenType.DISTINCT): 2913 this = self.expression( 2914 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2915 ) 2916 else: 2917 this = self._parse_select_or_expression() 2918 2919 if self._match(TokenType.IGNORE_NULLS): 2920 this = self.expression(exp.IgnoreNulls, this=this) 2921 else: 2922 self._match(TokenType.RESPECT_NULLS) 2923 2924 return self._parse_limit(self._parse_order(this)) 2925 2926 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2927 index = self._index 2928 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2929 self._retreat(index) 2930 return this 2931 2932 args = self._parse_csv( 2933 lambda: self._parse_constraint() 2934 or self._parse_column_def(self._parse_field(any_token=True)) 2935 ) 2936 self._match_r_paren() 2937 return self.expression(exp.Schema, this=this, expressions=args) 2938 2939 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2940 kind = self._parse_types() 2941 2942 if self._match_text_seq("FOR", "ORDINALITY"): 2943 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2944 2945 constraints = [] 2946 while True: 2947 constraint = self._parse_column_constraint() 2948 if not constraint: 2949 break 2950 constraints.append(constraint) 2951 2952 if not kind and not constraints: 2953 return this 2954 2955 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2956 2957 def _parse_auto_increment(self) -> exp.Expression: 2958 start = None 2959 increment = None 2960 2961 if self._match(TokenType.L_PAREN, advance=False): 2962 args = self._parse_wrapped_csv(self._parse_bitwise) 2963 start = seq_get(args, 0) 2964 increment = seq_get(args, 1) 2965 elif self._match_text_seq("START"): 2966 start = self._parse_bitwise() 2967 self._match_text_seq("INCREMENT") 2968 increment = self._parse_bitwise() 2969 2970 if start and increment: 2971 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2972 2973 return exp.AutoIncrementColumnConstraint() 2974 2975 def _parse_compress(self) -> exp.Expression: 2976 if self._match(TokenType.L_PAREN, advance=False): 2977 return self.expression( 2978 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2979 ) 2980 2981 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2982 2983 def _parse_generated_as_identity(self) -> exp.Expression: 2984 if self._match(TokenType.BY_DEFAULT): 2985 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2986 else: 2987 self._match_text_seq("ALWAYS") 2988 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2989 2990 self._match_text_seq("AS", "IDENTITY") 2991 if self._match(TokenType.L_PAREN): 2992 if self._match_text_seq("START", "WITH"): 2993 this.set("start", self._parse_bitwise()) 2994 if self._match_text_seq("INCREMENT", "BY"): 2995 this.set("increment", self._parse_bitwise()) 2996 if self._match_text_seq("MINVALUE"): 2997 this.set("minvalue", self._parse_bitwise()) 2998 if self._match_text_seq("MAXVALUE"): 2999 this.set("maxvalue", self._parse_bitwise()) 3000 3001 if self._match_text_seq("CYCLE"): 3002 this.set("cycle", True) 3003 elif self._match_text_seq("NO", "CYCLE"): 3004 this.set("cycle", False) 3005 3006 self._match_r_paren() 3007 3008 return this 3009 3010 def _parse_inline(self) -> t.Optional[exp.Expression]: 3011 self._match_text_seq("LENGTH") 3012 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3013 3014 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3015 if self._match_text_seq("NULL"): 3016 return self.expression(exp.NotNullColumnConstraint) 3017 if self._match_text_seq("CASESPECIFIC"): 3018 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3019 return None 3020 3021 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3022 this = self._parse_references() 3023 if this: 3024 return this 3025 3026 if self._match(TokenType.CONSTRAINT): 3027 this = self._parse_id_var() 3028 3029 if self._match_texts(self.CONSTRAINT_PARSERS): 3030 return self.expression( 3031 exp.ColumnConstraint, 3032 this=this, 3033 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3034 ) 3035 3036 return this 3037 3038 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3039 if not self._match(TokenType.CONSTRAINT): 3040 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3041 3042 this = self._parse_id_var() 3043 expressions = [] 3044 3045 while True: 3046 constraint = self._parse_unnamed_constraint() or self._parse_function() 3047 if not constraint: 3048 break 3049 expressions.append(constraint) 3050 3051 return self.expression(exp.Constraint, this=this, expressions=expressions) 3052 3053 def _parse_unnamed_constraint( 3054 self, constraints: t.Optional[t.Collection[str]] = None 3055 ) -> t.Optional[exp.Expression]: 3056 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3057 return None 3058 3059 constraint = self._prev.text.upper() 3060 if constraint not in self.CONSTRAINT_PARSERS: 3061 self.raise_error(f"No parser found for schema constraint {constraint}.") 3062 3063 return self.CONSTRAINT_PARSERS[constraint](self) 3064 3065 def _parse_unique(self) -> exp.Expression: 3066 if not self._match(TokenType.L_PAREN, advance=False): 3067 return self.expression(exp.UniqueColumnConstraint) 3068 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3069 3070 def _parse_key_constraint_options(self) -> t.List[str]: 3071 options = [] 3072 while True: 3073 if not self._curr: 3074 break 3075 3076 if self._match(TokenType.ON): 3077 action = None 3078 on = self._advance_any() and self._prev.text 3079 3080 if self._match(TokenType.NO_ACTION): 3081 action = "NO ACTION" 3082 elif self._match(TokenType.CASCADE): 3083 action = "CASCADE" 3084 elif self._match_pair(TokenType.SET, TokenType.NULL): 3085 action = "SET NULL" 3086 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3087 action = "SET DEFAULT" 3088 else: 3089 self.raise_error("Invalid key constraint") 3090 3091 options.append(f"ON {on} {action}") 3092 elif self._match_text_seq("NOT", "ENFORCED"): 3093 options.append("NOT ENFORCED") 3094 elif self._match_text_seq("DEFERRABLE"): 3095 options.append("DEFERRABLE") 3096 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3097 options.append("INITIALLY DEFERRED") 3098 elif self._match_text_seq("NORELY"): 3099 options.append("NORELY") 3100 elif self._match_text_seq("MATCH", "FULL"): 3101 options.append("MATCH FULL") 3102 else: 3103 break 3104 3105 return options 3106 3107 def _parse_references(self) -> t.Optional[exp.Expression]: 3108 if not self._match(TokenType.REFERENCES): 3109 return None 3110 3111 expressions = None 3112 this = self._parse_id_var() 3113 3114 if self._match(TokenType.L_PAREN, advance=False): 3115 expressions = self._parse_wrapped_id_vars() 3116 3117 options = self._parse_key_constraint_options() 3118 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3119 3120 def _parse_foreign_key(self) -> exp.Expression: 3121 expressions = self._parse_wrapped_id_vars() 3122 reference = self._parse_references() 3123 options = {} 3124 3125 while self._match(TokenType.ON): 3126 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3127 self.raise_error("Expected DELETE or UPDATE") 3128 3129 kind = self._prev.text.lower() 3130 3131 if self._match(TokenType.NO_ACTION): 3132 action = "NO ACTION" 3133 elif self._match(TokenType.SET): 3134 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3135 action = "SET " + self._prev.text.upper() 3136 else: 3137 self._advance() 3138 action = self._prev.text.upper() 3139 3140 options[kind] = action 3141 3142 return self.expression( 3143 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3144 ) 3145 3146 def _parse_primary_key(self) -> exp.Expression: 3147 desc = ( 3148 self._match_set((TokenType.ASC, TokenType.DESC)) 3149 and self._prev.token_type == TokenType.DESC 3150 ) 3151 3152 if not self._match(TokenType.L_PAREN, advance=False): 3153 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3154 3155 expressions = self._parse_wrapped_id_vars() 3156 options = self._parse_key_constraint_options() 3157 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3158 3159 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3160 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3161 return this 3162 3163 bracket_kind = self._prev.token_type 3164 expressions: t.List[t.Optional[exp.Expression]] 3165 3166 if self._match(TokenType.COLON): 3167 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3168 else: 3169 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3170 3171 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3172 if bracket_kind == TokenType.L_BRACE: 3173 this = self.expression(exp.Struct, expressions=expressions) 3174 elif not this or this.name.upper() == "ARRAY": 3175 this = self.expression(exp.Array, expressions=expressions) 3176 else: 3177 expressions = apply_index_offset(expressions, -self.index_offset) 3178 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3179 3180 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3181 self.raise_error("Expected ]") 3182 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3183 self.raise_error("Expected }") 3184 3185 this.comments = self._prev_comments 3186 return self._parse_bracket(this) 3187 3188 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3189 if self._match(TokenType.COLON): 3190 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3191 return this 3192 3193 def _parse_case(self) -> t.Optional[exp.Expression]: 3194 ifs = [] 3195 default = None 3196 3197 expression = self._parse_conjunction() 3198 3199 while self._match(TokenType.WHEN): 3200 this = self._parse_conjunction() 3201 self._match(TokenType.THEN) 3202 then = self._parse_conjunction() 3203 ifs.append(self.expression(exp.If, this=this, true=then)) 3204 3205 if self._match(TokenType.ELSE): 3206 default = self._parse_conjunction() 3207 3208 if not self._match(TokenType.END): 3209 self.raise_error("Expected END after CASE", self._prev) 3210 3211 return self._parse_window( 3212 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3213 ) 3214 3215 def _parse_if(self) -> t.Optional[exp.Expression]: 3216 if self._match(TokenType.L_PAREN): 3217 args = self._parse_csv(self._parse_conjunction) 3218 this = exp.If.from_arg_list(args) 3219 self.validate_expression(this, args) 3220 self._match_r_paren() 3221 else: 3222 condition = self._parse_conjunction() 3223 self._match(TokenType.THEN) 3224 true = self._parse_conjunction() 3225 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3226 self._match(TokenType.END) 3227 this = self.expression(exp.If, this=condition, true=true, false=false) 3228 3229 return self._parse_window(this) 3230 3231 def _parse_extract(self) -> exp.Expression: 3232 this = self._parse_function() or self._parse_var() or self._parse_type() 3233 3234 if self._match(TokenType.FROM): 3235 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3236 3237 if not self._match(TokenType.COMMA): 3238 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3239 3240 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3241 3242 def _parse_cast(self, strict: bool) -> exp.Expression: 3243 this = self._parse_conjunction() 3244 3245 if not self._match(TokenType.ALIAS): 3246 self.raise_error("Expected AS after CAST") 3247 3248 to = self._parse_types() 3249 3250 if not to: 3251 self.raise_error("Expected TYPE after CAST") 3252 elif to.this == exp.DataType.Type.CHAR: 3253 if self._match(TokenType.CHARACTER_SET): 3254 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3255 3256 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3257 3258 def _parse_string_agg(self) -> exp.Expression: 3259 expression: t.Optional[exp.Expression] 3260 3261 if self._match(TokenType.DISTINCT): 3262 args = self._parse_csv(self._parse_conjunction) 3263 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3264 else: 3265 args = self._parse_csv(self._parse_conjunction) 3266 expression = seq_get(args, 0) 3267 3268 index = self._index 3269 if not self._match(TokenType.R_PAREN): 3270 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3271 order = self._parse_order(this=expression) 3272 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3273 3274 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3275 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3276 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3277 if not self._match(TokenType.WITHIN_GROUP): 3278 self._retreat(index) 3279 this = exp.GroupConcat.from_arg_list(args) 3280 self.validate_expression(this, args) 3281 return this 3282 3283 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3284 order = self._parse_order(this=expression) 3285 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3286 3287 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3288 to: t.Optional[exp.Expression] 3289 this = self._parse_column() 3290 3291 if self._match(TokenType.USING): 3292 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3293 elif self._match(TokenType.COMMA): 3294 to = self._parse_types() 3295 else: 3296 to = None 3297 3298 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3299 3300 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3301 args = self._parse_csv(self._parse_bitwise) 3302 3303 if self._match(TokenType.IN): 3304 return self.expression( 3305 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3306 ) 3307 3308 if haystack_first: 3309 haystack = seq_get(args, 0) 3310 needle = seq_get(args, 1) 3311 else: 3312 needle = seq_get(args, 0) 3313 haystack = seq_get(args, 1) 3314 3315 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3316 3317 self.validate_expression(this, args) 3318 3319 return this 3320 3321 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3322 args = self._parse_csv(self._parse_table) 3323 return exp.JoinHint(this=func_name.upper(), expressions=args) 3324 3325 def _parse_substring(self) -> exp.Expression: 3326 # Postgres supports the form: substring(string [from int] [for int]) 3327 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3328 3329 args = self._parse_csv(self._parse_bitwise) 3330 3331 if self._match(TokenType.FROM): 3332 args.append(self._parse_bitwise()) 3333 if self._match(TokenType.FOR): 3334 args.append(self._parse_bitwise()) 3335 3336 this = exp.Substring.from_arg_list(args) 3337 self.validate_expression(this, args) 3338 3339 return this 3340 3341 def _parse_trim(self) -> exp.Expression: 3342 # https://www.w3resource.com/sql/character-functions/trim.php 3343 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3344 3345 position = None 3346 collation = None 3347 3348 if self._match_set(self.TRIM_TYPES): 3349 position = self._prev.text.upper() 3350 3351 expression = self._parse_term() 3352 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3353 this = self._parse_term() 3354 else: 3355 this = expression 3356 expression = None 3357 3358 if self._match(TokenType.COLLATE): 3359 collation = self._parse_term() 3360 3361 return self.expression( 3362 exp.Trim, 3363 this=this, 3364 position=position, 3365 expression=expression, 3366 collation=collation, 3367 ) 3368 3369 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3370 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3371 3372 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3373 return self._parse_window(self._parse_id_var(), alias=True) 3374 3375 def _parse_window( 3376 self, this: t.Optional[exp.Expression], alias: bool = False 3377 ) -> t.Optional[exp.Expression]: 3378 if self._match(TokenType.FILTER): 3379 where = self._parse_wrapped(self._parse_where) 3380 this = self.expression(exp.Filter, this=this, expression=where) 3381 3382 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3383 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3384 if self._match(TokenType.WITHIN_GROUP): 3385 order = self._parse_wrapped(self._parse_order) 3386 this = self.expression(exp.WithinGroup, this=this, expression=order) 3387 3388 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3389 # Some dialects choose to implement and some do not. 3390 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3391 3392 # There is some code above in _parse_lambda that handles 3393 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3394 3395 # The below changes handle 3396 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3397 3398 # Oracle allows both formats 3399 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3400 # and Snowflake chose to do the same for familiarity 3401 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3402 if self._match(TokenType.IGNORE_NULLS): 3403 this = self.expression(exp.IgnoreNulls, this=this) 3404 elif self._match(TokenType.RESPECT_NULLS): 3405 this = self.expression(exp.RespectNulls, this=this) 3406 3407 # bigquery select from window x AS (partition by ...) 3408 if alias: 3409 self._match(TokenType.ALIAS) 3410 elif not self._match(TokenType.OVER): 3411 return this 3412 3413 if not self._match(TokenType.L_PAREN): 3414 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3415 3416 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3417 partition = self._parse_partition_by() 3418 order = self._parse_order() 3419 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3420 3421 if kind: 3422 self._match(TokenType.BETWEEN) 3423 start = self._parse_window_spec() 3424 self._match(TokenType.AND) 3425 end = self._parse_window_spec() 3426 3427 spec = self.expression( 3428 exp.WindowSpec, 3429 kind=kind, 3430 start=start["value"], 3431 start_side=start["side"], 3432 end=end["value"], 3433 end_side=end["side"], 3434 ) 3435 else: 3436 spec = None 3437 3438 self._match_r_paren() 3439 3440 return self.expression( 3441 exp.Window, 3442 this=this, 3443 partition_by=partition, 3444 order=order, 3445 spec=spec, 3446 alias=window_alias, 3447 ) 3448 3449 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3450 self._match(TokenType.BETWEEN) 3451 3452 return { 3453 "value": ( 3454 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3455 ) 3456 or self._parse_bitwise(), 3457 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3458 } 3459 3460 def _parse_alias( 3461 self, this: t.Optional[exp.Expression], explicit: bool = False 3462 ) -> t.Optional[exp.Expression]: 3463 any_token = self._match(TokenType.ALIAS) 3464 3465 if explicit and not any_token: 3466 return this 3467 3468 if self._match(TokenType.L_PAREN): 3469 aliases = self.expression( 3470 exp.Aliases, 3471 this=this, 3472 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3473 ) 3474 self._match_r_paren(aliases) 3475 return aliases 3476 3477 alias = self._parse_id_var(any_token) 3478 3479 if alias: 3480 return self.expression(exp.Alias, this=this, alias=alias) 3481 3482 return this 3483 3484 def _parse_id_var( 3485 self, 3486 any_token: bool = True, 3487 tokens: t.Optional[t.Collection[TokenType]] = None, 3488 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3489 ) -> t.Optional[exp.Expression]: 3490 identifier = self._parse_identifier() 3491 3492 if identifier: 3493 return identifier 3494 3495 prefix = "" 3496 3497 if prefix_tokens: 3498 while self._match_set(prefix_tokens): 3499 prefix += self._prev.text 3500 3501 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3502 quoted = self._prev.token_type == TokenType.STRING 3503 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3504 3505 return None 3506 3507 def _parse_string(self) -> t.Optional[exp.Expression]: 3508 if self._match(TokenType.STRING): 3509 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3510 return self._parse_placeholder() 3511 3512 def _parse_number(self) -> t.Optional[exp.Expression]: 3513 if self._match(TokenType.NUMBER): 3514 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3515 return self._parse_placeholder() 3516 3517 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3518 if self._match(TokenType.IDENTIFIER): 3519 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3520 return self._parse_placeholder() 3521 3522 def _parse_var( 3523 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3524 ) -> t.Optional[exp.Expression]: 3525 if ( 3526 (any_token and self._advance_any()) 3527 or self._match(TokenType.VAR) 3528 or (self._match_set(tokens) if tokens else False) 3529 ): 3530 return self.expression(exp.Var, this=self._prev.text) 3531 return self._parse_placeholder() 3532 3533 def _advance_any(self) -> t.Optional[Token]: 3534 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3535 self._advance() 3536 return self._prev 3537 return None 3538 3539 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3540 return self._parse_var() or self._parse_string() 3541 3542 def _parse_null(self) -> t.Optional[exp.Expression]: 3543 if self._match(TokenType.NULL): 3544 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3545 return None 3546 3547 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3548 if self._match(TokenType.TRUE): 3549 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3550 if self._match(TokenType.FALSE): 3551 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3552 return None 3553 3554 def _parse_star(self) -> t.Optional[exp.Expression]: 3555 if self._match(TokenType.STAR): 3556 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3557 return None 3558 3559 def _parse_parameter(self) -> exp.Expression: 3560 wrapped = self._match(TokenType.L_BRACE) 3561 this = self._parse_var() or self._parse_primary() 3562 self._match(TokenType.R_BRACE) 3563 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3564 3565 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3566 if self._match_set(self.PLACEHOLDER_PARSERS): 3567 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3568 if placeholder: 3569 return placeholder 3570 self._advance(-1) 3571 return None 3572 3573 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3574 if not self._match(TokenType.EXCEPT): 3575 return None 3576 if self._match(TokenType.L_PAREN, advance=False): 3577 return self._parse_wrapped_csv(self._parse_column) 3578 return self._parse_csv(self._parse_column) 3579 3580 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3581 if not self._match(TokenType.REPLACE): 3582 return None 3583 if self._match(TokenType.L_PAREN, advance=False): 3584 return self._parse_wrapped_csv(self._parse_expression) 3585 return self._parse_csv(self._parse_expression) 3586 3587 def _parse_csv( 3588 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3589 ) -> t.List[t.Optional[exp.Expression]]: 3590 parse_result = parse_method() 3591 items = [parse_result] if parse_result is not None else [] 3592 3593 while self._match(sep): 3594 if parse_result and self._prev_comments: 3595 parse_result.comments = self._prev_comments 3596 3597 parse_result = parse_method() 3598 if parse_result is not None: 3599 items.append(parse_result) 3600 3601 return items 3602 3603 def _parse_tokens( 3604 self, parse_method: t.Callable, expressions: t.Dict 3605 ) -> t.Optional[exp.Expression]: 3606 this = parse_method() 3607 3608 while self._match_set(expressions): 3609 this = self.expression( 3610 expressions[self._prev.token_type], 3611 this=this, 3612 comments=self._prev_comments, 3613 expression=parse_method(), 3614 ) 3615 3616 return this 3617 3618 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3619 return self._parse_wrapped_csv(self._parse_id_var) 3620 3621 def _parse_wrapped_csv( 3622 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3623 ) -> t.List[t.Optional[exp.Expression]]: 3624 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3625 3626 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3627 self._match_l_paren() 3628 parse_result = parse_method() 3629 self._match_r_paren() 3630 return parse_result 3631 3632 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3633 return self._parse_select() or self._parse_expression() 3634 3635 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3636 return self._parse_set_operations( 3637 self._parse_select(nested=True, parse_subquery_alias=False) 3638 ) 3639 3640 def _parse_transaction(self) -> exp.Expression: 3641 this = None 3642 if self._match_texts(self.TRANSACTION_KIND): 3643 this = self._prev.text 3644 3645 self._match_texts({"TRANSACTION", "WORK"}) 3646 3647 modes = [] 3648 while True: 3649 mode = [] 3650 while self._match(TokenType.VAR): 3651 mode.append(self._prev.text) 3652 3653 if mode: 3654 modes.append(" ".join(mode)) 3655 if not self._match(TokenType.COMMA): 3656 break 3657 3658 return self.expression(exp.Transaction, this=this, modes=modes) 3659 3660 def _parse_commit_or_rollback(self) -> exp.Expression: 3661 chain = None 3662 savepoint = None 3663 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3664 3665 self._match_texts({"TRANSACTION", "WORK"}) 3666 3667 if self._match_text_seq("TO"): 3668 self._match_text_seq("SAVEPOINT") 3669 savepoint = self._parse_id_var() 3670 3671 if self._match(TokenType.AND): 3672 chain = not self._match_text_seq("NO") 3673 self._match_text_seq("CHAIN") 3674 3675 if is_rollback: 3676 return self.expression(exp.Rollback, savepoint=savepoint) 3677 return self.expression(exp.Commit, chain=chain) 3678 3679 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3680 if not self._match_text_seq("ADD"): 3681 return None 3682 3683 self._match(TokenType.COLUMN) 3684 exists_column = self._parse_exists(not_=True) 3685 expression = self._parse_column_def(self._parse_field(any_token=True)) 3686 3687 if expression: 3688 expression.set("exists", exists_column) 3689 3690 return expression 3691 3692 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3693 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3694 3695 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3696 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3697 return self.expression( 3698 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3699 ) 3700 3701 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3702 this = None 3703 kind = self._prev.token_type 3704 3705 if kind == TokenType.CONSTRAINT: 3706 this = self._parse_id_var() 3707 3708 if self._match_text_seq("CHECK"): 3709 expression = self._parse_wrapped(self._parse_conjunction) 3710 enforced = self._match_text_seq("ENFORCED") 3711 3712 return self.expression( 3713 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3714 ) 3715 3716 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3717 expression = self._parse_foreign_key() 3718 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3719 expression = self._parse_primary_key() 3720 3721 return self.expression(exp.AddConstraint, this=this, expression=expression) 3722 3723 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3724 index = self._index - 1 3725 3726 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3727 return self._parse_csv(self._parse_add_constraint) 3728 3729 self._retreat(index) 3730 return self._parse_csv(self._parse_add_column) 3731 3732 def _parse_alter_table_alter(self) -> exp.Expression: 3733 self._match(TokenType.COLUMN) 3734 column = self._parse_field(any_token=True) 3735 3736 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3737 return self.expression(exp.AlterColumn, this=column, drop=True) 3738 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3739 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3740 3741 self._match_text_seq("SET", "DATA") 3742 return self.expression( 3743 exp.AlterColumn, 3744 this=column, 3745 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3746 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3747 using=self._match(TokenType.USING) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3751 index = self._index - 1 3752 3753 partition_exists = self._parse_exists() 3754 if self._match(TokenType.PARTITION, advance=False): 3755 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3756 3757 self._retreat(index) 3758 return self._parse_csv(self._parse_drop_column) 3759 3760 def _parse_alter_table_rename(self) -> exp.Expression: 3761 self._match_text_seq("TO") 3762 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3763 3764 def _parse_alter(self) -> t.Optional[exp.Expression]: 3765 start = self._prev 3766 3767 if not self._match(TokenType.TABLE): 3768 return self._parse_as_command(start) 3769 3770 exists = self._parse_exists() 3771 this = self._parse_table(schema=True) 3772 3773 if self._next: 3774 self._advance() 3775 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3776 3777 if parser: 3778 return self.expression( 3779 exp.AlterTable, 3780 this=this, 3781 exists=exists, 3782 actions=ensure_list(parser(self)), 3783 ) 3784 return self._parse_as_command(start) 3785 3786 def _parse_show(self) -> t.Optional[exp.Expression]: 3787 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3788 if parser: 3789 return parser(self) 3790 self._advance() 3791 return self.expression(exp.Show, this=self._prev.text.upper()) 3792 3793 def _default_parse_set_item(self) -> exp.Expression: 3794 return self.expression( 3795 exp.SetItem, 3796 this=self._parse_statement(), 3797 ) 3798 3799 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3800 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3801 return parser(self) if parser else self._default_parse_set_item() 3802 3803 def _parse_merge(self) -> exp.Expression: 3804 self._match(TokenType.INTO) 3805 target = self._parse_table() 3806 3807 self._match(TokenType.USING) 3808 using = self._parse_table() 3809 3810 self._match(TokenType.ON) 3811 on = self._parse_conjunction() 3812 3813 whens = [] 3814 while self._match(TokenType.WHEN): 3815 matched = not self._match(TokenType.NOT) 3816 self._match_text_seq("MATCHED") 3817 source = ( 3818 False 3819 if self._match_text_seq("BY", "TARGET") 3820 else self._match_text_seq("BY", "SOURCE") 3821 ) 3822 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3823 3824 self._match(TokenType.THEN) 3825 3826 if self._match(TokenType.INSERT): 3827 _this = self._parse_star() 3828 if _this: 3829 then = self.expression(exp.Insert, this=_this) 3830 else: 3831 then = self.expression( 3832 exp.Insert, 3833 this=self._parse_value(), 3834 expression=self._match(TokenType.VALUES) and self._parse_value(), 3835 ) 3836 elif self._match(TokenType.UPDATE): 3837 expressions = self._parse_star() 3838 if expressions: 3839 then = self.expression(exp.Update, expressions=expressions) 3840 else: 3841 then = self.expression( 3842 exp.Update, 3843 expressions=self._match(TokenType.SET) 3844 and self._parse_csv(self._parse_equality), 3845 ) 3846 elif self._match(TokenType.DELETE): 3847 then = self.expression(exp.Var, this=self._prev.text) 3848 else: 3849 then = None 3850 3851 whens.append( 3852 self.expression( 3853 exp.When, 3854 matched=matched, 3855 source=source, 3856 condition=condition, 3857 then=then, 3858 ) 3859 ) 3860 3861 return self.expression( 3862 exp.Merge, 3863 this=target, 3864 using=using, 3865 on=on, 3866 expressions=whens, 3867 ) 3868 3869 def _parse_set(self) -> exp.Expression: 3870 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3871 3872 def _parse_as_command(self, start: Token) -> exp.Command: 3873 while self._curr: 3874 self._advance() 3875 text = self._find_sql(start, self._prev) 3876 size = len(start.text) 3877 return exp.Command(this=text[:size], expression=text[size:]) 3878 3879 def _find_parser( 3880 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3881 ) -> t.Optional[t.Callable]: 3882 index = self._index 3883 this = [] 3884 while True: 3885 # The current token might be multiple words 3886 curr = self._curr.text.upper() 3887 key = curr.split(" ") 3888 this.append(curr) 3889 self._advance() 3890 result, trie = in_trie(trie, key) 3891 if result == 0: 3892 break 3893 if result == 2: 3894 subparser = parsers[" ".join(this)] 3895 return subparser 3896 self._retreat(index) 3897 return None 3898 3899 def _match(self, token_type, advance=True): 3900 if not self._curr: 3901 return None 3902 3903 if self._curr.token_type == token_type: 3904 if advance: 3905 self._advance() 3906 return True 3907 3908 return None 3909 3910 def _match_set(self, types, advance=True): 3911 if not self._curr: 3912 return None 3913 3914 if self._curr.token_type in types: 3915 if advance: 3916 self._advance() 3917 return True 3918 3919 return None 3920 3921 def _match_pair(self, token_type_a, token_type_b, advance=True): 3922 if not self._curr or not self._next: 3923 return None 3924 3925 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3926 if advance: 3927 self._advance(2) 3928 return True 3929 3930 return None 3931 3932 def _match_l_paren(self, expression=None): 3933 if not self._match(TokenType.L_PAREN): 3934 self.raise_error("Expecting (") 3935 if expression and self._prev_comments: 3936 expression.comments = self._prev_comments 3937 3938 def _match_r_paren(self, expression=None): 3939 if not self._match(TokenType.R_PAREN): 3940 self.raise_error("Expecting )") 3941 if expression and self._prev_comments: 3942 expression.comments = self._prev_comments 3943 3944 def _match_texts(self, texts, advance=True): 3945 if self._curr and self._curr.text.upper() in texts: 3946 if advance: 3947 self._advance() 3948 return True 3949 return False 3950 3951 def _match_text_seq(self, *texts, advance=True): 3952 index = self._index 3953 for text in texts: 3954 if self._curr and self._curr.text.upper() == text: 3955 self._advance() 3956 else: 3957 self._retreat(index) 3958 return False 3959 3960 if not advance: 3961 self._retreat(index) 3962 3963 return True 3964 3965 def _replace_columns_with_dots(self, this): 3966 if isinstance(this, exp.Dot): 3967 exp.replace_children(this, self._replace_columns_with_dots) 3968 elif isinstance(this, exp.Column): 3969 exp.replace_children(this, self._replace_columns_with_dots) 3970 table = this.args.get("table") 3971 this = ( 3972 self.expression(exp.Dot, this=table, expression=this.this) 3973 if table 3974 else self.expression(exp.Var, this=this.name) 3975 ) 3976 elif isinstance(this, exp.Identifier): 3977 this = self.expression(exp.Var, this=this.name) 3978 return this 3979 3980 def _replace_lambda(self, node, lambda_variables): 3981 if isinstance(node, exp.Column): 3982 if node.name in lambda_variables: 3983 return node.this 3984 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
733 def __init__( 734 self, 735 error_level: t.Optional[ErrorLevel] = None, 736 error_message_context: int = 100, 737 index_offset: int = 0, 738 unnest_column_only: bool = False, 739 alias_post_tablesample: bool = False, 740 max_errors: int = 3, 741 null_ordering: t.Optional[str] = None, 742 ): 743 self.error_level = error_level or ErrorLevel.IMMEDIATE 744 self.error_message_context = error_message_context 745 self.index_offset = index_offset 746 self.unnest_column_only = unnest_column_only 747 self.alias_post_tablesample = alias_post_tablesample 748 self.max_errors = max_errors 749 self.null_ordering = null_ordering 750 self.reset()
762 def parse( 763 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 764 ) -> t.List[t.Optional[exp.Expression]]: 765 """ 766 Parses a list of tokens and returns a list of syntax trees, one tree 767 per parsed SQL statement. 768 769 Args: 770 raw_tokens: the list of tokens. 771 sql: the original SQL string, used to produce helpful debug messages. 772 773 Returns: 774 The list of syntax trees. 775 """ 776 return self._parse( 777 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 778 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
780 def parse_into( 781 self, 782 expression_types: exp.IntoType, 783 raw_tokens: t.List[Token], 784 sql: t.Optional[str] = None, 785 ) -> t.List[t.Optional[exp.Expression]]: 786 """ 787 Parses a list of tokens into a given Expression type. If a collection of Expression 788 types is given instead, this method will try to parse the token list into each one 789 of them, stopping at the first for which the parsing succeeds. 790 791 Args: 792 expression_types: the expression type(s) to try and parse the token list into. 793 raw_tokens: the list of tokens. 794 sql: the original SQL string, used to produce helpful debug messages. 795 796 Returns: 797 The target Expression. 798 """ 799 errors = [] 800 for expression_type in ensure_collection(expression_types): 801 parser = self.EXPRESSION_PARSERS.get(expression_type) 802 if not parser: 803 raise TypeError(f"No parser registered for {expression_type}") 804 try: 805 return self._parse(parser, raw_tokens, sql) 806 except ParseError as e: 807 e.errors[0]["into_expression"] = expression_type 808 errors.append(e) 809 raise ParseError( 810 f"Failed to parse into {expression_types}", 811 errors=merge_errors(errors), 812 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
848 def check_errors(self) -> None: 849 """ 850 Logs or raises any found errors, depending on the chosen error level setting. 851 """ 852 if self.error_level == ErrorLevel.WARN: 853 for error in self.errors: 854 logger.error(str(error)) 855 elif self.error_level == ErrorLevel.RAISE and self.errors: 856 raise ParseError( 857 concat_messages(self.errors, self.max_errors), 858 errors=merge_errors(self.errors), 859 )
Logs or raises any found errors, depending on the chosen error level setting.
861 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 862 """ 863 Appends an error in the list of recorded errors or raises it, depending on the chosen 864 error level setting. 865 """ 866 token = token or self._curr or self._prev or Token.string("") 867 start = self._find_token(token) 868 end = start + len(token.text) 869 start_context = self.sql[max(start - self.error_message_context, 0) : start] 870 highlight = self.sql[start:end] 871 end_context = self.sql[end : end + self.error_message_context] 872 873 error = ParseError.new( 874 f"{message}. Line {token.line}, Col: {token.col}.\n" 875 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 876 description=message, 877 line=token.line, 878 col=token.col, 879 start_context=start_context, 880 highlight=highlight, 881 end_context=end_context, 882 ) 883 884 if self.error_level == ErrorLevel.IMMEDIATE: 885 raise error 886 887 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
889 def expression( 890 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 891 ) -> exp.Expression: 892 """ 893 Creates a new, validated Expression. 894 895 Args: 896 exp_class: the expression class to instantiate. 897 comments: an optional list of comments to attach to the expression. 898 kwargs: the arguments to set for the expression along with their respective values. 899 900 Returns: 901 The target expression. 902 """ 903 instance = exp_class(**kwargs) 904 if self._prev_comments: 905 instance.comments = self._prev_comments 906 self._prev_comments = None 907 if comments: 908 instance.comments = comments 909 self.validate_expression(instance) 910 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
912 def validate_expression( 913 self, expression: exp.Expression, args: t.Optional[t.List] = None 914 ) -> None: 915 """ 916 Validates an already instantiated expression, making sure that all its mandatory arguments 917 are set. 918 919 Args: 920 expression: the expression to validate. 921 args: an optional list of items that was used to instantiate the expression, if it's a Func. 922 """ 923 if self.error_level == ErrorLevel.IGNORE: 924 return 925 926 for error_message in expression.error_messages(args): 927 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.