sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import ( 10 apply_index_offset, 11 count_params, 12 ensure_collection, 13 ensure_list, 14 seq_get, 15) 16from sqlglot.tokens import Token, Tokenizer, TokenType 17from sqlglot.trie import in_trie, new_trie 18 19logger = logging.getLogger("sqlglot") 20 21 22def parse_var_map(args): 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 ) 32 33 34def binary_range_parser( 35 expr_type: t.Type[exp.Expression], 36) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 37 return lambda self, this: self._parse_escape( 38 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 39 ) 40 41 42class _Parser(type): 43 def __new__(cls, clsname, bases, attrs): 44 klass = super().__new__(cls, clsname, bases, attrs) 45 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 46 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 47 48 if not klass.INTEGER_DIVISION: 49 klass.FACTOR = {**klass.FACTOR, TokenType.SLASH: exp.FloatDiv} 50 51 return klass 52 53 54class Parser(metaclass=_Parser): 55 """ 56 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 57 a parsed syntax tree. 58 59 Args: 60 error_level: the desired error level. 61 Default: ErrorLevel.RAISE 62 error_message_context: determines the amount of context to capture from a 63 query string when displaying the error message (in number of characters). 64 Default: 50. 65 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 66 Default: 0 67 alias_post_tablesample: If the table alias comes after tablesample. 68 Default: False 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 null_ordering: Indicates the default null ordering method to use if not explicitly set. 73 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 74 Default: "nulls_are_small" 75 """ 76 77 FUNCTIONS: t.Dict[str, t.Callable] = { 78 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 79 "DATE_TO_DATE_STR": lambda args: exp.Cast( 80 this=seq_get(args, 0), 81 to=exp.DataType(this=exp.DataType.Type.TEXT), 82 ), 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 "IFNULL": exp.Coalesce.from_arg_list, 97 } 98 99 NO_PAREN_FUNCTIONS = { 100 TokenType.CURRENT_DATE: exp.CurrentDate, 101 TokenType.CURRENT_DATETIME: exp.CurrentDate, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.STRUCT, 109 TokenType.NULLABLE, 110 } 111 112 TYPE_TOKENS = { 113 TokenType.BIT, 114 TokenType.BOOLEAN, 115 TokenType.TINYINT, 116 TokenType.SMALLINT, 117 TokenType.INT, 118 TokenType.BIGINT, 119 TokenType.FLOAT, 120 TokenType.DOUBLE, 121 TokenType.CHAR, 122 TokenType.NCHAR, 123 TokenType.VARCHAR, 124 TokenType.NVARCHAR, 125 TokenType.TEXT, 126 TokenType.MEDIUMTEXT, 127 TokenType.LONGTEXT, 128 TokenType.MEDIUMBLOB, 129 TokenType.LONGBLOB, 130 TokenType.BINARY, 131 TokenType.VARBINARY, 132 TokenType.JSON, 133 TokenType.JSONB, 134 TokenType.INTERVAL, 135 TokenType.TIME, 136 TokenType.TIMESTAMP, 137 TokenType.TIMESTAMPTZ, 138 TokenType.TIMESTAMPLTZ, 139 TokenType.DATETIME, 140 TokenType.DATE, 141 TokenType.DECIMAL, 142 TokenType.UUID, 143 TokenType.GEOGRAPHY, 144 TokenType.GEOMETRY, 145 TokenType.HLLSKETCH, 146 TokenType.HSTORE, 147 TokenType.PSEUDO_TYPE, 148 TokenType.SUPER, 149 TokenType.SERIAL, 150 TokenType.SMALLSERIAL, 151 TokenType.BIGSERIAL, 152 TokenType.XML, 153 TokenType.UNIQUEIDENTIFIER, 154 TokenType.MONEY, 155 TokenType.SMALLMONEY, 156 TokenType.ROWVERSION, 157 TokenType.IMAGE, 158 TokenType.VARIANT, 159 TokenType.OBJECT, 160 TokenType.INET, 161 *NESTED_TYPE_TOKENS, 162 } 163 164 SUBQUERY_PREDICATES = { 165 TokenType.ANY: exp.Any, 166 TokenType.ALL: exp.All, 167 TokenType.EXISTS: exp.Exists, 168 TokenType.SOME: exp.Any, 169 } 170 171 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 172 173 DB_CREATABLES = { 174 TokenType.DATABASE, 175 TokenType.SCHEMA, 176 TokenType.TABLE, 177 TokenType.VIEW, 178 } 179 180 CREATABLES = { 181 TokenType.COLUMN, 182 TokenType.FUNCTION, 183 TokenType.INDEX, 184 TokenType.PROCEDURE, 185 *DB_CREATABLES, 186 } 187 188 ID_VAR_TOKENS = { 189 TokenType.VAR, 190 TokenType.ANTI, 191 TokenType.APPLY, 192 TokenType.AUTO_INCREMENT, 193 TokenType.BEGIN, 194 TokenType.BOTH, 195 TokenType.BUCKET, 196 TokenType.CACHE, 197 TokenType.CASCADE, 198 TokenType.COLLATE, 199 TokenType.COMMAND, 200 TokenType.COMMENT, 201 TokenType.COMMIT, 202 TokenType.COMPOUND, 203 TokenType.CONSTRAINT, 204 TokenType.CURRENT_TIME, 205 TokenType.DEFAULT, 206 TokenType.DELETE, 207 TokenType.DESCRIBE, 208 TokenType.DIV, 209 TokenType.END, 210 TokenType.EXECUTE, 211 TokenType.ESCAPE, 212 TokenType.FALSE, 213 TokenType.FIRST, 214 TokenType.FILTER, 215 TokenType.FOLLOWING, 216 TokenType.FORMAT, 217 TokenType.IF, 218 TokenType.ISNULL, 219 TokenType.INTERVAL, 220 TokenType.LAZY, 221 TokenType.LEADING, 222 TokenType.LEFT, 223 TokenType.LOCAL, 224 TokenType.MATERIALIZED, 225 TokenType.MERGE, 226 TokenType.NATURAL, 227 TokenType.NEXT, 228 TokenType.OFFSET, 229 TokenType.ONLY, 230 TokenType.OPTIONS, 231 TokenType.ORDINALITY, 232 TokenType.PERCENT, 233 TokenType.PIVOT, 234 TokenType.PRECEDING, 235 TokenType.RANGE, 236 TokenType.REFERENCES, 237 TokenType.RIGHT, 238 TokenType.ROW, 239 TokenType.ROWS, 240 TokenType.SEED, 241 TokenType.SEMI, 242 TokenType.SET, 243 TokenType.SHOW, 244 TokenType.SORTKEY, 245 TokenType.TEMPORARY, 246 TokenType.TOP, 247 TokenType.TRAILING, 248 TokenType.TRUE, 249 TokenType.UNBOUNDED, 250 TokenType.UNIQUE, 251 TokenType.UNLOGGED, 252 TokenType.UNPIVOT, 253 TokenType.VOLATILE, 254 TokenType.WINDOW, 255 *CREATABLES, 256 *SUBQUERY_PREDICATES, 257 *TYPE_TOKENS, 258 *NO_PAREN_FUNCTIONS, 259 } 260 261 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 262 TokenType.APPLY, 263 TokenType.LEFT, 264 TokenType.NATURAL, 265 TokenType.OFFSET, 266 TokenType.RIGHT, 267 TokenType.WINDOW, 268 } 269 270 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 271 272 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 273 274 FUNC_TOKENS = { 275 TokenType.COMMAND, 276 TokenType.CURRENT_DATE, 277 TokenType.CURRENT_DATETIME, 278 TokenType.CURRENT_TIMESTAMP, 279 TokenType.CURRENT_TIME, 280 TokenType.FILTER, 281 TokenType.FIRST, 282 TokenType.FORMAT, 283 TokenType.IDENTIFIER, 284 TokenType.INDEX, 285 TokenType.ISNULL, 286 TokenType.ILIKE, 287 TokenType.LIKE, 288 TokenType.MERGE, 289 TokenType.OFFSET, 290 TokenType.PRIMARY_KEY, 291 TokenType.REPLACE, 292 TokenType.ROW, 293 TokenType.UNNEST, 294 TokenType.VAR, 295 TokenType.LEFT, 296 TokenType.RIGHT, 297 TokenType.DATE, 298 TokenType.DATETIME, 299 TokenType.TABLE, 300 TokenType.TIMESTAMP, 301 TokenType.TIMESTAMPTZ, 302 TokenType.WINDOW, 303 *TYPE_TOKENS, 304 *SUBQUERY_PREDICATES, 305 } 306 307 CONJUNCTION = { 308 TokenType.AND: exp.And, 309 TokenType.OR: exp.Or, 310 } 311 312 EQUALITY = { 313 TokenType.EQ: exp.EQ, 314 TokenType.NEQ: exp.NEQ, 315 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 316 } 317 318 COMPARISON = { 319 TokenType.GT: exp.GT, 320 TokenType.GTE: exp.GTE, 321 TokenType.LT: exp.LT, 322 TokenType.LTE: exp.LTE, 323 } 324 325 BITWISE = { 326 TokenType.AMP: exp.BitwiseAnd, 327 TokenType.CARET: exp.BitwiseXor, 328 TokenType.PIPE: exp.BitwiseOr, 329 TokenType.DPIPE: exp.DPipe, 330 } 331 332 TERM = { 333 TokenType.DASH: exp.Sub, 334 TokenType.PLUS: exp.Add, 335 TokenType.MOD: exp.Mod, 336 TokenType.COLLATE: exp.Collate, 337 } 338 339 FACTOR = { 340 TokenType.DIV: exp.IntDiv, 341 TokenType.LR_ARROW: exp.Distance, 342 TokenType.SLASH: exp.Div, 343 TokenType.STAR: exp.Mul, 344 } 345 346 TIMESTAMPS = { 347 TokenType.TIME, 348 TokenType.TIMESTAMP, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 } 352 353 SET_OPERATIONS = { 354 TokenType.UNION, 355 TokenType.INTERSECT, 356 TokenType.EXCEPT, 357 } 358 359 JOIN_SIDES = { 360 TokenType.LEFT, 361 TokenType.RIGHT, 362 TokenType.FULL, 363 } 364 365 JOIN_KINDS = { 366 TokenType.INNER, 367 TokenType.OUTER, 368 TokenType.CROSS, 369 TokenType.SEMI, 370 TokenType.ANTI, 371 } 372 373 LAMBDAS = { 374 TokenType.ARROW: lambda self, expressions: self.expression( 375 exp.Lambda, 376 this=self._parse_conjunction().transform( 377 self._replace_lambda, {node.name for node in expressions} 378 ), 379 expressions=expressions, 380 ), 381 TokenType.FARROW: lambda self, expressions: self.expression( 382 exp.Kwarg, 383 this=exp.Var(this=expressions[0].name), 384 expression=self._parse_conjunction(), 385 ), 386 } 387 388 COLUMN_OPERATORS = { 389 TokenType.DOT: None, 390 TokenType.DCOLON: lambda self, this, to: self.expression( 391 exp.Cast, 392 this=this, 393 to=to, 394 ), 395 TokenType.ARROW: lambda self, this, path: self.expression( 396 exp.JSONExtract, 397 this=this, 398 expression=path, 399 ), 400 TokenType.DARROW: lambda self, this, path: self.expression( 401 exp.JSONExtractScalar, 402 this=this, 403 expression=path, 404 ), 405 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 406 exp.JSONBExtract, 407 this=this, 408 expression=path, 409 ), 410 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 411 exp.JSONBExtractScalar, 412 this=this, 413 expression=path, 414 ), 415 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 416 exp.JSONBContains, 417 this=this, 418 expression=key, 419 ), 420 } 421 422 EXPRESSION_PARSERS = { 423 exp.Column: lambda self: self._parse_column(), 424 exp.DataType: lambda self: self._parse_types(), 425 exp.From: lambda self: self._parse_from(), 426 exp.Group: lambda self: self._parse_group(), 427 exp.Identifier: lambda self: self._parse_id_var(), 428 exp.Lateral: lambda self: self._parse_lateral(), 429 exp.Join: lambda self: self._parse_join(), 430 exp.Order: lambda self: self._parse_order(), 431 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 432 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 433 exp.Lambda: lambda self: self._parse_lambda(), 434 exp.Limit: lambda self: self._parse_limit(), 435 exp.Offset: lambda self: self._parse_offset(), 436 exp.TableAlias: lambda self: self._parse_table_alias(), 437 exp.Table: lambda self: self._parse_table(), 438 exp.Condition: lambda self: self._parse_conjunction(), 439 exp.Expression: lambda self: self._parse_statement(), 440 exp.Properties: lambda self: self._parse_properties(), 441 exp.Where: lambda self: self._parse_where(), 442 exp.Ordered: lambda self: self._parse_ordered(), 443 exp.Having: lambda self: self._parse_having(), 444 exp.With: lambda self: self._parse_with(), 445 exp.Window: lambda self: self._parse_named_window(), 446 exp.Qualify: lambda self: self._parse_qualify(), 447 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 448 } 449 450 STATEMENT_PARSERS = { 451 TokenType.ALTER: lambda self: self._parse_alter(), 452 TokenType.BEGIN: lambda self: self._parse_transaction(), 453 TokenType.CACHE: lambda self: self._parse_cache(), 454 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 455 TokenType.COMMENT: lambda self: self._parse_comment(), 456 TokenType.CREATE: lambda self: self._parse_create(), 457 TokenType.DELETE: lambda self: self._parse_delete(), 458 TokenType.DESC: lambda self: self._parse_describe(), 459 TokenType.DESCRIBE: lambda self: self._parse_describe(), 460 TokenType.DROP: lambda self: self._parse_drop(), 461 TokenType.END: lambda self: self._parse_commit_or_rollback(), 462 TokenType.INSERT: lambda self: self._parse_insert(), 463 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 464 TokenType.MERGE: lambda self: self._parse_merge(), 465 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 466 TokenType.UNCACHE: lambda self: self._parse_uncache(), 467 TokenType.UPDATE: lambda self: self._parse_update(), 468 TokenType.USE: lambda self: self.expression( 469 exp.Use, 470 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 471 and exp.Var(this=self._prev.text), 472 this=self._parse_table(schema=False), 473 ), 474 } 475 476 UNARY_PARSERS = { 477 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 478 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 479 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 480 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 481 } 482 483 PRIMARY_PARSERS = { 484 TokenType.STRING: lambda self, token: self.expression( 485 exp.Literal, this=token.text, is_string=True 486 ), 487 TokenType.NUMBER: lambda self, token: self.expression( 488 exp.Literal, this=token.text, is_string=False 489 ), 490 TokenType.STAR: lambda self, _: self.expression( 491 exp.Star, 492 **{"except": self._parse_except(), "replace": self._parse_replace()}, 493 ), 494 TokenType.NULL: lambda self, _: self.expression(exp.Null), 495 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 496 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 497 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 498 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 499 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 500 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 501 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 502 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 503 } 504 505 PLACEHOLDER_PARSERS = { 506 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 507 TokenType.PARAMETER: lambda self: self._parse_parameter(), 508 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 509 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 510 else None, 511 } 512 513 RANGE_PARSERS = { 514 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 515 TokenType.GLOB: binary_range_parser(exp.Glob), 516 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 517 TokenType.IN: lambda self, this: self._parse_in(this), 518 TokenType.IS: lambda self, this: self._parse_is(this), 519 TokenType.LIKE: binary_range_parser(exp.Like), 520 TokenType.ILIKE: binary_range_parser(exp.ILike), 521 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 522 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 523 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 524 } 525 526 PROPERTY_PARSERS = { 527 "AFTER": lambda self: self._parse_afterjournal( 528 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 529 ), 530 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 531 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 532 "BEFORE": lambda self: self._parse_journal( 533 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 534 ), 535 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 536 "CHARACTER SET": lambda self: self._parse_character_set(), 537 "CHECKSUM": lambda self: self._parse_checksum(), 538 "CLUSTER BY": lambda self: self.expression( 539 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 540 ), 541 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 542 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 543 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 544 default=self._prev.text.upper() == "DEFAULT" 545 ), 546 "DEFINER": lambda self: self._parse_definer(), 547 "DETERMINISTIC": lambda self: self.expression( 548 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 549 ), 550 "DISTKEY": lambda self: self._parse_distkey(), 551 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 552 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 553 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 554 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 555 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 556 "FREESPACE": lambda self: self._parse_freespace(), 557 "GLOBAL": lambda self: self._parse_temporary(global_=True), 558 "IMMUTABLE": lambda self: self.expression( 559 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 560 ), 561 "JOURNAL": lambda self: self._parse_journal( 562 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 563 ), 564 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 565 "LIKE": lambda self: self._parse_create_like(), 566 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 567 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 568 "LOCK": lambda self: self._parse_locking(), 569 "LOCKING": lambda self: self._parse_locking(), 570 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 571 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 572 "MAX": lambda self: self._parse_datablocksize(), 573 "MAXIMUM": lambda self: self._parse_datablocksize(), 574 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 575 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 576 ), 577 "MIN": lambda self: self._parse_datablocksize(), 578 "MINIMUM": lambda self: self._parse_datablocksize(), 579 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 580 "NO": lambda self: self._parse_noprimaryindex(), 581 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 582 "ON": lambda self: self._parse_oncommit(), 583 "PARTITION BY": lambda self: self._parse_partitioned_by(), 584 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 585 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 586 "RETURNS": lambda self: self._parse_returns(), 587 "ROW": lambda self: self._parse_row(), 588 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 589 "SORTKEY": lambda self: self._parse_sortkey(), 590 "STABLE": lambda self: self.expression( 591 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 592 ), 593 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 594 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 595 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 596 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 597 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 598 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 599 "VOLATILE": lambda self: self.expression( 600 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 601 ), 602 "WITH": lambda self: self._parse_with_property(), 603 } 604 605 CONSTRAINT_PARSERS = { 606 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 607 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 608 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 609 "CHARACTER SET": lambda self: self.expression( 610 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 611 ), 612 "CHECK": lambda self: self.expression( 613 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 614 ), 615 "COLLATE": lambda self: self.expression( 616 exp.CollateColumnConstraint, this=self._parse_var() 617 ), 618 "COMMENT": lambda self: self.expression( 619 exp.CommentColumnConstraint, this=self._parse_string() 620 ), 621 "COMPRESS": lambda self: self._parse_compress(), 622 "DEFAULT": lambda self: self.expression( 623 exp.DefaultColumnConstraint, this=self._parse_bitwise() 624 ), 625 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 626 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 627 "FORMAT": lambda self: self.expression( 628 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 629 ), 630 "GENERATED": lambda self: self._parse_generated_as_identity(), 631 "IDENTITY": lambda self: self._parse_auto_increment(), 632 "INLINE": lambda self: self._parse_inline(), 633 "LIKE": lambda self: self._parse_create_like(), 634 "NOT": lambda self: self._parse_not_constraint(), 635 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 636 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 637 "PRIMARY KEY": lambda self: self._parse_primary_key(), 638 "TITLE": lambda self: self.expression( 639 exp.TitleColumnConstraint, this=self._parse_var_or_string() 640 ), 641 "UNIQUE": lambda self: self._parse_unique(), 642 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 643 } 644 645 ALTER_PARSERS = { 646 "ADD": lambda self: self._parse_alter_table_add(), 647 "ALTER": lambda self: self._parse_alter_table_alter(), 648 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 649 "DROP": lambda self: self._parse_alter_table_drop(), 650 "RENAME": lambda self: self._parse_alter_table_rename(), 651 } 652 653 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 654 655 NO_PAREN_FUNCTION_PARSERS = { 656 TokenType.CASE: lambda self: self._parse_case(), 657 TokenType.IF: lambda self: self._parse_if(), 658 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 659 } 660 661 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 662 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 663 "TRY_CONVERT": lambda self: self._parse_convert(False), 664 "EXTRACT": lambda self: self._parse_extract(), 665 "POSITION": lambda self: self._parse_position(), 666 "SUBSTRING": lambda self: self._parse_substring(), 667 "TRIM": lambda self: self._parse_trim(), 668 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 669 "TRY_CAST": lambda self: self._parse_cast(False), 670 "STRING_AGG": lambda self: self._parse_string_agg(), 671 } 672 673 QUERY_MODIFIER_PARSERS = { 674 "match": lambda self: self._parse_match_recognize(), 675 "where": lambda self: self._parse_where(), 676 "group": lambda self: self._parse_group(), 677 "having": lambda self: self._parse_having(), 678 "qualify": lambda self: self._parse_qualify(), 679 "windows": lambda self: self._parse_window_clause(), 680 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 681 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 682 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 683 "order": lambda self: self._parse_order(), 684 "limit": lambda self: self._parse_limit(), 685 "offset": lambda self: self._parse_offset(), 686 "lock": lambda self: self._parse_lock(), 687 "sample": lambda self: self._parse_table_sample(as_modifier=True), 688 } 689 690 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 691 SET_PARSERS: t.Dict[str, t.Callable] = {} 692 693 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 694 695 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 696 697 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 698 699 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 700 701 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 702 703 STRICT_CAST = True 704 705 INTEGER_DIVISION = True 706 707 CONVERT_TYPE_FIRST = False 708 709 __slots__ = ( 710 "error_level", 711 "error_message_context", 712 "sql", 713 "errors", 714 "index_offset", 715 "unnest_column_only", 716 "alias_post_tablesample", 717 "max_errors", 718 "null_ordering", 719 "_tokens", 720 "_index", 721 "_curr", 722 "_next", 723 "_prev", 724 "_prev_comments", 725 "_show_trie", 726 "_set_trie", 727 ) 728 729 def __init__( 730 self, 731 error_level: t.Optional[ErrorLevel] = None, 732 error_message_context: int = 100, 733 index_offset: int = 0, 734 unnest_column_only: bool = False, 735 alias_post_tablesample: bool = False, 736 max_errors: int = 3, 737 null_ordering: t.Optional[str] = None, 738 ): 739 self.error_level = error_level or ErrorLevel.IMMEDIATE 740 self.error_message_context = error_message_context 741 self.index_offset = index_offset 742 self.unnest_column_only = unnest_column_only 743 self.alias_post_tablesample = alias_post_tablesample 744 self.max_errors = max_errors 745 self.null_ordering = null_ordering 746 self.reset() 747 748 def reset(self): 749 self.sql = "" 750 self.errors = [] 751 self._tokens = [] 752 self._index = 0 753 self._curr = None 754 self._next = None 755 self._prev = None 756 self._prev_comments = None 757 758 def parse( 759 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 760 ) -> t.List[t.Optional[exp.Expression]]: 761 """ 762 Parses a list of tokens and returns a list of syntax trees, one tree 763 per parsed SQL statement. 764 765 Args: 766 raw_tokens: the list of tokens. 767 sql: the original SQL string, used to produce helpful debug messages. 768 769 Returns: 770 The list of syntax trees. 771 """ 772 return self._parse( 773 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 774 ) 775 776 def parse_into( 777 self, 778 expression_types: exp.IntoType, 779 raw_tokens: t.List[Token], 780 sql: t.Optional[str] = None, 781 ) -> t.List[t.Optional[exp.Expression]]: 782 """ 783 Parses a list of tokens into a given Expression type. If a collection of Expression 784 types is given instead, this method will try to parse the token list into each one 785 of them, stopping at the first for which the parsing succeeds. 786 787 Args: 788 expression_types: the expression type(s) to try and parse the token list into. 789 raw_tokens: the list of tokens. 790 sql: the original SQL string, used to produce helpful debug messages. 791 792 Returns: 793 The target Expression. 794 """ 795 errors = [] 796 for expression_type in ensure_collection(expression_types): 797 parser = self.EXPRESSION_PARSERS.get(expression_type) 798 if not parser: 799 raise TypeError(f"No parser registered for {expression_type}") 800 try: 801 return self._parse(parser, raw_tokens, sql) 802 except ParseError as e: 803 e.errors[0]["into_expression"] = expression_type 804 errors.append(e) 805 raise ParseError( 806 f"Failed to parse into {expression_types}", 807 errors=merge_errors(errors), 808 ) from errors[-1] 809 810 def _parse( 811 self, 812 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 813 raw_tokens: t.List[Token], 814 sql: t.Optional[str] = None, 815 ) -> t.List[t.Optional[exp.Expression]]: 816 self.reset() 817 self.sql = sql or "" 818 total = len(raw_tokens) 819 chunks: t.List[t.List[Token]] = [[]] 820 821 for i, token in enumerate(raw_tokens): 822 if token.token_type == TokenType.SEMICOLON: 823 if i < total - 1: 824 chunks.append([]) 825 else: 826 chunks[-1].append(token) 827 828 expressions = [] 829 830 for tokens in chunks: 831 self._index = -1 832 self._tokens = tokens 833 self._advance() 834 835 expressions.append(parse_method(self)) 836 837 if self._index < len(self._tokens): 838 self.raise_error("Invalid expression / Unexpected token") 839 840 self.check_errors() 841 842 return expressions 843 844 def check_errors(self) -> None: 845 """ 846 Logs or raises any found errors, depending on the chosen error level setting. 847 """ 848 if self.error_level == ErrorLevel.WARN: 849 for error in self.errors: 850 logger.error(str(error)) 851 elif self.error_level == ErrorLevel.RAISE and self.errors: 852 raise ParseError( 853 concat_messages(self.errors, self.max_errors), 854 errors=merge_errors(self.errors), 855 ) 856 857 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 858 """ 859 Appends an error in the list of recorded errors or raises it, depending on the chosen 860 error level setting. 861 """ 862 token = token or self._curr or self._prev or Token.string("") 863 start = self._find_token(token) 864 end = start + len(token.text) 865 start_context = self.sql[max(start - self.error_message_context, 0) : start] 866 highlight = self.sql[start:end] 867 end_context = self.sql[end : end + self.error_message_context] 868 869 error = ParseError.new( 870 f"{message}. Line {token.line}, Col: {token.col}.\n" 871 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 872 description=message, 873 line=token.line, 874 col=token.col, 875 start_context=start_context, 876 highlight=highlight, 877 end_context=end_context, 878 ) 879 880 if self.error_level == ErrorLevel.IMMEDIATE: 881 raise error 882 883 self.errors.append(error) 884 885 def expression( 886 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 887 ) -> exp.Expression: 888 """ 889 Creates a new, validated Expression. 890 891 Args: 892 exp_class: the expression class to instantiate. 893 comments: an optional list of comments to attach to the expression. 894 kwargs: the arguments to set for the expression along with their respective values. 895 896 Returns: 897 The target expression. 898 """ 899 instance = exp_class(**kwargs) 900 if self._prev_comments: 901 instance.comments = self._prev_comments 902 self._prev_comments = None 903 if comments: 904 instance.comments = comments 905 self.validate_expression(instance) 906 return instance 907 908 def validate_expression( 909 self, expression: exp.Expression, args: t.Optional[t.List] = None 910 ) -> None: 911 """ 912 Validates an already instantiated expression, making sure that all its mandatory arguments 913 are set. 914 915 Args: 916 expression: the expression to validate. 917 args: an optional list of items that was used to instantiate the expression, if it's a Func. 918 """ 919 if self.error_level == ErrorLevel.IGNORE: 920 return 921 922 for error_message in expression.error_messages(args): 923 self.raise_error(error_message) 924 925 def _find_sql(self, start: Token, end: Token) -> str: 926 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 927 928 def _find_token(self, token: Token) -> int: 929 line = 1 930 col = 1 931 index = 0 932 933 while line < token.line or col < token.col: 934 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 935 line += 1 936 col = 1 937 else: 938 col += 1 939 index += 1 940 941 return index 942 943 def _advance(self, times: int = 1) -> None: 944 self._index += times 945 self._curr = seq_get(self._tokens, self._index) 946 self._next = seq_get(self._tokens, self._index + 1) 947 if self._index > 0: 948 self._prev = self._tokens[self._index - 1] 949 self._prev_comments = self._prev.comments 950 else: 951 self._prev = None 952 self._prev_comments = None 953 954 def _retreat(self, index: int) -> None: 955 if index != self._index: 956 self._advance(index - self._index) 957 958 def _parse_command(self) -> exp.Expression: 959 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 960 961 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 962 start = self._prev 963 exists = self._parse_exists() if allow_exists else None 964 965 self._match(TokenType.ON) 966 967 kind = self._match_set(self.CREATABLES) and self._prev 968 969 if not kind: 970 return self._parse_as_command(start) 971 972 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 973 this = self._parse_user_defined_function(kind=kind.token_type) 974 elif kind.token_type == TokenType.TABLE: 975 this = self._parse_table() 976 elif kind.token_type == TokenType.COLUMN: 977 this = self._parse_column() 978 else: 979 this = self._parse_id_var() 980 981 self._match(TokenType.IS) 982 983 return self.expression( 984 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 985 ) 986 987 def _parse_statement(self) -> t.Optional[exp.Expression]: 988 if self._curr is None: 989 return None 990 991 if self._match_set(self.STATEMENT_PARSERS): 992 return self.STATEMENT_PARSERS[self._prev.token_type](self) 993 994 if self._match_set(Tokenizer.COMMANDS): 995 return self._parse_command() 996 997 expression = self._parse_expression() 998 expression = self._parse_set_operations(expression) if expression else self._parse_select() 999 1000 self._parse_query_modifiers(expression) 1001 return expression 1002 1003 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1004 start = self._prev 1005 temporary = self._match(TokenType.TEMPORARY) 1006 materialized = self._match(TokenType.MATERIALIZED) 1007 kind = self._match_set(self.CREATABLES) and self._prev.text 1008 if not kind: 1009 if default_kind: 1010 kind = default_kind 1011 else: 1012 return self._parse_as_command(start) 1013 1014 return self.expression( 1015 exp.Drop, 1016 exists=self._parse_exists(), 1017 this=self._parse_table(schema=True), 1018 kind=kind, 1019 temporary=temporary, 1020 materialized=materialized, 1021 cascade=self._match(TokenType.CASCADE), 1022 ) 1023 1024 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1025 return ( 1026 self._match(TokenType.IF) 1027 and (not not_ or self._match(TokenType.NOT)) 1028 and self._match(TokenType.EXISTS) 1029 ) 1030 1031 def _parse_create(self) -> t.Optional[exp.Expression]: 1032 start = self._prev 1033 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1034 TokenType.OR, TokenType.REPLACE 1035 ) 1036 unique = self._match(TokenType.UNIQUE) 1037 volatile = self._match(TokenType.VOLATILE) 1038 1039 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1040 self._match(TokenType.TABLE) 1041 1042 properties = None 1043 create_token = self._match_set(self.CREATABLES) and self._prev 1044 1045 if not create_token: 1046 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1047 create_token = self._match_set(self.CREATABLES) and self._prev 1048 1049 if not properties or not create_token: 1050 return self._parse_as_command(start) 1051 1052 exists = self._parse_exists(not_=True) 1053 this = None 1054 expression = None 1055 indexes = None 1056 no_schema_binding = None 1057 begin = None 1058 1059 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1060 this = self._parse_user_defined_function(kind=create_token.token_type) 1061 temp_properties = self._parse_properties() 1062 if properties and temp_properties: 1063 properties.expressions.extend(temp_properties.expressions) 1064 elif temp_properties: 1065 properties = temp_properties 1066 1067 self._match(TokenType.ALIAS) 1068 begin = self._match(TokenType.BEGIN) 1069 return_ = self._match_text_seq("RETURN") 1070 expression = self._parse_statement() 1071 1072 if return_: 1073 expression = self.expression(exp.Return, this=expression) 1074 elif create_token.token_type == TokenType.INDEX: 1075 this = self._parse_index() 1076 elif create_token.token_type in self.DB_CREATABLES: 1077 table_parts = self._parse_table_parts(schema=True) 1078 1079 # exp.Properties.Location.POST_NAME 1080 if self._match(TokenType.COMMA): 1081 temp_properties = self._parse_properties(before=True) 1082 if properties and temp_properties: 1083 properties.expressions.extend(temp_properties.expressions) 1084 elif temp_properties: 1085 properties = temp_properties 1086 1087 this = self._parse_schema(this=table_parts) 1088 1089 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1090 temp_properties = self._parse_properties() 1091 if properties and temp_properties: 1092 properties.expressions.extend(temp_properties.expressions) 1093 elif temp_properties: 1094 properties = temp_properties 1095 1096 self._match(TokenType.ALIAS) 1097 1098 # exp.Properties.Location.POST_ALIAS 1099 if not ( 1100 self._match(TokenType.SELECT, advance=False) 1101 or self._match(TokenType.WITH, advance=False) 1102 or self._match(TokenType.L_PAREN, advance=False) 1103 ): 1104 temp_properties = self._parse_properties() 1105 if properties and temp_properties: 1106 properties.expressions.extend(temp_properties.expressions) 1107 elif temp_properties: 1108 properties = temp_properties 1109 1110 expression = self._parse_ddl_select() 1111 1112 if create_token.token_type == TokenType.TABLE: 1113 # exp.Properties.Location.POST_EXPRESSION 1114 temp_properties = self._parse_properties() 1115 if properties and temp_properties: 1116 properties.expressions.extend(temp_properties.expressions) 1117 elif temp_properties: 1118 properties = temp_properties 1119 1120 indexes = [] 1121 while True: 1122 index = self._parse_create_table_index() 1123 1124 # exp.Properties.Location.POST_INDEX 1125 if self._match(TokenType.PARTITION_BY, advance=False): 1126 temp_properties = self._parse_properties() 1127 if properties and temp_properties: 1128 properties.expressions.extend(temp_properties.expressions) 1129 elif temp_properties: 1130 properties = temp_properties 1131 1132 if not index: 1133 break 1134 else: 1135 indexes.append(index) 1136 elif create_token.token_type == TokenType.VIEW: 1137 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1138 no_schema_binding = True 1139 1140 return self.expression( 1141 exp.Create, 1142 this=this, 1143 kind=create_token.text, 1144 replace=replace, 1145 unique=unique, 1146 volatile=volatile, 1147 expression=expression, 1148 exists=exists, 1149 properties=properties, 1150 indexes=indexes, 1151 no_schema_binding=no_schema_binding, 1152 begin=begin, 1153 ) 1154 1155 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1156 self._match(TokenType.COMMA) 1157 1158 # parsers look to _prev for no/dual/default, so need to consume first 1159 self._match_text_seq("NO") 1160 self._match_text_seq("DUAL") 1161 self._match_text_seq("DEFAULT") 1162 1163 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1164 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1165 1166 return None 1167 1168 def _parse_property(self) -> t.Optional[exp.Expression]: 1169 if self._match_texts(self.PROPERTY_PARSERS): 1170 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1171 1172 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1173 return self._parse_character_set(default=True) 1174 1175 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1176 return self._parse_sortkey(compound=True) 1177 1178 if self._match_text_seq("SQL", "SECURITY"): 1179 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1180 1181 assignment = self._match_pair( 1182 TokenType.VAR, TokenType.EQ, advance=False 1183 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1184 1185 if assignment: 1186 key = self._parse_var_or_string() 1187 self._match(TokenType.EQ) 1188 return self.expression(exp.Property, this=key, value=self._parse_column()) 1189 1190 return None 1191 1192 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1193 self._match(TokenType.EQ) 1194 self._match(TokenType.ALIAS) 1195 return self.expression( 1196 exp_class, 1197 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1198 ) 1199 1200 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1201 properties = [] 1202 1203 while True: 1204 if before: 1205 identified_property = self._parse_property_before() 1206 else: 1207 identified_property = self._parse_property() 1208 1209 if not identified_property: 1210 break 1211 for p in ensure_collection(identified_property): 1212 properties.append(p) 1213 1214 if properties: 1215 return self.expression(exp.Properties, expressions=properties) 1216 1217 return None 1218 1219 def _parse_fallback(self, no=False) -> exp.Expression: 1220 self._match_text_seq("FALLBACK") 1221 return self.expression( 1222 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1223 ) 1224 1225 def _parse_with_property( 1226 self, 1227 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1228 self._match(TokenType.WITH) 1229 if self._match(TokenType.L_PAREN, advance=False): 1230 return self._parse_wrapped_csv(self._parse_property) 1231 1232 if self._match_text_seq("JOURNAL"): 1233 return self._parse_withjournaltable() 1234 1235 if self._match_text_seq("DATA"): 1236 return self._parse_withdata(no=False) 1237 elif self._match_text_seq("NO", "DATA"): 1238 return self._parse_withdata(no=True) 1239 1240 if not self._next: 1241 return None 1242 1243 return self._parse_withisolatedloading() 1244 1245 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1246 def _parse_definer(self) -> t.Optional[exp.Expression]: 1247 self._match(TokenType.EQ) 1248 1249 user = self._parse_id_var() 1250 self._match(TokenType.PARAMETER) 1251 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1252 1253 if not user or not host: 1254 return None 1255 1256 return exp.DefinerProperty(this=f"{user}@{host}") 1257 1258 def _parse_withjournaltable(self) -> exp.Expression: 1259 self._match(TokenType.TABLE) 1260 self._match(TokenType.EQ) 1261 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1262 1263 def _parse_log(self, no=False) -> exp.Expression: 1264 self._match_text_seq("LOG") 1265 return self.expression(exp.LogProperty, no=no) 1266 1267 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1268 before = self._match_text_seq("BEFORE") 1269 self._match_text_seq("JOURNAL") 1270 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1271 1272 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1273 self._match_text_seq("NOT") 1274 self._match_text_seq("LOCAL") 1275 self._match_text_seq("AFTER", "JOURNAL") 1276 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1277 1278 def _parse_checksum(self) -> exp.Expression: 1279 self._match_text_seq("CHECKSUM") 1280 self._match(TokenType.EQ) 1281 1282 on = None 1283 if self._match(TokenType.ON): 1284 on = True 1285 elif self._match_text_seq("OFF"): 1286 on = False 1287 default = self._match(TokenType.DEFAULT) 1288 1289 return self.expression( 1290 exp.ChecksumProperty, 1291 on=on, 1292 default=default, 1293 ) 1294 1295 def _parse_freespace(self) -> exp.Expression: 1296 self._match_text_seq("FREESPACE") 1297 self._match(TokenType.EQ) 1298 return self.expression( 1299 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1300 ) 1301 1302 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1303 self._match_text_seq("MERGEBLOCKRATIO") 1304 if self._match(TokenType.EQ): 1305 return self.expression( 1306 exp.MergeBlockRatioProperty, 1307 this=self._parse_number(), 1308 percent=self._match(TokenType.PERCENT), 1309 ) 1310 else: 1311 return self.expression( 1312 exp.MergeBlockRatioProperty, 1313 no=no, 1314 default=default, 1315 ) 1316 1317 def _parse_datablocksize(self, default=None) -> exp.Expression: 1318 if default: 1319 self._match_text_seq("DATABLOCKSIZE") 1320 return self.expression(exp.DataBlocksizeProperty, default=True) 1321 elif self._match_texts(("MIN", "MINIMUM")): 1322 self._match_text_seq("DATABLOCKSIZE") 1323 return self.expression(exp.DataBlocksizeProperty, min=True) 1324 elif self._match_texts(("MAX", "MAXIMUM")): 1325 self._match_text_seq("DATABLOCKSIZE") 1326 return self.expression(exp.DataBlocksizeProperty, min=False) 1327 1328 self._match_text_seq("DATABLOCKSIZE") 1329 self._match(TokenType.EQ) 1330 size = self._parse_number() 1331 units = None 1332 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1333 units = self._prev.text 1334 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1335 1336 def _parse_blockcompression(self) -> exp.Expression: 1337 self._match_text_seq("BLOCKCOMPRESSION") 1338 self._match(TokenType.EQ) 1339 always = self._match_text_seq("ALWAYS") 1340 manual = self._match_text_seq("MANUAL") 1341 never = self._match_text_seq("NEVER") 1342 default = self._match_text_seq("DEFAULT") 1343 autotemp = None 1344 if self._match_text_seq("AUTOTEMP"): 1345 autotemp = self._parse_schema() 1346 1347 return self.expression( 1348 exp.BlockCompressionProperty, 1349 always=always, 1350 manual=manual, 1351 never=never, 1352 default=default, 1353 autotemp=autotemp, 1354 ) 1355 1356 def _parse_withisolatedloading(self) -> exp.Expression: 1357 no = self._match_text_seq("NO") 1358 concurrent = self._match_text_seq("CONCURRENT") 1359 self._match_text_seq("ISOLATED", "LOADING") 1360 for_all = self._match_text_seq("FOR", "ALL") 1361 for_insert = self._match_text_seq("FOR", "INSERT") 1362 for_none = self._match_text_seq("FOR", "NONE") 1363 return self.expression( 1364 exp.IsolatedLoadingProperty, 1365 no=no, 1366 concurrent=concurrent, 1367 for_all=for_all, 1368 for_insert=for_insert, 1369 for_none=for_none, 1370 ) 1371 1372 def _parse_locking(self) -> exp.Expression: 1373 if self._match(TokenType.TABLE): 1374 kind = "TABLE" 1375 elif self._match(TokenType.VIEW): 1376 kind = "VIEW" 1377 elif self._match(TokenType.ROW): 1378 kind = "ROW" 1379 elif self._match_text_seq("DATABASE"): 1380 kind = "DATABASE" 1381 else: 1382 kind = None 1383 1384 if kind in ("DATABASE", "TABLE", "VIEW"): 1385 this = self._parse_table_parts() 1386 else: 1387 this = None 1388 1389 if self._match(TokenType.FOR): 1390 for_or_in = "FOR" 1391 elif self._match(TokenType.IN): 1392 for_or_in = "IN" 1393 else: 1394 for_or_in = None 1395 1396 if self._match_text_seq("ACCESS"): 1397 lock_type = "ACCESS" 1398 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1399 lock_type = "EXCLUSIVE" 1400 elif self._match_text_seq("SHARE"): 1401 lock_type = "SHARE" 1402 elif self._match_text_seq("READ"): 1403 lock_type = "READ" 1404 elif self._match_text_seq("WRITE"): 1405 lock_type = "WRITE" 1406 elif self._match_text_seq("CHECKSUM"): 1407 lock_type = "CHECKSUM" 1408 else: 1409 lock_type = None 1410 1411 override = self._match_text_seq("OVERRIDE") 1412 1413 return self.expression( 1414 exp.LockingProperty, 1415 this=this, 1416 kind=kind, 1417 for_or_in=for_or_in, 1418 lock_type=lock_type, 1419 override=override, 1420 ) 1421 1422 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1423 if self._match(TokenType.PARTITION_BY): 1424 return self._parse_csv(self._parse_conjunction) 1425 return [] 1426 1427 def _parse_partitioned_by(self) -> exp.Expression: 1428 self._match(TokenType.EQ) 1429 return self.expression( 1430 exp.PartitionedByProperty, 1431 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1432 ) 1433 1434 def _parse_withdata(self, no=False) -> exp.Expression: 1435 if self._match_text_seq("AND", "STATISTICS"): 1436 statistics = True 1437 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1438 statistics = False 1439 else: 1440 statistics = None 1441 1442 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1443 1444 def _parse_noprimaryindex(self) -> exp.Expression: 1445 self._match_text_seq("PRIMARY", "INDEX") 1446 return exp.NoPrimaryIndexProperty() 1447 1448 def _parse_oncommit(self) -> exp.Expression: 1449 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1450 return exp.OnCommitProperty() 1451 1452 def _parse_distkey(self) -> exp.Expression: 1453 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1454 1455 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1456 table = self._parse_table(schema=True) 1457 options = [] 1458 while self._match_texts(("INCLUDING", "EXCLUDING")): 1459 this = self._prev.text.upper() 1460 id_var = self._parse_id_var() 1461 1462 if not id_var: 1463 return None 1464 1465 options.append( 1466 self.expression( 1467 exp.Property, 1468 this=this, 1469 value=exp.Var(this=id_var.this.upper()), 1470 ) 1471 ) 1472 return self.expression(exp.LikeProperty, this=table, expressions=options) 1473 1474 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1475 return self.expression( 1476 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1477 ) 1478 1479 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1480 self._match(TokenType.EQ) 1481 return self.expression( 1482 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1483 ) 1484 1485 def _parse_returns(self) -> exp.Expression: 1486 value: t.Optional[exp.Expression] 1487 is_table = self._match(TokenType.TABLE) 1488 1489 if is_table: 1490 if self._match(TokenType.LT): 1491 value = self.expression( 1492 exp.Schema, 1493 this="TABLE", 1494 expressions=self._parse_csv(self._parse_struct_kwargs), 1495 ) 1496 if not self._match(TokenType.GT): 1497 self.raise_error("Expecting >") 1498 else: 1499 value = self._parse_schema(exp.Var(this="TABLE")) 1500 else: 1501 value = self._parse_types() 1502 1503 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1504 1505 def _parse_temporary(self, global_=False) -> exp.Expression: 1506 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1507 return self.expression(exp.TemporaryProperty, global_=global_) 1508 1509 def _parse_describe(self) -> exp.Expression: 1510 kind = self._match_set(self.CREATABLES) and self._prev.text 1511 this = self._parse_table() 1512 1513 return self.expression(exp.Describe, this=this, kind=kind) 1514 1515 def _parse_insert(self) -> exp.Expression: 1516 overwrite = self._match(TokenType.OVERWRITE) 1517 local = self._match(TokenType.LOCAL) 1518 alternative = None 1519 1520 if self._match_text_seq("DIRECTORY"): 1521 this: t.Optional[exp.Expression] = self.expression( 1522 exp.Directory, 1523 this=self._parse_var_or_string(), 1524 local=local, 1525 row_format=self._parse_row_format(match_row=True), 1526 ) 1527 else: 1528 if self._match(TokenType.OR): 1529 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1530 1531 self._match(TokenType.INTO) 1532 self._match(TokenType.TABLE) 1533 this = self._parse_table(schema=True) 1534 1535 return self.expression( 1536 exp.Insert, 1537 this=this, 1538 exists=self._parse_exists(), 1539 partition=self._parse_partition(), 1540 expression=self._parse_ddl_select(), 1541 returning=self._parse_returning(), 1542 overwrite=overwrite, 1543 alternative=alternative, 1544 ) 1545 1546 def _parse_returning(self) -> t.Optional[exp.Expression]: 1547 if not self._match(TokenType.RETURNING): 1548 return None 1549 1550 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1551 1552 def _parse_row(self) -> t.Optional[exp.Expression]: 1553 if not self._match(TokenType.FORMAT): 1554 return None 1555 return self._parse_row_format() 1556 1557 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1558 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1559 return None 1560 1561 if self._match_text_seq("SERDE"): 1562 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1563 1564 self._match_text_seq("DELIMITED") 1565 1566 kwargs = {} 1567 1568 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1569 kwargs["fields"] = self._parse_string() 1570 if self._match_text_seq("ESCAPED", "BY"): 1571 kwargs["escaped"] = self._parse_string() 1572 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1573 kwargs["collection_items"] = self._parse_string() 1574 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1575 kwargs["map_keys"] = self._parse_string() 1576 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1577 kwargs["lines"] = self._parse_string() 1578 if self._match_text_seq("NULL", "DEFINED", "AS"): 1579 kwargs["null"] = self._parse_string() 1580 1581 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1582 1583 def _parse_load_data(self) -> exp.Expression: 1584 local = self._match(TokenType.LOCAL) 1585 self._match_text_seq("INPATH") 1586 inpath = self._parse_string() 1587 overwrite = self._match(TokenType.OVERWRITE) 1588 self._match_pair(TokenType.INTO, TokenType.TABLE) 1589 1590 return self.expression( 1591 exp.LoadData, 1592 this=self._parse_table(schema=True), 1593 local=local, 1594 overwrite=overwrite, 1595 inpath=inpath, 1596 partition=self._parse_partition(), 1597 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1598 serde=self._match_text_seq("SERDE") and self._parse_string(), 1599 ) 1600 1601 def _parse_delete(self) -> exp.Expression: 1602 self._match(TokenType.FROM) 1603 1604 return self.expression( 1605 exp.Delete, 1606 this=self._parse_table(schema=True), 1607 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1608 where=self._parse_where(), 1609 returning=self._parse_returning(), 1610 ) 1611 1612 def _parse_update(self) -> exp.Expression: 1613 return self.expression( 1614 exp.Update, 1615 **{ # type: ignore 1616 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1617 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1618 "from": self._parse_from(), 1619 "where": self._parse_where(), 1620 "returning": self._parse_returning(), 1621 }, 1622 ) 1623 1624 def _parse_uncache(self) -> exp.Expression: 1625 if not self._match(TokenType.TABLE): 1626 self.raise_error("Expecting TABLE after UNCACHE") 1627 1628 return self.expression( 1629 exp.Uncache, 1630 exists=self._parse_exists(), 1631 this=self._parse_table(schema=True), 1632 ) 1633 1634 def _parse_cache(self) -> exp.Expression: 1635 lazy = self._match(TokenType.LAZY) 1636 self._match(TokenType.TABLE) 1637 table = self._parse_table(schema=True) 1638 options = [] 1639 1640 if self._match(TokenType.OPTIONS): 1641 self._match_l_paren() 1642 k = self._parse_string() 1643 self._match(TokenType.EQ) 1644 v = self._parse_string() 1645 options = [k, v] 1646 self._match_r_paren() 1647 1648 self._match(TokenType.ALIAS) 1649 return self.expression( 1650 exp.Cache, 1651 this=table, 1652 lazy=lazy, 1653 options=options, 1654 expression=self._parse_select(nested=True), 1655 ) 1656 1657 def _parse_partition(self) -> t.Optional[exp.Expression]: 1658 if not self._match(TokenType.PARTITION): 1659 return None 1660 1661 return self.expression( 1662 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1663 ) 1664 1665 def _parse_value(self) -> exp.Expression: 1666 if self._match(TokenType.L_PAREN): 1667 expressions = self._parse_csv(self._parse_conjunction) 1668 self._match_r_paren() 1669 return self.expression(exp.Tuple, expressions=expressions) 1670 1671 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1672 # Source: https://prestodb.io/docs/current/sql/values.html 1673 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1674 1675 def _parse_select( 1676 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1677 ) -> t.Optional[exp.Expression]: 1678 cte = self._parse_with() 1679 if cte: 1680 this = self._parse_statement() 1681 1682 if not this: 1683 self.raise_error("Failed to parse any statement following CTE") 1684 return cte 1685 1686 if "with" in this.arg_types: 1687 this.set("with", cte) 1688 else: 1689 self.raise_error(f"{this.key} does not support CTE") 1690 this = cte 1691 elif self._match(TokenType.SELECT): 1692 comments = self._prev_comments 1693 1694 hint = self._parse_hint() 1695 all_ = self._match(TokenType.ALL) 1696 distinct = self._match(TokenType.DISTINCT) 1697 1698 if distinct: 1699 distinct = self.expression( 1700 exp.Distinct, 1701 on=self._parse_value() if self._match(TokenType.ON) else None, 1702 ) 1703 1704 if all_ and distinct: 1705 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1706 1707 limit = self._parse_limit(top=True) 1708 expressions = self._parse_csv(self._parse_expression) 1709 1710 this = self.expression( 1711 exp.Select, 1712 hint=hint, 1713 distinct=distinct, 1714 expressions=expressions, 1715 limit=limit, 1716 ) 1717 this.comments = comments 1718 1719 into = self._parse_into() 1720 if into: 1721 this.set("into", into) 1722 1723 from_ = self._parse_from() 1724 if from_: 1725 this.set("from", from_) 1726 1727 self._parse_query_modifiers(this) 1728 elif (table or nested) and self._match(TokenType.L_PAREN): 1729 this = self._parse_table() if table else self._parse_select(nested=True) 1730 self._parse_query_modifiers(this) 1731 this = self._parse_set_operations(this) 1732 self._match_r_paren() 1733 1734 # early return so that subquery unions aren't parsed again 1735 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1736 # Union ALL should be a property of the top select node, not the subquery 1737 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1738 elif self._match(TokenType.VALUES): 1739 this = self.expression( 1740 exp.Values, 1741 expressions=self._parse_csv(self._parse_value), 1742 alias=self._parse_table_alias(), 1743 ) 1744 else: 1745 this = None 1746 1747 return self._parse_set_operations(this) 1748 1749 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1750 if not skip_with_token and not self._match(TokenType.WITH): 1751 return None 1752 1753 recursive = self._match(TokenType.RECURSIVE) 1754 1755 expressions = [] 1756 while True: 1757 expressions.append(self._parse_cte()) 1758 1759 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1760 break 1761 else: 1762 self._match(TokenType.WITH) 1763 1764 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1765 1766 def _parse_cte(self) -> exp.Expression: 1767 alias = self._parse_table_alias() 1768 if not alias or not alias.this: 1769 self.raise_error("Expected CTE to have alias") 1770 1771 self._match(TokenType.ALIAS) 1772 1773 return self.expression( 1774 exp.CTE, 1775 this=self._parse_wrapped(self._parse_statement), 1776 alias=alias, 1777 ) 1778 1779 def _parse_table_alias( 1780 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1781 ) -> t.Optional[exp.Expression]: 1782 any_token = self._match(TokenType.ALIAS) 1783 alias = self._parse_id_var( 1784 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1785 ) 1786 index = self._index 1787 1788 if self._match(TokenType.L_PAREN): 1789 columns = self._parse_csv(self._parse_function_parameter) 1790 self._match_r_paren() if columns else self._retreat(index) 1791 else: 1792 columns = None 1793 1794 if not alias and not columns: 1795 return None 1796 1797 return self.expression(exp.TableAlias, this=alias, columns=columns) 1798 1799 def _parse_subquery( 1800 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1801 ) -> exp.Expression: 1802 return self.expression( 1803 exp.Subquery, 1804 this=this, 1805 pivots=self._parse_pivots(), 1806 alias=self._parse_table_alias() if parse_alias else None, 1807 ) 1808 1809 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1810 if not isinstance(this, self.MODIFIABLES): 1811 return 1812 1813 table = isinstance(this, exp.Table) 1814 1815 while True: 1816 lateral = self._parse_lateral() 1817 join = self._parse_join() 1818 comma = None if table else self._match(TokenType.COMMA) 1819 if lateral: 1820 this.append("laterals", lateral) 1821 if join: 1822 this.append("joins", join) 1823 if comma: 1824 this.args["from"].append("expressions", self._parse_table()) 1825 if not (lateral or join or comma): 1826 break 1827 1828 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1829 expression = parser(self) 1830 1831 if expression: 1832 this.set(key, expression) 1833 1834 def _parse_hint(self) -> t.Optional[exp.Expression]: 1835 if self._match(TokenType.HINT): 1836 hints = self._parse_csv(self._parse_function) 1837 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1838 self.raise_error("Expected */ after HINT") 1839 return self.expression(exp.Hint, expressions=hints) 1840 1841 return None 1842 1843 def _parse_into(self) -> t.Optional[exp.Expression]: 1844 if not self._match(TokenType.INTO): 1845 return None 1846 1847 temp = self._match(TokenType.TEMPORARY) 1848 unlogged = self._match(TokenType.UNLOGGED) 1849 self._match(TokenType.TABLE) 1850 1851 return self.expression( 1852 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1853 ) 1854 1855 def _parse_from(self) -> t.Optional[exp.Expression]: 1856 if not self._match(TokenType.FROM): 1857 return None 1858 1859 return self.expression( 1860 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1861 ) 1862 1863 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1864 if not self._match(TokenType.MATCH_RECOGNIZE): 1865 return None 1866 self._match_l_paren() 1867 1868 partition = self._parse_partition_by() 1869 order = self._parse_order() 1870 measures = ( 1871 self._parse_alias(self._parse_conjunction()) 1872 if self._match_text_seq("MEASURES") 1873 else None 1874 ) 1875 1876 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1877 rows = exp.Var(this="ONE ROW PER MATCH") 1878 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1879 text = "ALL ROWS PER MATCH" 1880 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1881 text += f" SHOW EMPTY MATCHES" 1882 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1883 text += f" OMIT EMPTY MATCHES" 1884 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1885 text += f" WITH UNMATCHED ROWS" 1886 rows = exp.Var(this=text) 1887 else: 1888 rows = None 1889 1890 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1891 text = "AFTER MATCH SKIP" 1892 if self._match_text_seq("PAST", "LAST", "ROW"): 1893 text += f" PAST LAST ROW" 1894 elif self._match_text_seq("TO", "NEXT", "ROW"): 1895 text += f" TO NEXT ROW" 1896 elif self._match_text_seq("TO", "FIRST"): 1897 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1898 elif self._match_text_seq("TO", "LAST"): 1899 text += f" TO LAST {self._advance_any().text}" # type: ignore 1900 after = exp.Var(this=text) 1901 else: 1902 after = None 1903 1904 if self._match_text_seq("PATTERN"): 1905 self._match_l_paren() 1906 1907 if not self._curr: 1908 self.raise_error("Expecting )", self._curr) 1909 1910 paren = 1 1911 start = self._curr 1912 1913 while self._curr and paren > 0: 1914 if self._curr.token_type == TokenType.L_PAREN: 1915 paren += 1 1916 if self._curr.token_type == TokenType.R_PAREN: 1917 paren -= 1 1918 end = self._prev 1919 self._advance() 1920 if paren > 0: 1921 self.raise_error("Expecting )", self._curr) 1922 pattern = exp.Var(this=self._find_sql(start, end)) 1923 else: 1924 pattern = None 1925 1926 define = ( 1927 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1928 ) 1929 self._match_r_paren() 1930 1931 return self.expression( 1932 exp.MatchRecognize, 1933 partition_by=partition, 1934 order=order, 1935 measures=measures, 1936 rows=rows, 1937 after=after, 1938 pattern=pattern, 1939 define=define, 1940 ) 1941 1942 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1943 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1944 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1945 1946 if outer_apply or cross_apply: 1947 this = self._parse_select(table=True) 1948 view = None 1949 outer = not cross_apply 1950 elif self._match(TokenType.LATERAL): 1951 this = self._parse_select(table=True) 1952 view = self._match(TokenType.VIEW) 1953 outer = self._match(TokenType.OUTER) 1954 else: 1955 return None 1956 1957 if not this: 1958 this = self._parse_function() or self._parse_id_var(any_token=False) 1959 while self._match(TokenType.DOT): 1960 this = exp.Dot( 1961 this=this, 1962 expression=self._parse_function() or self._parse_id_var(any_token=False), 1963 ) 1964 1965 table_alias: t.Optional[exp.Expression] 1966 1967 if view: 1968 table = self._parse_id_var(any_token=False) 1969 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1970 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1971 else: 1972 table_alias = self._parse_table_alias() 1973 1974 expression = self.expression( 1975 exp.Lateral, 1976 this=this, 1977 view=view, 1978 outer=outer, 1979 alias=table_alias, 1980 ) 1981 1982 if outer_apply or cross_apply: 1983 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1984 1985 return expression 1986 1987 def _parse_join_side_and_kind( 1988 self, 1989 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1990 return ( 1991 self._match(TokenType.NATURAL) and self._prev, 1992 self._match_set(self.JOIN_SIDES) and self._prev, 1993 self._match_set(self.JOIN_KINDS) and self._prev, 1994 ) 1995 1996 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1997 natural, side, kind = self._parse_join_side_and_kind() 1998 1999 if not skip_join_token and not self._match(TokenType.JOIN): 2000 return None 2001 2002 kwargs: t.Dict[ 2003 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2004 ] = {"this": self._parse_table()} 2005 2006 if natural: 2007 kwargs["natural"] = True 2008 if side: 2009 kwargs["side"] = side.text 2010 if kind: 2011 kwargs["kind"] = kind.text 2012 2013 if self._match(TokenType.ON): 2014 kwargs["on"] = self._parse_conjunction() 2015 elif self._match(TokenType.USING): 2016 kwargs["using"] = self._parse_wrapped_id_vars() 2017 2018 return self.expression(exp.Join, **kwargs) # type: ignore 2019 2020 def _parse_index(self) -> exp.Expression: 2021 index = self._parse_id_var() 2022 self._match(TokenType.ON) 2023 self._match(TokenType.TABLE) # hive 2024 2025 return self.expression( 2026 exp.Index, 2027 this=index, 2028 table=self.expression(exp.Table, this=self._parse_id_var()), 2029 columns=self._parse_expression(), 2030 ) 2031 2032 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2033 unique = self._match(TokenType.UNIQUE) 2034 primary = self._match_text_seq("PRIMARY") 2035 amp = self._match_text_seq("AMP") 2036 if not self._match(TokenType.INDEX): 2037 return None 2038 index = self._parse_id_var() 2039 columns = None 2040 if self._match(TokenType.L_PAREN, advance=False): 2041 columns = self._parse_wrapped_csv(self._parse_column) 2042 return self.expression( 2043 exp.Index, 2044 this=index, 2045 columns=columns, 2046 unique=unique, 2047 primary=primary, 2048 amp=amp, 2049 ) 2050 2051 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2052 catalog = None 2053 db = None 2054 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2055 2056 while self._match(TokenType.DOT): 2057 if catalog: 2058 # This allows nesting the table in arbitrarily many dot expressions if needed 2059 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2060 else: 2061 catalog = db 2062 db = table 2063 table = self._parse_id_var() 2064 2065 if not table: 2066 self.raise_error(f"Expected table name but got {self._curr}") 2067 2068 return self.expression( 2069 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2070 ) 2071 2072 def _parse_table( 2073 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2074 ) -> t.Optional[exp.Expression]: 2075 lateral = self._parse_lateral() 2076 2077 if lateral: 2078 return lateral 2079 2080 unnest = self._parse_unnest() 2081 2082 if unnest: 2083 return unnest 2084 2085 values = self._parse_derived_table_values() 2086 2087 if values: 2088 return values 2089 2090 subquery = self._parse_select(table=True) 2091 2092 if subquery: 2093 return subquery 2094 2095 this = self._parse_table_parts(schema=schema) 2096 2097 if schema: 2098 return self._parse_schema(this=this) 2099 2100 if self.alias_post_tablesample: 2101 table_sample = self._parse_table_sample() 2102 2103 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2104 2105 if alias: 2106 this.set("alias", alias) 2107 2108 if not this.args.get("pivots"): 2109 this.set("pivots", self._parse_pivots()) 2110 2111 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2112 this.set( 2113 "hints", 2114 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2115 ) 2116 self._match_r_paren() 2117 2118 if not self.alias_post_tablesample: 2119 table_sample = self._parse_table_sample() 2120 2121 if table_sample: 2122 table_sample.set("this", this) 2123 this = table_sample 2124 2125 return this 2126 2127 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2128 if not self._match(TokenType.UNNEST): 2129 return None 2130 2131 expressions = self._parse_wrapped_csv(self._parse_column) 2132 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2133 alias = self._parse_table_alias() 2134 2135 if alias and self.unnest_column_only: 2136 if alias.args.get("columns"): 2137 self.raise_error("Unexpected extra column alias in unnest.") 2138 alias.set("columns", [alias.this]) 2139 alias.set("this", None) 2140 2141 offset = None 2142 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2143 self._match(TokenType.ALIAS) 2144 offset = self._parse_conjunction() 2145 2146 return self.expression( 2147 exp.Unnest, 2148 expressions=expressions, 2149 ordinality=ordinality, 2150 alias=alias, 2151 offset=offset, 2152 ) 2153 2154 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2155 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2156 if not is_derived and not self._match(TokenType.VALUES): 2157 return None 2158 2159 expressions = self._parse_csv(self._parse_value) 2160 2161 if is_derived: 2162 self._match_r_paren() 2163 2164 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2165 2166 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2167 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2168 as_modifier and self._match_text_seq("USING", "SAMPLE") 2169 ): 2170 return None 2171 2172 bucket_numerator = None 2173 bucket_denominator = None 2174 bucket_field = None 2175 percent = None 2176 rows = None 2177 size = None 2178 seed = None 2179 2180 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2181 method = self._parse_var(tokens=(TokenType.ROW,)) 2182 2183 self._match(TokenType.L_PAREN) 2184 2185 num = self._parse_number() 2186 2187 if self._match(TokenType.BUCKET): 2188 bucket_numerator = self._parse_number() 2189 self._match(TokenType.OUT_OF) 2190 bucket_denominator = bucket_denominator = self._parse_number() 2191 self._match(TokenType.ON) 2192 bucket_field = self._parse_field() 2193 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2194 percent = num 2195 elif self._match(TokenType.ROWS): 2196 rows = num 2197 else: 2198 size = num 2199 2200 self._match(TokenType.R_PAREN) 2201 2202 if self._match(TokenType.L_PAREN): 2203 method = self._parse_var() 2204 seed = self._match(TokenType.COMMA) and self._parse_number() 2205 self._match_r_paren() 2206 elif self._match_texts(("SEED", "REPEATABLE")): 2207 seed = self._parse_wrapped(self._parse_number) 2208 2209 return self.expression( 2210 exp.TableSample, 2211 method=method, 2212 bucket_numerator=bucket_numerator, 2213 bucket_denominator=bucket_denominator, 2214 bucket_field=bucket_field, 2215 percent=percent, 2216 rows=rows, 2217 size=size, 2218 seed=seed, 2219 kind=kind, 2220 ) 2221 2222 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2223 return list(iter(self._parse_pivot, None)) 2224 2225 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2226 index = self._index 2227 2228 if self._match(TokenType.PIVOT): 2229 unpivot = False 2230 elif self._match(TokenType.UNPIVOT): 2231 unpivot = True 2232 else: 2233 return None 2234 2235 expressions = [] 2236 field = None 2237 2238 if not self._match(TokenType.L_PAREN): 2239 self._retreat(index) 2240 return None 2241 2242 if unpivot: 2243 expressions = self._parse_csv(self._parse_column) 2244 else: 2245 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2246 2247 if not self._match(TokenType.FOR): 2248 self.raise_error("Expecting FOR") 2249 2250 value = self._parse_column() 2251 2252 if not self._match(TokenType.IN): 2253 self.raise_error("Expecting IN") 2254 2255 field = self._parse_in(value) 2256 2257 self._match_r_paren() 2258 2259 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2260 2261 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2262 pivot.set("alias", self._parse_table_alias()) 2263 2264 return pivot 2265 2266 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2267 if not skip_where_token and not self._match(TokenType.WHERE): 2268 return None 2269 2270 return self.expression( 2271 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2272 ) 2273 2274 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2275 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2276 return None 2277 2278 elements = defaultdict(list) 2279 2280 while True: 2281 expressions = self._parse_csv(self._parse_conjunction) 2282 if expressions: 2283 elements["expressions"].extend(expressions) 2284 2285 grouping_sets = self._parse_grouping_sets() 2286 if grouping_sets: 2287 elements["grouping_sets"].extend(grouping_sets) 2288 2289 rollup = None 2290 cube = None 2291 2292 with_ = self._match(TokenType.WITH) 2293 if self._match(TokenType.ROLLUP): 2294 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2295 elements["rollup"].extend(ensure_list(rollup)) 2296 2297 if self._match(TokenType.CUBE): 2298 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2299 elements["cube"].extend(ensure_list(cube)) 2300 2301 if not (expressions or grouping_sets or rollup or cube): 2302 break 2303 2304 return self.expression(exp.Group, **elements) # type: ignore 2305 2306 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2307 if not self._match(TokenType.GROUPING_SETS): 2308 return None 2309 2310 return self._parse_wrapped_csv(self._parse_grouping_set) 2311 2312 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2313 if self._match(TokenType.L_PAREN): 2314 grouping_set = self._parse_csv(self._parse_column) 2315 self._match_r_paren() 2316 return self.expression(exp.Tuple, expressions=grouping_set) 2317 2318 return self._parse_column() 2319 2320 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2321 if not skip_having_token and not self._match(TokenType.HAVING): 2322 return None 2323 return self.expression(exp.Having, this=self._parse_conjunction()) 2324 2325 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2326 if not self._match(TokenType.QUALIFY): 2327 return None 2328 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2329 2330 def _parse_order( 2331 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2332 ) -> t.Optional[exp.Expression]: 2333 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2334 return this 2335 2336 return self.expression( 2337 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2338 ) 2339 2340 def _parse_sort( 2341 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2342 ) -> t.Optional[exp.Expression]: 2343 if not self._match(token_type): 2344 return None 2345 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2346 2347 def _parse_ordered(self) -> exp.Expression: 2348 this = self._parse_conjunction() 2349 self._match(TokenType.ASC) 2350 is_desc = self._match(TokenType.DESC) 2351 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2352 is_nulls_last = self._match(TokenType.NULLS_LAST) 2353 desc = is_desc or False 2354 asc = not desc 2355 nulls_first = is_nulls_first or False 2356 explicitly_null_ordered = is_nulls_first or is_nulls_last 2357 if ( 2358 not explicitly_null_ordered 2359 and ( 2360 (asc and self.null_ordering == "nulls_are_small") 2361 or (desc and self.null_ordering != "nulls_are_small") 2362 ) 2363 and self.null_ordering != "nulls_are_last" 2364 ): 2365 nulls_first = True 2366 2367 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2368 2369 def _parse_limit( 2370 self, this: t.Optional[exp.Expression] = None, top: bool = False 2371 ) -> t.Optional[exp.Expression]: 2372 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2373 limit_paren = self._match(TokenType.L_PAREN) 2374 limit_exp = self.expression( 2375 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2376 ) 2377 2378 if limit_paren: 2379 self._match_r_paren() 2380 2381 return limit_exp 2382 2383 if self._match(TokenType.FETCH): 2384 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2385 direction = self._prev.text if direction else "FIRST" 2386 count = self._parse_number() 2387 self._match_set((TokenType.ROW, TokenType.ROWS)) 2388 self._match(TokenType.ONLY) 2389 return self.expression(exp.Fetch, direction=direction, count=count) 2390 2391 return this 2392 2393 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2394 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2395 return this 2396 2397 count = self._parse_number() 2398 self._match_set((TokenType.ROW, TokenType.ROWS)) 2399 return self.expression(exp.Offset, this=this, expression=count) 2400 2401 def _parse_lock(self) -> t.Optional[exp.Expression]: 2402 if self._match_text_seq("FOR", "UPDATE"): 2403 return self.expression(exp.Lock, update=True) 2404 if self._match_text_seq("FOR", "SHARE"): 2405 return self.expression(exp.Lock, update=False) 2406 2407 return None 2408 2409 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2410 if not self._match_set(self.SET_OPERATIONS): 2411 return this 2412 2413 token_type = self._prev.token_type 2414 2415 if token_type == TokenType.UNION: 2416 expression = exp.Union 2417 elif token_type == TokenType.EXCEPT: 2418 expression = exp.Except 2419 else: 2420 expression = exp.Intersect 2421 2422 return self.expression( 2423 expression, 2424 this=this, 2425 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2426 expression=self._parse_set_operations(self._parse_select(nested=True)), 2427 ) 2428 2429 def _parse_expression(self) -> t.Optional[exp.Expression]: 2430 return self._parse_alias(self._parse_conjunction()) 2431 2432 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2433 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2434 2435 def _parse_equality(self) -> t.Optional[exp.Expression]: 2436 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2437 2438 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2439 return self._parse_tokens(self._parse_range, self.COMPARISON) 2440 2441 def _parse_range(self) -> t.Optional[exp.Expression]: 2442 this = self._parse_bitwise() 2443 negate = self._match(TokenType.NOT) 2444 2445 if self._match_set(self.RANGE_PARSERS): 2446 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2447 elif self._match(TokenType.ISNULL): 2448 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2449 2450 # Postgres supports ISNULL and NOTNULL for conditions. 2451 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2452 if self._match(TokenType.NOTNULL): 2453 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2454 this = self.expression(exp.Not, this=this) 2455 2456 if negate: 2457 this = self.expression(exp.Not, this=this) 2458 2459 if self._match(TokenType.IS): 2460 this = self._parse_is(this) 2461 2462 return this 2463 2464 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2465 negate = self._match(TokenType.NOT) 2466 if self._match(TokenType.DISTINCT_FROM): 2467 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2468 return self.expression(klass, this=this, expression=self._parse_expression()) 2469 2470 this = self.expression( 2471 exp.Is, 2472 this=this, 2473 expression=self._parse_null() or self._parse_boolean(), 2474 ) 2475 return self.expression(exp.Not, this=this) if negate else this 2476 2477 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2478 unnest = self._parse_unnest() 2479 if unnest: 2480 this = self.expression(exp.In, this=this, unnest=unnest) 2481 elif self._match(TokenType.L_PAREN): 2482 expressions = self._parse_csv(self._parse_select_or_expression) 2483 2484 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2485 this = self.expression(exp.In, this=this, query=expressions[0]) 2486 else: 2487 this = self.expression(exp.In, this=this, expressions=expressions) 2488 2489 self._match_r_paren() 2490 else: 2491 this = self.expression(exp.In, this=this, field=self._parse_field()) 2492 2493 return this 2494 2495 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2496 low = self._parse_bitwise() 2497 self._match(TokenType.AND) 2498 high = self._parse_bitwise() 2499 return self.expression(exp.Between, this=this, low=low, high=high) 2500 2501 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2502 if not self._match(TokenType.ESCAPE): 2503 return this 2504 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2505 2506 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2507 this = self._parse_term() 2508 2509 while True: 2510 if self._match_set(self.BITWISE): 2511 this = self.expression( 2512 self.BITWISE[self._prev.token_type], 2513 this=this, 2514 expression=self._parse_term(), 2515 ) 2516 elif self._match_pair(TokenType.LT, TokenType.LT): 2517 this = self.expression( 2518 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2519 ) 2520 elif self._match_pair(TokenType.GT, TokenType.GT): 2521 this = self.expression( 2522 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2523 ) 2524 else: 2525 break 2526 2527 return this 2528 2529 def _parse_term(self) -> t.Optional[exp.Expression]: 2530 return self._parse_tokens(self._parse_factor, self.TERM) 2531 2532 def _parse_factor(self) -> t.Optional[exp.Expression]: 2533 return self._parse_tokens(self._parse_unary, self.FACTOR) 2534 2535 def _parse_unary(self) -> t.Optional[exp.Expression]: 2536 if self._match_set(self.UNARY_PARSERS): 2537 return self.UNARY_PARSERS[self._prev.token_type](self) 2538 return self._parse_at_time_zone(self._parse_type()) 2539 2540 def _parse_type(self) -> t.Optional[exp.Expression]: 2541 if self._match(TokenType.INTERVAL): 2542 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2543 2544 index = self._index 2545 type_token = self._parse_types(check_func=True) 2546 this = self._parse_column() 2547 2548 if type_token: 2549 if isinstance(this, exp.Literal): 2550 return self.expression(exp.Cast, this=this, to=type_token) 2551 if not type_token.args.get("expressions"): 2552 self._retreat(index) 2553 return self._parse_column() 2554 return type_token 2555 2556 return this 2557 2558 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2559 index = self._index 2560 2561 prefix = self._match_text_seq("SYSUDTLIB", ".") 2562 2563 if not self._match_set(self.TYPE_TOKENS): 2564 return None 2565 2566 type_token = self._prev.token_type 2567 2568 if type_token == TokenType.PSEUDO_TYPE: 2569 return self.expression(exp.PseudoType, this=self._prev.text) 2570 2571 nested = type_token in self.NESTED_TYPE_TOKENS 2572 is_struct = type_token == TokenType.STRUCT 2573 expressions = None 2574 maybe_func = False 2575 2576 if self._match(TokenType.L_PAREN): 2577 if is_struct: 2578 expressions = self._parse_csv(self._parse_struct_kwargs) 2579 elif nested: 2580 expressions = self._parse_csv(self._parse_types) 2581 else: 2582 expressions = self._parse_csv(self._parse_conjunction) 2583 2584 if not expressions: 2585 self._retreat(index) 2586 return None 2587 2588 self._match_r_paren() 2589 maybe_func = True 2590 2591 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2592 this = exp.DataType( 2593 this=exp.DataType.Type.ARRAY, 2594 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2595 nested=True, 2596 ) 2597 2598 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2599 this = exp.DataType( 2600 this=exp.DataType.Type.ARRAY, 2601 expressions=[this], 2602 nested=True, 2603 ) 2604 2605 return this 2606 2607 if self._match(TokenType.L_BRACKET): 2608 self._retreat(index) 2609 return None 2610 2611 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2612 if nested and self._match(TokenType.LT): 2613 if is_struct: 2614 expressions = self._parse_csv(self._parse_struct_kwargs) 2615 else: 2616 expressions = self._parse_csv(self._parse_types) 2617 2618 if not self._match(TokenType.GT): 2619 self.raise_error("Expecting >") 2620 2621 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2622 values = self._parse_csv(self._parse_conjunction) 2623 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2624 2625 value: t.Optional[exp.Expression] = None 2626 if type_token in self.TIMESTAMPS: 2627 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2628 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2629 elif ( 2630 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2631 ): 2632 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2633 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2634 if type_token == TokenType.TIME: 2635 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2636 else: 2637 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2638 2639 maybe_func = maybe_func and value is None 2640 2641 if value is None: 2642 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2643 elif type_token == TokenType.INTERVAL: 2644 unit = self._parse_var() 2645 2646 if not unit: 2647 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2648 else: 2649 value = self.expression(exp.Interval, unit=unit) 2650 2651 if maybe_func and check_func: 2652 index2 = self._index 2653 peek = self._parse_string() 2654 2655 if not peek: 2656 self._retreat(index) 2657 return None 2658 2659 self._retreat(index2) 2660 2661 if value: 2662 return value 2663 2664 return exp.DataType( 2665 this=exp.DataType.Type[type_token.value.upper()], 2666 expressions=expressions, 2667 nested=nested, 2668 values=values, 2669 prefix=prefix, 2670 ) 2671 2672 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2673 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2674 return self._parse_types() 2675 2676 this = self._parse_id_var() 2677 self._match(TokenType.COLON) 2678 data_type = self._parse_types() 2679 2680 if not data_type: 2681 return None 2682 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2683 2684 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2685 if not self._match(TokenType.AT_TIME_ZONE): 2686 return this 2687 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2688 2689 def _parse_column(self) -> t.Optional[exp.Expression]: 2690 this = self._parse_field() 2691 if isinstance(this, exp.Identifier): 2692 this = self.expression(exp.Column, this=this) 2693 elif not this: 2694 return self._parse_bracket(this) 2695 this = self._parse_bracket(this) 2696 2697 while self._match_set(self.COLUMN_OPERATORS): 2698 op_token = self._prev.token_type 2699 op = self.COLUMN_OPERATORS.get(op_token) 2700 2701 if op_token == TokenType.DCOLON: 2702 field = self._parse_types() 2703 if not field: 2704 self.raise_error("Expected type") 2705 elif op: 2706 self._advance() 2707 value = self._prev.text 2708 field = ( 2709 exp.Literal.number(value) 2710 if self._prev.token_type == TokenType.NUMBER 2711 else exp.Literal.string(value) 2712 ) 2713 else: 2714 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2715 2716 if isinstance(field, exp.Func): 2717 # bigquery allows function calls like x.y.count(...) 2718 # SAFE.SUBSTR(...) 2719 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2720 this = self._replace_columns_with_dots(this) 2721 2722 if op: 2723 this = op(self, this, field) 2724 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2725 this = self.expression( 2726 exp.Column, 2727 this=field, 2728 table=this.this, 2729 db=this.args.get("table"), 2730 catalog=this.args.get("db"), 2731 ) 2732 else: 2733 this = self.expression(exp.Dot, this=this, expression=field) 2734 this = self._parse_bracket(this) 2735 2736 return this 2737 2738 def _parse_primary(self) -> t.Optional[exp.Expression]: 2739 if self._match_set(self.PRIMARY_PARSERS): 2740 token_type = self._prev.token_type 2741 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2742 2743 if token_type == TokenType.STRING: 2744 expressions = [primary] 2745 while self._match(TokenType.STRING): 2746 expressions.append(exp.Literal.string(self._prev.text)) 2747 if len(expressions) > 1: 2748 return self.expression(exp.Concat, expressions=expressions) 2749 return primary 2750 2751 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2752 return exp.Literal.number(f"0.{self._prev.text}") 2753 2754 if self._match(TokenType.L_PAREN): 2755 comments = self._prev_comments 2756 query = self._parse_select() 2757 2758 if query: 2759 expressions = [query] 2760 else: 2761 expressions = self._parse_csv( 2762 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2763 ) 2764 2765 this = seq_get(expressions, 0) 2766 self._parse_query_modifiers(this) 2767 self._match_r_paren() 2768 2769 if isinstance(this, exp.Subqueryable): 2770 this = self._parse_set_operations( 2771 self._parse_subquery(this=this, parse_alias=False) 2772 ) 2773 elif len(expressions) > 1: 2774 this = self.expression(exp.Tuple, expressions=expressions) 2775 else: 2776 this = self.expression(exp.Paren, this=this) 2777 2778 if this and comments: 2779 this.comments = comments 2780 2781 return this 2782 2783 return None 2784 2785 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2786 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2787 2788 def _parse_function( 2789 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2790 ) -> t.Optional[exp.Expression]: 2791 if not self._curr: 2792 return None 2793 2794 token_type = self._curr.token_type 2795 2796 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2797 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2798 2799 if not self._next or self._next.token_type != TokenType.L_PAREN: 2800 if token_type in self.NO_PAREN_FUNCTIONS: 2801 self._advance() 2802 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2803 2804 return None 2805 2806 if token_type not in self.FUNC_TOKENS: 2807 return None 2808 2809 this = self._curr.text 2810 upper = this.upper() 2811 self._advance(2) 2812 2813 parser = self.FUNCTION_PARSERS.get(upper) 2814 2815 if parser: 2816 this = parser(self) 2817 else: 2818 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2819 2820 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2821 this = self.expression(subquery_predicate, this=self._parse_select()) 2822 self._match_r_paren() 2823 return this 2824 2825 if functions is None: 2826 functions = self.FUNCTIONS 2827 2828 function = functions.get(upper) 2829 args = self._parse_csv(self._parse_lambda) 2830 2831 if function: 2832 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2833 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2834 if count_params(function) == 2: 2835 params = None 2836 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2837 params = self._parse_csv(self._parse_lambda) 2838 2839 this = function(args, params) 2840 else: 2841 this = function(args) 2842 2843 self.validate_expression(this, args) 2844 else: 2845 this = self.expression(exp.Anonymous, this=this, expressions=args) 2846 2847 self._match_r_paren(this) 2848 return self._parse_window(this) 2849 2850 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2851 return self._parse_column_def(self._parse_id_var()) 2852 2853 def _parse_user_defined_function( 2854 self, kind: t.Optional[TokenType] = None 2855 ) -> t.Optional[exp.Expression]: 2856 this = self._parse_id_var() 2857 2858 while self._match(TokenType.DOT): 2859 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2860 2861 if not self._match(TokenType.L_PAREN): 2862 return this 2863 2864 expressions = self._parse_csv(self._parse_function_parameter) 2865 self._match_r_paren() 2866 return self.expression( 2867 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2868 ) 2869 2870 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2871 literal = self._parse_primary() 2872 if literal: 2873 return self.expression(exp.Introducer, this=token.text, expression=literal) 2874 2875 return self.expression(exp.Identifier, this=token.text) 2876 2877 def _parse_national(self, token: Token) -> exp.Expression: 2878 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2879 2880 def _parse_session_parameter(self) -> exp.Expression: 2881 kind = None 2882 this = self._parse_id_var() or self._parse_primary() 2883 2884 if this and self._match(TokenType.DOT): 2885 kind = this.name 2886 this = self._parse_var() or self._parse_primary() 2887 2888 return self.expression(exp.SessionParameter, this=this, kind=kind) 2889 2890 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2891 index = self._index 2892 2893 if self._match(TokenType.L_PAREN): 2894 expressions = self._parse_csv(self._parse_id_var) 2895 2896 if not self._match(TokenType.R_PAREN): 2897 self._retreat(index) 2898 else: 2899 expressions = [self._parse_id_var()] 2900 2901 if self._match_set(self.LAMBDAS): 2902 return self.LAMBDAS[self._prev.token_type](self, expressions) 2903 2904 self._retreat(index) 2905 2906 this: t.Optional[exp.Expression] 2907 2908 if self._match(TokenType.DISTINCT): 2909 this = self.expression( 2910 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2911 ) 2912 else: 2913 this = self._parse_select_or_expression() 2914 2915 if self._match(TokenType.IGNORE_NULLS): 2916 this = self.expression(exp.IgnoreNulls, this=this) 2917 else: 2918 self._match(TokenType.RESPECT_NULLS) 2919 2920 return self._parse_limit(self._parse_order(this)) 2921 2922 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2923 index = self._index 2924 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2925 self._retreat(index) 2926 return this 2927 2928 args = self._parse_csv( 2929 lambda: self._parse_constraint() 2930 or self._parse_column_def(self._parse_field(any_token=True)) 2931 ) 2932 self._match_r_paren() 2933 return self.expression(exp.Schema, this=this, expressions=args) 2934 2935 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2936 kind = self._parse_types() 2937 2938 if self._match_text_seq("FOR", "ORDINALITY"): 2939 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2940 2941 constraints = [] 2942 while True: 2943 constraint = self._parse_column_constraint() 2944 if not constraint: 2945 break 2946 constraints.append(constraint) 2947 2948 if not kind and not constraints: 2949 return this 2950 2951 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2952 2953 def _parse_auto_increment(self) -> exp.Expression: 2954 start = None 2955 increment = None 2956 2957 if self._match(TokenType.L_PAREN, advance=False): 2958 args = self._parse_wrapped_csv(self._parse_bitwise) 2959 start = seq_get(args, 0) 2960 increment = seq_get(args, 1) 2961 elif self._match_text_seq("START"): 2962 start = self._parse_bitwise() 2963 self._match_text_seq("INCREMENT") 2964 increment = self._parse_bitwise() 2965 2966 if start and increment: 2967 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2968 2969 return exp.AutoIncrementColumnConstraint() 2970 2971 def _parse_compress(self) -> exp.Expression: 2972 if self._match(TokenType.L_PAREN, advance=False): 2973 return self.expression( 2974 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2975 ) 2976 2977 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2978 2979 def _parse_generated_as_identity(self) -> exp.Expression: 2980 if self._match(TokenType.BY_DEFAULT): 2981 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2982 else: 2983 self._match_text_seq("ALWAYS") 2984 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2985 2986 self._match_text_seq("AS", "IDENTITY") 2987 if self._match(TokenType.L_PAREN): 2988 if self._match_text_seq("START", "WITH"): 2989 this.set("start", self._parse_bitwise()) 2990 if self._match_text_seq("INCREMENT", "BY"): 2991 this.set("increment", self._parse_bitwise()) 2992 if self._match_text_seq("MINVALUE"): 2993 this.set("minvalue", self._parse_bitwise()) 2994 if self._match_text_seq("MAXVALUE"): 2995 this.set("maxvalue", self._parse_bitwise()) 2996 2997 if self._match_text_seq("CYCLE"): 2998 this.set("cycle", True) 2999 elif self._match_text_seq("NO", "CYCLE"): 3000 this.set("cycle", False) 3001 3002 self._match_r_paren() 3003 3004 return this 3005 3006 def _parse_inline(self) -> t.Optional[exp.Expression]: 3007 self._match_text_seq("LENGTH") 3008 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3009 3010 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3011 if self._match_text_seq("NULL"): 3012 return self.expression(exp.NotNullColumnConstraint) 3013 if self._match_text_seq("CASESPECIFIC"): 3014 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3015 return None 3016 3017 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3018 this = self._parse_references() 3019 if this: 3020 return this 3021 3022 if self._match(TokenType.CONSTRAINT): 3023 this = self._parse_id_var() 3024 3025 if self._match_texts(self.CONSTRAINT_PARSERS): 3026 return self.expression( 3027 exp.ColumnConstraint, 3028 this=this, 3029 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3030 ) 3031 3032 return this 3033 3034 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3035 if not self._match(TokenType.CONSTRAINT): 3036 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3037 3038 this = self._parse_id_var() 3039 expressions = [] 3040 3041 while True: 3042 constraint = self._parse_unnamed_constraint() or self._parse_function() 3043 if not constraint: 3044 break 3045 expressions.append(constraint) 3046 3047 return self.expression(exp.Constraint, this=this, expressions=expressions) 3048 3049 def _parse_unnamed_constraint( 3050 self, constraints: t.Optional[t.Collection[str]] = None 3051 ) -> t.Optional[exp.Expression]: 3052 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3053 return None 3054 3055 constraint = self._prev.text.upper() 3056 if constraint not in self.CONSTRAINT_PARSERS: 3057 self.raise_error(f"No parser found for schema constraint {constraint}.") 3058 3059 return self.CONSTRAINT_PARSERS[constraint](self) 3060 3061 def _parse_unique(self) -> exp.Expression: 3062 if not self._match(TokenType.L_PAREN, advance=False): 3063 return self.expression(exp.UniqueColumnConstraint) 3064 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3065 3066 def _parse_key_constraint_options(self) -> t.List[str]: 3067 options = [] 3068 while True: 3069 if not self._curr: 3070 break 3071 3072 if self._match(TokenType.ON): 3073 action = None 3074 on = self._advance_any() and self._prev.text 3075 3076 if self._match(TokenType.NO_ACTION): 3077 action = "NO ACTION" 3078 elif self._match(TokenType.CASCADE): 3079 action = "CASCADE" 3080 elif self._match_pair(TokenType.SET, TokenType.NULL): 3081 action = "SET NULL" 3082 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3083 action = "SET DEFAULT" 3084 else: 3085 self.raise_error("Invalid key constraint") 3086 3087 options.append(f"ON {on} {action}") 3088 elif self._match_text_seq("NOT", "ENFORCED"): 3089 options.append("NOT ENFORCED") 3090 elif self._match_text_seq("DEFERRABLE"): 3091 options.append("DEFERRABLE") 3092 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3093 options.append("INITIALLY DEFERRED") 3094 elif self._match_text_seq("NORELY"): 3095 options.append("NORELY") 3096 elif self._match_text_seq("MATCH", "FULL"): 3097 options.append("MATCH FULL") 3098 else: 3099 break 3100 3101 return options 3102 3103 def _parse_references(self) -> t.Optional[exp.Expression]: 3104 if not self._match(TokenType.REFERENCES): 3105 return None 3106 3107 expressions = None 3108 this = self._parse_id_var() 3109 3110 if self._match(TokenType.L_PAREN, advance=False): 3111 expressions = self._parse_wrapped_id_vars() 3112 3113 options = self._parse_key_constraint_options() 3114 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3115 3116 def _parse_foreign_key(self) -> exp.Expression: 3117 expressions = self._parse_wrapped_id_vars() 3118 reference = self._parse_references() 3119 options = {} 3120 3121 while self._match(TokenType.ON): 3122 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3123 self.raise_error("Expected DELETE or UPDATE") 3124 3125 kind = self._prev.text.lower() 3126 3127 if self._match(TokenType.NO_ACTION): 3128 action = "NO ACTION" 3129 elif self._match(TokenType.SET): 3130 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3131 action = "SET " + self._prev.text.upper() 3132 else: 3133 self._advance() 3134 action = self._prev.text.upper() 3135 3136 options[kind] = action 3137 3138 return self.expression( 3139 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3140 ) 3141 3142 def _parse_primary_key(self) -> exp.Expression: 3143 desc = ( 3144 self._match_set((TokenType.ASC, TokenType.DESC)) 3145 and self._prev.token_type == TokenType.DESC 3146 ) 3147 3148 if not self._match(TokenType.L_PAREN, advance=False): 3149 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3150 3151 expressions = self._parse_wrapped_id_vars() 3152 options = self._parse_key_constraint_options() 3153 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3154 3155 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3156 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3157 return this 3158 3159 bracket_kind = self._prev.token_type 3160 expressions: t.List[t.Optional[exp.Expression]] 3161 3162 if self._match(TokenType.COLON): 3163 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3164 else: 3165 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3166 3167 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3168 if bracket_kind == TokenType.L_BRACE: 3169 this = self.expression(exp.Struct, expressions=expressions) 3170 elif not this or this.name.upper() == "ARRAY": 3171 this = self.expression(exp.Array, expressions=expressions) 3172 else: 3173 expressions = apply_index_offset(expressions, -self.index_offset) 3174 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3175 3176 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3177 self.raise_error("Expected ]") 3178 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3179 self.raise_error("Expected }") 3180 3181 this.comments = self._prev_comments 3182 return self._parse_bracket(this) 3183 3184 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3185 if self._match(TokenType.COLON): 3186 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3187 return this 3188 3189 def _parse_case(self) -> t.Optional[exp.Expression]: 3190 ifs = [] 3191 default = None 3192 3193 expression = self._parse_conjunction() 3194 3195 while self._match(TokenType.WHEN): 3196 this = self._parse_conjunction() 3197 self._match(TokenType.THEN) 3198 then = self._parse_conjunction() 3199 ifs.append(self.expression(exp.If, this=this, true=then)) 3200 3201 if self._match(TokenType.ELSE): 3202 default = self._parse_conjunction() 3203 3204 if not self._match(TokenType.END): 3205 self.raise_error("Expected END after CASE", self._prev) 3206 3207 return self._parse_window( 3208 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3209 ) 3210 3211 def _parse_if(self) -> t.Optional[exp.Expression]: 3212 if self._match(TokenType.L_PAREN): 3213 args = self._parse_csv(self._parse_conjunction) 3214 this = exp.If.from_arg_list(args) 3215 self.validate_expression(this, args) 3216 self._match_r_paren() 3217 else: 3218 condition = self._parse_conjunction() 3219 self._match(TokenType.THEN) 3220 true = self._parse_conjunction() 3221 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3222 self._match(TokenType.END) 3223 this = self.expression(exp.If, this=condition, true=true, false=false) 3224 3225 return self._parse_window(this) 3226 3227 def _parse_extract(self) -> exp.Expression: 3228 this = self._parse_function() or self._parse_var() or self._parse_type() 3229 3230 if self._match(TokenType.FROM): 3231 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3232 3233 if not self._match(TokenType.COMMA): 3234 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3235 3236 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3237 3238 def _parse_cast(self, strict: bool) -> exp.Expression: 3239 this = self._parse_conjunction() 3240 3241 if not self._match(TokenType.ALIAS): 3242 self.raise_error("Expected AS after CAST") 3243 3244 to = self._parse_types() 3245 3246 if not to: 3247 self.raise_error("Expected TYPE after CAST") 3248 elif to.this == exp.DataType.Type.CHAR: 3249 if self._match(TokenType.CHARACTER_SET): 3250 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3251 3252 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3253 3254 def _parse_string_agg(self) -> exp.Expression: 3255 expression: t.Optional[exp.Expression] 3256 3257 if self._match(TokenType.DISTINCT): 3258 args = self._parse_csv(self._parse_conjunction) 3259 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3260 else: 3261 args = self._parse_csv(self._parse_conjunction) 3262 expression = seq_get(args, 0) 3263 3264 index = self._index 3265 if not self._match(TokenType.R_PAREN): 3266 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3267 order = self._parse_order(this=expression) 3268 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3269 3270 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3271 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3272 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3273 if not self._match(TokenType.WITHIN_GROUP): 3274 self._retreat(index) 3275 this = exp.GroupConcat.from_arg_list(args) 3276 self.validate_expression(this, args) 3277 return this 3278 3279 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3280 order = self._parse_order(this=expression) 3281 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3282 3283 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3284 to: t.Optional[exp.Expression] 3285 this = self._parse_bitwise() 3286 3287 if self._match(TokenType.USING): 3288 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3289 elif self._match(TokenType.COMMA): 3290 to = self._parse_bitwise() 3291 else: 3292 to = None 3293 3294 # Swap the argument order if needed to produce the correct AST 3295 if self.CONVERT_TYPE_FIRST: 3296 this, to = to, this 3297 3298 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3299 3300 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3301 args = self._parse_csv(self._parse_bitwise) 3302 3303 if self._match(TokenType.IN): 3304 return self.expression( 3305 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3306 ) 3307 3308 if haystack_first: 3309 haystack = seq_get(args, 0) 3310 needle = seq_get(args, 1) 3311 else: 3312 needle = seq_get(args, 0) 3313 haystack = seq_get(args, 1) 3314 3315 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3316 3317 self.validate_expression(this, args) 3318 3319 return this 3320 3321 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3322 args = self._parse_csv(self._parse_table) 3323 return exp.JoinHint(this=func_name.upper(), expressions=args) 3324 3325 def _parse_substring(self) -> exp.Expression: 3326 # Postgres supports the form: substring(string [from int] [for int]) 3327 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3328 3329 args = self._parse_csv(self._parse_bitwise) 3330 3331 if self._match(TokenType.FROM): 3332 args.append(self._parse_bitwise()) 3333 if self._match(TokenType.FOR): 3334 args.append(self._parse_bitwise()) 3335 3336 this = exp.Substring.from_arg_list(args) 3337 self.validate_expression(this, args) 3338 3339 return this 3340 3341 def _parse_trim(self) -> exp.Expression: 3342 # https://www.w3resource.com/sql/character-functions/trim.php 3343 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3344 3345 position = None 3346 collation = None 3347 3348 if self._match_set(self.TRIM_TYPES): 3349 position = self._prev.text.upper() 3350 3351 expression = self._parse_term() 3352 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3353 this = self._parse_term() 3354 else: 3355 this = expression 3356 expression = None 3357 3358 if self._match(TokenType.COLLATE): 3359 collation = self._parse_term() 3360 3361 return self.expression( 3362 exp.Trim, 3363 this=this, 3364 position=position, 3365 expression=expression, 3366 collation=collation, 3367 ) 3368 3369 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3370 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3371 3372 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3373 return self._parse_window(self._parse_id_var(), alias=True) 3374 3375 def _parse_window( 3376 self, this: t.Optional[exp.Expression], alias: bool = False 3377 ) -> t.Optional[exp.Expression]: 3378 if self._match(TokenType.FILTER): 3379 where = self._parse_wrapped(self._parse_where) 3380 this = self.expression(exp.Filter, this=this, expression=where) 3381 3382 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3383 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3384 if self._match(TokenType.WITHIN_GROUP): 3385 order = self._parse_wrapped(self._parse_order) 3386 this = self.expression(exp.WithinGroup, this=this, expression=order) 3387 3388 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3389 # Some dialects choose to implement and some do not. 3390 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3391 3392 # There is some code above in _parse_lambda that handles 3393 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3394 3395 # The below changes handle 3396 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3397 3398 # Oracle allows both formats 3399 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3400 # and Snowflake chose to do the same for familiarity 3401 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3402 if self._match(TokenType.IGNORE_NULLS): 3403 this = self.expression(exp.IgnoreNulls, this=this) 3404 elif self._match(TokenType.RESPECT_NULLS): 3405 this = self.expression(exp.RespectNulls, this=this) 3406 3407 # bigquery select from window x AS (partition by ...) 3408 if alias: 3409 self._match(TokenType.ALIAS) 3410 elif not self._match(TokenType.OVER): 3411 return this 3412 3413 if not self._match(TokenType.L_PAREN): 3414 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3415 3416 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3417 partition = self._parse_partition_by() 3418 order = self._parse_order() 3419 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3420 3421 if kind: 3422 self._match(TokenType.BETWEEN) 3423 start = self._parse_window_spec() 3424 self._match(TokenType.AND) 3425 end = self._parse_window_spec() 3426 3427 spec = self.expression( 3428 exp.WindowSpec, 3429 kind=kind, 3430 start=start["value"], 3431 start_side=start["side"], 3432 end=end["value"], 3433 end_side=end["side"], 3434 ) 3435 else: 3436 spec = None 3437 3438 self._match_r_paren() 3439 3440 return self.expression( 3441 exp.Window, 3442 this=this, 3443 partition_by=partition, 3444 order=order, 3445 spec=spec, 3446 alias=window_alias, 3447 ) 3448 3449 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3450 self._match(TokenType.BETWEEN) 3451 3452 return { 3453 "value": ( 3454 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3455 ) 3456 or self._parse_bitwise(), 3457 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3458 } 3459 3460 def _parse_alias( 3461 self, this: t.Optional[exp.Expression], explicit: bool = False 3462 ) -> t.Optional[exp.Expression]: 3463 any_token = self._match(TokenType.ALIAS) 3464 3465 if explicit and not any_token: 3466 return this 3467 3468 if self._match(TokenType.L_PAREN): 3469 aliases = self.expression( 3470 exp.Aliases, 3471 this=this, 3472 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3473 ) 3474 self._match_r_paren(aliases) 3475 return aliases 3476 3477 alias = self._parse_id_var(any_token) 3478 3479 if alias: 3480 return self.expression(exp.Alias, this=this, alias=alias) 3481 3482 return this 3483 3484 def _parse_id_var( 3485 self, 3486 any_token: bool = True, 3487 tokens: t.Optional[t.Collection[TokenType]] = None, 3488 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3489 ) -> t.Optional[exp.Expression]: 3490 identifier = self._parse_identifier() 3491 3492 if identifier: 3493 return identifier 3494 3495 prefix = "" 3496 3497 if prefix_tokens: 3498 while self._match_set(prefix_tokens): 3499 prefix += self._prev.text 3500 3501 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3502 quoted = self._prev.token_type == TokenType.STRING 3503 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3504 3505 return None 3506 3507 def _parse_string(self) -> t.Optional[exp.Expression]: 3508 if self._match(TokenType.STRING): 3509 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3510 return self._parse_placeholder() 3511 3512 def _parse_number(self) -> t.Optional[exp.Expression]: 3513 if self._match(TokenType.NUMBER): 3514 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3515 return self._parse_placeholder() 3516 3517 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3518 if self._match(TokenType.IDENTIFIER): 3519 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3520 return self._parse_placeholder() 3521 3522 def _parse_var( 3523 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3524 ) -> t.Optional[exp.Expression]: 3525 if ( 3526 (any_token and self._advance_any()) 3527 or self._match(TokenType.VAR) 3528 or (self._match_set(tokens) if tokens else False) 3529 ): 3530 return self.expression(exp.Var, this=self._prev.text) 3531 return self._parse_placeholder() 3532 3533 def _advance_any(self) -> t.Optional[Token]: 3534 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3535 self._advance() 3536 return self._prev 3537 return None 3538 3539 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3540 return self._parse_var() or self._parse_string() 3541 3542 def _parse_null(self) -> t.Optional[exp.Expression]: 3543 if self._match(TokenType.NULL): 3544 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3545 return None 3546 3547 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3548 if self._match(TokenType.TRUE): 3549 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3550 if self._match(TokenType.FALSE): 3551 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3552 return None 3553 3554 def _parse_star(self) -> t.Optional[exp.Expression]: 3555 if self._match(TokenType.STAR): 3556 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3557 return None 3558 3559 def _parse_parameter(self) -> exp.Expression: 3560 wrapped = self._match(TokenType.L_BRACE) 3561 this = self._parse_var() or self._parse_primary() 3562 self._match(TokenType.R_BRACE) 3563 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3564 3565 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3566 if self._match_set(self.PLACEHOLDER_PARSERS): 3567 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3568 if placeholder: 3569 return placeholder 3570 self._advance(-1) 3571 return None 3572 3573 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3574 if not self._match(TokenType.EXCEPT): 3575 return None 3576 if self._match(TokenType.L_PAREN, advance=False): 3577 return self._parse_wrapped_csv(self._parse_column) 3578 return self._parse_csv(self._parse_column) 3579 3580 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3581 if not self._match(TokenType.REPLACE): 3582 return None 3583 if self._match(TokenType.L_PAREN, advance=False): 3584 return self._parse_wrapped_csv(self._parse_expression) 3585 return self._parse_csv(self._parse_expression) 3586 3587 def _parse_csv( 3588 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3589 ) -> t.List[t.Optional[exp.Expression]]: 3590 parse_result = parse_method() 3591 items = [parse_result] if parse_result is not None else [] 3592 3593 while self._match(sep): 3594 if parse_result and self._prev_comments: 3595 parse_result.comments = self._prev_comments 3596 3597 parse_result = parse_method() 3598 if parse_result is not None: 3599 items.append(parse_result) 3600 3601 return items 3602 3603 def _parse_tokens( 3604 self, parse_method: t.Callable, expressions: t.Dict 3605 ) -> t.Optional[exp.Expression]: 3606 this = parse_method() 3607 3608 while self._match_set(expressions): 3609 this = self.expression( 3610 expressions[self._prev.token_type], 3611 this=this, 3612 comments=self._prev_comments, 3613 expression=parse_method(), 3614 ) 3615 3616 return this 3617 3618 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3619 return self._parse_wrapped_csv(self._parse_id_var) 3620 3621 def _parse_wrapped_csv( 3622 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3623 ) -> t.List[t.Optional[exp.Expression]]: 3624 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3625 3626 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3627 self._match_l_paren() 3628 parse_result = parse_method() 3629 self._match_r_paren() 3630 return parse_result 3631 3632 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3633 return self._parse_select() or self._parse_expression() 3634 3635 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3636 return self._parse_set_operations( 3637 self._parse_select(nested=True, parse_subquery_alias=False) 3638 ) 3639 3640 def _parse_transaction(self) -> exp.Expression: 3641 this = None 3642 if self._match_texts(self.TRANSACTION_KIND): 3643 this = self._prev.text 3644 3645 self._match_texts({"TRANSACTION", "WORK"}) 3646 3647 modes = [] 3648 while True: 3649 mode = [] 3650 while self._match(TokenType.VAR): 3651 mode.append(self._prev.text) 3652 3653 if mode: 3654 modes.append(" ".join(mode)) 3655 if not self._match(TokenType.COMMA): 3656 break 3657 3658 return self.expression(exp.Transaction, this=this, modes=modes) 3659 3660 def _parse_commit_or_rollback(self) -> exp.Expression: 3661 chain = None 3662 savepoint = None 3663 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3664 3665 self._match_texts({"TRANSACTION", "WORK"}) 3666 3667 if self._match_text_seq("TO"): 3668 self._match_text_seq("SAVEPOINT") 3669 savepoint = self._parse_id_var() 3670 3671 if self._match(TokenType.AND): 3672 chain = not self._match_text_seq("NO") 3673 self._match_text_seq("CHAIN") 3674 3675 if is_rollback: 3676 return self.expression(exp.Rollback, savepoint=savepoint) 3677 return self.expression(exp.Commit, chain=chain) 3678 3679 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3680 if not self._match_text_seq("ADD"): 3681 return None 3682 3683 self._match(TokenType.COLUMN) 3684 exists_column = self._parse_exists(not_=True) 3685 expression = self._parse_column_def(self._parse_field(any_token=True)) 3686 3687 if expression: 3688 expression.set("exists", exists_column) 3689 3690 return expression 3691 3692 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3693 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3694 3695 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3696 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3697 return self.expression( 3698 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3699 ) 3700 3701 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3702 this = None 3703 kind = self._prev.token_type 3704 3705 if kind == TokenType.CONSTRAINT: 3706 this = self._parse_id_var() 3707 3708 if self._match_text_seq("CHECK"): 3709 expression = self._parse_wrapped(self._parse_conjunction) 3710 enforced = self._match_text_seq("ENFORCED") 3711 3712 return self.expression( 3713 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3714 ) 3715 3716 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3717 expression = self._parse_foreign_key() 3718 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3719 expression = self._parse_primary_key() 3720 3721 return self.expression(exp.AddConstraint, this=this, expression=expression) 3722 3723 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3724 index = self._index - 1 3725 3726 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3727 return self._parse_csv(self._parse_add_constraint) 3728 3729 self._retreat(index) 3730 return self._parse_csv(self._parse_add_column) 3731 3732 def _parse_alter_table_alter(self) -> exp.Expression: 3733 self._match(TokenType.COLUMN) 3734 column = self._parse_field(any_token=True) 3735 3736 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3737 return self.expression(exp.AlterColumn, this=column, drop=True) 3738 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3739 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3740 3741 self._match_text_seq("SET", "DATA") 3742 return self.expression( 3743 exp.AlterColumn, 3744 this=column, 3745 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3746 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3747 using=self._match(TokenType.USING) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3751 index = self._index - 1 3752 3753 partition_exists = self._parse_exists() 3754 if self._match(TokenType.PARTITION, advance=False): 3755 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3756 3757 self._retreat(index) 3758 return self._parse_csv(self._parse_drop_column) 3759 3760 def _parse_alter_table_rename(self) -> exp.Expression: 3761 self._match_text_seq("TO") 3762 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3763 3764 def _parse_alter(self) -> t.Optional[exp.Expression]: 3765 start = self._prev 3766 3767 if not self._match(TokenType.TABLE): 3768 return self._parse_as_command(start) 3769 3770 exists = self._parse_exists() 3771 this = self._parse_table(schema=True) 3772 3773 if self._next: 3774 self._advance() 3775 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3776 3777 if parser: 3778 return self.expression( 3779 exp.AlterTable, 3780 this=this, 3781 exists=exists, 3782 actions=ensure_list(parser(self)), 3783 ) 3784 return self._parse_as_command(start) 3785 3786 def _parse_show(self) -> t.Optional[exp.Expression]: 3787 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3788 if parser: 3789 return parser(self) 3790 self._advance() 3791 return self.expression(exp.Show, this=self._prev.text.upper()) 3792 3793 def _default_parse_set_item(self) -> exp.Expression: 3794 return self.expression( 3795 exp.SetItem, 3796 this=self._parse_statement(), 3797 ) 3798 3799 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3800 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3801 return parser(self) if parser else self._default_parse_set_item() 3802 3803 def _parse_merge(self) -> exp.Expression: 3804 self._match(TokenType.INTO) 3805 target = self._parse_table() 3806 3807 self._match(TokenType.USING) 3808 using = self._parse_table() 3809 3810 self._match(TokenType.ON) 3811 on = self._parse_conjunction() 3812 3813 whens = [] 3814 while self._match(TokenType.WHEN): 3815 matched = not self._match(TokenType.NOT) 3816 self._match_text_seq("MATCHED") 3817 source = ( 3818 False 3819 if self._match_text_seq("BY", "TARGET") 3820 else self._match_text_seq("BY", "SOURCE") 3821 ) 3822 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3823 3824 self._match(TokenType.THEN) 3825 3826 if self._match(TokenType.INSERT): 3827 _this = self._parse_star() 3828 if _this: 3829 then = self.expression(exp.Insert, this=_this) 3830 else: 3831 then = self.expression( 3832 exp.Insert, 3833 this=self._parse_value(), 3834 expression=self._match(TokenType.VALUES) and self._parse_value(), 3835 ) 3836 elif self._match(TokenType.UPDATE): 3837 expressions = self._parse_star() 3838 if expressions: 3839 then = self.expression(exp.Update, expressions=expressions) 3840 else: 3841 then = self.expression( 3842 exp.Update, 3843 expressions=self._match(TokenType.SET) 3844 and self._parse_csv(self._parse_equality), 3845 ) 3846 elif self._match(TokenType.DELETE): 3847 then = self.expression(exp.Var, this=self._prev.text) 3848 else: 3849 then = None 3850 3851 whens.append( 3852 self.expression( 3853 exp.When, 3854 matched=matched, 3855 source=source, 3856 condition=condition, 3857 then=then, 3858 ) 3859 ) 3860 3861 return self.expression( 3862 exp.Merge, 3863 this=target, 3864 using=using, 3865 on=on, 3866 expressions=whens, 3867 ) 3868 3869 def _parse_set(self) -> exp.Expression: 3870 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3871 3872 def _parse_as_command(self, start: Token) -> exp.Command: 3873 while self._curr: 3874 self._advance() 3875 text = self._find_sql(start, self._prev) 3876 size = len(start.text) 3877 return exp.Command(this=text[:size], expression=text[size:]) 3878 3879 def _find_parser( 3880 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3881 ) -> t.Optional[t.Callable]: 3882 index = self._index 3883 this = [] 3884 while True: 3885 # The current token might be multiple words 3886 curr = self._curr.text.upper() 3887 key = curr.split(" ") 3888 this.append(curr) 3889 self._advance() 3890 result, trie = in_trie(trie, key) 3891 if result == 0: 3892 break 3893 if result == 2: 3894 subparser = parsers[" ".join(this)] 3895 return subparser 3896 self._retreat(index) 3897 return None 3898 3899 def _match(self, token_type, advance=True): 3900 if not self._curr: 3901 return None 3902 3903 if self._curr.token_type == token_type: 3904 if advance: 3905 self._advance() 3906 return True 3907 3908 return None 3909 3910 def _match_set(self, types, advance=True): 3911 if not self._curr: 3912 return None 3913 3914 if self._curr.token_type in types: 3915 if advance: 3916 self._advance() 3917 return True 3918 3919 return None 3920 3921 def _match_pair(self, token_type_a, token_type_b, advance=True): 3922 if not self._curr or not self._next: 3923 return None 3924 3925 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3926 if advance: 3927 self._advance(2) 3928 return True 3929 3930 return None 3931 3932 def _match_l_paren(self, expression=None): 3933 if not self._match(TokenType.L_PAREN): 3934 self.raise_error("Expecting (") 3935 if expression and self._prev_comments: 3936 expression.comments = self._prev_comments 3937 3938 def _match_r_paren(self, expression=None): 3939 if not self._match(TokenType.R_PAREN): 3940 self.raise_error("Expecting )") 3941 if expression and self._prev_comments: 3942 expression.comments = self._prev_comments 3943 3944 def _match_texts(self, texts, advance=True): 3945 if self._curr and self._curr.text.upper() in texts: 3946 if advance: 3947 self._advance() 3948 return True 3949 return False 3950 3951 def _match_text_seq(self, *texts, advance=True): 3952 index = self._index 3953 for text in texts: 3954 if self._curr and self._curr.text.upper() == text: 3955 self._advance() 3956 else: 3957 self._retreat(index) 3958 return False 3959 3960 if not advance: 3961 self._retreat(index) 3962 3963 return True 3964 3965 def _replace_columns_with_dots(self, this): 3966 if isinstance(this, exp.Dot): 3967 exp.replace_children(this, self._replace_columns_with_dots) 3968 elif isinstance(this, exp.Column): 3969 exp.replace_children(this, self._replace_columns_with_dots) 3970 table = this.args.get("table") 3971 this = ( 3972 self.expression(exp.Dot, this=table, expression=this.this) 3973 if table 3974 else self.expression(exp.Var, this=this.name) 3975 ) 3976 elif isinstance(this, exp.Identifier): 3977 this = self.expression(exp.Var, this=this.name) 3978 return this 3979 3980 def _replace_lambda(self, node, lambda_variables): 3981 if isinstance(node, exp.Column): 3982 if node.name in lambda_variables: 3983 return node.this 3984 return node
55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "TIME_TO_TIME_STR": lambda args: exp.Cast( 85 this=seq_get(args, 0), 86 to=exp.DataType(this=exp.DataType.Type.TEXT), 87 ), 88 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 89 this=exp.Cast( 90 this=seq_get(args, 0), 91 to=exp.DataType(this=exp.DataType.Type.TEXT), 92 ), 93 start=exp.Literal.number(1), 94 length=exp.Literal.number(10), 95 ), 96 "VAR_MAP": parse_var_map, 97 "IFNULL": exp.Coalesce.from_arg_list, 98 } 99 100 NO_PAREN_FUNCTIONS = { 101 TokenType.CURRENT_DATE: exp.CurrentDate, 102 TokenType.CURRENT_DATETIME: exp.CurrentDate, 103 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.STRUCT, 110 TokenType.NULLABLE, 111 } 112 113 TYPE_TOKENS = { 114 TokenType.BIT, 115 TokenType.BOOLEAN, 116 TokenType.TINYINT, 117 TokenType.SMALLINT, 118 TokenType.INT, 119 TokenType.BIGINT, 120 TokenType.FLOAT, 121 TokenType.DOUBLE, 122 TokenType.CHAR, 123 TokenType.NCHAR, 124 TokenType.VARCHAR, 125 TokenType.NVARCHAR, 126 TokenType.TEXT, 127 TokenType.MEDIUMTEXT, 128 TokenType.LONGTEXT, 129 TokenType.MEDIUMBLOB, 130 TokenType.LONGBLOB, 131 TokenType.BINARY, 132 TokenType.VARBINARY, 133 TokenType.JSON, 134 TokenType.JSONB, 135 TokenType.INTERVAL, 136 TokenType.TIME, 137 TokenType.TIMESTAMP, 138 TokenType.TIMESTAMPTZ, 139 TokenType.TIMESTAMPLTZ, 140 TokenType.DATETIME, 141 TokenType.DATE, 142 TokenType.DECIMAL, 143 TokenType.UUID, 144 TokenType.GEOGRAPHY, 145 TokenType.GEOMETRY, 146 TokenType.HLLSKETCH, 147 TokenType.HSTORE, 148 TokenType.PSEUDO_TYPE, 149 TokenType.SUPER, 150 TokenType.SERIAL, 151 TokenType.SMALLSERIAL, 152 TokenType.BIGSERIAL, 153 TokenType.XML, 154 TokenType.UNIQUEIDENTIFIER, 155 TokenType.MONEY, 156 TokenType.SMALLMONEY, 157 TokenType.ROWVERSION, 158 TokenType.IMAGE, 159 TokenType.VARIANT, 160 TokenType.OBJECT, 161 TokenType.INET, 162 *NESTED_TYPE_TOKENS, 163 } 164 165 SUBQUERY_PREDICATES = { 166 TokenType.ANY: exp.Any, 167 TokenType.ALL: exp.All, 168 TokenType.EXISTS: exp.Exists, 169 TokenType.SOME: exp.Any, 170 } 171 172 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 173 174 DB_CREATABLES = { 175 TokenType.DATABASE, 176 TokenType.SCHEMA, 177 TokenType.TABLE, 178 TokenType.VIEW, 179 } 180 181 CREATABLES = { 182 TokenType.COLUMN, 183 TokenType.FUNCTION, 184 TokenType.INDEX, 185 TokenType.PROCEDURE, 186 *DB_CREATABLES, 187 } 188 189 ID_VAR_TOKENS = { 190 TokenType.VAR, 191 TokenType.ANTI, 192 TokenType.APPLY, 193 TokenType.AUTO_INCREMENT, 194 TokenType.BEGIN, 195 TokenType.BOTH, 196 TokenType.BUCKET, 197 TokenType.CACHE, 198 TokenType.CASCADE, 199 TokenType.COLLATE, 200 TokenType.COMMAND, 201 TokenType.COMMENT, 202 TokenType.COMMIT, 203 TokenType.COMPOUND, 204 TokenType.CONSTRAINT, 205 TokenType.CURRENT_TIME, 206 TokenType.DEFAULT, 207 TokenType.DELETE, 208 TokenType.DESCRIBE, 209 TokenType.DIV, 210 TokenType.END, 211 TokenType.EXECUTE, 212 TokenType.ESCAPE, 213 TokenType.FALSE, 214 TokenType.FIRST, 215 TokenType.FILTER, 216 TokenType.FOLLOWING, 217 TokenType.FORMAT, 218 TokenType.IF, 219 TokenType.ISNULL, 220 TokenType.INTERVAL, 221 TokenType.LAZY, 222 TokenType.LEADING, 223 TokenType.LEFT, 224 TokenType.LOCAL, 225 TokenType.MATERIALIZED, 226 TokenType.MERGE, 227 TokenType.NATURAL, 228 TokenType.NEXT, 229 TokenType.OFFSET, 230 TokenType.ONLY, 231 TokenType.OPTIONS, 232 TokenType.ORDINALITY, 233 TokenType.PERCENT, 234 TokenType.PIVOT, 235 TokenType.PRECEDING, 236 TokenType.RANGE, 237 TokenType.REFERENCES, 238 TokenType.RIGHT, 239 TokenType.ROW, 240 TokenType.ROWS, 241 TokenType.SEED, 242 TokenType.SEMI, 243 TokenType.SET, 244 TokenType.SHOW, 245 TokenType.SORTKEY, 246 TokenType.TEMPORARY, 247 TokenType.TOP, 248 TokenType.TRAILING, 249 TokenType.TRUE, 250 TokenType.UNBOUNDED, 251 TokenType.UNIQUE, 252 TokenType.UNLOGGED, 253 TokenType.UNPIVOT, 254 TokenType.VOLATILE, 255 TokenType.WINDOW, 256 *CREATABLES, 257 *SUBQUERY_PREDICATES, 258 *TYPE_TOKENS, 259 *NO_PAREN_FUNCTIONS, 260 } 261 262 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 263 TokenType.APPLY, 264 TokenType.LEFT, 265 TokenType.NATURAL, 266 TokenType.OFFSET, 267 TokenType.RIGHT, 268 TokenType.WINDOW, 269 } 270 271 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 272 273 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 274 275 FUNC_TOKENS = { 276 TokenType.COMMAND, 277 TokenType.CURRENT_DATE, 278 TokenType.CURRENT_DATETIME, 279 TokenType.CURRENT_TIMESTAMP, 280 TokenType.CURRENT_TIME, 281 TokenType.FILTER, 282 TokenType.FIRST, 283 TokenType.FORMAT, 284 TokenType.IDENTIFIER, 285 TokenType.INDEX, 286 TokenType.ISNULL, 287 TokenType.ILIKE, 288 TokenType.LIKE, 289 TokenType.MERGE, 290 TokenType.OFFSET, 291 TokenType.PRIMARY_KEY, 292 TokenType.REPLACE, 293 TokenType.ROW, 294 TokenType.UNNEST, 295 TokenType.VAR, 296 TokenType.LEFT, 297 TokenType.RIGHT, 298 TokenType.DATE, 299 TokenType.DATETIME, 300 TokenType.TABLE, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMPTZ, 303 TokenType.WINDOW, 304 *TYPE_TOKENS, 305 *SUBQUERY_PREDICATES, 306 } 307 308 CONJUNCTION = { 309 TokenType.AND: exp.And, 310 TokenType.OR: exp.Or, 311 } 312 313 EQUALITY = { 314 TokenType.EQ: exp.EQ, 315 TokenType.NEQ: exp.NEQ, 316 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 317 } 318 319 COMPARISON = { 320 TokenType.GT: exp.GT, 321 TokenType.GTE: exp.GTE, 322 TokenType.LT: exp.LT, 323 TokenType.LTE: exp.LTE, 324 } 325 326 BITWISE = { 327 TokenType.AMP: exp.BitwiseAnd, 328 TokenType.CARET: exp.BitwiseXor, 329 TokenType.PIPE: exp.BitwiseOr, 330 TokenType.DPIPE: exp.DPipe, 331 } 332 333 TERM = { 334 TokenType.DASH: exp.Sub, 335 TokenType.PLUS: exp.Add, 336 TokenType.MOD: exp.Mod, 337 TokenType.COLLATE: exp.Collate, 338 } 339 340 FACTOR = { 341 TokenType.DIV: exp.IntDiv, 342 TokenType.LR_ARROW: exp.Distance, 343 TokenType.SLASH: exp.Div, 344 TokenType.STAR: exp.Mul, 345 } 346 347 TIMESTAMPS = { 348 TokenType.TIME, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 } 353 354 SET_OPERATIONS = { 355 TokenType.UNION, 356 TokenType.INTERSECT, 357 TokenType.EXCEPT, 358 } 359 360 JOIN_SIDES = { 361 TokenType.LEFT, 362 TokenType.RIGHT, 363 TokenType.FULL, 364 } 365 366 JOIN_KINDS = { 367 TokenType.INNER, 368 TokenType.OUTER, 369 TokenType.CROSS, 370 TokenType.SEMI, 371 TokenType.ANTI, 372 } 373 374 LAMBDAS = { 375 TokenType.ARROW: lambda self, expressions: self.expression( 376 exp.Lambda, 377 this=self._parse_conjunction().transform( 378 self._replace_lambda, {node.name for node in expressions} 379 ), 380 expressions=expressions, 381 ), 382 TokenType.FARROW: lambda self, expressions: self.expression( 383 exp.Kwarg, 384 this=exp.Var(this=expressions[0].name), 385 expression=self._parse_conjunction(), 386 ), 387 } 388 389 COLUMN_OPERATORS = { 390 TokenType.DOT: None, 391 TokenType.DCOLON: lambda self, this, to: self.expression( 392 exp.Cast, 393 this=this, 394 to=to, 395 ), 396 TokenType.ARROW: lambda self, this, path: self.expression( 397 exp.JSONExtract, 398 this=this, 399 expression=path, 400 ), 401 TokenType.DARROW: lambda self, this, path: self.expression( 402 exp.JSONExtractScalar, 403 this=this, 404 expression=path, 405 ), 406 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 407 exp.JSONBExtract, 408 this=this, 409 expression=path, 410 ), 411 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 412 exp.JSONBExtractScalar, 413 this=this, 414 expression=path, 415 ), 416 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 417 exp.JSONBContains, 418 this=this, 419 expression=key, 420 ), 421 } 422 423 EXPRESSION_PARSERS = { 424 exp.Column: lambda self: self._parse_column(), 425 exp.DataType: lambda self: self._parse_types(), 426 exp.From: lambda self: self._parse_from(), 427 exp.Group: lambda self: self._parse_group(), 428 exp.Identifier: lambda self: self._parse_id_var(), 429 exp.Lateral: lambda self: self._parse_lateral(), 430 exp.Join: lambda self: self._parse_join(), 431 exp.Order: lambda self: self._parse_order(), 432 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 433 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 434 exp.Lambda: lambda self: self._parse_lambda(), 435 exp.Limit: lambda self: self._parse_limit(), 436 exp.Offset: lambda self: self._parse_offset(), 437 exp.TableAlias: lambda self: self._parse_table_alias(), 438 exp.Table: lambda self: self._parse_table(), 439 exp.Condition: lambda self: self._parse_conjunction(), 440 exp.Expression: lambda self: self._parse_statement(), 441 exp.Properties: lambda self: self._parse_properties(), 442 exp.Where: lambda self: self._parse_where(), 443 exp.Ordered: lambda self: self._parse_ordered(), 444 exp.Having: lambda self: self._parse_having(), 445 exp.With: lambda self: self._parse_with(), 446 exp.Window: lambda self: self._parse_named_window(), 447 exp.Qualify: lambda self: self._parse_qualify(), 448 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 449 } 450 451 STATEMENT_PARSERS = { 452 TokenType.ALTER: lambda self: self._parse_alter(), 453 TokenType.BEGIN: lambda self: self._parse_transaction(), 454 TokenType.CACHE: lambda self: self._parse_cache(), 455 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 456 TokenType.COMMENT: lambda self: self._parse_comment(), 457 TokenType.CREATE: lambda self: self._parse_create(), 458 TokenType.DELETE: lambda self: self._parse_delete(), 459 TokenType.DESC: lambda self: self._parse_describe(), 460 TokenType.DESCRIBE: lambda self: self._parse_describe(), 461 TokenType.DROP: lambda self: self._parse_drop(), 462 TokenType.END: lambda self: self._parse_commit_or_rollback(), 463 TokenType.INSERT: lambda self: self._parse_insert(), 464 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 465 TokenType.MERGE: lambda self: self._parse_merge(), 466 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 467 TokenType.UNCACHE: lambda self: self._parse_uncache(), 468 TokenType.UPDATE: lambda self: self._parse_update(), 469 TokenType.USE: lambda self: self.expression( 470 exp.Use, 471 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 472 and exp.Var(this=self._prev.text), 473 this=self._parse_table(schema=False), 474 ), 475 } 476 477 UNARY_PARSERS = { 478 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 479 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 480 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 481 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 482 } 483 484 PRIMARY_PARSERS = { 485 TokenType.STRING: lambda self, token: self.expression( 486 exp.Literal, this=token.text, is_string=True 487 ), 488 TokenType.NUMBER: lambda self, token: self.expression( 489 exp.Literal, this=token.text, is_string=False 490 ), 491 TokenType.STAR: lambda self, _: self.expression( 492 exp.Star, 493 **{"except": self._parse_except(), "replace": self._parse_replace()}, 494 ), 495 TokenType.NULL: lambda self, _: self.expression(exp.Null), 496 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 497 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 498 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 499 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 500 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 501 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 502 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 503 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 504 } 505 506 PLACEHOLDER_PARSERS = { 507 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 508 TokenType.PARAMETER: lambda self: self._parse_parameter(), 509 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 510 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 511 else None, 512 } 513 514 RANGE_PARSERS = { 515 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 516 TokenType.GLOB: binary_range_parser(exp.Glob), 517 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 518 TokenType.IN: lambda self, this: self._parse_in(this), 519 TokenType.IS: lambda self, this: self._parse_is(this), 520 TokenType.LIKE: binary_range_parser(exp.Like), 521 TokenType.ILIKE: binary_range_parser(exp.ILike), 522 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 523 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 524 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 525 } 526 527 PROPERTY_PARSERS = { 528 "AFTER": lambda self: self._parse_afterjournal( 529 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 530 ), 531 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 532 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 533 "BEFORE": lambda self: self._parse_journal( 534 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 535 ), 536 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 537 "CHARACTER SET": lambda self: self._parse_character_set(), 538 "CHECKSUM": lambda self: self._parse_checksum(), 539 "CLUSTER BY": lambda self: self.expression( 540 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 541 ), 542 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 543 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 544 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 545 default=self._prev.text.upper() == "DEFAULT" 546 ), 547 "DEFINER": lambda self: self._parse_definer(), 548 "DETERMINISTIC": lambda self: self.expression( 549 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 550 ), 551 "DISTKEY": lambda self: self._parse_distkey(), 552 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 553 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 554 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 555 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 556 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 557 "FREESPACE": lambda self: self._parse_freespace(), 558 "GLOBAL": lambda self: self._parse_temporary(global_=True), 559 "IMMUTABLE": lambda self: self.expression( 560 exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") 561 ), 562 "JOURNAL": lambda self: self._parse_journal( 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 564 ), 565 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 566 "LIKE": lambda self: self._parse_create_like(), 567 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 568 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 569 "LOCK": lambda self: self._parse_locking(), 570 "LOCKING": lambda self: self._parse_locking(), 571 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 572 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 573 "MAX": lambda self: self._parse_datablocksize(), 574 "MAXIMUM": lambda self: self._parse_datablocksize(), 575 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 576 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 577 ), 578 "MIN": lambda self: self._parse_datablocksize(), 579 "MINIMUM": lambda self: self._parse_datablocksize(), 580 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 581 "NO": lambda self: self._parse_noprimaryindex(), 582 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 583 "ON": lambda self: self._parse_oncommit(), 584 "PARTITION BY": lambda self: self._parse_partitioned_by(), 585 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 586 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 587 "RETURNS": lambda self: self._parse_returns(), 588 "ROW": lambda self: self._parse_row(), 589 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 590 "SORTKEY": lambda self: self._parse_sortkey(), 591 "STABLE": lambda self: self.expression( 592 exp.VolatilityProperty, this=exp.Literal.string("STABLE") 593 ), 594 "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 595 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 596 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 597 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 598 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 599 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 600 "VOLATILE": lambda self: self.expression( 601 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 602 ), 603 "WITH": lambda self: self._parse_with_property(), 604 } 605 606 CONSTRAINT_PARSERS = { 607 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 608 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 609 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 610 "CHARACTER SET": lambda self: self.expression( 611 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 612 ), 613 "CHECK": lambda self: self.expression( 614 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 615 ), 616 "COLLATE": lambda self: self.expression( 617 exp.CollateColumnConstraint, this=self._parse_var() 618 ), 619 "COMMENT": lambda self: self.expression( 620 exp.CommentColumnConstraint, this=self._parse_string() 621 ), 622 "COMPRESS": lambda self: self._parse_compress(), 623 "DEFAULT": lambda self: self.expression( 624 exp.DefaultColumnConstraint, this=self._parse_bitwise() 625 ), 626 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 627 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 628 "FORMAT": lambda self: self.expression( 629 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 630 ), 631 "GENERATED": lambda self: self._parse_generated_as_identity(), 632 "IDENTITY": lambda self: self._parse_auto_increment(), 633 "INLINE": lambda self: self._parse_inline(), 634 "LIKE": lambda self: self._parse_create_like(), 635 "NOT": lambda self: self._parse_not_constraint(), 636 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 637 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 638 "PRIMARY KEY": lambda self: self._parse_primary_key(), 639 "TITLE": lambda self: self.expression( 640 exp.TitleColumnConstraint, this=self._parse_var_or_string() 641 ), 642 "UNIQUE": lambda self: self._parse_unique(), 643 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 644 } 645 646 ALTER_PARSERS = { 647 "ADD": lambda self: self._parse_alter_table_add(), 648 "ALTER": lambda self: self._parse_alter_table_alter(), 649 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 650 "DROP": lambda self: self._parse_alter_table_drop(), 651 "RENAME": lambda self: self._parse_alter_table_rename(), 652 } 653 654 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 655 656 NO_PAREN_FUNCTION_PARSERS = { 657 TokenType.CASE: lambda self: self._parse_case(), 658 TokenType.IF: lambda self: self._parse_if(), 659 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 660 } 661 662 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 663 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 664 "TRY_CONVERT": lambda self: self._parse_convert(False), 665 "EXTRACT": lambda self: self._parse_extract(), 666 "POSITION": lambda self: self._parse_position(), 667 "SUBSTRING": lambda self: self._parse_substring(), 668 "TRIM": lambda self: self._parse_trim(), 669 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 670 "TRY_CAST": lambda self: self._parse_cast(False), 671 "STRING_AGG": lambda self: self._parse_string_agg(), 672 } 673 674 QUERY_MODIFIER_PARSERS = { 675 "match": lambda self: self._parse_match_recognize(), 676 "where": lambda self: self._parse_where(), 677 "group": lambda self: self._parse_group(), 678 "having": lambda self: self._parse_having(), 679 "qualify": lambda self: self._parse_qualify(), 680 "windows": lambda self: self._parse_window_clause(), 681 "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute), 682 "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 683 "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 684 "order": lambda self: self._parse_order(), 685 "limit": lambda self: self._parse_limit(), 686 "offset": lambda self: self._parse_offset(), 687 "lock": lambda self: self._parse_lock(), 688 "sample": lambda self: self._parse_table_sample(as_modifier=True), 689 } 690 691 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 692 SET_PARSERS: t.Dict[str, t.Callable] = {} 693 694 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 695 696 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 697 698 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 699 700 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 701 702 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 703 704 STRICT_CAST = True 705 706 INTEGER_DIVISION = True 707 708 CONVERT_TYPE_FIRST = False 709 710 __slots__ = ( 711 "error_level", 712 "error_message_context", 713 "sql", 714 "errors", 715 "index_offset", 716 "unnest_column_only", 717 "alias_post_tablesample", 718 "max_errors", 719 "null_ordering", 720 "_tokens", 721 "_index", 722 "_curr", 723 "_next", 724 "_prev", 725 "_prev_comments", 726 "_show_trie", 727 "_set_trie", 728 ) 729 730 def __init__( 731 self, 732 error_level: t.Optional[ErrorLevel] = None, 733 error_message_context: int = 100, 734 index_offset: int = 0, 735 unnest_column_only: bool = False, 736 alias_post_tablesample: bool = False, 737 max_errors: int = 3, 738 null_ordering: t.Optional[str] = None, 739 ): 740 self.error_level = error_level or ErrorLevel.IMMEDIATE 741 self.error_message_context = error_message_context 742 self.index_offset = index_offset 743 self.unnest_column_only = unnest_column_only 744 self.alias_post_tablesample = alias_post_tablesample 745 self.max_errors = max_errors 746 self.null_ordering = null_ordering 747 self.reset() 748 749 def reset(self): 750 self.sql = "" 751 self.errors = [] 752 self._tokens = [] 753 self._index = 0 754 self._curr = None 755 self._next = None 756 self._prev = None 757 self._prev_comments = None 758 759 def parse( 760 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 761 ) -> t.List[t.Optional[exp.Expression]]: 762 """ 763 Parses a list of tokens and returns a list of syntax trees, one tree 764 per parsed SQL statement. 765 766 Args: 767 raw_tokens: the list of tokens. 768 sql: the original SQL string, used to produce helpful debug messages. 769 770 Returns: 771 The list of syntax trees. 772 """ 773 return self._parse( 774 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 775 ) 776 777 def parse_into( 778 self, 779 expression_types: exp.IntoType, 780 raw_tokens: t.List[Token], 781 sql: t.Optional[str] = None, 782 ) -> t.List[t.Optional[exp.Expression]]: 783 """ 784 Parses a list of tokens into a given Expression type. If a collection of Expression 785 types is given instead, this method will try to parse the token list into each one 786 of them, stopping at the first for which the parsing succeeds. 787 788 Args: 789 expression_types: the expression type(s) to try and parse the token list into. 790 raw_tokens: the list of tokens. 791 sql: the original SQL string, used to produce helpful debug messages. 792 793 Returns: 794 The target Expression. 795 """ 796 errors = [] 797 for expression_type in ensure_collection(expression_types): 798 parser = self.EXPRESSION_PARSERS.get(expression_type) 799 if not parser: 800 raise TypeError(f"No parser registered for {expression_type}") 801 try: 802 return self._parse(parser, raw_tokens, sql) 803 except ParseError as e: 804 e.errors[0]["into_expression"] = expression_type 805 errors.append(e) 806 raise ParseError( 807 f"Failed to parse into {expression_types}", 808 errors=merge_errors(errors), 809 ) from errors[-1] 810 811 def _parse( 812 self, 813 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 814 raw_tokens: t.List[Token], 815 sql: t.Optional[str] = None, 816 ) -> t.List[t.Optional[exp.Expression]]: 817 self.reset() 818 self.sql = sql or "" 819 total = len(raw_tokens) 820 chunks: t.List[t.List[Token]] = [[]] 821 822 for i, token in enumerate(raw_tokens): 823 if token.token_type == TokenType.SEMICOLON: 824 if i < total - 1: 825 chunks.append([]) 826 else: 827 chunks[-1].append(token) 828 829 expressions = [] 830 831 for tokens in chunks: 832 self._index = -1 833 self._tokens = tokens 834 self._advance() 835 836 expressions.append(parse_method(self)) 837 838 if self._index < len(self._tokens): 839 self.raise_error("Invalid expression / Unexpected token") 840 841 self.check_errors() 842 843 return expressions 844 845 def check_errors(self) -> None: 846 """ 847 Logs or raises any found errors, depending on the chosen error level setting. 848 """ 849 if self.error_level == ErrorLevel.WARN: 850 for error in self.errors: 851 logger.error(str(error)) 852 elif self.error_level == ErrorLevel.RAISE and self.errors: 853 raise ParseError( 854 concat_messages(self.errors, self.max_errors), 855 errors=merge_errors(self.errors), 856 ) 857 858 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 859 """ 860 Appends an error in the list of recorded errors or raises it, depending on the chosen 861 error level setting. 862 """ 863 token = token or self._curr or self._prev or Token.string("") 864 start = self._find_token(token) 865 end = start + len(token.text) 866 start_context = self.sql[max(start - self.error_message_context, 0) : start] 867 highlight = self.sql[start:end] 868 end_context = self.sql[end : end + self.error_message_context] 869 870 error = ParseError.new( 871 f"{message}. Line {token.line}, Col: {token.col}.\n" 872 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 873 description=message, 874 line=token.line, 875 col=token.col, 876 start_context=start_context, 877 highlight=highlight, 878 end_context=end_context, 879 ) 880 881 if self.error_level == ErrorLevel.IMMEDIATE: 882 raise error 883 884 self.errors.append(error) 885 886 def expression( 887 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 888 ) -> exp.Expression: 889 """ 890 Creates a new, validated Expression. 891 892 Args: 893 exp_class: the expression class to instantiate. 894 comments: an optional list of comments to attach to the expression. 895 kwargs: the arguments to set for the expression along with their respective values. 896 897 Returns: 898 The target expression. 899 """ 900 instance = exp_class(**kwargs) 901 if self._prev_comments: 902 instance.comments = self._prev_comments 903 self._prev_comments = None 904 if comments: 905 instance.comments = comments 906 self.validate_expression(instance) 907 return instance 908 909 def validate_expression( 910 self, expression: exp.Expression, args: t.Optional[t.List] = None 911 ) -> None: 912 """ 913 Validates an already instantiated expression, making sure that all its mandatory arguments 914 are set. 915 916 Args: 917 expression: the expression to validate. 918 args: an optional list of items that was used to instantiate the expression, if it's a Func. 919 """ 920 if self.error_level == ErrorLevel.IGNORE: 921 return 922 923 for error_message in expression.error_messages(args): 924 self.raise_error(error_message) 925 926 def _find_sql(self, start: Token, end: Token) -> str: 927 return self.sql[self._find_token(start) : self._find_token(end) + len(end.text)] 928 929 def _find_token(self, token: Token) -> int: 930 line = 1 931 col = 1 932 index = 0 933 934 while line < token.line or col < token.col: 935 if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK: 936 line += 1 937 col = 1 938 else: 939 col += 1 940 index += 1 941 942 return index 943 944 def _advance(self, times: int = 1) -> None: 945 self._index += times 946 self._curr = seq_get(self._tokens, self._index) 947 self._next = seq_get(self._tokens, self._index + 1) 948 if self._index > 0: 949 self._prev = self._tokens[self._index - 1] 950 self._prev_comments = self._prev.comments 951 else: 952 self._prev = None 953 self._prev_comments = None 954 955 def _retreat(self, index: int) -> None: 956 if index != self._index: 957 self._advance(index - self._index) 958 959 def _parse_command(self) -> exp.Expression: 960 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 961 962 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 963 start = self._prev 964 exists = self._parse_exists() if allow_exists else None 965 966 self._match(TokenType.ON) 967 968 kind = self._match_set(self.CREATABLES) and self._prev 969 970 if not kind: 971 return self._parse_as_command(start) 972 973 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 974 this = self._parse_user_defined_function(kind=kind.token_type) 975 elif kind.token_type == TokenType.TABLE: 976 this = self._parse_table() 977 elif kind.token_type == TokenType.COLUMN: 978 this = self._parse_column() 979 else: 980 this = self._parse_id_var() 981 982 self._match(TokenType.IS) 983 984 return self.expression( 985 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 986 ) 987 988 def _parse_statement(self) -> t.Optional[exp.Expression]: 989 if self._curr is None: 990 return None 991 992 if self._match_set(self.STATEMENT_PARSERS): 993 return self.STATEMENT_PARSERS[self._prev.token_type](self) 994 995 if self._match_set(Tokenizer.COMMANDS): 996 return self._parse_command() 997 998 expression = self._parse_expression() 999 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1000 1001 self._parse_query_modifiers(expression) 1002 return expression 1003 1004 def _parse_drop(self, default_kind: t.Optional[str] = None) -> t.Optional[exp.Expression]: 1005 start = self._prev 1006 temporary = self._match(TokenType.TEMPORARY) 1007 materialized = self._match(TokenType.MATERIALIZED) 1008 kind = self._match_set(self.CREATABLES) and self._prev.text 1009 if not kind: 1010 if default_kind: 1011 kind = default_kind 1012 else: 1013 return self._parse_as_command(start) 1014 1015 return self.expression( 1016 exp.Drop, 1017 exists=self._parse_exists(), 1018 this=self._parse_table(schema=True), 1019 kind=kind, 1020 temporary=temporary, 1021 materialized=materialized, 1022 cascade=self._match(TokenType.CASCADE), 1023 ) 1024 1025 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1026 return ( 1027 self._match(TokenType.IF) 1028 and (not not_ or self._match(TokenType.NOT)) 1029 and self._match(TokenType.EXISTS) 1030 ) 1031 1032 def _parse_create(self) -> t.Optional[exp.Expression]: 1033 start = self._prev 1034 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1035 TokenType.OR, TokenType.REPLACE 1036 ) 1037 unique = self._match(TokenType.UNIQUE) 1038 volatile = self._match(TokenType.VOLATILE) 1039 1040 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1041 self._match(TokenType.TABLE) 1042 1043 properties = None 1044 create_token = self._match_set(self.CREATABLES) and self._prev 1045 1046 if not create_token: 1047 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1048 create_token = self._match_set(self.CREATABLES) and self._prev 1049 1050 if not properties or not create_token: 1051 return self._parse_as_command(start) 1052 1053 exists = self._parse_exists(not_=True) 1054 this = None 1055 expression = None 1056 indexes = None 1057 no_schema_binding = None 1058 begin = None 1059 1060 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1061 this = self._parse_user_defined_function(kind=create_token.token_type) 1062 temp_properties = self._parse_properties() 1063 if properties and temp_properties: 1064 properties.expressions.extend(temp_properties.expressions) 1065 elif temp_properties: 1066 properties = temp_properties 1067 1068 self._match(TokenType.ALIAS) 1069 begin = self._match(TokenType.BEGIN) 1070 return_ = self._match_text_seq("RETURN") 1071 expression = self._parse_statement() 1072 1073 if return_: 1074 expression = self.expression(exp.Return, this=expression) 1075 elif create_token.token_type == TokenType.INDEX: 1076 this = self._parse_index() 1077 elif create_token.token_type in self.DB_CREATABLES: 1078 table_parts = self._parse_table_parts(schema=True) 1079 1080 # exp.Properties.Location.POST_NAME 1081 if self._match(TokenType.COMMA): 1082 temp_properties = self._parse_properties(before=True) 1083 if properties and temp_properties: 1084 properties.expressions.extend(temp_properties.expressions) 1085 elif temp_properties: 1086 properties = temp_properties 1087 1088 this = self._parse_schema(this=table_parts) 1089 1090 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1091 temp_properties = self._parse_properties() 1092 if properties and temp_properties: 1093 properties.expressions.extend(temp_properties.expressions) 1094 elif temp_properties: 1095 properties = temp_properties 1096 1097 self._match(TokenType.ALIAS) 1098 1099 # exp.Properties.Location.POST_ALIAS 1100 if not ( 1101 self._match(TokenType.SELECT, advance=False) 1102 or self._match(TokenType.WITH, advance=False) 1103 or self._match(TokenType.L_PAREN, advance=False) 1104 ): 1105 temp_properties = self._parse_properties() 1106 if properties and temp_properties: 1107 properties.expressions.extend(temp_properties.expressions) 1108 elif temp_properties: 1109 properties = temp_properties 1110 1111 expression = self._parse_ddl_select() 1112 1113 if create_token.token_type == TokenType.TABLE: 1114 # exp.Properties.Location.POST_EXPRESSION 1115 temp_properties = self._parse_properties() 1116 if properties and temp_properties: 1117 properties.expressions.extend(temp_properties.expressions) 1118 elif temp_properties: 1119 properties = temp_properties 1120 1121 indexes = [] 1122 while True: 1123 index = self._parse_create_table_index() 1124 1125 # exp.Properties.Location.POST_INDEX 1126 if self._match(TokenType.PARTITION_BY, advance=False): 1127 temp_properties = self._parse_properties() 1128 if properties and temp_properties: 1129 properties.expressions.extend(temp_properties.expressions) 1130 elif temp_properties: 1131 properties = temp_properties 1132 1133 if not index: 1134 break 1135 else: 1136 indexes.append(index) 1137 elif create_token.token_type == TokenType.VIEW: 1138 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1139 no_schema_binding = True 1140 1141 return self.expression( 1142 exp.Create, 1143 this=this, 1144 kind=create_token.text, 1145 replace=replace, 1146 unique=unique, 1147 volatile=volatile, 1148 expression=expression, 1149 exists=exists, 1150 properties=properties, 1151 indexes=indexes, 1152 no_schema_binding=no_schema_binding, 1153 begin=begin, 1154 ) 1155 1156 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1157 self._match(TokenType.COMMA) 1158 1159 # parsers look to _prev for no/dual/default, so need to consume first 1160 self._match_text_seq("NO") 1161 self._match_text_seq("DUAL") 1162 self._match_text_seq("DEFAULT") 1163 1164 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1165 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1166 1167 return None 1168 1169 def _parse_property(self) -> t.Optional[exp.Expression]: 1170 if self._match_texts(self.PROPERTY_PARSERS): 1171 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1172 1173 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1174 return self._parse_character_set(default=True) 1175 1176 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1177 return self._parse_sortkey(compound=True) 1178 1179 if self._match_text_seq("SQL", "SECURITY"): 1180 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1181 1182 assignment = self._match_pair( 1183 TokenType.VAR, TokenType.EQ, advance=False 1184 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1185 1186 if assignment: 1187 key = self._parse_var_or_string() 1188 self._match(TokenType.EQ) 1189 return self.expression(exp.Property, this=key, value=self._parse_column()) 1190 1191 return None 1192 1193 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1194 self._match(TokenType.EQ) 1195 self._match(TokenType.ALIAS) 1196 return self.expression( 1197 exp_class, 1198 this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1199 ) 1200 1201 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1202 properties = [] 1203 1204 while True: 1205 if before: 1206 identified_property = self._parse_property_before() 1207 else: 1208 identified_property = self._parse_property() 1209 1210 if not identified_property: 1211 break 1212 for p in ensure_collection(identified_property): 1213 properties.append(p) 1214 1215 if properties: 1216 return self.expression(exp.Properties, expressions=properties) 1217 1218 return None 1219 1220 def _parse_fallback(self, no=False) -> exp.Expression: 1221 self._match_text_seq("FALLBACK") 1222 return self.expression( 1223 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1224 ) 1225 1226 def _parse_with_property( 1227 self, 1228 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1229 self._match(TokenType.WITH) 1230 if self._match(TokenType.L_PAREN, advance=False): 1231 return self._parse_wrapped_csv(self._parse_property) 1232 1233 if self._match_text_seq("JOURNAL"): 1234 return self._parse_withjournaltable() 1235 1236 if self._match_text_seq("DATA"): 1237 return self._parse_withdata(no=False) 1238 elif self._match_text_seq("NO", "DATA"): 1239 return self._parse_withdata(no=True) 1240 1241 if not self._next: 1242 return None 1243 1244 return self._parse_withisolatedloading() 1245 1246 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1247 def _parse_definer(self) -> t.Optional[exp.Expression]: 1248 self._match(TokenType.EQ) 1249 1250 user = self._parse_id_var() 1251 self._match(TokenType.PARAMETER) 1252 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1253 1254 if not user or not host: 1255 return None 1256 1257 return exp.DefinerProperty(this=f"{user}@{host}") 1258 1259 def _parse_withjournaltable(self) -> exp.Expression: 1260 self._match(TokenType.TABLE) 1261 self._match(TokenType.EQ) 1262 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1263 1264 def _parse_log(self, no=False) -> exp.Expression: 1265 self._match_text_seq("LOG") 1266 return self.expression(exp.LogProperty, no=no) 1267 1268 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1269 before = self._match_text_seq("BEFORE") 1270 self._match_text_seq("JOURNAL") 1271 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1272 1273 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1274 self._match_text_seq("NOT") 1275 self._match_text_seq("LOCAL") 1276 self._match_text_seq("AFTER", "JOURNAL") 1277 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1278 1279 def _parse_checksum(self) -> exp.Expression: 1280 self._match_text_seq("CHECKSUM") 1281 self._match(TokenType.EQ) 1282 1283 on = None 1284 if self._match(TokenType.ON): 1285 on = True 1286 elif self._match_text_seq("OFF"): 1287 on = False 1288 default = self._match(TokenType.DEFAULT) 1289 1290 return self.expression( 1291 exp.ChecksumProperty, 1292 on=on, 1293 default=default, 1294 ) 1295 1296 def _parse_freespace(self) -> exp.Expression: 1297 self._match_text_seq("FREESPACE") 1298 self._match(TokenType.EQ) 1299 return self.expression( 1300 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1301 ) 1302 1303 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1304 self._match_text_seq("MERGEBLOCKRATIO") 1305 if self._match(TokenType.EQ): 1306 return self.expression( 1307 exp.MergeBlockRatioProperty, 1308 this=self._parse_number(), 1309 percent=self._match(TokenType.PERCENT), 1310 ) 1311 else: 1312 return self.expression( 1313 exp.MergeBlockRatioProperty, 1314 no=no, 1315 default=default, 1316 ) 1317 1318 def _parse_datablocksize(self, default=None) -> exp.Expression: 1319 if default: 1320 self._match_text_seq("DATABLOCKSIZE") 1321 return self.expression(exp.DataBlocksizeProperty, default=True) 1322 elif self._match_texts(("MIN", "MINIMUM")): 1323 self._match_text_seq("DATABLOCKSIZE") 1324 return self.expression(exp.DataBlocksizeProperty, min=True) 1325 elif self._match_texts(("MAX", "MAXIMUM")): 1326 self._match_text_seq("DATABLOCKSIZE") 1327 return self.expression(exp.DataBlocksizeProperty, min=False) 1328 1329 self._match_text_seq("DATABLOCKSIZE") 1330 self._match(TokenType.EQ) 1331 size = self._parse_number() 1332 units = None 1333 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1334 units = self._prev.text 1335 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1336 1337 def _parse_blockcompression(self) -> exp.Expression: 1338 self._match_text_seq("BLOCKCOMPRESSION") 1339 self._match(TokenType.EQ) 1340 always = self._match_text_seq("ALWAYS") 1341 manual = self._match_text_seq("MANUAL") 1342 never = self._match_text_seq("NEVER") 1343 default = self._match_text_seq("DEFAULT") 1344 autotemp = None 1345 if self._match_text_seq("AUTOTEMP"): 1346 autotemp = self._parse_schema() 1347 1348 return self.expression( 1349 exp.BlockCompressionProperty, 1350 always=always, 1351 manual=manual, 1352 never=never, 1353 default=default, 1354 autotemp=autotemp, 1355 ) 1356 1357 def _parse_withisolatedloading(self) -> exp.Expression: 1358 no = self._match_text_seq("NO") 1359 concurrent = self._match_text_seq("CONCURRENT") 1360 self._match_text_seq("ISOLATED", "LOADING") 1361 for_all = self._match_text_seq("FOR", "ALL") 1362 for_insert = self._match_text_seq("FOR", "INSERT") 1363 for_none = self._match_text_seq("FOR", "NONE") 1364 return self.expression( 1365 exp.IsolatedLoadingProperty, 1366 no=no, 1367 concurrent=concurrent, 1368 for_all=for_all, 1369 for_insert=for_insert, 1370 for_none=for_none, 1371 ) 1372 1373 def _parse_locking(self) -> exp.Expression: 1374 if self._match(TokenType.TABLE): 1375 kind = "TABLE" 1376 elif self._match(TokenType.VIEW): 1377 kind = "VIEW" 1378 elif self._match(TokenType.ROW): 1379 kind = "ROW" 1380 elif self._match_text_seq("DATABASE"): 1381 kind = "DATABASE" 1382 else: 1383 kind = None 1384 1385 if kind in ("DATABASE", "TABLE", "VIEW"): 1386 this = self._parse_table_parts() 1387 else: 1388 this = None 1389 1390 if self._match(TokenType.FOR): 1391 for_or_in = "FOR" 1392 elif self._match(TokenType.IN): 1393 for_or_in = "IN" 1394 else: 1395 for_or_in = None 1396 1397 if self._match_text_seq("ACCESS"): 1398 lock_type = "ACCESS" 1399 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1400 lock_type = "EXCLUSIVE" 1401 elif self._match_text_seq("SHARE"): 1402 lock_type = "SHARE" 1403 elif self._match_text_seq("READ"): 1404 lock_type = "READ" 1405 elif self._match_text_seq("WRITE"): 1406 lock_type = "WRITE" 1407 elif self._match_text_seq("CHECKSUM"): 1408 lock_type = "CHECKSUM" 1409 else: 1410 lock_type = None 1411 1412 override = self._match_text_seq("OVERRIDE") 1413 1414 return self.expression( 1415 exp.LockingProperty, 1416 this=this, 1417 kind=kind, 1418 for_or_in=for_or_in, 1419 lock_type=lock_type, 1420 override=override, 1421 ) 1422 1423 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1424 if self._match(TokenType.PARTITION_BY): 1425 return self._parse_csv(self._parse_conjunction) 1426 return [] 1427 1428 def _parse_partitioned_by(self) -> exp.Expression: 1429 self._match(TokenType.EQ) 1430 return self.expression( 1431 exp.PartitionedByProperty, 1432 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1433 ) 1434 1435 def _parse_withdata(self, no=False) -> exp.Expression: 1436 if self._match_text_seq("AND", "STATISTICS"): 1437 statistics = True 1438 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1439 statistics = False 1440 else: 1441 statistics = None 1442 1443 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1444 1445 def _parse_noprimaryindex(self) -> exp.Expression: 1446 self._match_text_seq("PRIMARY", "INDEX") 1447 return exp.NoPrimaryIndexProperty() 1448 1449 def _parse_oncommit(self) -> exp.Expression: 1450 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1451 return exp.OnCommitProperty() 1452 1453 def _parse_distkey(self) -> exp.Expression: 1454 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1455 1456 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1457 table = self._parse_table(schema=True) 1458 options = [] 1459 while self._match_texts(("INCLUDING", "EXCLUDING")): 1460 this = self._prev.text.upper() 1461 id_var = self._parse_id_var() 1462 1463 if not id_var: 1464 return None 1465 1466 options.append( 1467 self.expression( 1468 exp.Property, 1469 this=this, 1470 value=exp.Var(this=id_var.this.upper()), 1471 ) 1472 ) 1473 return self.expression(exp.LikeProperty, this=table, expressions=options) 1474 1475 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1476 return self.expression( 1477 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1478 ) 1479 1480 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1481 self._match(TokenType.EQ) 1482 return self.expression( 1483 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1484 ) 1485 1486 def _parse_returns(self) -> exp.Expression: 1487 value: t.Optional[exp.Expression] 1488 is_table = self._match(TokenType.TABLE) 1489 1490 if is_table: 1491 if self._match(TokenType.LT): 1492 value = self.expression( 1493 exp.Schema, 1494 this="TABLE", 1495 expressions=self._parse_csv(self._parse_struct_kwargs), 1496 ) 1497 if not self._match(TokenType.GT): 1498 self.raise_error("Expecting >") 1499 else: 1500 value = self._parse_schema(exp.Var(this="TABLE")) 1501 else: 1502 value = self._parse_types() 1503 1504 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1505 1506 def _parse_temporary(self, global_=False) -> exp.Expression: 1507 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1508 return self.expression(exp.TemporaryProperty, global_=global_) 1509 1510 def _parse_describe(self) -> exp.Expression: 1511 kind = self._match_set(self.CREATABLES) and self._prev.text 1512 this = self._parse_table() 1513 1514 return self.expression(exp.Describe, this=this, kind=kind) 1515 1516 def _parse_insert(self) -> exp.Expression: 1517 overwrite = self._match(TokenType.OVERWRITE) 1518 local = self._match(TokenType.LOCAL) 1519 alternative = None 1520 1521 if self._match_text_seq("DIRECTORY"): 1522 this: t.Optional[exp.Expression] = self.expression( 1523 exp.Directory, 1524 this=self._parse_var_or_string(), 1525 local=local, 1526 row_format=self._parse_row_format(match_row=True), 1527 ) 1528 else: 1529 if self._match(TokenType.OR): 1530 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1531 1532 self._match(TokenType.INTO) 1533 self._match(TokenType.TABLE) 1534 this = self._parse_table(schema=True) 1535 1536 return self.expression( 1537 exp.Insert, 1538 this=this, 1539 exists=self._parse_exists(), 1540 partition=self._parse_partition(), 1541 expression=self._parse_ddl_select(), 1542 returning=self._parse_returning(), 1543 overwrite=overwrite, 1544 alternative=alternative, 1545 ) 1546 1547 def _parse_returning(self) -> t.Optional[exp.Expression]: 1548 if not self._match(TokenType.RETURNING): 1549 return None 1550 1551 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1552 1553 def _parse_row(self) -> t.Optional[exp.Expression]: 1554 if not self._match(TokenType.FORMAT): 1555 return None 1556 return self._parse_row_format() 1557 1558 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1559 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1560 return None 1561 1562 if self._match_text_seq("SERDE"): 1563 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1564 1565 self._match_text_seq("DELIMITED") 1566 1567 kwargs = {} 1568 1569 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1570 kwargs["fields"] = self._parse_string() 1571 if self._match_text_seq("ESCAPED", "BY"): 1572 kwargs["escaped"] = self._parse_string() 1573 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1574 kwargs["collection_items"] = self._parse_string() 1575 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1576 kwargs["map_keys"] = self._parse_string() 1577 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1578 kwargs["lines"] = self._parse_string() 1579 if self._match_text_seq("NULL", "DEFINED", "AS"): 1580 kwargs["null"] = self._parse_string() 1581 1582 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1583 1584 def _parse_load_data(self) -> exp.Expression: 1585 local = self._match(TokenType.LOCAL) 1586 self._match_text_seq("INPATH") 1587 inpath = self._parse_string() 1588 overwrite = self._match(TokenType.OVERWRITE) 1589 self._match_pair(TokenType.INTO, TokenType.TABLE) 1590 1591 return self.expression( 1592 exp.LoadData, 1593 this=self._parse_table(schema=True), 1594 local=local, 1595 overwrite=overwrite, 1596 inpath=inpath, 1597 partition=self._parse_partition(), 1598 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1599 serde=self._match_text_seq("SERDE") and self._parse_string(), 1600 ) 1601 1602 def _parse_delete(self) -> exp.Expression: 1603 self._match(TokenType.FROM) 1604 1605 return self.expression( 1606 exp.Delete, 1607 this=self._parse_table(schema=True), 1608 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1609 where=self._parse_where(), 1610 returning=self._parse_returning(), 1611 ) 1612 1613 def _parse_update(self) -> exp.Expression: 1614 return self.expression( 1615 exp.Update, 1616 **{ # type: ignore 1617 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1618 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1619 "from": self._parse_from(), 1620 "where": self._parse_where(), 1621 "returning": self._parse_returning(), 1622 }, 1623 ) 1624 1625 def _parse_uncache(self) -> exp.Expression: 1626 if not self._match(TokenType.TABLE): 1627 self.raise_error("Expecting TABLE after UNCACHE") 1628 1629 return self.expression( 1630 exp.Uncache, 1631 exists=self._parse_exists(), 1632 this=self._parse_table(schema=True), 1633 ) 1634 1635 def _parse_cache(self) -> exp.Expression: 1636 lazy = self._match(TokenType.LAZY) 1637 self._match(TokenType.TABLE) 1638 table = self._parse_table(schema=True) 1639 options = [] 1640 1641 if self._match(TokenType.OPTIONS): 1642 self._match_l_paren() 1643 k = self._parse_string() 1644 self._match(TokenType.EQ) 1645 v = self._parse_string() 1646 options = [k, v] 1647 self._match_r_paren() 1648 1649 self._match(TokenType.ALIAS) 1650 return self.expression( 1651 exp.Cache, 1652 this=table, 1653 lazy=lazy, 1654 options=options, 1655 expression=self._parse_select(nested=True), 1656 ) 1657 1658 def _parse_partition(self) -> t.Optional[exp.Expression]: 1659 if not self._match(TokenType.PARTITION): 1660 return None 1661 1662 return self.expression( 1663 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1664 ) 1665 1666 def _parse_value(self) -> exp.Expression: 1667 if self._match(TokenType.L_PAREN): 1668 expressions = self._parse_csv(self._parse_conjunction) 1669 self._match_r_paren() 1670 return self.expression(exp.Tuple, expressions=expressions) 1671 1672 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1673 # Source: https://prestodb.io/docs/current/sql/values.html 1674 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1675 1676 def _parse_select( 1677 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1678 ) -> t.Optional[exp.Expression]: 1679 cte = self._parse_with() 1680 if cte: 1681 this = self._parse_statement() 1682 1683 if not this: 1684 self.raise_error("Failed to parse any statement following CTE") 1685 return cte 1686 1687 if "with" in this.arg_types: 1688 this.set("with", cte) 1689 else: 1690 self.raise_error(f"{this.key} does not support CTE") 1691 this = cte 1692 elif self._match(TokenType.SELECT): 1693 comments = self._prev_comments 1694 1695 hint = self._parse_hint() 1696 all_ = self._match(TokenType.ALL) 1697 distinct = self._match(TokenType.DISTINCT) 1698 1699 if distinct: 1700 distinct = self.expression( 1701 exp.Distinct, 1702 on=self._parse_value() if self._match(TokenType.ON) else None, 1703 ) 1704 1705 if all_ and distinct: 1706 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1707 1708 limit = self._parse_limit(top=True) 1709 expressions = self._parse_csv(self._parse_expression) 1710 1711 this = self.expression( 1712 exp.Select, 1713 hint=hint, 1714 distinct=distinct, 1715 expressions=expressions, 1716 limit=limit, 1717 ) 1718 this.comments = comments 1719 1720 into = self._parse_into() 1721 if into: 1722 this.set("into", into) 1723 1724 from_ = self._parse_from() 1725 if from_: 1726 this.set("from", from_) 1727 1728 self._parse_query_modifiers(this) 1729 elif (table or nested) and self._match(TokenType.L_PAREN): 1730 this = self._parse_table() if table else self._parse_select(nested=True) 1731 self._parse_query_modifiers(this) 1732 this = self._parse_set_operations(this) 1733 self._match_r_paren() 1734 1735 # early return so that subquery unions aren't parsed again 1736 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1737 # Union ALL should be a property of the top select node, not the subquery 1738 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1739 elif self._match(TokenType.VALUES): 1740 this = self.expression( 1741 exp.Values, 1742 expressions=self._parse_csv(self._parse_value), 1743 alias=self._parse_table_alias(), 1744 ) 1745 else: 1746 this = None 1747 1748 return self._parse_set_operations(this) 1749 1750 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1751 if not skip_with_token and not self._match(TokenType.WITH): 1752 return None 1753 1754 recursive = self._match(TokenType.RECURSIVE) 1755 1756 expressions = [] 1757 while True: 1758 expressions.append(self._parse_cte()) 1759 1760 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1761 break 1762 else: 1763 self._match(TokenType.WITH) 1764 1765 return self.expression(exp.With, expressions=expressions, recursive=recursive) 1766 1767 def _parse_cte(self) -> exp.Expression: 1768 alias = self._parse_table_alias() 1769 if not alias or not alias.this: 1770 self.raise_error("Expected CTE to have alias") 1771 1772 self._match(TokenType.ALIAS) 1773 1774 return self.expression( 1775 exp.CTE, 1776 this=self._parse_wrapped(self._parse_statement), 1777 alias=alias, 1778 ) 1779 1780 def _parse_table_alias( 1781 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1782 ) -> t.Optional[exp.Expression]: 1783 any_token = self._match(TokenType.ALIAS) 1784 alias = self._parse_id_var( 1785 any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 1786 ) 1787 index = self._index 1788 1789 if self._match(TokenType.L_PAREN): 1790 columns = self._parse_csv(self._parse_function_parameter) 1791 self._match_r_paren() if columns else self._retreat(index) 1792 else: 1793 columns = None 1794 1795 if not alias and not columns: 1796 return None 1797 1798 return self.expression(exp.TableAlias, this=alias, columns=columns) 1799 1800 def _parse_subquery( 1801 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1802 ) -> exp.Expression: 1803 return self.expression( 1804 exp.Subquery, 1805 this=this, 1806 pivots=self._parse_pivots(), 1807 alias=self._parse_table_alias() if parse_alias else None, 1808 ) 1809 1810 def _parse_query_modifiers(self, this: t.Optional[exp.Expression]) -> None: 1811 if not isinstance(this, self.MODIFIABLES): 1812 return 1813 1814 table = isinstance(this, exp.Table) 1815 1816 while True: 1817 lateral = self._parse_lateral() 1818 join = self._parse_join() 1819 comma = None if table else self._match(TokenType.COMMA) 1820 if lateral: 1821 this.append("laterals", lateral) 1822 if join: 1823 this.append("joins", join) 1824 if comma: 1825 this.args["from"].append("expressions", self._parse_table()) 1826 if not (lateral or join or comma): 1827 break 1828 1829 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1830 expression = parser(self) 1831 1832 if expression: 1833 this.set(key, expression) 1834 1835 def _parse_hint(self) -> t.Optional[exp.Expression]: 1836 if self._match(TokenType.HINT): 1837 hints = self._parse_csv(self._parse_function) 1838 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 1839 self.raise_error("Expected */ after HINT") 1840 return self.expression(exp.Hint, expressions=hints) 1841 1842 return None 1843 1844 def _parse_into(self) -> t.Optional[exp.Expression]: 1845 if not self._match(TokenType.INTO): 1846 return None 1847 1848 temp = self._match(TokenType.TEMPORARY) 1849 unlogged = self._match(TokenType.UNLOGGED) 1850 self._match(TokenType.TABLE) 1851 1852 return self.expression( 1853 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 1854 ) 1855 1856 def _parse_from(self) -> t.Optional[exp.Expression]: 1857 if not self._match(TokenType.FROM): 1858 return None 1859 1860 return self.expression( 1861 exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) 1862 ) 1863 1864 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 1865 if not self._match(TokenType.MATCH_RECOGNIZE): 1866 return None 1867 self._match_l_paren() 1868 1869 partition = self._parse_partition_by() 1870 order = self._parse_order() 1871 measures = ( 1872 self._parse_alias(self._parse_conjunction()) 1873 if self._match_text_seq("MEASURES") 1874 else None 1875 ) 1876 1877 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 1878 rows = exp.Var(this="ONE ROW PER MATCH") 1879 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 1880 text = "ALL ROWS PER MATCH" 1881 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 1882 text += f" SHOW EMPTY MATCHES" 1883 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 1884 text += f" OMIT EMPTY MATCHES" 1885 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 1886 text += f" WITH UNMATCHED ROWS" 1887 rows = exp.Var(this=text) 1888 else: 1889 rows = None 1890 1891 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 1892 text = "AFTER MATCH SKIP" 1893 if self._match_text_seq("PAST", "LAST", "ROW"): 1894 text += f" PAST LAST ROW" 1895 elif self._match_text_seq("TO", "NEXT", "ROW"): 1896 text += f" TO NEXT ROW" 1897 elif self._match_text_seq("TO", "FIRST"): 1898 text += f" TO FIRST {self._advance_any().text}" # type: ignore 1899 elif self._match_text_seq("TO", "LAST"): 1900 text += f" TO LAST {self._advance_any().text}" # type: ignore 1901 after = exp.Var(this=text) 1902 else: 1903 after = None 1904 1905 if self._match_text_seq("PATTERN"): 1906 self._match_l_paren() 1907 1908 if not self._curr: 1909 self.raise_error("Expecting )", self._curr) 1910 1911 paren = 1 1912 start = self._curr 1913 1914 while self._curr and paren > 0: 1915 if self._curr.token_type == TokenType.L_PAREN: 1916 paren += 1 1917 if self._curr.token_type == TokenType.R_PAREN: 1918 paren -= 1 1919 end = self._prev 1920 self._advance() 1921 if paren > 0: 1922 self.raise_error("Expecting )", self._curr) 1923 pattern = exp.Var(this=self._find_sql(start, end)) 1924 else: 1925 pattern = None 1926 1927 define = ( 1928 self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None 1929 ) 1930 self._match_r_paren() 1931 1932 return self.expression( 1933 exp.MatchRecognize, 1934 partition_by=partition, 1935 order=order, 1936 measures=measures, 1937 rows=rows, 1938 after=after, 1939 pattern=pattern, 1940 define=define, 1941 ) 1942 1943 def _parse_lateral(self) -> t.Optional[exp.Expression]: 1944 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 1945 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 1946 1947 if outer_apply or cross_apply: 1948 this = self._parse_select(table=True) 1949 view = None 1950 outer = not cross_apply 1951 elif self._match(TokenType.LATERAL): 1952 this = self._parse_select(table=True) 1953 view = self._match(TokenType.VIEW) 1954 outer = self._match(TokenType.OUTER) 1955 else: 1956 return None 1957 1958 if not this: 1959 this = self._parse_function() or self._parse_id_var(any_token=False) 1960 while self._match(TokenType.DOT): 1961 this = exp.Dot( 1962 this=this, 1963 expression=self._parse_function() or self._parse_id_var(any_token=False), 1964 ) 1965 1966 table_alias: t.Optional[exp.Expression] 1967 1968 if view: 1969 table = self._parse_id_var(any_token=False) 1970 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 1971 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 1972 else: 1973 table_alias = self._parse_table_alias() 1974 1975 expression = self.expression( 1976 exp.Lateral, 1977 this=this, 1978 view=view, 1979 outer=outer, 1980 alias=table_alias, 1981 ) 1982 1983 if outer_apply or cross_apply: 1984 return self.expression(exp.Join, this=expression, side=None if cross_apply else "LEFT") 1985 1986 return expression 1987 1988 def _parse_join_side_and_kind( 1989 self, 1990 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 1991 return ( 1992 self._match(TokenType.NATURAL) and self._prev, 1993 self._match_set(self.JOIN_SIDES) and self._prev, 1994 self._match_set(self.JOIN_KINDS) and self._prev, 1995 ) 1996 1997 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 1998 natural, side, kind = self._parse_join_side_and_kind() 1999 2000 if not skip_join_token and not self._match(TokenType.JOIN): 2001 return None 2002 2003 kwargs: t.Dict[ 2004 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2005 ] = {"this": self._parse_table()} 2006 2007 if natural: 2008 kwargs["natural"] = True 2009 if side: 2010 kwargs["side"] = side.text 2011 if kind: 2012 kwargs["kind"] = kind.text 2013 2014 if self._match(TokenType.ON): 2015 kwargs["on"] = self._parse_conjunction() 2016 elif self._match(TokenType.USING): 2017 kwargs["using"] = self._parse_wrapped_id_vars() 2018 2019 return self.expression(exp.Join, **kwargs) # type: ignore 2020 2021 def _parse_index(self) -> exp.Expression: 2022 index = self._parse_id_var() 2023 self._match(TokenType.ON) 2024 self._match(TokenType.TABLE) # hive 2025 2026 return self.expression( 2027 exp.Index, 2028 this=index, 2029 table=self.expression(exp.Table, this=self._parse_id_var()), 2030 columns=self._parse_expression(), 2031 ) 2032 2033 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2034 unique = self._match(TokenType.UNIQUE) 2035 primary = self._match_text_seq("PRIMARY") 2036 amp = self._match_text_seq("AMP") 2037 if not self._match(TokenType.INDEX): 2038 return None 2039 index = self._parse_id_var() 2040 columns = None 2041 if self._match(TokenType.L_PAREN, advance=False): 2042 columns = self._parse_wrapped_csv(self._parse_column) 2043 return self.expression( 2044 exp.Index, 2045 this=index, 2046 columns=columns, 2047 unique=unique, 2048 primary=primary, 2049 amp=amp, 2050 ) 2051 2052 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2053 catalog = None 2054 db = None 2055 table = (not schema and self._parse_function()) or self._parse_id_var(any_token=False) 2056 2057 while self._match(TokenType.DOT): 2058 if catalog: 2059 # This allows nesting the table in arbitrarily many dot expressions if needed 2060 table = self.expression(exp.Dot, this=table, expression=self._parse_id_var()) 2061 else: 2062 catalog = db 2063 db = table 2064 table = self._parse_id_var() 2065 2066 if not table: 2067 self.raise_error(f"Expected table name but got {self._curr}") 2068 2069 return self.expression( 2070 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2071 ) 2072 2073 def _parse_table( 2074 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2075 ) -> t.Optional[exp.Expression]: 2076 lateral = self._parse_lateral() 2077 2078 if lateral: 2079 return lateral 2080 2081 unnest = self._parse_unnest() 2082 2083 if unnest: 2084 return unnest 2085 2086 values = self._parse_derived_table_values() 2087 2088 if values: 2089 return values 2090 2091 subquery = self._parse_select(table=True) 2092 2093 if subquery: 2094 return subquery 2095 2096 this = self._parse_table_parts(schema=schema) 2097 2098 if schema: 2099 return self._parse_schema(this=this) 2100 2101 if self.alias_post_tablesample: 2102 table_sample = self._parse_table_sample() 2103 2104 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2105 2106 if alias: 2107 this.set("alias", alias) 2108 2109 if not this.args.get("pivots"): 2110 this.set("pivots", self._parse_pivots()) 2111 2112 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2113 this.set( 2114 "hints", 2115 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2116 ) 2117 self._match_r_paren() 2118 2119 if not self.alias_post_tablesample: 2120 table_sample = self._parse_table_sample() 2121 2122 if table_sample: 2123 table_sample.set("this", this) 2124 this = table_sample 2125 2126 return this 2127 2128 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2129 if not self._match(TokenType.UNNEST): 2130 return None 2131 2132 expressions = self._parse_wrapped_csv(self._parse_column) 2133 ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)) 2134 alias = self._parse_table_alias() 2135 2136 if alias and self.unnest_column_only: 2137 if alias.args.get("columns"): 2138 self.raise_error("Unexpected extra column alias in unnest.") 2139 alias.set("columns", [alias.this]) 2140 alias.set("this", None) 2141 2142 offset = None 2143 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2144 self._match(TokenType.ALIAS) 2145 offset = self._parse_conjunction() 2146 2147 return self.expression( 2148 exp.Unnest, 2149 expressions=expressions, 2150 ordinality=ordinality, 2151 alias=alias, 2152 offset=offset, 2153 ) 2154 2155 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2156 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2157 if not is_derived and not self._match(TokenType.VALUES): 2158 return None 2159 2160 expressions = self._parse_csv(self._parse_value) 2161 2162 if is_derived: 2163 self._match_r_paren() 2164 2165 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2166 2167 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2168 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2169 as_modifier and self._match_text_seq("USING", "SAMPLE") 2170 ): 2171 return None 2172 2173 bucket_numerator = None 2174 bucket_denominator = None 2175 bucket_field = None 2176 percent = None 2177 rows = None 2178 size = None 2179 seed = None 2180 2181 kind = "TABLESAMPLE" if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2182 method = self._parse_var(tokens=(TokenType.ROW,)) 2183 2184 self._match(TokenType.L_PAREN) 2185 2186 num = self._parse_number() 2187 2188 if self._match(TokenType.BUCKET): 2189 bucket_numerator = self._parse_number() 2190 self._match(TokenType.OUT_OF) 2191 bucket_denominator = bucket_denominator = self._parse_number() 2192 self._match(TokenType.ON) 2193 bucket_field = self._parse_field() 2194 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2195 percent = num 2196 elif self._match(TokenType.ROWS): 2197 rows = num 2198 else: 2199 size = num 2200 2201 self._match(TokenType.R_PAREN) 2202 2203 if self._match(TokenType.L_PAREN): 2204 method = self._parse_var() 2205 seed = self._match(TokenType.COMMA) and self._parse_number() 2206 self._match_r_paren() 2207 elif self._match_texts(("SEED", "REPEATABLE")): 2208 seed = self._parse_wrapped(self._parse_number) 2209 2210 return self.expression( 2211 exp.TableSample, 2212 method=method, 2213 bucket_numerator=bucket_numerator, 2214 bucket_denominator=bucket_denominator, 2215 bucket_field=bucket_field, 2216 percent=percent, 2217 rows=rows, 2218 size=size, 2219 seed=seed, 2220 kind=kind, 2221 ) 2222 2223 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2224 return list(iter(self._parse_pivot, None)) 2225 2226 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2227 index = self._index 2228 2229 if self._match(TokenType.PIVOT): 2230 unpivot = False 2231 elif self._match(TokenType.UNPIVOT): 2232 unpivot = True 2233 else: 2234 return None 2235 2236 expressions = [] 2237 field = None 2238 2239 if not self._match(TokenType.L_PAREN): 2240 self._retreat(index) 2241 return None 2242 2243 if unpivot: 2244 expressions = self._parse_csv(self._parse_column) 2245 else: 2246 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2247 2248 if not self._match(TokenType.FOR): 2249 self.raise_error("Expecting FOR") 2250 2251 value = self._parse_column() 2252 2253 if not self._match(TokenType.IN): 2254 self.raise_error("Expecting IN") 2255 2256 field = self._parse_in(value) 2257 2258 self._match_r_paren() 2259 2260 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2261 2262 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2263 pivot.set("alias", self._parse_table_alias()) 2264 2265 return pivot 2266 2267 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2268 if not skip_where_token and not self._match(TokenType.WHERE): 2269 return None 2270 2271 return self.expression( 2272 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2273 ) 2274 2275 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2276 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2277 return None 2278 2279 elements = defaultdict(list) 2280 2281 while True: 2282 expressions = self._parse_csv(self._parse_conjunction) 2283 if expressions: 2284 elements["expressions"].extend(expressions) 2285 2286 grouping_sets = self._parse_grouping_sets() 2287 if grouping_sets: 2288 elements["grouping_sets"].extend(grouping_sets) 2289 2290 rollup = None 2291 cube = None 2292 2293 with_ = self._match(TokenType.WITH) 2294 if self._match(TokenType.ROLLUP): 2295 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2296 elements["rollup"].extend(ensure_list(rollup)) 2297 2298 if self._match(TokenType.CUBE): 2299 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2300 elements["cube"].extend(ensure_list(cube)) 2301 2302 if not (expressions or grouping_sets or rollup or cube): 2303 break 2304 2305 return self.expression(exp.Group, **elements) # type: ignore 2306 2307 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2308 if not self._match(TokenType.GROUPING_SETS): 2309 return None 2310 2311 return self._parse_wrapped_csv(self._parse_grouping_set) 2312 2313 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2314 if self._match(TokenType.L_PAREN): 2315 grouping_set = self._parse_csv(self._parse_column) 2316 self._match_r_paren() 2317 return self.expression(exp.Tuple, expressions=grouping_set) 2318 2319 return self._parse_column() 2320 2321 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2322 if not skip_having_token and not self._match(TokenType.HAVING): 2323 return None 2324 return self.expression(exp.Having, this=self._parse_conjunction()) 2325 2326 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2327 if not self._match(TokenType.QUALIFY): 2328 return None 2329 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2330 2331 def _parse_order( 2332 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2333 ) -> t.Optional[exp.Expression]: 2334 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2335 return this 2336 2337 return self.expression( 2338 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2339 ) 2340 2341 def _parse_sort( 2342 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2343 ) -> t.Optional[exp.Expression]: 2344 if not self._match(token_type): 2345 return None 2346 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2347 2348 def _parse_ordered(self) -> exp.Expression: 2349 this = self._parse_conjunction() 2350 self._match(TokenType.ASC) 2351 is_desc = self._match(TokenType.DESC) 2352 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2353 is_nulls_last = self._match(TokenType.NULLS_LAST) 2354 desc = is_desc or False 2355 asc = not desc 2356 nulls_first = is_nulls_first or False 2357 explicitly_null_ordered = is_nulls_first or is_nulls_last 2358 if ( 2359 not explicitly_null_ordered 2360 and ( 2361 (asc and self.null_ordering == "nulls_are_small") 2362 or (desc and self.null_ordering != "nulls_are_small") 2363 ) 2364 and self.null_ordering != "nulls_are_last" 2365 ): 2366 nulls_first = True 2367 2368 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2369 2370 def _parse_limit( 2371 self, this: t.Optional[exp.Expression] = None, top: bool = False 2372 ) -> t.Optional[exp.Expression]: 2373 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2374 limit_paren = self._match(TokenType.L_PAREN) 2375 limit_exp = self.expression( 2376 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2377 ) 2378 2379 if limit_paren: 2380 self._match_r_paren() 2381 2382 return limit_exp 2383 2384 if self._match(TokenType.FETCH): 2385 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2386 direction = self._prev.text if direction else "FIRST" 2387 count = self._parse_number() 2388 self._match_set((TokenType.ROW, TokenType.ROWS)) 2389 self._match(TokenType.ONLY) 2390 return self.expression(exp.Fetch, direction=direction, count=count) 2391 2392 return this 2393 2394 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2395 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2396 return this 2397 2398 count = self._parse_number() 2399 self._match_set((TokenType.ROW, TokenType.ROWS)) 2400 return self.expression(exp.Offset, this=this, expression=count) 2401 2402 def _parse_lock(self) -> t.Optional[exp.Expression]: 2403 if self._match_text_seq("FOR", "UPDATE"): 2404 return self.expression(exp.Lock, update=True) 2405 if self._match_text_seq("FOR", "SHARE"): 2406 return self.expression(exp.Lock, update=False) 2407 2408 return None 2409 2410 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2411 if not self._match_set(self.SET_OPERATIONS): 2412 return this 2413 2414 token_type = self._prev.token_type 2415 2416 if token_type == TokenType.UNION: 2417 expression = exp.Union 2418 elif token_type == TokenType.EXCEPT: 2419 expression = exp.Except 2420 else: 2421 expression = exp.Intersect 2422 2423 return self.expression( 2424 expression, 2425 this=this, 2426 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2427 expression=self._parse_set_operations(self._parse_select(nested=True)), 2428 ) 2429 2430 def _parse_expression(self) -> t.Optional[exp.Expression]: 2431 return self._parse_alias(self._parse_conjunction()) 2432 2433 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2434 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2435 2436 def _parse_equality(self) -> t.Optional[exp.Expression]: 2437 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2438 2439 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2440 return self._parse_tokens(self._parse_range, self.COMPARISON) 2441 2442 def _parse_range(self) -> t.Optional[exp.Expression]: 2443 this = self._parse_bitwise() 2444 negate = self._match(TokenType.NOT) 2445 2446 if self._match_set(self.RANGE_PARSERS): 2447 this = self.RANGE_PARSERS[self._prev.token_type](self, this) 2448 elif self._match(TokenType.ISNULL): 2449 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2450 2451 # Postgres supports ISNULL and NOTNULL for conditions. 2452 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2453 if self._match(TokenType.NOTNULL): 2454 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2455 this = self.expression(exp.Not, this=this) 2456 2457 if negate: 2458 this = self.expression(exp.Not, this=this) 2459 2460 if self._match(TokenType.IS): 2461 this = self._parse_is(this) 2462 2463 return this 2464 2465 def _parse_is(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2466 negate = self._match(TokenType.NOT) 2467 if self._match(TokenType.DISTINCT_FROM): 2468 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2469 return self.expression(klass, this=this, expression=self._parse_expression()) 2470 2471 this = self.expression( 2472 exp.Is, 2473 this=this, 2474 expression=self._parse_null() or self._parse_boolean(), 2475 ) 2476 return self.expression(exp.Not, this=this) if negate else this 2477 2478 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2479 unnest = self._parse_unnest() 2480 if unnest: 2481 this = self.expression(exp.In, this=this, unnest=unnest) 2482 elif self._match(TokenType.L_PAREN): 2483 expressions = self._parse_csv(self._parse_select_or_expression) 2484 2485 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2486 this = self.expression(exp.In, this=this, query=expressions[0]) 2487 else: 2488 this = self.expression(exp.In, this=this, expressions=expressions) 2489 2490 self._match_r_paren() 2491 else: 2492 this = self.expression(exp.In, this=this, field=self._parse_field()) 2493 2494 return this 2495 2496 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2497 low = self._parse_bitwise() 2498 self._match(TokenType.AND) 2499 high = self._parse_bitwise() 2500 return self.expression(exp.Between, this=this, low=low, high=high) 2501 2502 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2503 if not self._match(TokenType.ESCAPE): 2504 return this 2505 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2506 2507 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2508 this = self._parse_term() 2509 2510 while True: 2511 if self._match_set(self.BITWISE): 2512 this = self.expression( 2513 self.BITWISE[self._prev.token_type], 2514 this=this, 2515 expression=self._parse_term(), 2516 ) 2517 elif self._match_pair(TokenType.LT, TokenType.LT): 2518 this = self.expression( 2519 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2520 ) 2521 elif self._match_pair(TokenType.GT, TokenType.GT): 2522 this = self.expression( 2523 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2524 ) 2525 else: 2526 break 2527 2528 return this 2529 2530 def _parse_term(self) -> t.Optional[exp.Expression]: 2531 return self._parse_tokens(self._parse_factor, self.TERM) 2532 2533 def _parse_factor(self) -> t.Optional[exp.Expression]: 2534 return self._parse_tokens(self._parse_unary, self.FACTOR) 2535 2536 def _parse_unary(self) -> t.Optional[exp.Expression]: 2537 if self._match_set(self.UNARY_PARSERS): 2538 return self.UNARY_PARSERS[self._prev.token_type](self) 2539 return self._parse_at_time_zone(self._parse_type()) 2540 2541 def _parse_type(self) -> t.Optional[exp.Expression]: 2542 if self._match(TokenType.INTERVAL): 2543 return self.expression(exp.Interval, this=self._parse_term(), unit=self._parse_field()) 2544 2545 index = self._index 2546 type_token = self._parse_types(check_func=True) 2547 this = self._parse_column() 2548 2549 if type_token: 2550 if isinstance(this, exp.Literal): 2551 return self.expression(exp.Cast, this=this, to=type_token) 2552 if not type_token.args.get("expressions"): 2553 self._retreat(index) 2554 return self._parse_column() 2555 return type_token 2556 2557 return this 2558 2559 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2560 index = self._index 2561 2562 prefix = self._match_text_seq("SYSUDTLIB", ".") 2563 2564 if not self._match_set(self.TYPE_TOKENS): 2565 return None 2566 2567 type_token = self._prev.token_type 2568 2569 if type_token == TokenType.PSEUDO_TYPE: 2570 return self.expression(exp.PseudoType, this=self._prev.text) 2571 2572 nested = type_token in self.NESTED_TYPE_TOKENS 2573 is_struct = type_token == TokenType.STRUCT 2574 expressions = None 2575 maybe_func = False 2576 2577 if self._match(TokenType.L_PAREN): 2578 if is_struct: 2579 expressions = self._parse_csv(self._parse_struct_kwargs) 2580 elif nested: 2581 expressions = self._parse_csv(self._parse_types) 2582 else: 2583 expressions = self._parse_csv(self._parse_conjunction) 2584 2585 if not expressions: 2586 self._retreat(index) 2587 return None 2588 2589 self._match_r_paren() 2590 maybe_func = True 2591 2592 if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2593 this = exp.DataType( 2594 this=exp.DataType.Type.ARRAY, 2595 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2596 nested=True, 2597 ) 2598 2599 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2600 this = exp.DataType( 2601 this=exp.DataType.Type.ARRAY, 2602 expressions=[this], 2603 nested=True, 2604 ) 2605 2606 return this 2607 2608 if self._match(TokenType.L_BRACKET): 2609 self._retreat(index) 2610 return None 2611 2612 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2613 if nested and self._match(TokenType.LT): 2614 if is_struct: 2615 expressions = self._parse_csv(self._parse_struct_kwargs) 2616 else: 2617 expressions = self._parse_csv(self._parse_types) 2618 2619 if not self._match(TokenType.GT): 2620 self.raise_error("Expecting >") 2621 2622 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2623 values = self._parse_csv(self._parse_conjunction) 2624 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2625 2626 value: t.Optional[exp.Expression] = None 2627 if type_token in self.TIMESTAMPS: 2628 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2629 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2630 elif ( 2631 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2632 ): 2633 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2634 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2635 if type_token == TokenType.TIME: 2636 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2637 else: 2638 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2639 2640 maybe_func = maybe_func and value is None 2641 2642 if value is None: 2643 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2644 elif type_token == TokenType.INTERVAL: 2645 unit = self._parse_var() 2646 2647 if not unit: 2648 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2649 else: 2650 value = self.expression(exp.Interval, unit=unit) 2651 2652 if maybe_func and check_func: 2653 index2 = self._index 2654 peek = self._parse_string() 2655 2656 if not peek: 2657 self._retreat(index) 2658 return None 2659 2660 self._retreat(index2) 2661 2662 if value: 2663 return value 2664 2665 return exp.DataType( 2666 this=exp.DataType.Type[type_token.value.upper()], 2667 expressions=expressions, 2668 nested=nested, 2669 values=values, 2670 prefix=prefix, 2671 ) 2672 2673 def _parse_struct_kwargs(self) -> t.Optional[exp.Expression]: 2674 if self._curr and self._curr.token_type in self.TYPE_TOKENS: 2675 return self._parse_types() 2676 2677 this = self._parse_id_var() 2678 self._match(TokenType.COLON) 2679 data_type = self._parse_types() 2680 2681 if not data_type: 2682 return None 2683 return self.expression(exp.StructKwarg, this=this, expression=data_type) 2684 2685 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2686 if not self._match(TokenType.AT_TIME_ZONE): 2687 return this 2688 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2689 2690 def _parse_column(self) -> t.Optional[exp.Expression]: 2691 this = self._parse_field() 2692 if isinstance(this, exp.Identifier): 2693 this = self.expression(exp.Column, this=this) 2694 elif not this: 2695 return self._parse_bracket(this) 2696 this = self._parse_bracket(this) 2697 2698 while self._match_set(self.COLUMN_OPERATORS): 2699 op_token = self._prev.token_type 2700 op = self.COLUMN_OPERATORS.get(op_token) 2701 2702 if op_token == TokenType.DCOLON: 2703 field = self._parse_types() 2704 if not field: 2705 self.raise_error("Expected type") 2706 elif op: 2707 self._advance() 2708 value = self._prev.text 2709 field = ( 2710 exp.Literal.number(value) 2711 if self._prev.token_type == TokenType.NUMBER 2712 else exp.Literal.string(value) 2713 ) 2714 else: 2715 field = self._parse_star() or self._parse_function() or self._parse_id_var() 2716 2717 if isinstance(field, exp.Func): 2718 # bigquery allows function calls like x.y.count(...) 2719 # SAFE.SUBSTR(...) 2720 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 2721 this = self._replace_columns_with_dots(this) 2722 2723 if op: 2724 this = op(self, this, field) 2725 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 2726 this = self.expression( 2727 exp.Column, 2728 this=field, 2729 table=this.this, 2730 db=this.args.get("table"), 2731 catalog=this.args.get("db"), 2732 ) 2733 else: 2734 this = self.expression(exp.Dot, this=this, expression=field) 2735 this = self._parse_bracket(this) 2736 2737 return this 2738 2739 def _parse_primary(self) -> t.Optional[exp.Expression]: 2740 if self._match_set(self.PRIMARY_PARSERS): 2741 token_type = self._prev.token_type 2742 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 2743 2744 if token_type == TokenType.STRING: 2745 expressions = [primary] 2746 while self._match(TokenType.STRING): 2747 expressions.append(exp.Literal.string(self._prev.text)) 2748 if len(expressions) > 1: 2749 return self.expression(exp.Concat, expressions=expressions) 2750 return primary 2751 2752 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 2753 return exp.Literal.number(f"0.{self._prev.text}") 2754 2755 if self._match(TokenType.L_PAREN): 2756 comments = self._prev_comments 2757 query = self._parse_select() 2758 2759 if query: 2760 expressions = [query] 2761 else: 2762 expressions = self._parse_csv( 2763 lambda: self._parse_alias(self._parse_conjunction(), explicit=True) 2764 ) 2765 2766 this = seq_get(expressions, 0) 2767 self._parse_query_modifiers(this) 2768 self._match_r_paren() 2769 2770 if isinstance(this, exp.Subqueryable): 2771 this = self._parse_set_operations( 2772 self._parse_subquery(this=this, parse_alias=False) 2773 ) 2774 elif len(expressions) > 1: 2775 this = self.expression(exp.Tuple, expressions=expressions) 2776 else: 2777 this = self.expression(exp.Paren, this=this) 2778 2779 if this and comments: 2780 this.comments = comments 2781 2782 return this 2783 2784 return None 2785 2786 def _parse_field(self, any_token: bool = False) -> t.Optional[exp.Expression]: 2787 return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token) 2788 2789 def _parse_function( 2790 self, functions: t.Optional[t.Dict[str, t.Callable]] = None 2791 ) -> t.Optional[exp.Expression]: 2792 if not self._curr: 2793 return None 2794 2795 token_type = self._curr.token_type 2796 2797 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 2798 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 2799 2800 if not self._next or self._next.token_type != TokenType.L_PAREN: 2801 if token_type in self.NO_PAREN_FUNCTIONS: 2802 self._advance() 2803 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 2804 2805 return None 2806 2807 if token_type not in self.FUNC_TOKENS: 2808 return None 2809 2810 this = self._curr.text 2811 upper = this.upper() 2812 self._advance(2) 2813 2814 parser = self.FUNCTION_PARSERS.get(upper) 2815 2816 if parser: 2817 this = parser(self) 2818 else: 2819 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 2820 2821 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 2822 this = self.expression(subquery_predicate, this=self._parse_select()) 2823 self._match_r_paren() 2824 return this 2825 2826 if functions is None: 2827 functions = self.FUNCTIONS 2828 2829 function = functions.get(upper) 2830 args = self._parse_csv(self._parse_lambda) 2831 2832 if function: 2833 # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the 2834 # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. 2835 if count_params(function) == 2: 2836 params = None 2837 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 2838 params = self._parse_csv(self._parse_lambda) 2839 2840 this = function(args, params) 2841 else: 2842 this = function(args) 2843 2844 self.validate_expression(this, args) 2845 else: 2846 this = self.expression(exp.Anonymous, this=this, expressions=args) 2847 2848 self._match_r_paren(this) 2849 return self._parse_window(this) 2850 2851 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 2852 return self._parse_column_def(self._parse_id_var()) 2853 2854 def _parse_user_defined_function( 2855 self, kind: t.Optional[TokenType] = None 2856 ) -> t.Optional[exp.Expression]: 2857 this = self._parse_id_var() 2858 2859 while self._match(TokenType.DOT): 2860 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 2861 2862 if not self._match(TokenType.L_PAREN): 2863 return this 2864 2865 expressions = self._parse_csv(self._parse_function_parameter) 2866 self._match_r_paren() 2867 return self.expression( 2868 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 2869 ) 2870 2871 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 2872 literal = self._parse_primary() 2873 if literal: 2874 return self.expression(exp.Introducer, this=token.text, expression=literal) 2875 2876 return self.expression(exp.Identifier, this=token.text) 2877 2878 def _parse_national(self, token: Token) -> exp.Expression: 2879 return self.expression(exp.National, this=exp.Literal.string(token.text)) 2880 2881 def _parse_session_parameter(self) -> exp.Expression: 2882 kind = None 2883 this = self._parse_id_var() or self._parse_primary() 2884 2885 if this and self._match(TokenType.DOT): 2886 kind = this.name 2887 this = self._parse_var() or self._parse_primary() 2888 2889 return self.expression(exp.SessionParameter, this=this, kind=kind) 2890 2891 def _parse_lambda(self) -> t.Optional[exp.Expression]: 2892 index = self._index 2893 2894 if self._match(TokenType.L_PAREN): 2895 expressions = self._parse_csv(self._parse_id_var) 2896 2897 if not self._match(TokenType.R_PAREN): 2898 self._retreat(index) 2899 else: 2900 expressions = [self._parse_id_var()] 2901 2902 if self._match_set(self.LAMBDAS): 2903 return self.LAMBDAS[self._prev.token_type](self, expressions) 2904 2905 self._retreat(index) 2906 2907 this: t.Optional[exp.Expression] 2908 2909 if self._match(TokenType.DISTINCT): 2910 this = self.expression( 2911 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 2912 ) 2913 else: 2914 this = self._parse_select_or_expression() 2915 2916 if self._match(TokenType.IGNORE_NULLS): 2917 this = self.expression(exp.IgnoreNulls, this=this) 2918 else: 2919 self._match(TokenType.RESPECT_NULLS) 2920 2921 return self._parse_limit(self._parse_order(this)) 2922 2923 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2924 index = self._index 2925 if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT): 2926 self._retreat(index) 2927 return this 2928 2929 args = self._parse_csv( 2930 lambda: self._parse_constraint() 2931 or self._parse_column_def(self._parse_field(any_token=True)) 2932 ) 2933 self._match_r_paren() 2934 return self.expression(exp.Schema, this=this, expressions=args) 2935 2936 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2937 kind = self._parse_types() 2938 2939 if self._match_text_seq("FOR", "ORDINALITY"): 2940 return self.expression(exp.ColumnDef, this=this, ordinality=True) 2941 2942 constraints = [] 2943 while True: 2944 constraint = self._parse_column_constraint() 2945 if not constraint: 2946 break 2947 constraints.append(constraint) 2948 2949 if not kind and not constraints: 2950 return this 2951 2952 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 2953 2954 def _parse_auto_increment(self) -> exp.Expression: 2955 start = None 2956 increment = None 2957 2958 if self._match(TokenType.L_PAREN, advance=False): 2959 args = self._parse_wrapped_csv(self._parse_bitwise) 2960 start = seq_get(args, 0) 2961 increment = seq_get(args, 1) 2962 elif self._match_text_seq("START"): 2963 start = self._parse_bitwise() 2964 self._match_text_seq("INCREMENT") 2965 increment = self._parse_bitwise() 2966 2967 if start and increment: 2968 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 2969 2970 return exp.AutoIncrementColumnConstraint() 2971 2972 def _parse_compress(self) -> exp.Expression: 2973 if self._match(TokenType.L_PAREN, advance=False): 2974 return self.expression( 2975 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 2976 ) 2977 2978 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 2979 2980 def _parse_generated_as_identity(self) -> exp.Expression: 2981 if self._match(TokenType.BY_DEFAULT): 2982 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False) 2983 else: 2984 self._match_text_seq("ALWAYS") 2985 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 2986 2987 self._match_text_seq("AS", "IDENTITY") 2988 if self._match(TokenType.L_PAREN): 2989 if self._match_text_seq("START", "WITH"): 2990 this.set("start", self._parse_bitwise()) 2991 if self._match_text_seq("INCREMENT", "BY"): 2992 this.set("increment", self._parse_bitwise()) 2993 if self._match_text_seq("MINVALUE"): 2994 this.set("minvalue", self._parse_bitwise()) 2995 if self._match_text_seq("MAXVALUE"): 2996 this.set("maxvalue", self._parse_bitwise()) 2997 2998 if self._match_text_seq("CYCLE"): 2999 this.set("cycle", True) 3000 elif self._match_text_seq("NO", "CYCLE"): 3001 this.set("cycle", False) 3002 3003 self._match_r_paren() 3004 3005 return this 3006 3007 def _parse_inline(self) -> t.Optional[exp.Expression]: 3008 self._match_text_seq("LENGTH") 3009 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3010 3011 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3012 if self._match_text_seq("NULL"): 3013 return self.expression(exp.NotNullColumnConstraint) 3014 if self._match_text_seq("CASESPECIFIC"): 3015 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3016 return None 3017 3018 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3019 this = self._parse_references() 3020 if this: 3021 return this 3022 3023 if self._match(TokenType.CONSTRAINT): 3024 this = self._parse_id_var() 3025 3026 if self._match_texts(self.CONSTRAINT_PARSERS): 3027 return self.expression( 3028 exp.ColumnConstraint, 3029 this=this, 3030 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3031 ) 3032 3033 return this 3034 3035 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3036 if not self._match(TokenType.CONSTRAINT): 3037 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3038 3039 this = self._parse_id_var() 3040 expressions = [] 3041 3042 while True: 3043 constraint = self._parse_unnamed_constraint() or self._parse_function() 3044 if not constraint: 3045 break 3046 expressions.append(constraint) 3047 3048 return self.expression(exp.Constraint, this=this, expressions=expressions) 3049 3050 def _parse_unnamed_constraint( 3051 self, constraints: t.Optional[t.Collection[str]] = None 3052 ) -> t.Optional[exp.Expression]: 3053 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3054 return None 3055 3056 constraint = self._prev.text.upper() 3057 if constraint not in self.CONSTRAINT_PARSERS: 3058 self.raise_error(f"No parser found for schema constraint {constraint}.") 3059 3060 return self.CONSTRAINT_PARSERS[constraint](self) 3061 3062 def _parse_unique(self) -> exp.Expression: 3063 if not self._match(TokenType.L_PAREN, advance=False): 3064 return self.expression(exp.UniqueColumnConstraint) 3065 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3066 3067 def _parse_key_constraint_options(self) -> t.List[str]: 3068 options = [] 3069 while True: 3070 if not self._curr: 3071 break 3072 3073 if self._match(TokenType.ON): 3074 action = None 3075 on = self._advance_any() and self._prev.text 3076 3077 if self._match(TokenType.NO_ACTION): 3078 action = "NO ACTION" 3079 elif self._match(TokenType.CASCADE): 3080 action = "CASCADE" 3081 elif self._match_pair(TokenType.SET, TokenType.NULL): 3082 action = "SET NULL" 3083 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3084 action = "SET DEFAULT" 3085 else: 3086 self.raise_error("Invalid key constraint") 3087 3088 options.append(f"ON {on} {action}") 3089 elif self._match_text_seq("NOT", "ENFORCED"): 3090 options.append("NOT ENFORCED") 3091 elif self._match_text_seq("DEFERRABLE"): 3092 options.append("DEFERRABLE") 3093 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3094 options.append("INITIALLY DEFERRED") 3095 elif self._match_text_seq("NORELY"): 3096 options.append("NORELY") 3097 elif self._match_text_seq("MATCH", "FULL"): 3098 options.append("MATCH FULL") 3099 else: 3100 break 3101 3102 return options 3103 3104 def _parse_references(self) -> t.Optional[exp.Expression]: 3105 if not self._match(TokenType.REFERENCES): 3106 return None 3107 3108 expressions = None 3109 this = self._parse_id_var() 3110 3111 if self._match(TokenType.L_PAREN, advance=False): 3112 expressions = self._parse_wrapped_id_vars() 3113 3114 options = self._parse_key_constraint_options() 3115 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3116 3117 def _parse_foreign_key(self) -> exp.Expression: 3118 expressions = self._parse_wrapped_id_vars() 3119 reference = self._parse_references() 3120 options = {} 3121 3122 while self._match(TokenType.ON): 3123 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3124 self.raise_error("Expected DELETE or UPDATE") 3125 3126 kind = self._prev.text.lower() 3127 3128 if self._match(TokenType.NO_ACTION): 3129 action = "NO ACTION" 3130 elif self._match(TokenType.SET): 3131 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3132 action = "SET " + self._prev.text.upper() 3133 else: 3134 self._advance() 3135 action = self._prev.text.upper() 3136 3137 options[kind] = action 3138 3139 return self.expression( 3140 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3141 ) 3142 3143 def _parse_primary_key(self) -> exp.Expression: 3144 desc = ( 3145 self._match_set((TokenType.ASC, TokenType.DESC)) 3146 and self._prev.token_type == TokenType.DESC 3147 ) 3148 3149 if not self._match(TokenType.L_PAREN, advance=False): 3150 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3151 3152 expressions = self._parse_wrapped_id_vars() 3153 options = self._parse_key_constraint_options() 3154 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3155 3156 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3157 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3158 return this 3159 3160 bracket_kind = self._prev.token_type 3161 expressions: t.List[t.Optional[exp.Expression]] 3162 3163 if self._match(TokenType.COLON): 3164 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3165 else: 3166 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3167 3168 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3169 if bracket_kind == TokenType.L_BRACE: 3170 this = self.expression(exp.Struct, expressions=expressions) 3171 elif not this or this.name.upper() == "ARRAY": 3172 this = self.expression(exp.Array, expressions=expressions) 3173 else: 3174 expressions = apply_index_offset(expressions, -self.index_offset) 3175 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3176 3177 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3178 self.raise_error("Expected ]") 3179 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3180 self.raise_error("Expected }") 3181 3182 this.comments = self._prev_comments 3183 return self._parse_bracket(this) 3184 3185 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3186 if self._match(TokenType.COLON): 3187 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3188 return this 3189 3190 def _parse_case(self) -> t.Optional[exp.Expression]: 3191 ifs = [] 3192 default = None 3193 3194 expression = self._parse_conjunction() 3195 3196 while self._match(TokenType.WHEN): 3197 this = self._parse_conjunction() 3198 self._match(TokenType.THEN) 3199 then = self._parse_conjunction() 3200 ifs.append(self.expression(exp.If, this=this, true=then)) 3201 3202 if self._match(TokenType.ELSE): 3203 default = self._parse_conjunction() 3204 3205 if not self._match(TokenType.END): 3206 self.raise_error("Expected END after CASE", self._prev) 3207 3208 return self._parse_window( 3209 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3210 ) 3211 3212 def _parse_if(self) -> t.Optional[exp.Expression]: 3213 if self._match(TokenType.L_PAREN): 3214 args = self._parse_csv(self._parse_conjunction) 3215 this = exp.If.from_arg_list(args) 3216 self.validate_expression(this, args) 3217 self._match_r_paren() 3218 else: 3219 condition = self._parse_conjunction() 3220 self._match(TokenType.THEN) 3221 true = self._parse_conjunction() 3222 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3223 self._match(TokenType.END) 3224 this = self.expression(exp.If, this=condition, true=true, false=false) 3225 3226 return self._parse_window(this) 3227 3228 def _parse_extract(self) -> exp.Expression: 3229 this = self._parse_function() or self._parse_var() or self._parse_type() 3230 3231 if self._match(TokenType.FROM): 3232 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3233 3234 if not self._match(TokenType.COMMA): 3235 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3236 3237 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3238 3239 def _parse_cast(self, strict: bool) -> exp.Expression: 3240 this = self._parse_conjunction() 3241 3242 if not self._match(TokenType.ALIAS): 3243 self.raise_error("Expected AS after CAST") 3244 3245 to = self._parse_types() 3246 3247 if not to: 3248 self.raise_error("Expected TYPE after CAST") 3249 elif to.this == exp.DataType.Type.CHAR: 3250 if self._match(TokenType.CHARACTER_SET): 3251 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3252 3253 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3254 3255 def _parse_string_agg(self) -> exp.Expression: 3256 expression: t.Optional[exp.Expression] 3257 3258 if self._match(TokenType.DISTINCT): 3259 args = self._parse_csv(self._parse_conjunction) 3260 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3261 else: 3262 args = self._parse_csv(self._parse_conjunction) 3263 expression = seq_get(args, 0) 3264 3265 index = self._index 3266 if not self._match(TokenType.R_PAREN): 3267 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3268 order = self._parse_order(this=expression) 3269 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3270 3271 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3272 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3273 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3274 if not self._match(TokenType.WITHIN_GROUP): 3275 self._retreat(index) 3276 this = exp.GroupConcat.from_arg_list(args) 3277 self.validate_expression(this, args) 3278 return this 3279 3280 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3281 order = self._parse_order(this=expression) 3282 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3283 3284 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3285 to: t.Optional[exp.Expression] 3286 this = self._parse_bitwise() 3287 3288 if self._match(TokenType.USING): 3289 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3290 elif self._match(TokenType.COMMA): 3291 to = self._parse_bitwise() 3292 else: 3293 to = None 3294 3295 # Swap the argument order if needed to produce the correct AST 3296 if self.CONVERT_TYPE_FIRST: 3297 this, to = to, this 3298 3299 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3300 3301 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3302 args = self._parse_csv(self._parse_bitwise) 3303 3304 if self._match(TokenType.IN): 3305 return self.expression( 3306 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3307 ) 3308 3309 if haystack_first: 3310 haystack = seq_get(args, 0) 3311 needle = seq_get(args, 1) 3312 else: 3313 needle = seq_get(args, 0) 3314 haystack = seq_get(args, 1) 3315 3316 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3317 3318 self.validate_expression(this, args) 3319 3320 return this 3321 3322 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3323 args = self._parse_csv(self._parse_table) 3324 return exp.JoinHint(this=func_name.upper(), expressions=args) 3325 3326 def _parse_substring(self) -> exp.Expression: 3327 # Postgres supports the form: substring(string [from int] [for int]) 3328 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3329 3330 args = self._parse_csv(self._parse_bitwise) 3331 3332 if self._match(TokenType.FROM): 3333 args.append(self._parse_bitwise()) 3334 if self._match(TokenType.FOR): 3335 args.append(self._parse_bitwise()) 3336 3337 this = exp.Substring.from_arg_list(args) 3338 self.validate_expression(this, args) 3339 3340 return this 3341 3342 def _parse_trim(self) -> exp.Expression: 3343 # https://www.w3resource.com/sql/character-functions/trim.php 3344 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3345 3346 position = None 3347 collation = None 3348 3349 if self._match_set(self.TRIM_TYPES): 3350 position = self._prev.text.upper() 3351 3352 expression = self._parse_term() 3353 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3354 this = self._parse_term() 3355 else: 3356 this = expression 3357 expression = None 3358 3359 if self._match(TokenType.COLLATE): 3360 collation = self._parse_term() 3361 3362 return self.expression( 3363 exp.Trim, 3364 this=this, 3365 position=position, 3366 expression=expression, 3367 collation=collation, 3368 ) 3369 3370 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3371 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3372 3373 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3374 return self._parse_window(self._parse_id_var(), alias=True) 3375 3376 def _parse_window( 3377 self, this: t.Optional[exp.Expression], alias: bool = False 3378 ) -> t.Optional[exp.Expression]: 3379 if self._match(TokenType.FILTER): 3380 where = self._parse_wrapped(self._parse_where) 3381 this = self.expression(exp.Filter, this=this, expression=where) 3382 3383 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3384 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3385 if self._match(TokenType.WITHIN_GROUP): 3386 order = self._parse_wrapped(self._parse_order) 3387 this = self.expression(exp.WithinGroup, this=this, expression=order) 3388 3389 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3390 # Some dialects choose to implement and some do not. 3391 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3392 3393 # There is some code above in _parse_lambda that handles 3394 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3395 3396 # The below changes handle 3397 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3398 3399 # Oracle allows both formats 3400 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3401 # and Snowflake chose to do the same for familiarity 3402 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3403 if self._match(TokenType.IGNORE_NULLS): 3404 this = self.expression(exp.IgnoreNulls, this=this) 3405 elif self._match(TokenType.RESPECT_NULLS): 3406 this = self.expression(exp.RespectNulls, this=this) 3407 3408 # bigquery select from window x AS (partition by ...) 3409 if alias: 3410 self._match(TokenType.ALIAS) 3411 elif not self._match(TokenType.OVER): 3412 return this 3413 3414 if not self._match(TokenType.L_PAREN): 3415 return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) 3416 3417 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3418 partition = self._parse_partition_by() 3419 order = self._parse_order() 3420 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3421 3422 if kind: 3423 self._match(TokenType.BETWEEN) 3424 start = self._parse_window_spec() 3425 self._match(TokenType.AND) 3426 end = self._parse_window_spec() 3427 3428 spec = self.expression( 3429 exp.WindowSpec, 3430 kind=kind, 3431 start=start["value"], 3432 start_side=start["side"], 3433 end=end["value"], 3434 end_side=end["side"], 3435 ) 3436 else: 3437 spec = None 3438 3439 self._match_r_paren() 3440 3441 return self.expression( 3442 exp.Window, 3443 this=this, 3444 partition_by=partition, 3445 order=order, 3446 spec=spec, 3447 alias=window_alias, 3448 ) 3449 3450 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3451 self._match(TokenType.BETWEEN) 3452 3453 return { 3454 "value": ( 3455 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3456 ) 3457 or self._parse_bitwise(), 3458 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3459 } 3460 3461 def _parse_alias( 3462 self, this: t.Optional[exp.Expression], explicit: bool = False 3463 ) -> t.Optional[exp.Expression]: 3464 any_token = self._match(TokenType.ALIAS) 3465 3466 if explicit and not any_token: 3467 return this 3468 3469 if self._match(TokenType.L_PAREN): 3470 aliases = self.expression( 3471 exp.Aliases, 3472 this=this, 3473 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3474 ) 3475 self._match_r_paren(aliases) 3476 return aliases 3477 3478 alias = self._parse_id_var(any_token) 3479 3480 if alias: 3481 return self.expression(exp.Alias, this=this, alias=alias) 3482 3483 return this 3484 3485 def _parse_id_var( 3486 self, 3487 any_token: bool = True, 3488 tokens: t.Optional[t.Collection[TokenType]] = None, 3489 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3490 ) -> t.Optional[exp.Expression]: 3491 identifier = self._parse_identifier() 3492 3493 if identifier: 3494 return identifier 3495 3496 prefix = "" 3497 3498 if prefix_tokens: 3499 while self._match_set(prefix_tokens): 3500 prefix += self._prev.text 3501 3502 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3503 quoted = self._prev.token_type == TokenType.STRING 3504 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3505 3506 return None 3507 3508 def _parse_string(self) -> t.Optional[exp.Expression]: 3509 if self._match(TokenType.STRING): 3510 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3511 return self._parse_placeholder() 3512 3513 def _parse_number(self) -> t.Optional[exp.Expression]: 3514 if self._match(TokenType.NUMBER): 3515 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3516 return self._parse_placeholder() 3517 3518 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3519 if self._match(TokenType.IDENTIFIER): 3520 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3521 return self._parse_placeholder() 3522 3523 def _parse_var( 3524 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3525 ) -> t.Optional[exp.Expression]: 3526 if ( 3527 (any_token and self._advance_any()) 3528 or self._match(TokenType.VAR) 3529 or (self._match_set(tokens) if tokens else False) 3530 ): 3531 return self.expression(exp.Var, this=self._prev.text) 3532 return self._parse_placeholder() 3533 3534 def _advance_any(self) -> t.Optional[Token]: 3535 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 3536 self._advance() 3537 return self._prev 3538 return None 3539 3540 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 3541 return self._parse_var() or self._parse_string() 3542 3543 def _parse_null(self) -> t.Optional[exp.Expression]: 3544 if self._match(TokenType.NULL): 3545 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 3546 return None 3547 3548 def _parse_boolean(self) -> t.Optional[exp.Expression]: 3549 if self._match(TokenType.TRUE): 3550 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 3551 if self._match(TokenType.FALSE): 3552 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 3553 return None 3554 3555 def _parse_star(self) -> t.Optional[exp.Expression]: 3556 if self._match(TokenType.STAR): 3557 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 3558 return None 3559 3560 def _parse_parameter(self) -> exp.Expression: 3561 wrapped = self._match(TokenType.L_BRACE) 3562 this = self._parse_var() or self._parse_primary() 3563 self._match(TokenType.R_BRACE) 3564 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 3565 3566 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 3567 if self._match_set(self.PLACEHOLDER_PARSERS): 3568 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 3569 if placeholder: 3570 return placeholder 3571 self._advance(-1) 3572 return None 3573 3574 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3575 if not self._match(TokenType.EXCEPT): 3576 return None 3577 if self._match(TokenType.L_PAREN, advance=False): 3578 return self._parse_wrapped_csv(self._parse_column) 3579 return self._parse_csv(self._parse_column) 3580 3581 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3582 if not self._match(TokenType.REPLACE): 3583 return None 3584 if self._match(TokenType.L_PAREN, advance=False): 3585 return self._parse_wrapped_csv(self._parse_expression) 3586 return self._parse_csv(self._parse_expression) 3587 3588 def _parse_csv( 3589 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3590 ) -> t.List[t.Optional[exp.Expression]]: 3591 parse_result = parse_method() 3592 items = [parse_result] if parse_result is not None else [] 3593 3594 while self._match(sep): 3595 if parse_result and self._prev_comments: 3596 parse_result.comments = self._prev_comments 3597 3598 parse_result = parse_method() 3599 if parse_result is not None: 3600 items.append(parse_result) 3601 3602 return items 3603 3604 def _parse_tokens( 3605 self, parse_method: t.Callable, expressions: t.Dict 3606 ) -> t.Optional[exp.Expression]: 3607 this = parse_method() 3608 3609 while self._match_set(expressions): 3610 this = self.expression( 3611 expressions[self._prev.token_type], 3612 this=this, 3613 comments=self._prev_comments, 3614 expression=parse_method(), 3615 ) 3616 3617 return this 3618 3619 def _parse_wrapped_id_vars(self) -> t.List[t.Optional[exp.Expression]]: 3620 return self._parse_wrapped_csv(self._parse_id_var) 3621 3622 def _parse_wrapped_csv( 3623 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 3624 ) -> t.List[t.Optional[exp.Expression]]: 3625 return self._parse_wrapped(lambda: self._parse_csv(parse_method, sep=sep)) 3626 3627 def _parse_wrapped(self, parse_method: t.Callable) -> t.Any: 3628 self._match_l_paren() 3629 parse_result = parse_method() 3630 self._match_r_paren() 3631 return parse_result 3632 3633 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 3634 return self._parse_select() or self._parse_expression() 3635 3636 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 3637 return self._parse_set_operations( 3638 self._parse_select(nested=True, parse_subquery_alias=False) 3639 ) 3640 3641 def _parse_transaction(self) -> exp.Expression: 3642 this = None 3643 if self._match_texts(self.TRANSACTION_KIND): 3644 this = self._prev.text 3645 3646 self._match_texts({"TRANSACTION", "WORK"}) 3647 3648 modes = [] 3649 while True: 3650 mode = [] 3651 while self._match(TokenType.VAR): 3652 mode.append(self._prev.text) 3653 3654 if mode: 3655 modes.append(" ".join(mode)) 3656 if not self._match(TokenType.COMMA): 3657 break 3658 3659 return self.expression(exp.Transaction, this=this, modes=modes) 3660 3661 def _parse_commit_or_rollback(self) -> exp.Expression: 3662 chain = None 3663 savepoint = None 3664 is_rollback = self._prev.token_type == TokenType.ROLLBACK 3665 3666 self._match_texts({"TRANSACTION", "WORK"}) 3667 3668 if self._match_text_seq("TO"): 3669 self._match_text_seq("SAVEPOINT") 3670 savepoint = self._parse_id_var() 3671 3672 if self._match(TokenType.AND): 3673 chain = not self._match_text_seq("NO") 3674 self._match_text_seq("CHAIN") 3675 3676 if is_rollback: 3677 return self.expression(exp.Rollback, savepoint=savepoint) 3678 return self.expression(exp.Commit, chain=chain) 3679 3680 def _parse_add_column(self) -> t.Optional[exp.Expression]: 3681 if not self._match_text_seq("ADD"): 3682 return None 3683 3684 self._match(TokenType.COLUMN) 3685 exists_column = self._parse_exists(not_=True) 3686 expression = self._parse_column_def(self._parse_field(any_token=True)) 3687 3688 if expression: 3689 expression.set("exists", exists_column) 3690 3691 return expression 3692 3693 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 3694 return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") 3695 3696 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 3697 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 3698 return self.expression( 3699 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 3700 ) 3701 3702 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 3703 this = None 3704 kind = self._prev.token_type 3705 3706 if kind == TokenType.CONSTRAINT: 3707 this = self._parse_id_var() 3708 3709 if self._match_text_seq("CHECK"): 3710 expression = self._parse_wrapped(self._parse_conjunction) 3711 enforced = self._match_text_seq("ENFORCED") 3712 3713 return self.expression( 3714 exp.AddConstraint, this=this, expression=expression, enforced=enforced 3715 ) 3716 3717 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 3718 expression = self._parse_foreign_key() 3719 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 3720 expression = self._parse_primary_key() 3721 3722 return self.expression(exp.AddConstraint, this=this, expression=expression) 3723 3724 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 3725 index = self._index - 1 3726 3727 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 3728 return self._parse_csv(self._parse_add_constraint) 3729 3730 self._retreat(index) 3731 return self._parse_csv(self._parse_add_column) 3732 3733 def _parse_alter_table_alter(self) -> exp.Expression: 3734 self._match(TokenType.COLUMN) 3735 column = self._parse_field(any_token=True) 3736 3737 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 3738 return self.expression(exp.AlterColumn, this=column, drop=True) 3739 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 3740 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 3741 3742 self._match_text_seq("SET", "DATA") 3743 return self.expression( 3744 exp.AlterColumn, 3745 this=column, 3746 dtype=self._match_text_seq("TYPE") and self._parse_types(), 3747 collate=self._match(TokenType.COLLATE) and self._parse_term(), 3748 using=self._match(TokenType.USING) and self._parse_conjunction(), 3749 ) 3750 3751 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 3752 index = self._index - 1 3753 3754 partition_exists = self._parse_exists() 3755 if self._match(TokenType.PARTITION, advance=False): 3756 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 3757 3758 self._retreat(index) 3759 return self._parse_csv(self._parse_drop_column) 3760 3761 def _parse_alter_table_rename(self) -> exp.Expression: 3762 self._match_text_seq("TO") 3763 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 3764 3765 def _parse_alter(self) -> t.Optional[exp.Expression]: 3766 start = self._prev 3767 3768 if not self._match(TokenType.TABLE): 3769 return self._parse_as_command(start) 3770 3771 exists = self._parse_exists() 3772 this = self._parse_table(schema=True) 3773 3774 if self._next: 3775 self._advance() 3776 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 3777 3778 if parser: 3779 return self.expression( 3780 exp.AlterTable, 3781 this=this, 3782 exists=exists, 3783 actions=ensure_list(parser(self)), 3784 ) 3785 return self._parse_as_command(start) 3786 3787 def _parse_show(self) -> t.Optional[exp.Expression]: 3788 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 3789 if parser: 3790 return parser(self) 3791 self._advance() 3792 return self.expression(exp.Show, this=self._prev.text.upper()) 3793 3794 def _default_parse_set_item(self) -> exp.Expression: 3795 return self.expression( 3796 exp.SetItem, 3797 this=self._parse_statement(), 3798 ) 3799 3800 def _parse_set_item(self) -> t.Optional[exp.Expression]: 3801 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 3802 return parser(self) if parser else self._default_parse_set_item() 3803 3804 def _parse_merge(self) -> exp.Expression: 3805 self._match(TokenType.INTO) 3806 target = self._parse_table() 3807 3808 self._match(TokenType.USING) 3809 using = self._parse_table() 3810 3811 self._match(TokenType.ON) 3812 on = self._parse_conjunction() 3813 3814 whens = [] 3815 while self._match(TokenType.WHEN): 3816 matched = not self._match(TokenType.NOT) 3817 self._match_text_seq("MATCHED") 3818 source = ( 3819 False 3820 if self._match_text_seq("BY", "TARGET") 3821 else self._match_text_seq("BY", "SOURCE") 3822 ) 3823 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 3824 3825 self._match(TokenType.THEN) 3826 3827 if self._match(TokenType.INSERT): 3828 _this = self._parse_star() 3829 if _this: 3830 then = self.expression(exp.Insert, this=_this) 3831 else: 3832 then = self.expression( 3833 exp.Insert, 3834 this=self._parse_value(), 3835 expression=self._match(TokenType.VALUES) and self._parse_value(), 3836 ) 3837 elif self._match(TokenType.UPDATE): 3838 expressions = self._parse_star() 3839 if expressions: 3840 then = self.expression(exp.Update, expressions=expressions) 3841 else: 3842 then = self.expression( 3843 exp.Update, 3844 expressions=self._match(TokenType.SET) 3845 and self._parse_csv(self._parse_equality), 3846 ) 3847 elif self._match(TokenType.DELETE): 3848 then = self.expression(exp.Var, this=self._prev.text) 3849 else: 3850 then = None 3851 3852 whens.append( 3853 self.expression( 3854 exp.When, 3855 matched=matched, 3856 source=source, 3857 condition=condition, 3858 then=then, 3859 ) 3860 ) 3861 3862 return self.expression( 3863 exp.Merge, 3864 this=target, 3865 using=using, 3866 on=on, 3867 expressions=whens, 3868 ) 3869 3870 def _parse_set(self) -> exp.Expression: 3871 return self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 3872 3873 def _parse_as_command(self, start: Token) -> exp.Command: 3874 while self._curr: 3875 self._advance() 3876 text = self._find_sql(start, self._prev) 3877 size = len(start.text) 3878 return exp.Command(this=text[:size], expression=text[size:]) 3879 3880 def _find_parser( 3881 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 3882 ) -> t.Optional[t.Callable]: 3883 index = self._index 3884 this = [] 3885 while True: 3886 # The current token might be multiple words 3887 curr = self._curr.text.upper() 3888 key = curr.split(" ") 3889 this.append(curr) 3890 self._advance() 3891 result, trie = in_trie(trie, key) 3892 if result == 0: 3893 break 3894 if result == 2: 3895 subparser = parsers[" ".join(this)] 3896 return subparser 3897 self._retreat(index) 3898 return None 3899 3900 def _match(self, token_type, advance=True): 3901 if not self._curr: 3902 return None 3903 3904 if self._curr.token_type == token_type: 3905 if advance: 3906 self._advance() 3907 return True 3908 3909 return None 3910 3911 def _match_set(self, types, advance=True): 3912 if not self._curr: 3913 return None 3914 3915 if self._curr.token_type in types: 3916 if advance: 3917 self._advance() 3918 return True 3919 3920 return None 3921 3922 def _match_pair(self, token_type_a, token_type_b, advance=True): 3923 if not self._curr or not self._next: 3924 return None 3925 3926 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 3927 if advance: 3928 self._advance(2) 3929 return True 3930 3931 return None 3932 3933 def _match_l_paren(self, expression=None): 3934 if not self._match(TokenType.L_PAREN): 3935 self.raise_error("Expecting (") 3936 if expression and self._prev_comments: 3937 expression.comments = self._prev_comments 3938 3939 def _match_r_paren(self, expression=None): 3940 if not self._match(TokenType.R_PAREN): 3941 self.raise_error("Expecting )") 3942 if expression and self._prev_comments: 3943 expression.comments = self._prev_comments 3944 3945 def _match_texts(self, texts, advance=True): 3946 if self._curr and self._curr.text.upper() in texts: 3947 if advance: 3948 self._advance() 3949 return True 3950 return False 3951 3952 def _match_text_seq(self, *texts, advance=True): 3953 index = self._index 3954 for text in texts: 3955 if self._curr and self._curr.text.upper() == text: 3956 self._advance() 3957 else: 3958 self._retreat(index) 3959 return False 3960 3961 if not advance: 3962 self._retreat(index) 3963 3964 return True 3965 3966 def _replace_columns_with_dots(self, this): 3967 if isinstance(this, exp.Dot): 3968 exp.replace_children(this, self._replace_columns_with_dots) 3969 elif isinstance(this, exp.Column): 3970 exp.replace_children(this, self._replace_columns_with_dots) 3971 table = this.args.get("table") 3972 this = ( 3973 self.expression(exp.Dot, this=table, expression=this.this) 3974 if table 3975 else self.expression(exp.Var, this=this.name) 3976 ) 3977 elif isinstance(this, exp.Identifier): 3978 this = self.expression(exp.Var, this=this.name) 3979 return this 3980 3981 def _replace_lambda(self, node, lambda_variables): 3982 if isinstance(node, exp.Column): 3983 if node.name in lambda_variables: 3984 return node.this 3985 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
730 def __init__( 731 self, 732 error_level: t.Optional[ErrorLevel] = None, 733 error_message_context: int = 100, 734 index_offset: int = 0, 735 unnest_column_only: bool = False, 736 alias_post_tablesample: bool = False, 737 max_errors: int = 3, 738 null_ordering: t.Optional[str] = None, 739 ): 740 self.error_level = error_level or ErrorLevel.IMMEDIATE 741 self.error_message_context = error_message_context 742 self.index_offset = index_offset 743 self.unnest_column_only = unnest_column_only 744 self.alias_post_tablesample = alias_post_tablesample 745 self.max_errors = max_errors 746 self.null_ordering = null_ordering 747 self.reset()
759 def parse( 760 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 761 ) -> t.List[t.Optional[exp.Expression]]: 762 """ 763 Parses a list of tokens and returns a list of syntax trees, one tree 764 per parsed SQL statement. 765 766 Args: 767 raw_tokens: the list of tokens. 768 sql: the original SQL string, used to produce helpful debug messages. 769 770 Returns: 771 The list of syntax trees. 772 """ 773 return self._parse( 774 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 775 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
777 def parse_into( 778 self, 779 expression_types: exp.IntoType, 780 raw_tokens: t.List[Token], 781 sql: t.Optional[str] = None, 782 ) -> t.List[t.Optional[exp.Expression]]: 783 """ 784 Parses a list of tokens into a given Expression type. If a collection of Expression 785 types is given instead, this method will try to parse the token list into each one 786 of them, stopping at the first for which the parsing succeeds. 787 788 Args: 789 expression_types: the expression type(s) to try and parse the token list into. 790 raw_tokens: the list of tokens. 791 sql: the original SQL string, used to produce helpful debug messages. 792 793 Returns: 794 The target Expression. 795 """ 796 errors = [] 797 for expression_type in ensure_collection(expression_types): 798 parser = self.EXPRESSION_PARSERS.get(expression_type) 799 if not parser: 800 raise TypeError(f"No parser registered for {expression_type}") 801 try: 802 return self._parse(parser, raw_tokens, sql) 803 except ParseError as e: 804 e.errors[0]["into_expression"] = expression_type 805 errors.append(e) 806 raise ParseError( 807 f"Failed to parse into {expression_types}", 808 errors=merge_errors(errors), 809 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
845 def check_errors(self) -> None: 846 """ 847 Logs or raises any found errors, depending on the chosen error level setting. 848 """ 849 if self.error_level == ErrorLevel.WARN: 850 for error in self.errors: 851 logger.error(str(error)) 852 elif self.error_level == ErrorLevel.RAISE and self.errors: 853 raise ParseError( 854 concat_messages(self.errors, self.max_errors), 855 errors=merge_errors(self.errors), 856 )
Logs or raises any found errors, depending on the chosen error level setting.
858 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 859 """ 860 Appends an error in the list of recorded errors or raises it, depending on the chosen 861 error level setting. 862 """ 863 token = token or self._curr or self._prev or Token.string("") 864 start = self._find_token(token) 865 end = start + len(token.text) 866 start_context = self.sql[max(start - self.error_message_context, 0) : start] 867 highlight = self.sql[start:end] 868 end_context = self.sql[end : end + self.error_message_context] 869 870 error = ParseError.new( 871 f"{message}. Line {token.line}, Col: {token.col}.\n" 872 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 873 description=message, 874 line=token.line, 875 col=token.col, 876 start_context=start_context, 877 highlight=highlight, 878 end_context=end_context, 879 ) 880 881 if self.error_level == ErrorLevel.IMMEDIATE: 882 raise error 883 884 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
886 def expression( 887 self, exp_class: t.Type[exp.Expression], comments: t.Optional[t.List[str]] = None, **kwargs 888 ) -> exp.Expression: 889 """ 890 Creates a new, validated Expression. 891 892 Args: 893 exp_class: the expression class to instantiate. 894 comments: an optional list of comments to attach to the expression. 895 kwargs: the arguments to set for the expression along with their respective values. 896 897 Returns: 898 The target expression. 899 """ 900 instance = exp_class(**kwargs) 901 if self._prev_comments: 902 instance.comments = self._prev_comments 903 self._prev_comments = None 904 if comments: 905 instance.comments = comments 906 self.validate_expression(instance) 907 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
909 def validate_expression( 910 self, expression: exp.Expression, args: t.Optional[t.List] = None 911 ) -> None: 912 """ 913 Validates an already instantiated expression, making sure that all its mandatory arguments 914 are set. 915 916 Args: 917 expression: the expression to validate. 918 args: an optional list of items that was used to instantiate the expression, if it's a Func. 919 """ 920 if self.error_level == ErrorLevel.IGNORE: 921 return 922 923 for error_message in expression.error_messages(args): 924 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.